aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp993
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h186
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h124
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp870
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp714
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h65
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp4166
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp305
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp519
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h145
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp3444
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h530
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp872
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp440
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h112
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def55
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp602
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp427
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h290
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h200
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp1708
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h380
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp3638
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h849
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h110
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp740
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h439
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h185
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp1851
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h390
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp850
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h165
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp182
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp1345
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h121
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp2576
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp1974
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp406
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp200
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp2046
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h200
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp637
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp305
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp225
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp449
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp323
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp231
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp292
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp143
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp293
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp8660
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp727
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp2077
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp698
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h112
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp288
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp566
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp380
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp1244
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExecutionDomainFix.cpp470
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp664
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp916
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp240
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp769
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp628
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadataPrinter.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp328
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp452
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp354
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp1241
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp166
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp6029
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp771
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp3698
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp687
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp330
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp383
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp385
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp8119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp435
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp971
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp220
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp1318
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp1110
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp1381
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp706
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp606
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp2360
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp818
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp270
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp1691
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp258
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h243
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp538
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp1363
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp474
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp233
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp301
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp347
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp4230
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h1441
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp2405
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp1970
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp1409
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp215
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp1748
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp340
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp451
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp508
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp245
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp248
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp159
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp888
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp442
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp423
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp767
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h253
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp3620
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp1133
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp989
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp406
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRYamlMapping.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp1164
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h93
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp357
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp1740
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp291
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp3701
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp947
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp127
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp769
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp1424
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp207
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp152
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp1521
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp188
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp224
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp2462
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp387
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp1522
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp249
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp214
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp1256
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp1213
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp3276
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp667
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp373
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp4332
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp1892
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp236
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp1356
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp264
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp3465
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp2208
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp206
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp759
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.h24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp2128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp696
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp415
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp168
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp1580
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp1799
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp1177
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp444
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp712
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp192
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h131
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp339
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp311
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h223
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp1673
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp2669
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h447
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp954
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h73
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp215
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp817
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp236
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp4220
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h114
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp1392
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp686
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp227
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp405
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp251
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp939
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp152
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp754
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp1531
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp241
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp1046
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp27593
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp2382
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp560
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp1414
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h169
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp5533
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp3207
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp5977
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1060
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h1137
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp601
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1771
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp7262
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp624
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h264
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp819
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3210
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp1086
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h193
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp271
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp12710
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp324
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp11977
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h803
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1096
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3894
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp314
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp1313
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp10800
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp386
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp997
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp507
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp272
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp398
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h172
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp1888
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h557
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp1379
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp254
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp171
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp760
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp660
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp550
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp311
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp494
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp1071
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp168
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp1726
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp2405
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp2680
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp1569
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp678
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp343
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp1967
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp1047
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp1007
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp642
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp647
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp377
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp1396
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp269
350 files changed, 334221 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 000000000000..886c4db069f1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,993 @@
+//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB)
+ : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0) {
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
+ GroupNodeIndices[i] = i;
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+ unsigned Node = GroupNodeIndices[Reg];
+ while (GroupNodes[Node] != Node)
+ Node = GroupNodes[Node];
+
+ return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+ Regs.push_back(Reg);
+ }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) {
+ assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+ assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+ // find group for each register
+ unsigned Group1 = GetGroup(Reg1);
+ unsigned Group2 = GetGroup(Reg2);
+
+ // if either group is 0, then that must become the parent
+ unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+ unsigned Other = (Parent == Group1) ? Group2 : Group1;
+ GroupNodes.at(Other) = Parent;
+ return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) {
+ // Create a new GroupNode for Reg. Reg's existing GroupNode must
+ // stay as is because there could be other GroupNodes referring to
+ // it.
+ unsigned idx = GroupNodes.size();
+ GroupNodes.push_back(idx);
+ GroupNodeIndices[Reg] = idx;
+ return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg) {
+ // KillIndex must be defined and DefIndex not defined for a register
+ // to be live.
+ return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
+ : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ LLVM_DEBUG(for (unsigned r
+ : CriticalPathSet.set_bits()) dbgs()
+ << " " << printReg(r, TRI));
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+ delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ assert(!State);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+ bool IsReturnBlock = BB->isReturnBlock();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock *Succ : BB->successors())
+ for (const auto &LI : Succ->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI.getPristineRegs(MF);
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg))
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+ delete State;
+ State = nullptr;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+ PrescanInstruction(MI, Count, PassthruRegs);
+ ScanInstruction(MI, Count);
+
+ LLVM_DEBUG(dbgs() << "Observe: ");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "\tRegs:");
+
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) {
+ // If Reg is current live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled). If it is not live but was defined in the
+ // previous schedule region, then set its def index to the most
+ // conservative location (i.e. the beginning of the previous
+ // schedule region).
+ if (State->IsLive(Reg)) {
+ LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs()
+ << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)
+ << "->g0(region live-out)");
+ State->UnionGroups(Reg, 0);
+ } else if ((DefIndices[Reg] < InsertPosIndex)
+ && (DefIndices[Reg] >= Count)) {
+ DefIndices[Reg] = Count;
+ }
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
+ MachineOperand &MO) {
+ if (!MO.isReg() || !MO.isImplicit())
+ return false;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ return false;
+
+ MachineOperand *Op = nullptr;
+ if (MO.isDef())
+ Op = MI.findRegisterUseOperand(Reg, true);
+ else
+ Op = MI.findRegisterDefOperand(Reg);
+
+ return(Op && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(
+ MachineInstr &MI, std::set<unsigned> &PassthruRegs) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
+ IsImplicitDefUse(MI, MO)) {
+ const Register Reg = MO.getReg();
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ PassthruRegs.insert(SubReg);
+ }
+ }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) {
+ SmallSet<unsigned, 4> RegSet;
+ for (const SDep &Pred : SU->Preds) {
+ if ((Pred.getKind() == SDep::Anti) || (Pred.getKind() == SDep::Output)) {
+ if (RegSet.insert(Pred.getReg()).second)
+ Edges.push_back(&Pred);
+ }
+ }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = nullptr;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU) {
+ for (const SDep &Pred : SU->Preds) {
+ const SUnit *PredSU = Pred.getSUnit();
+ unsigned PredLatency = Pred.getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && Pred.getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &Pred;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : nullptr;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+ const char *tag,
+ const char *header,
+ const char *footer) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // FIXME: We must leave subregisters of live super registers as live, so that
+ // we don't clear out the register tracking information for subregisters of
+ // super registers we're still tracking (and with which we're unioning
+ // subregister definitions).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) {
+ LLVM_DEBUG(if (!header && footer) dbgs() << footer);
+ return;
+ }
+
+ if (!State->IsLive(Reg)) {
+ KillIndices[Reg] = KillIdx;
+ DefIndices[Reg] = ~0u;
+ RegRefs.erase(Reg);
+ State->LeaveGroup(Reg);
+ LLVM_DEBUG(if (header) {
+ dbgs() << header << printReg(Reg, TRI);
+ header = nullptr;
+ });
+ LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ // Repeat for subregisters. Note that we only do this if the superregister
+ // was not live because otherwise, regardless whether we have an explicit
+ // use of the subregister, the subregister's contents are needed for the
+ // uses of the superregister.
+ for (MCPhysReg SubregReg : TRI->subregs(Reg)) {
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ LLVM_DEBUG(if (header) {
+ dbgs() << header << printReg(Reg, TRI);
+ header = nullptr;
+ });
+ LLVM_DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g"
+ << State->GetGroup(SubregReg) << tag);
+ }
+ }
+ }
+
+ LLVM_DEBUG(if (!header && footer) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(
+ MachineInstr &MI, unsigned Count, std::set<unsigned> &PassthruRegs) {
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Handle dead defs by simulating a last-use of the register just
+ // after the def. A dead def can occur because the def is truly
+ // dead, or because only a subregister is live at the def. If we
+ // don't do this the dead def will be incorrectly merged into the
+ // previous def.
+ for (const MachineOperand &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+ }
+
+ LLVM_DEBUG(dbgs() << "\tDef Groups:");
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
+ << State->GetGroup(Reg));
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI). Inline assembly may
+ // reference either system calls or the register directly. Skip it until we
+ // can tell user specified registers from compiler-specified.
+ if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) ||
+ MI.isInlineAsm()) {
+ LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Any aliased that are live at this point are completely or
+ // partially defined here, so group those aliases with Reg.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg)) {
+ State->UnionGroups(Reg, AliasReg);
+ LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via "
+ << printReg(AliasReg, TRI) << ")");
+ }
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = nullptr;
+ if (i < MI.getDesc().getNumOperands())
+ RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ LLVM_DEBUG(dbgs() << '\n');
+
+ // Scan the register defs for this instruction and update
+ // live-ranges.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+ // Ignore KILLs and passthru registers for liveness...
+ if (MI.isKill() || (PassthruRegs.count(Reg) != 0))
+ continue;
+
+ // Update def for Reg and aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ // We need to be careful here not to define already-live super registers.
+ // If the super register is already live, then this definition is not
+ // a definition of the whole super register (just a partial insertion
+ // into it). Earlier subregister definitions (which we've not yet visited
+ // because we're iterating bottom-up) need to be linked to the same group
+ // as this definition.
+ if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI))
+ continue;
+
+ DefIndices[*AI] = Count;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
+ unsigned Count) {
+ LLVM_DEBUG(dbgs() << "\tUse Groups:");
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // Inline Assembly register uses also cannot be safely changed.
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395]
+ // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12]
+ // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI.isCall() || MI.hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI) || MI.isInlineAsm();
+
+ // Scan the register uses for this instruction and update
+ // live-ranges, groups and RegRefs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
+ << State->GetGroup(Reg));
+
+ // It wasn't previously live but now it is, this is a kill. Forget
+ // the previous live-range information and start a new live-range
+ // for the register.
+ HandleLastUse(Reg, Count, "(last-use)");
+
+ if (Special) {
+ LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = nullptr;
+ if (i < MI.getDesc().getNumOperands())
+ RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ LLVM_DEBUG(dbgs() << '\n');
+
+ // Form a group of all defs and uses of a KILL instruction to ensure
+ // that all registers are renamed as a group.
+ if (MI.isKill()) {
+ LLVM_DEBUG(dbgs() << "\tKill Group:");
+
+ unsigned FirstReg = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (FirstReg != 0) {
+ LLVM_DEBUG(dbgs() << "=" << printReg(Reg, TRI));
+ State->UnionGroups(FirstReg, Reg);
+ } else {
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
+ FirstReg = Reg;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+ BitVector BV(TRI->getNumRegs(), false);
+ bool first = true;
+
+ // Check all references that need rewriting for Reg. For each, use
+ // the corresponding register class to narrow the set of registers
+ // that are appropriate for renaming.
+ for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) {
+ const TargetRegisterClass *RC = Q.second.RC;
+ if (!RC) continue;
+
+ BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+ if (first) {
+ BV |= RCBV;
+ first = false;
+ } else {
+ BV &= RCBV;
+ }
+
+ LLVM_DEBUG(dbgs() << " " << TRI->getRegClassName(RC));
+ }
+
+ return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+ unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
+ std::vector<unsigned> Regs;
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+ assert(!Regs.empty() && "Empty register group!");
+ if (Regs.empty())
+ return false;
+
+ // Find the "superest" register in the group. At the same time,
+ // collect the BitVector of registers that can be used to rename
+ // each register.
+ LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
+ std::map<unsigned, BitVector> RenameRegisterMap;
+ unsigned SuperReg = 0;
+ for (unsigned Reg : Regs) {
+ if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+ SuperReg = Reg;
+
+ // If Reg has any references, then collect possible rename regs
+ if (RegRefs.count(Reg) > 0) {
+ LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");
+
+ BitVector &BV = RenameRegisterMap[Reg];
+ assert(BV.empty());
+ BV = GetRenameRegisters(Reg);
+
+ LLVM_DEBUG({
+ dbgs() << " ::";
+ for (unsigned r : BV.set_bits())
+ dbgs() << " " << printReg(r, TRI);
+ dbgs() << "\n";
+ });
+ }
+ }
+
+ // All group registers should be a subreg of SuperReg.
+ for (unsigned Reg : Regs) {
+ if (Reg == SuperReg) continue;
+ bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+ // FIXME: remove this once PR18663 has been properly fixed. For now,
+ // return a conservative answer:
+ // assert(IsSub && "Expecting group subregister");
+ if (!IsSub)
+ return false;
+ }
+
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int renamecnt = 0;
+ if (renamecnt++ % DebugDiv != DebugMod)
+ return false;
+
+ dbgs() << "*** Performing rename " << printReg(SuperReg, TRI)
+ << " for debug ***\n";
+ }
+#endif
+
+ // Check each possible rename register for SuperReg in round-robin
+ // order. If that register is available, and the corresponding
+ // registers are available for the other group subregisters, then we
+ // can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
+ const TargetRegisterClass *SuperRC =
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
+ if (Order.empty()) {
+ LLVM_DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "\tFind Registers:");
+
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+
+ unsigned OrigR = RenameOrder[SuperRC];
+ unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+ unsigned R = OrigR;
+ do {
+ if (R == 0) R = Order.size();
+ --R;
+ const unsigned NewSuperReg = Order[R];
+ // Don't consider non-allocatable registers
+ if (!MRI.isAllocatable(NewSuperReg)) continue;
+ // Don't replace a register with itself.
+ if (NewSuperReg == SuperReg) continue;
+
+ LLVM_DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':');
+ RenameMap.clear();
+
+ // For each referenced group register (which must be a SuperReg or
+ // a subregister of SuperReg), find the corresponding subregister
+ // of NewSuperReg and make sure it is free to be renamed.
+ for (unsigned Reg : Regs) {
+ unsigned NewReg = 0;
+ if (Reg == SuperReg) {
+ NewReg = NewSuperReg;
+ } else {
+ unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+ if (NewSubRegIdx != 0)
+ NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+ }
+
+ LLVM_DEBUG(dbgs() << " " << printReg(NewReg, TRI));
+
+ // Check if Reg can be renamed to NewReg.
+ if (!RenameRegisterMap[Reg].test(NewReg)) {
+ LLVM_DEBUG(dbgs() << "(no rename)");
+ goto next_super_reg;
+ }
+
+ // If NewReg is dead and NewReg's most recent def is not before
+ // Regs's kill, it's safe to replace Reg with NewReg. We
+ // must also check all aliases of NewReg, because we can't define a
+ // register when any sub or super is already live.
+ if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+ LLVM_DEBUG(dbgs() << "(live)");
+ goto next_super_reg;
+ } else {
+ bool found = false;
+ for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg) ||
+ (KillIndices[Reg] > DefIndices[AliasReg])) {
+ LLVM_DEBUG(dbgs()
+ << "(alias " << printReg(AliasReg, TRI) << " live)");
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto next_super_reg;
+ }
+
+ // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also
+ // defines 'NewReg' via an early-clobber operand.
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ MachineInstr *UseMI = Q.second.Operand->getParent();
+ int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
+ if (Idx == -1)
+ continue;
+
+ if (UseMI->getOperand(Idx).isEarlyClobber()) {
+ LLVM_DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
+ // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining
+ // 'Reg' is an early-clobber define and that instruction also uses
+ // 'NewReg'.
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber())
+ continue;
+
+ MachineInstr *DefMI = Q.second.Operand->getParent();
+ if (DefMI->readsRegister(NewReg, TRI)) {
+ LLVM_DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
+ // Record that 'Reg' can be renamed to 'NewReg'.
+ RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+ }
+
+ // If we fall-out here, then every register in the group can be
+ // renamed, as recorded in RenameMap.
+ RenameOrder.erase(SuperRC);
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+ LLVM_DEBUG(dbgs() << "]\n");
+ return true;
+
+ next_super_reg:
+ LLVM_DEBUG(dbgs() << ']');
+ } while (R != EndR);
+
+ LLVM_DEBUG(dbgs() << '\n');
+
+ // No registers are free and available!
+ return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+ const std::vector<SUnit> &SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // For each regclass the next register to use for renaming.
+ RenameOrderType RenameOrder;
+
+ // ...need a map from MI to SUnit.
+ std::map<MachineInstr *, const SUnit *> MISUnitMap;
+ for (const SUnit &SU : SUnits)
+ MISUnitMap.insert(std::make_pair(SU.getInstr(), &SU));
+
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ const SUnit *CriticalPathSU = nullptr;
+ MachineInstr *CriticalPathMI = nullptr;
+ if (CriticalPathSet.any()) {
+ for (const SUnit &SU : SUnits) {
+ if (!CriticalPathSU ||
+ ((SU.getDepth() + SU.Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = &SU;
+ }
+ }
+ assert(CriticalPathSU && "Failed to find SUnit critical path");
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ LLVM_DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) {
+ if (!State->IsLive(Reg))
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+#endif
+
+ BitVector RegAliases(TRI->getNumRegs());
+
+ // Attempt to break anti-dependence edges. Walk the instructions
+ // from the bottom up, tracking information about liveness as we go
+ // to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr &MI = *--I;
+
+ if (MI.isDebugInstr())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Anti: ");
+ LLVM_DEBUG(MI.dump());
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+
+ // Process the defs in MI...
+ PrescanInstruction(MI, Count, PassthruRegs);
+
+ // The dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
+ std::vector<const SDep *> Edges;
+ const SUnit *PathSU = MISUnitMap[&MI];
+ AntiDepEdges(PathSU, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = nullptr;
+ if (&MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr;
+ } else if (CriticalPathSet.any()) {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
+ // Ignore KILL instructions (they form a group in ScanInstruction
+ // but don't cause any anti-dependence breaking themselves)
+ if (!MI.isKill()) {
+ // Attempt to break each anti-dependency...
+ for (const SDep *Edge : Edges) {
+ SUnit *NextSU = Edge->getSUnit();
+
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
+
+ unsigned AntiDepReg = Edge->getReg();
+ LLVM_DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI));
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+ if (!MRI.isAllocatable(AntiDepReg)) {
+ // Don't break anti-dependencies on non-allocatable registers.
+ LLVM_DEBUG(dbgs() << " (non-allocatable)\n");
+ continue;
+ } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ LLVM_DEBUG(dbgs() << " (not critical-path)\n");
+ continue;
+ } else if (PassthruRegs.count(AntiDepReg) != 0) {
+ // If the anti-dep register liveness "passes-thru", then
+ // don't try to change it. It will be changed along with
+ // the use if required to break an earlier antidep.
+ LLVM_DEBUG(dbgs() << " (passthru)\n");
+ continue;
+ } else {
+ // No anti-dep breaking for implicit deps
+ MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg);
+ assert(AntiDepOp && "Can't find index for defined register operand");
+ if (!AntiDepOp || AntiDepOp->isImplicit()) {
+ LLVM_DEBUG(dbgs() << " (implicit)\n");
+ continue;
+ }
+
+ // If the SUnit has other dependencies on the SUnit that
+ // it anti-depends on, don't bother breaking the
+ // anti-dependency since those edges would prevent such
+ // units from being scheduled past each other
+ // regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (const SDep &Pred : PathSU->Preds) {
+ if (Pred.getSUnit() == NextSU ? (Pred.getKind() != SDep::Anti ||
+ Pred.getReg() != AntiDepReg)
+ : (Pred.getKind() == SDep::Data &&
+ Pred.getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ for (const SDep &Pred : PathSU->Preds) {
+ if ((Pred.getSUnit() == NextSU) && (Pred.getKind() != SDep::Anti) &&
+ (Pred.getKind() != SDep::Output)) {
+ LLVM_DEBUG(dbgs() << " (real dependency)\n");
+ AntiDepReg = 0;
+ break;
+ } else if ((Pred.getSUnit() != NextSU) &&
+ (Pred.getKind() == SDep::Data) &&
+ (Pred.getReg() == AntiDepReg)) {
+ LLVM_DEBUG(dbgs() << " (other dependency)\n");
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ if (AntiDepReg == 0) continue;
+
+ // If the definition of the anti-dependency register does not start
+ // a new live range, bail out. This can happen if the anti-dep
+ // register is a sub-register of another register whose live range
+ // spans over PathSU. In such case, PathSU defines only a part of
+ // the larger register.
+ RegAliases.reset();
+ for (MCRegAliasIterator AI(AntiDepReg, TRI, true); AI.isValid(); ++AI)
+ RegAliases.set(*AI);
+ for (SDep S : PathSU->Succs) {
+ SDep::Kind K = S.getKind();
+ if (K != SDep::Data && K != SDep::Output && K != SDep::Anti)
+ continue;
+ unsigned R = S.getReg();
+ if (!RegAliases[R])
+ continue;
+ if (R == AntiDepReg || TRI->isSubRegister(AntiDepReg, R))
+ continue;
+ AntiDepReg = 0;
+ break;
+ }
+
+ if (AntiDepReg == 0) continue;
+ }
+
+ assert(AntiDepReg != 0);
+
+ // Determine AntiDepReg's register group.
+ const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+ if (GroupIndex == 0) {
+ LLVM_DEBUG(dbgs() << " (zero group)\n");
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << '\n');
+
+ // Look for a suitable register to use to break the anti-dependence.
+ std::map<unsigned, unsigned> RenameMap;
+ if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << printReg(AntiDepReg, TRI) << ":");
+
+ // Handle each group register...
+ for (const auto &P : RenameMap) {
+ unsigned CurrReg = P.first;
+ unsigned NewReg = P.second;
+
+ LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->"
+ << printReg(NewReg, TRI) << "("
+ << RegRefs.count(CurrReg) << " refs)");
+
+ // Update the references to the old register CurrReg to
+ // refer to the new register NewReg.
+ for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) {
+ Q.second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
+ if (!SU) continue;
+ UpdateDbgValues(DbgValues, Q.second.Operand->getParent(),
+ AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for CurrReg is now inconsistent. Set
+ // the state as if it were dead.
+ State->UnionGroups(NewReg, 0);
+ RegRefs.erase(NewReg);
+ DefIndices[NewReg] = DefIndices[CurrReg];
+ KillIndices[NewReg] = KillIndices[CurrReg];
+
+ State->UnionGroups(CurrReg, 0);
+ RegRefs.erase(CurrReg);
+ DefIndices[CurrReg] = KillIndices[CurrReg];
+ KillIndices[CurrReg] = ~0u;
+ assert(((KillIndices[CurrReg] == ~0u) !=
+ (DefIndices[CurrReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ }
+
+ ++Broken;
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
+
+AntiDepBreaker *llvm::createAggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) {
+ return new AggressiveAntiDepBreaker(MFi, RCI, CriticalPathRCs);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 000000000000..cece217e645c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,186 @@
+//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/AntiDepBreaker.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+
+ /// Contains all the state necessary for anti-dep breaking.
+class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
+ public:
+ /// Information about a register reference within a liverange
+ struct RegisterReference {
+ /// The registers operand
+ MachineOperand *Operand;
+
+ /// The register class
+ const TargetRegisterClass *RC;
+ };
+
+ private:
+ /// Number of non-virtual target registers (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
+ /// Implements a disjoint-union data structure to
+ /// form register groups. A node is represented by an index into
+ /// the vector. A node can "point to" itself to indicate that it
+ /// is the parent of a group, or point to another node to indicate
+ /// that it is a member of the same group as that node.
+ std::vector<unsigned> GroupNodes;
+
+ /// For each register, the index of the GroupNode
+ /// currently representing the group that the register belongs to.
+ /// Register 0 is always represented by the 0 group, a group
+ /// composed of registers that are not eligible for anti-aliasing.
+ std::vector<unsigned> GroupNodeIndices;
+
+ /// Map registers to all their references within a live range.
+ std::multimap<unsigned, RegisterReference> RegRefs;
+
+ /// The index of the most recent kill (proceeding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// The index of the most recent complete def (proceeding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ public:
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+ /// Return the kill indices.
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+ /// Return the define indices.
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+ /// Return the RegRefs map.
+ std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+ // Get the group for a register. The returned value is
+ // the index of the GroupNode representing the group.
+ unsigned GetGroup(unsigned Reg);
+
+ // Return a vector of the registers belonging to a group.
+ // If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+ // Union Reg1's and Reg2's groups to form a new group.
+ // Return the index of the GroupNode representing the group.
+ unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+ // Remove a register from its current group and place
+ // it alone in its own group. Return the index of the GroupNode
+ // representing the registers new group.
+ unsigned LeaveGroup(unsigned Reg);
+
+ /// Return true if Reg is live.
+ bool IsLive(unsigned Reg);
+ };
+
+ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker
+ : public AntiDepBreaker {
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
+ /// The state used to identify and rename anti-dependence registers.
+ AggressiveAntiDepState *State = nullptr;
+
+ public:
+ AggressiveAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ AggressiveAntiDepBreaker &
+ operator=(const AggressiveAntiDepBreaker &other) = delete;
+ AggressiveAntiDepBreaker(const AggressiveAntiDepBreaker &other) = delete;
+ ~AggressiveAntiDepBreaker() override;
+
+ /// Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB) override;
+
+ /// Identifiy anti-dependencies along the critical path
+ /// of the ScheduleDAG and break them by renaming registers.
+ unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) override;
+
+ /// Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) override;
+
+ /// Finish anti-dep breaking for a basic block.
+ void FinishBlock() override;
+
+ private:
+ /// Keep track of a position in the allocation order for each regclass.
+ using RenameOrderType = std::map<const TargetRegisterClass *, unsigned>;
+
+ /// Return true if MO represents a register
+ /// that is both implicitly used and defined in MI
+ bool IsImplicitDefUse(MachineInstr &MI, MachineOperand &MO);
+
+ /// If MI implicitly def/uses a register, then
+ /// return that register and all subregisters.
+ void GetPassthruRegs(MachineInstr &MI, std::set<unsigned> &PassthruRegs);
+
+ void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+ const char *header = nullptr,
+ const char *footer = nullptr);
+
+ void PrescanInstruction(MachineInstr &MI, unsigned Count,
+ std::set<unsigned> &PassthruRegs);
+ void ScanInstruction(MachineInstr &MI, unsigned Count);
+ BitVector GetRenameRegisters(unsigned Reg);
+ bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap);
+ };
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..2aef1234ac0e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,53 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix) {
+ const MachineFunction &MF = VRM.getMachineFunction();
+ const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+ auto Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+ SmallVector<MCPhysReg, 16> Hints;
+ bool HardHints =
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
+
+ LLVM_DEBUG({
+ if (!Hints.empty()) {
+ dbgs() << "hints:";
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ dbgs() << ' ' << printReg(Hints[I], TRI);
+ dbgs() << '\n';
+ }
+ });
+#ifndef NDEBUG
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ assert(is_contained(Order, Hints[I]) &&
+ "Target hint is outside allocation order.");
+#endif
+ return AllocationOrder(std::move(Hints), Order, HardHints);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..0701e6810100
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,124 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+class LiveRegMatrix;
+
+class LLVM_LIBRARY_VISIBILITY AllocationOrder {
+ const SmallVector<MCPhysReg, 16> Hints;
+ ArrayRef<MCPhysReg> Order;
+ // How far into the Order we can iterate. This is 0 if the AllocationOrder is
+ // constructed with HardHints = true, Order.size() otherwise. While
+ // technically a size_t, it will participate in comparisons with the
+ // Iterator's Pos, which must be signed, so it's typed here as signed, too, to
+ // avoid warnings and under the assumption that the size of Order is
+ // relatively small.
+ // IterationLimit defines an invalid iterator position.
+ const int IterationLimit;
+
+public:
+ /// Forward iterator for an AllocationOrder.
+ class Iterator final {
+ const AllocationOrder &AO;
+ int Pos = 0;
+
+ public:
+ Iterator(const AllocationOrder &AO, int Pos) : AO(AO), Pos(Pos) {}
+
+ /// Return true if the curent position is that of a preferred register.
+ bool isHint() const { return Pos < 0; }
+
+ /// Return the next physical register in the allocation order.
+ MCRegister operator*() const {
+ if (Pos < 0)
+ return AO.Hints.end()[Pos];
+ assert(Pos < AO.IterationLimit);
+ return AO.Order[Pos];
+ }
+
+ /// Advance the iterator to the next position. If that's past the Hints
+ /// list, advance to the first value that's not also in the Hints list.
+ Iterator &operator++() {
+ if (Pos < AO.IterationLimit)
+ ++Pos;
+ while (Pos >= 0 && Pos < AO.IterationLimit && AO.isHint(AO.Order[Pos]))
+ ++Pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &Other) const {
+ assert(&AO == &Other.AO);
+ return Pos == Other.Pos;
+ }
+
+ bool operator!=(const Iterator &Other) const { return !(*this == Other); }
+ };
+
+ /// Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param RegClassInfo Information about reserved and allocatable registers.
+ static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix);
+
+ /// Create an AllocationOrder given the Hits, Order, and HardHits values.
+ /// Use the create method above - the ctor is for unittests.
+ AllocationOrder(SmallVector<MCPhysReg, 16> &&Hints, ArrayRef<MCPhysReg> Order,
+ bool HardHints)
+ : Hints(std::move(Hints)), Order(Order),
+ IterationLimit(HardHints ? 0 : static_cast<int>(Order.size())) {}
+
+ Iterator begin() const {
+ return Iterator(*this, -(static_cast<int>(Hints.size())));
+ }
+
+ Iterator end() const { return Iterator(*this, IterationLimit); }
+
+ Iterator getOrderLimitEnd(unsigned OrderLimit) const {
+ assert(OrderLimit <= Order.size());
+ if (OrderLimit == 0)
+ return end();
+ Iterator Ret(*this,
+ std::min(static_cast<int>(OrderLimit) - 1, IterationLimit));
+ return ++Ret;
+ }
+
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
+
+ /// Return true if Reg is a preferred physical register.
+ bool isHint(Register Reg) const {
+ assert(!Reg.isPhysical() ||
+ Reg.id() <
+ static_cast<uint32_t>(std::numeric_limits<MCPhysReg>::max()));
+ return Reg.isPhysical() && is_contained(Hints, Reg.id());
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
new file mode 100644
index 000000000000..2065bfbd1c44
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,870 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+/// Compute the linearized index of a member in a nested aggregate/struct/array
+/// by recursing and accumulating CurIndex as long as there are indices in the
+/// index list.
+unsigned llvm::ComputeLinearIndex(Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (auto I : llvm::enumerate(STy->elements())) {
+ Type *ET = I.value();
+ if (Indices && *Indices == I.index())
+ return ComputeLinearIndex(ET, Indices + 1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(ET, nullptr, nullptr, CurIndex);
+ }
+ assert(!Indices && "Unexpected out of bound");
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ unsigned NumElts = ATy->getNumElements();
+ // Compute the Linear offset when jumping one element of the array
+ unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
+ if (Indices) {
+ assert(*Indices < NumElts && "Unexpected out of bound");
+ // If the indice is inside the array, compute the index to the requested
+ // elt and recurse inside the element with the end of the indices list
+ CurIndex += EltLinearOffset* *Indices;
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ }
+ CurIndex += EltLinearOffset*NumElts;
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ // If the Offsets aren't needed, don't query the struct layout. This allows
+ // us to support structs with scalable vectors for operations that don't
+ // need offsets.
+ const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ // Don't compute the element offset if we didn't get a StructLayout above.
+ TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB)
+ : TypeSize::get(0, StartingOffset.isScalable());
+ ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
+ StartingOffset + EltOffset);
+ }
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ TypeSize EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty->isVoidTy())
+ return;
+ // Base case: we can get an EVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(DL, Ty));
+ if (MemVTs)
+ MemVTs->push_back(TLI.getMemValueType(DL, Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
+ StartingOffset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ SmallVector<TypeSize, 4> Offsets;
+ if (FixedOffsets)
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset);
+ else
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset);
+
+ if (FixedOffsets)
+ for (TypeSize Offset : Offsets)
+ FixedOffsets->push_back(Offset.getKnownMinValue());
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ SmallVector<TypeSize, 4> Offsets;
+ if (FixedOffsets)
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset);
+ else
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset);
+
+ if (FixedOffsets)
+ for (TypeSize Offset : Offsets)
+ FixedOffsets->push_back(Offset.getKnownMinValue());
+}
+
+void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
+ SmallVectorImpl<LLT> &ValueTys,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(&Ty)) {
+ // If the Offsets aren't needed, don't query the struct layout. This allows
+ // us to support structs with scalable vectors for operations that don't
+ // need offsets.
+ const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
+ uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0;
+ computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
+ StartingOffset + EltOffset);
+ }
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty.isVoidTy())
+ return;
+ // Base case: we can get an LLT for this LLVM IR type.
+ ValueTys.push_back(getLLTForType(Ty, DL));
+ if (Offsets != nullptr)
+ Offsets->push_back(StartingOffset * 8);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalValue *llvm::ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalValue *GV = dyn_cast<GlobalValue>(V);
+ GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
+
+ if (Var && Var->getName() == "llvm.eh.catch.all.value") {
+ assert(Var->hasInitializer() &&
+ "The EH catch-all value must have an initializer");
+ Value *Init = Var->getInitializer();
+ GV = dyn_cast<GlobalValue>(Init);
+ if (!GV) V = cast<ConstantPointerNull>(Init);
+ }
+
+ assert((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: llvm_unreachable("Invalid FCmp predicate opcode!");
+ }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: return CC;
+ }
+}
+
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ llvm_unreachable("Invalid ICmp predicate opcode!");
+ }
+}
+
+ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) {
+ switch (Pred) {
+ case ISD::SETEQ:
+ return ICmpInst::ICMP_EQ;
+ case ISD::SETNE:
+ return ICmpInst::ICMP_NE;
+ case ISD::SETLE:
+ return ICmpInst::ICMP_SLE;
+ case ISD::SETULE:
+ return ICmpInst::ICMP_ULE;
+ case ISD::SETGE:
+ return ICmpInst::ICMP_SGE;
+ case ISD::SETUGE:
+ return ICmpInst::ICMP_UGE;
+ case ISD::SETLT:
+ return ICmpInst::ICMP_SLT;
+ case ISD::SETULT:
+ return ICmpInst::ICMP_ULT;
+ case ISD::SETGT:
+ return ICmpInst::ICMP_SGT;
+ case ISD::SETUGT:
+ return ICmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("Invalid ISD integer condition code!");
+ }
+}
+
+static bool isNoopBitcast(Type *T1, Type *T2,
+ const TargetLoweringBase& TLI) {
+ return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
+ (isa<VectorType>(T1) && isa<VectorType>(T2) &&
+ TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
+}
+
+/// Look through operations that will be free to find the earliest source of
+/// this value.
+///
+/// @param ValLoc If V has aggregate type, we will be interested in a particular
+/// scalar component. This records its address; the reverse of this list gives a
+/// sequence of indices appropriate for an extractvalue to locate the important
+/// value. This value is updated during the function and on exit will indicate
+/// similar information for the Value returned.
+///
+/// @param DataBits If this function looks through truncate instructions, this
+/// will record the smallest size attained.
+static const Value *getNoopInput(const Value *V,
+ SmallVectorImpl<unsigned> &ValLoc,
+ unsigned &DataBits,
+ const TargetLoweringBase &TLI,
+ const DataLayout &DL) {
+ while (true) {
+ // Try to look through V1; if V1 is not an instruction, it can't be looked
+ // through.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || I->getNumOperands() == 0) return V;
+ const Value *NoopInput = nullptr;
+
+ Value *Op = I->getOperand(0);
+ if (isa<BitCastInst>(I)) {
+ // Look through truly no-op bitcasts.
+ if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+ NoopInput = Op;
+ } else if (isa<GetElementPtrInst>(I)) {
+ // Look through getelementptr
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ NoopInput = Op;
+ } else if (isa<IntToPtrInst>(I)) {
+ // Look through inttoptr.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ DL.getPointerSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<PtrToIntInst>(I)) {
+ // Look through ptrtoint.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ DL.getPointerSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<TruncInst>(I) &&
+ TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
+ DataBits =
+ std::min((uint64_t)DataBits,
+ I->getType()->getPrimitiveSizeInBits().getFixedValue());
+ NoopInput = Op;
+ } else if (auto *CB = dyn_cast<CallBase>(I)) {
+ const Value *ReturnedOp = CB->getReturnedArgOperand();
+ if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
+ NoopInput = ReturnedOp;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
+ // Value may come from either the aggregate or the scalar
+ ArrayRef<unsigned> InsertLoc = IVI->getIndices();
+ if (ValLoc.size() >= InsertLoc.size() &&
+ std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
+ // The type being inserted is a nested sub-type of the aggregate; we
+ // have to remove those initial indices to get the location we're
+ // interested in for the operand.
+ ValLoc.resize(ValLoc.size() - InsertLoc.size());
+ NoopInput = IVI->getInsertedValueOperand();
+ } else {
+ // The struct we're inserting into has the value we're interested in, no
+ // change of address.
+ NoopInput = Op;
+ }
+ } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+ // The part we're interested in will inevitably be some sub-section of the
+ // previous aggregate. Combine the two paths to obtain the true address of
+ // our element.
+ ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
+ ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
+ NoopInput = Op;
+ }
+ // Terminate if we couldn't find anything to look through.
+ if (!NoopInput)
+ return V;
+
+ V = NoopInput;
+ }
+}
+
+/// Return true if this scalar return value only has bits discarded on its path
+/// from the "tail call" to the "ret". This includes the obvious noop
+/// instructions handled by getNoopInput above as well as free truncations (or
+/// extensions prior to the call).
+static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
+ SmallVectorImpl<unsigned> &RetIndices,
+ SmallVectorImpl<unsigned> &CallIndices,
+ bool AllowDifferingSizes,
+ const TargetLoweringBase &TLI,
+ const DataLayout &DL) {
+
+ // Trace the sub-value needed by the return value as far back up the graph as
+ // possible, in the hope that it will intersect with the value produced by the
+ // call. In the simple case with no "returned" attribute, the hope is actually
+ // that we end up back at the tail call instruction itself.
+ unsigned BitsRequired = UINT_MAX;
+ RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
+
+ // If this slot in the value returned is undef, it doesn't matter what the
+ // call puts there, it'll be fine.
+ if (isa<UndefValue>(RetVal))
+ return true;
+
+ // Now do a similar search up through the graph to find where the value
+ // actually returned by the "tail call" comes from. In the simple case without
+ // a "returned" attribute, the search will be blocked immediately and the loop
+ // a Noop.
+ unsigned BitsProvided = UINT_MAX;
+ CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
+
+ // There's no hope if we can't actually trace them to (the same part of!) the
+ // same value.
+ if (CallVal != RetVal || CallIndices != RetIndices)
+ return false;
+
+ // However, intervening truncates may have made the call non-tail. Make sure
+ // all the bits that are needed by the "ret" have been provided by the "tail
+ // call". FIXME: with sufficiently cunning bit-tracking, we could look through
+ // extensions too.
+ if (BitsProvided < BitsRequired ||
+ (!AllowDifferingSizes && BitsProvided != BitsRequired))
+ return false;
+
+ return true;
+}
+
+/// For an aggregate type, determine whether a given index is within bounds or
+/// not.
+static bool indexReallyValid(Type *T, unsigned Idx) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
+ return Idx < AT->getNumElements();
+
+ return Idx < cast<StructType>(T)->getNumElements();
+}
+
+/// Move the given iterators to the next leaf type in depth first traversal.
+///
+/// Performs a depth-first traversal of the type as specified by its arguments,
+/// stopping at the next leaf node (which may be a legitimate scalar type or an
+/// empty struct or array).
+///
+/// @param SubTypes List of the partial components making up the type from
+/// outermost to innermost non-empty aggregate. The element currently
+/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
+///
+/// @param Path Set of extractvalue indices leading from the outermost type
+/// (SubTypes[0]) to the leaf node currently represented.
+///
+/// @returns true if a new type was found, false otherwise. Calling this
+/// function again on a finished iterator will repeatedly return
+/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
+/// aggregate or a non-aggregate
+static bool advanceToNextLeafType(SmallVectorImpl<Type *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
+ // First march back up the tree until we can successfully increment one of the
+ // coordinates in Path.
+ while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
+ Path.pop_back();
+ SubTypes.pop_back();
+ }
+
+ // If we reached the top, then the iterator is done.
+ if (Path.empty())
+ return false;
+
+ // We know there's *some* valid leaf now, so march back down the tree picking
+ // out the left-most element at each node.
+ ++Path.back();
+ Type *DeeperType =
+ ExtractValueInst::getIndexedType(SubTypes.back(), Path.back());
+ while (DeeperType->isAggregateType()) {
+ if (!indexReallyValid(DeeperType, 0))
+ return true;
+
+ SubTypes.push_back(DeeperType);
+ Path.push_back(0);
+
+ DeeperType = ExtractValueInst::getIndexedType(DeeperType, 0);
+ }
+
+ return true;
+}
+
+/// Find the first non-empty, scalar-like type in Next and setup the iterator
+/// components.
+///
+/// Assuming Next is an aggregate of some kind, this function will traverse the
+/// tree from left to right (i.e. depth-first) looking for the first
+/// non-aggregate type which will play a role in function return.
+///
+/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
+/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
+/// i32 in that type.
+static bool firstRealType(Type *Next, SmallVectorImpl<Type *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
+ // First initialise the iterator components to the first "leaf" node
+ // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
+ // despite nominally being an aggregate).
+ while (Type *FirstInner = ExtractValueInst::getIndexedType(Next, 0)) {
+ SubTypes.push_back(Next);
+ Path.push_back(0);
+ Next = FirstInner;
+ }
+
+ // If there's no Path now, Next was originally scalar already (or empty
+ // leaf). We're done.
+ if (Path.empty())
+ return true;
+
+ // Otherwise, use normal iteration to keep looking through the tree until we
+ // find a non-aggregate type.
+ while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
+ ->isAggregateType()) {
+ if (!advanceToNextLeafType(SubTypes, Path))
+ return false;
+ }
+
+ return true;
+}
+
+/// Set the iterator data-structures to the next non-empty, non-aggregate
+/// subtype.
+static bool nextRealType(SmallVectorImpl<Type *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
+ do {
+ if (!advanceToNextLeafType(SubTypes, Path))
+ return false;
+
+ assert(!Path.empty() && "found a leaf but didn't set the path?");
+ } while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
+ ->isAggregateType());
+
+ return true;
+}
+
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
+ const BasicBlock *ExitBB = Call.getParent();
+ const Instruction *Term = ExitBB->getTerminator();
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+
+ // The block must end in a return statement or unreachable.
+ //
+ // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+ // an unreachable, for now. The way tailcall optimization is currently
+ // implemented means it will add an epilogue followed by a jump. That is
+ // not profitable. Also, if the callee is a special function (e.g.
+ // longjmp on x86), it can end up causing miscompilation that has not
+ // been fully understood.
+ if (!Ret && ((!TM.Options.GuaranteedTailCallOpt &&
+ Call.getCallingConv() != CallingConv::Tail &&
+ Call.getCallingConv() != CallingConv::SwiftTail) ||
+ !isa<UnreachableInst>(Term)))
+ return false;
+
+ // If I will have a chain, make sure no other instruction that will have a
+ // chain interposes between I and the return.
+ // Check for all calls including speculatable functions.
+ for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
+ if (&*BBI == &Call)
+ break;
+ // Debug info intrinsics do not get in the way of tail call optimization.
+ // Pseudo probe intrinsics do not block tail call optimization either.
+ if (BBI->isDebugOrPseudoInst())
+ continue;
+ // A lifetime end, assume or noalias.decl intrinsic should not stop tail
+ // call optimization.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
+ II->getIntrinsicID() == Intrinsic::assume ||
+ II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
+ continue;
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(&*BBI))
+ return false;
+ }
+
+ const Function *F = ExitBB->getParent();
+ return returnTypeIsEligibleForTailCall(
+ F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
+}
+
+bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
+ const ReturnInst *Ret,
+ const TargetLoweringBase &TLI,
+ bool *AllowDifferingSizes) {
+ // ADS may be null, so don't write to it directly.
+ bool DummyADS;
+ bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
+ ADS = true;
+
+ AttrBuilder CallerAttrs(F->getContext(), F->getAttributes().getRetAttrs());
+ AttrBuilder CalleeAttrs(F->getContext(),
+ cast<CallInst>(I)->getAttributes().getRetAttrs());
+
+ // Following attributes are completely benign as far as calling convention
+ // goes, they shouldn't affect whether the call is a tail call.
+ for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull, Attribute::NoAlias,
+ Attribute::NonNull, Attribute::NoUndef}) {
+ CallerAttrs.removeAttribute(Attr);
+ CalleeAttrs.removeAttribute(Attr);
+ }
+
+ if (CallerAttrs.contains(Attribute::ZExt)) {
+ if (!CalleeAttrs.contains(Attribute::ZExt))
+ return false;
+
+ ADS = false;
+ CallerAttrs.removeAttribute(Attribute::ZExt);
+ CalleeAttrs.removeAttribute(Attribute::ZExt);
+ } else if (CallerAttrs.contains(Attribute::SExt)) {
+ if (!CalleeAttrs.contains(Attribute::SExt))
+ return false;
+
+ ADS = false;
+ CallerAttrs.removeAttribute(Attribute::SExt);
+ CalleeAttrs.removeAttribute(Attribute::SExt);
+ }
+
+ // Drop sext and zext return attributes if the result is not used.
+ // This enables tail calls for code like:
+ //
+ // define void @caller() {
+ // entry:
+ // %unused_result = tail call zeroext i1 @callee()
+ // br label %retlabel
+ // retlabel:
+ // ret void
+ // }
+ if (I->use_empty()) {
+ CalleeAttrs.removeAttribute(Attribute::SExt);
+ CalleeAttrs.removeAttribute(Attribute::ZExt);
+ }
+
+ // If they're still different, there's some facet we don't understand
+ // (currently only "inreg", but in future who knows). It may be OK but the
+ // only safe option is to reject the tail call.
+ return CallerAttrs == CalleeAttrs;
+}
+
+/// Check whether B is a bitcast of a pointer type to another pointer type,
+/// which is equal to A.
+static bool isPointerBitcastEqualTo(const Value *A, const Value *B) {
+ assert(A && B && "Expected non-null inputs!");
+
+ auto *BitCastIn = dyn_cast<BitCastInst>(B);
+
+ if (!BitCastIn)
+ return false;
+
+ if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
+ return false;
+
+ return A == BitCastIn->getOperand(0);
+}
+
+bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
+ const Instruction *I,
+ const ReturnInst *Ret,
+ const TargetLoweringBase &TLI) {
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Make sure the attributes attached to each return are compatible.
+ bool AllowDifferingSizes;
+ if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
+ return false;
+
+ const Value *RetVal = Ret->getOperand(0), *CallVal = I;
+ // Intrinsic like llvm.memcpy has no return value, but the expanded
+ // libcall may or may not have return value. On most platforms, it
+ // will be expanded as memcpy in libc, which returns the first
+ // argument. On other platforms like arm-none-eabi, memcpy may be
+ // expanded as library call without return value, like __aeabi_memcpy.
+ const CallInst *Call = cast<CallInst>(I);
+ if (Function *F = Call->getCalledFunction()) {
+ Intrinsic::ID IID = F->getIntrinsicID();
+ if (((IID == Intrinsic::memcpy &&
+ TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
+ (IID == Intrinsic::memmove &&
+ TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
+ (IID == Intrinsic::memset &&
+ TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
+ (RetVal == Call->getArgOperand(0) ||
+ isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
+ return true;
+ }
+
+ SmallVector<unsigned, 4> RetPath, CallPath;
+ SmallVector<Type *, 4> RetSubTypes, CallSubTypes;
+
+ bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
+ bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
+
+ // Nothing's actually returned, it doesn't matter what the callee put there
+ // it's a valid tail call.
+ if (RetEmpty)
+ return true;
+
+ // Iterate pairwise through each of the value types making up the tail call
+ // and the corresponding return. For each one we want to know whether it's
+ // essentially going directly from the tail call to the ret, via operations
+ // that end up not generating any code.
+ //
+ // We allow a certain amount of covariance here. For example it's permitted
+ // for the tail call to define more bits than the ret actually cares about
+ // (e.g. via a truncate).
+ do {
+ if (CallEmpty) {
+ // We've exhausted the values produced by the tail call instruction, the
+ // rest are essentially undef. The type doesn't really matter, but we need
+ // *something*.
+ Type *SlotType =
+ ExtractValueInst::getIndexedType(RetSubTypes.back(), RetPath.back());
+ CallVal = UndefValue::get(SlotType);
+ }
+
+ // The manipulations performed when we're looking through an insertvalue or
+ // an extractvalue would happen at the front of the RetPath list, so since
+ // we have to copy it anyway it's more efficient to create a reversed copy.
+ SmallVector<unsigned, 4> TmpRetPath(llvm::reverse(RetPath));
+ SmallVector<unsigned, 4> TmpCallPath(llvm::reverse(CallPath));
+
+ // Finally, we can check whether the value produced by the tail call at this
+ // index is compatible with the value we return.
+ if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
+ AllowDifferingSizes, TLI,
+ F->getParent()->getDataLayout()))
+ return false;
+
+ CallEmpty = !nextRealType(CallSubTypes, CallPath);
+ } while(nextRealType(RetSubTypes, RetPath));
+
+ return true;
+}
+
+static void collectEHScopeMembers(
+ DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,
+ const MachineBasicBlock *MBB) {
+ SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};
+ while (!Worklist.empty()) {
+ const MachineBasicBlock *Visiting = Worklist.pop_back_val();
+ // Don't follow blocks which start new scopes.
+ if (Visiting->isEHPad() && Visiting != MBB)
+ continue;
+
+ // Add this MBB to our scope.
+ auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope));
+
+ // Don't revisit blocks.
+ if (!P.second) {
+ assert(P.first->second == EHScope && "MBB is part of two scopes!");
+ continue;
+ }
+
+ // Returns are boundaries where scope transfer can occur, don't follow
+ // successors.
+ if (Visiting->isEHScopeReturnBlock())
+ continue;
+
+ append_range(Worklist, Visiting->successors());
+ }
+}
+
+DenseMap<const MachineBasicBlock *, int>
+llvm::getEHScopeMembership(const MachineFunction &MF) {
+ DenseMap<const MachineBasicBlock *, int> EHScopeMembership;
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (!MF.hasEHScopes())
+ return EHScopeMembership;
+
+ int EntryBBNumber = MF.front().getNumber();
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction().getPersonalityFn()));
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<const MachineBasicBlock *, 16> EHScopeBlocks;
+ SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
+ SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
+ SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
+ for (const MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHScopeEntry()) {
+ EHScopeBlocks.push_back(&MBB);
+ } else if (IsSEH && MBB.isEHPad()) {
+ SEHCatchPads.push_back(&MBB);
+ } else if (MBB.pred_empty()) {
+ UnreachableBlocks.push_back(&MBB);
+ }
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+
+ // CatchPads are not scopes for SEH so do not consider CatchRet to
+ // transfer control to another scope.
+ if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
+ continue;
+
+ // FIXME: SEH CatchPads are not necessarily in the parent function:
+ // they could be inside a finally block.
+ const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
+ const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
+ CatchRetSuccessors.push_back(
+ {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
+ }
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (EHScopeBlocks.empty())
+ return EHScopeMembership;
+
+ // Identify all the basic blocks reachable from the function entry.
+ collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front());
+ // All blocks not part of a scope are in the parent function.
+ for (const MachineBasicBlock *MBB : UnreachableBlocks)
+ collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
+ // Next, identify all the blocks inside the scopes.
+ for (const MachineBasicBlock *MBB : EHScopeBlocks)
+ collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB);
+ // SEH CatchPads aren't really scopes, handle them separately.
+ for (const MachineBasicBlock *MBB : SEHCatchPads)
+ collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
+ // Finally, identify all the targets of a catchret.
+ for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
+ CatchRetSuccessors)
+ collectEHScopeMembers(EHScopeMembership, CatchRetPair.second,
+ CatchRetPair.first);
+ return EHScopeMembership;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
new file mode 100644
index 000000000000..82b5ccdc70ea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -0,0 +1,93 @@
+//===-- CodeGen/AsmPrinter/AIXException.cpp - AIX Exception Impl ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing AIX exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {}
+
+void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
+ const MCSymbol *PerSym) {
+ // Generate EH Info Table.
+ // The EH Info Table, aka, 'compat unwind section' on AIX, have the following
+ // format: struct eh_info_t {
+ // unsigned version; /* EH info verion 0 */
+ // #if defined(__64BIT__)
+ // char _pad[4]; /* padding */
+ // #endif
+ // unsigned long lsda; /* Pointer to LSDA */
+ // unsigned long personality; /* Pointer to the personality routine */
+ // }
+
+ auto *EHInfo =
+ cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection());
+ if (Asm->TM.getFunctionSections()) {
+ // If option -ffunction-sections is on, append the function name to the
+ // name of EH Info Table csect so that each function has its own EH Info
+ // Table csect. This helps the linker to garbage-collect EH info of unused
+ // functions.
+ SmallString<128> NameStr = EHInfo->getName();
+ raw_svector_ostream(NameStr) << '.' << Asm->MF->getFunction().getName();
+ EHInfo = Asm->OutContext.getXCOFFSection(NameStr, EHInfo->getKind(),
+ EHInfo->getCsectProp());
+ }
+ Asm->OutStreamer->switchSection(EHInfo);
+ MCSymbol *EHInfoLabel =
+ TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF);
+ Asm->OutStreamer->emitLabel(EHInfoLabel);
+
+ // Version number.
+ Asm->emitInt32(0);
+
+ const DataLayout &DL = MMI->getModule()->getDataLayout();
+ const unsigned PointerSize = DL.getPointerSize();
+
+ // Add necessary paddings in 64 bit mode.
+ Asm->OutStreamer->emitValueToAlignment(Align(PointerSize));
+
+ // LSDA location.
+ Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(LSDA, Asm->OutContext),
+ PointerSize);
+
+ // Personality routine.
+ Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(PerSym, Asm->OutContext),
+ PointerSize);
+}
+
+void AIXException::endFunction(const MachineFunction *MF) {
+ // There is no easy way to access register information in `AIXException`
+ // class. when ShouldEmitEHBlock is false and VRs are saved, A dumy eh info
+ // table are emitted in PPCAIXAsmPrinter::emitFunctionBodyEnd.
+ if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF))
+ return;
+
+ const MCSymbol *LSDALabel = emitExceptionTable();
+
+ const Function &F = MF->getFunction();
+ assert(F.hasPersonalityFn() &&
+ "Landingpads are presented, but no personality routine is found.");
+ const auto *Per =
+ cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PerSym = Asm->TM.getSymbol(Per);
+
+ emitExceptionInfoTable(LSDALabel, PerSym);
+}
+
+} // End of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 000000000000..de6ebcf0c341
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,132 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A) : EHStreamer(A) {}
+
+ARMException::~ARMException() = default;
+
+ARMTargetStreamer &ARMException::getTargetStreamer() {
+ MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+}
+
+void ARMException::beginFunction(const MachineFunction *MF) {
+ if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ getTargetStreamer().emitFnStart();
+ // See if we need call frame info.
+ AsmPrinter::CFISection CFISecType = Asm->getFunctionCFISectionType(*MF);
+ assert(CFISecType != AsmPrinter::CFISection::EH &&
+ "non-EH CFI not yet supported in prologue with EHABI lowering");
+
+ if (CFISecType == AsmPrinter::CFISection::Debug) {
+ if (!hasEmittedCFISections) {
+ if (Asm->getModuleCFISectionType() == AsmPrinter::CFISection::Debug)
+ Asm->OutStreamer->emitCFISections(false, true);
+ hasEmittedCFISections = true;
+ }
+
+ shouldEmitCFI = true;
+ Asm->OutStreamer->emitCFIStartProc(false);
+ }
+}
+
+void ARMException::markFunctionEnd() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->emitCFIEndProc();
+}
+
+/// endFunction - Gather and emit post-function exception information.
+///
+void ARMException::endFunction(const MachineFunction *MF) {
+ ARMTargetStreamer &ATS = getTargetStreamer();
+ const Function &F = MF->getFunction();
+ const Function *Per = nullptr;
+ if (F.hasPersonalityFn())
+ Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ bool forceEmitPersonality =
+ F.hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ F.needsUnwindTableEntry();
+ bool shouldEmitPersonality = forceEmitPersonality ||
+ !MF->getLandingPads().empty();
+ if (!Asm->MF->getFunction().needsUnwindTableEntry() &&
+ !shouldEmitPersonality)
+ ATS.emitCantUnwind();
+ else if (shouldEmitPersonality) {
+ // Emit references to personality.
+ if (Per) {
+ MCSymbol *PerSym = Asm->getSymbol(Per);
+ ATS.emitPersonality(PerSym);
+ }
+
+ // Emit .handlerdata directive.
+ ATS.emitHandlerData();
+
+ // Emit actual exception table
+ emitExceptionTable();
+ }
+
+ if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ ATS.emitFnEnd();
+}
+
+void ARMException::emitTypeInfos(unsigned TTypeEncoding,
+ MCSymbol *TTBaseLabel) {
+ const MachineFunction *MF = Asm->MF;
+ const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MF->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer->addBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (const GlobalValue *GV : reverse(TypeInfos)) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
+ Asm->emitTTypeReference(GV, TTypeEncoding);
+ }
+
+ Asm->OutStreamer->emitLabel(TTBaseLabel);
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer->addBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->emitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
+ TTypeEncoding);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
new file mode 100644
index 000000000000..aab3c2681339
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -0,0 +1,714 @@
+//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AccelTable.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <vector>
+
+using namespace llvm;
+
+void AccelTableBase::computeBucketCount() {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> Uniques;
+ Uniques.reserve(Entries.size());
+ for (const auto &E : Entries)
+ Uniques.push_back(E.second.HashValue);
+ array_pod_sort(Uniques.begin(), Uniques.end());
+ std::vector<uint32_t>::iterator P =
+ std::unique(Uniques.begin(), Uniques.end());
+
+ UniqueHashCount = std::distance(Uniques.begin(), P);
+
+ if (UniqueHashCount > 1024)
+ BucketCount = UniqueHashCount / 4;
+ else if (UniqueHashCount > 16)
+ BucketCount = UniqueHashCount / 2;
+ else
+ BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
+}
+
+void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
+ // Create the individual hash data outputs.
+ for (auto &E : Entries) {
+ // Unique the entries.
+ llvm::stable_sort(E.second.Values,
+ [](const AccelTableData *A, const AccelTableData *B) {
+ return *A < *B;
+ });
+ E.second.Values.erase(
+ std::unique(E.second.Values.begin(), E.second.Values.end()),
+ E.second.Values.end());
+ }
+
+ // Figure out how many buckets we need, then compute the bucket contents and
+ // the final ordering. The hashes and offsets can be emitted by walking these
+ // data structures. We add temporary symbols to the data so they can be
+ // referenced when emitting the offsets.
+ computeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(BucketCount);
+ for (auto &E : Entries) {
+ uint32_t Bucket = E.second.HashValue % BucketCount;
+ Buckets[Bucket].push_back(&E.second);
+ E.second.Sym = Asm->createTempSymbol(Prefix);
+ }
+
+ // Sort the contents of the buckets by hash value so that hash collisions end
+ // up together. Stable sort makes testing easier and doesn't cost much more.
+ for (auto &Bucket : Buckets)
+ llvm::stable_sort(Bucket, [](HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
+}
+
+namespace {
+/// Base class for writing out Accelerator tables. It holds the common
+/// functionality for the two Accelerator table types.
+class AccelTableWriter {
+protected:
+ AsmPrinter *const Asm; ///< Destination.
+ const AccelTableBase &Contents; ///< Data to emit.
+
+ /// Controls whether to emit duplicate hash and offset table entries for names
+ /// with identical hashes. Apple tables don't emit duplicate entries, DWARF v5
+ /// tables do.
+ const bool SkipIdenticalHashes;
+
+ void emitHashes() const;
+
+ /// Emit offsets to lists of entries with identical names. The offsets are
+ /// relative to the Base argument.
+ void emitOffsets(const MCSymbol *Base) const;
+
+public:
+ AccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents,
+ bool SkipIdenticalHashes)
+ : Asm(Asm), Contents(Contents), SkipIdenticalHashes(SkipIdenticalHashes) {
+ }
+};
+
+class AppleAccelTableWriter : public AccelTableWriter {
+ using Atom = AppleAccelTableData::Atom;
+
+ /// The fixed header of an Apple Accelerator Table.
+ struct Header {
+ uint32_t Magic = MagicHash;
+ uint16_t Version = 1;
+ uint16_t HashFunction = dwarf::DW_hash_function_djb;
+ uint32_t BucketCount;
+ uint32_t HashCount;
+ uint32_t HeaderDataLength;
+
+ /// 'HASH' magic value to detect endianness.
+ static const uint32_t MagicHash = 0x48415348;
+
+ Header(uint32_t BucketCount, uint32_t UniqueHashCount, uint32_t DataLength)
+ : BucketCount(BucketCount), HashCount(UniqueHashCount),
+ HeaderDataLength(DataLength) {}
+
+ void emit(AsmPrinter *Asm) const;
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+#endif
+ };
+
+ /// The HeaderData describes the structure of an Apple accelerator table
+ /// through a list of Atoms.
+ struct HeaderData {
+ /// In the case of data that is referenced via DW_FORM_ref_* the offset
+ /// base is used to describe the offset for all forms in the list of atoms.
+ uint32_t DieOffsetBase;
+
+ const SmallVector<Atom, 4> Atoms;
+
+ HeaderData(ArrayRef<Atom> AtomList, uint32_t Offset = 0)
+ : DieOffsetBase(Offset), Atoms(AtomList.begin(), AtomList.end()) {}
+
+ void emit(AsmPrinter *Asm) const;
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+#endif
+ };
+
+ Header Header;
+ HeaderData HeaderData;
+ const MCSymbol *SecBegin;
+
+ void emitBuckets() const;
+ void emitData() const;
+
+public:
+ AppleAccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents,
+ ArrayRef<Atom> Atoms, const MCSymbol *SecBegin)
+ : AccelTableWriter(Asm, Contents, true),
+ Header(Contents.getBucketCount(), Contents.getUniqueHashCount(),
+ 8 + (Atoms.size() * 4)),
+ HeaderData(Atoms), SecBegin(SecBegin) {}
+
+ void emit() const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+#endif
+};
+
+/// Class responsible for emitting a DWARF v5 Accelerator Table. The only
+/// public function is emit(), which performs the actual emission.
+///
+/// The class is templated in its data type. This allows us to emit both dyamic
+/// and static data entries. A callback abstract the logic to provide a CU
+/// index for a given entry, which is different per data type, but identical
+/// for every entry in the same table.
+template <typename DataT>
+class Dwarf5AccelTableWriter : public AccelTableWriter {
+ struct Header {
+ uint16_t Version = 5;
+ uint16_t Padding = 0;
+ uint32_t CompUnitCount;
+ uint32_t LocalTypeUnitCount = 0;
+ uint32_t ForeignTypeUnitCount = 0;
+ uint32_t BucketCount = 0;
+ uint32_t NameCount = 0;
+ uint32_t AbbrevTableSize = 0;
+ uint32_t AugmentationStringSize = sizeof(AugmentationString);
+ char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'};
+
+ Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount)
+ : CompUnitCount(CompUnitCount), BucketCount(BucketCount),
+ NameCount(NameCount) {}
+
+ void emit(Dwarf5AccelTableWriter &Ctx);
+ };
+ struct AttributeEncoding {
+ dwarf::Index Index;
+ dwarf::Form Form;
+ };
+
+ Header Header;
+ DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations;
+ ArrayRef<MCSymbol *> CompUnits;
+ llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry;
+ MCSymbol *ContributionEnd = nullptr;
+ MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start");
+ MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
+ MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
+
+ DenseSet<uint32_t> getUniqueTags() const;
+
+ // Right now, we emit uniform attributes for all tags.
+ SmallVector<AttributeEncoding, 2> getUniformAttributes() const;
+
+ void emitCUList() const;
+ void emitBuckets() const;
+ void emitStringOffsets() const;
+ void emitAbbrevs() const;
+ void emitEntry(const DataT &Entry) const;
+ void emitData() const;
+
+public:
+ Dwarf5AccelTableWriter(
+ AsmPrinter *Asm, const AccelTableBase &Contents,
+ ArrayRef<MCSymbol *> CompUnits,
+ llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry);
+
+ void emit();
+};
+} // namespace
+
+void AccelTableWriter::emitHashes() const {
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ unsigned BucketIdx = 0;
+ for (const auto &Bucket : Contents.getBuckets()) {
+ for (const auto &Hash : Bucket) {
+ uint32_t HashValue = Hash->HashValue;
+ if (SkipIdenticalHashes && PrevHash == HashValue)
+ continue;
+ Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(BucketIdx));
+ Asm->emitInt32(HashValue);
+ PrevHash = HashValue;
+ }
+ BucketIdx++;
+ }
+}
+
+void AccelTableWriter::emitOffsets(const MCSymbol *Base) const {
+ const auto &Buckets = Contents.getBuckets();
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (auto *Hash : Buckets[i]) {
+ uint32_t HashValue = Hash->HashValue;
+ if (SkipIdenticalHashes && PrevHash == HashValue)
+ continue;
+ PrevHash = HashValue;
+ Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
+ Asm->emitLabelDifference(Hash->Sym, Base, Asm->getDwarfOffsetByteSize());
+ }
+ }
+}
+
+void AppleAccelTableWriter::Header::emit(AsmPrinter *Asm) const {
+ Asm->OutStreamer->AddComment("Header Magic");
+ Asm->emitInt32(Magic);
+ Asm->OutStreamer->AddComment("Header Version");
+ Asm->emitInt16(Version);
+ Asm->OutStreamer->AddComment("Header Hash Function");
+ Asm->emitInt16(HashFunction);
+ Asm->OutStreamer->AddComment("Header Bucket Count");
+ Asm->emitInt32(BucketCount);
+ Asm->OutStreamer->AddComment("Header Hash Count");
+ Asm->emitInt32(HashCount);
+ Asm->OutStreamer->AddComment("Header Data Length");
+ Asm->emitInt32(HeaderDataLength);
+}
+
+void AppleAccelTableWriter::HeaderData::emit(AsmPrinter *Asm) const {
+ Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
+ Asm->emitInt32(DieOffsetBase);
+ Asm->OutStreamer->AddComment("HeaderData Atom Count");
+ Asm->emitInt32(Atoms.size());
+
+ for (const Atom &A : Atoms) {
+ Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.Type));
+ Asm->emitInt16(A.Type);
+ Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.Form));
+ Asm->emitInt16(A.Form);
+ }
+}
+
+void AppleAccelTableWriter::emitBuckets() const {
+ const auto &Buckets = Contents.getBuckets();
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer->AddComment("Bucket " + Twine(i));
+ if (!Buckets[i].empty())
+ Asm->emitInt32(index);
+ else
+ Asm->emitInt32(std::numeric_limits<uint32_t>::max());
+ // Buckets point in the list of hashes, not to the data. Do not increment
+ // the index multiple times in case of hash collisions.
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ for (auto *HD : Buckets[i]) {
+ uint32_t HashValue = HD->HashValue;
+ if (PrevHash != HashValue)
+ ++index;
+ PrevHash = HashValue;
+ }
+ }
+}
+
+void AppleAccelTableWriter::emitData() const {
+ const auto &Buckets = Contents.getBuckets();
+ for (const AccelTableBase::HashList &Bucket : Buckets) {
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ for (const auto &Hash : Bucket) {
+ // Terminate the previous entry if there is no hash collision with the
+ // current one.
+ if (PrevHash != std::numeric_limits<uint64_t>::max() &&
+ PrevHash != Hash->HashValue)
+ Asm->emitInt32(0);
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer->emitLabel(Hash->Sym);
+ Asm->OutStreamer->AddComment(Hash->Name.getString());
+ Asm->emitDwarfStringOffset(Hash->Name);
+ Asm->OutStreamer->AddComment("Num DIEs");
+ Asm->emitInt32(Hash->Values.size());
+ for (const auto *V : Hash->Values)
+ static_cast<const AppleAccelTableData *>(V)->emit(Asm);
+ PrevHash = Hash->HashValue;
+ }
+ // Emit the final end marker for the bucket.
+ if (!Bucket.empty())
+ Asm->emitInt32(0);
+ }
+}
+
+void AppleAccelTableWriter::emit() const {
+ Header.emit(Asm);
+ HeaderData.emit(Asm);
+ emitBuckets();
+ emitHashes();
+ emitOffsets(SecBegin);
+ emitData();
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
+ assert(CompUnitCount > 0 && "Index must have at least one CU.");
+
+ AsmPrinter *Asm = Ctx.Asm;
+ Ctx.ContributionEnd =
+ Asm->emitDwarfUnitLength("names", "Header: unit length");
+ Asm->OutStreamer->AddComment("Header: version");
+ Asm->emitInt16(Version);
+ Asm->OutStreamer->AddComment("Header: padding");
+ Asm->emitInt16(Padding);
+ Asm->OutStreamer->AddComment("Header: compilation unit count");
+ Asm->emitInt32(CompUnitCount);
+ Asm->OutStreamer->AddComment("Header: local type unit count");
+ Asm->emitInt32(LocalTypeUnitCount);
+ Asm->OutStreamer->AddComment("Header: foreign type unit count");
+ Asm->emitInt32(ForeignTypeUnitCount);
+ Asm->OutStreamer->AddComment("Header: bucket count");
+ Asm->emitInt32(BucketCount);
+ Asm->OutStreamer->AddComment("Header: name count");
+ Asm->emitInt32(NameCount);
+ Asm->OutStreamer->AddComment("Header: abbreviation table size");
+ Asm->emitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t));
+ Asm->OutStreamer->AddComment("Header: augmentation string size");
+ assert(AugmentationStringSize % 4 == 0);
+ Asm->emitInt32(AugmentationStringSize);
+ Asm->OutStreamer->AddComment("Header: augmentation string");
+ Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
+}
+
+template <typename DataT>
+DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const {
+ DenseSet<uint32_t> UniqueTags;
+ for (auto &Bucket : Contents.getBuckets()) {
+ for (auto *Hash : Bucket) {
+ for (auto *Value : Hash->Values) {
+ unsigned Tag = static_cast<const DataT *>(Value)->getDieTag();
+ UniqueTags.insert(Tag);
+ }
+ }
+ }
+ return UniqueTags;
+}
+
+template <typename DataT>
+SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2>
+Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const {
+ SmallVector<AttributeEncoding, 2> UA;
+ if (CompUnits.size() > 1) {
+ size_t LargestCUIndex = CompUnits.size() - 1;
+ dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex);
+ UA.push_back({dwarf::DW_IDX_compile_unit, Form});
+ }
+ UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+ return UA;
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
+ for (const auto &CU : enumerate(CompUnits)) {
+ Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index()));
+ Asm->emitDwarfSymbolReference(CU.value());
+ }
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitBuckets() const {
+ uint32_t Index = 1;
+ for (const auto &Bucket : enumerate(Contents.getBuckets())) {
+ Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index()));
+ Asm->emitInt32(Bucket.value().empty() ? 0 : Index);
+ Index += Bucket.value().size();
+ }
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const {
+ for (const auto &Bucket : enumerate(Contents.getBuckets())) {
+ for (auto *Hash : Bucket.value()) {
+ DwarfStringPoolEntryRef String = Hash->Name;
+ Asm->OutStreamer->AddComment("String in Bucket " + Twine(Bucket.index()) +
+ ": " + String.getString());
+ Asm->emitDwarfStringOffset(String);
+ }
+ }
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
+ Asm->OutStreamer->emitLabel(AbbrevStart);
+ for (const auto &Abbrev : Abbreviations) {
+ Asm->OutStreamer->AddComment("Abbrev code");
+ assert(Abbrev.first != 0);
+ Asm->emitULEB128(Abbrev.first);
+ Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first));
+ Asm->emitULEB128(Abbrev.first);
+ for (const auto &AttrEnc : Abbrev.second) {
+ Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
+ Asm->emitULEB128(AttrEnc.Form,
+ dwarf::FormEncodingString(AttrEnc.Form).data());
+ }
+ Asm->emitULEB128(0, "End of abbrev");
+ Asm->emitULEB128(0, "End of abbrev");
+ }
+ Asm->emitULEB128(0, "End of abbrev list");
+ Asm->OutStreamer->emitLabel(AbbrevEnd);
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
+ auto AbbrevIt = Abbreviations.find(Entry.getDieTag());
+ assert(AbbrevIt != Abbreviations.end() &&
+ "Why wasn't this abbrev generated?");
+
+ Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+ for (const auto &AttrEnc : AbbrevIt->second) {
+ Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
+ switch (AttrEnc.Index) {
+ case dwarf::DW_IDX_compile_unit: {
+ DIEInteger ID(getCUIndexForEntry(Entry));
+ ID.emitValue(Asm, AttrEnc.Form);
+ break;
+ }
+ case dwarf::DW_IDX_die_offset:
+ assert(AttrEnc.Form == dwarf::DW_FORM_ref4);
+ Asm->emitInt32(Entry.getDieOffset());
+ break;
+ default:
+ llvm_unreachable("Unexpected index attribute!");
+ }
+ }
+}
+
+template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
+ Asm->OutStreamer->emitLabel(EntryPool);
+ for (auto &Bucket : Contents.getBuckets()) {
+ for (auto *Hash : Bucket) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer->emitLabel(Hash->Sym);
+ for (const auto *Value : Hash->Values)
+ emitEntry(*static_cast<const DataT *>(Value));
+ Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString());
+ Asm->emitInt8(0);
+ }
+ }
+}
+
+template <typename DataT>
+Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
+ AsmPrinter *Asm, const AccelTableBase &Contents,
+ ArrayRef<MCSymbol *> CompUnits,
+ llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry)
+ : AccelTableWriter(Asm, Contents, false),
+ Header(CompUnits.size(), Contents.getBucketCount(),
+ Contents.getUniqueNameCount()),
+ CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) {
+ DenseSet<uint32_t> UniqueTags = getUniqueTags();
+ SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes();
+
+ Abbreviations.reserve(UniqueTags.size());
+ for (uint32_t Tag : UniqueTags)
+ Abbreviations.try_emplace(Tag, UniformAttributes);
+}
+
+template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
+ Header.emit(*this);
+ emitCUList();
+ emitBuckets();
+ emitHashes();
+ emitStringOffsets();
+ emitOffsets(EntryPool);
+ emitAbbrevs();
+ emitData();
+ Asm->OutStreamer->emitValueToAlignment(Align(4), 0);
+ Asm->OutStreamer->emitLabel(ContributionEnd);
+}
+
+void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
+ StringRef Prefix, const MCSymbol *SecBegin,
+ ArrayRef<AppleAccelTableData::Atom> Atoms) {
+ Contents.finalize(Asm, Prefix);
+ AppleAccelTableWriter(Asm, Contents, Atoms, SecBegin).emit();
+}
+
+void llvm::emitDWARF5AccelTable(
+ AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
+ const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
+ std::vector<MCSymbol *> CompUnits;
+ SmallVector<unsigned, 1> CUIndex(CUs.size());
+ int Count = 0;
+ for (const auto &CU : enumerate(CUs)) {
+ switch (CU.value()->getCUNode()->getNameTableKind()) {
+ case DICompileUnit::DebugNameTableKind::Default:
+ case DICompileUnit::DebugNameTableKind::Apple:
+ break;
+ default:
+ continue;
+ }
+ CUIndex[CU.index()] = Count++;
+ assert(CU.index() == CU.value()->getUniqueID());
+ const DwarfCompileUnit *MainCU =
+ DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
+ CompUnits.push_back(MainCU->getLabelBegin());
+ }
+
+ if (CompUnits.empty())
+ return;
+
+ Asm->OutStreamer->switchSection(
+ Asm->getObjFileLowering().getDwarfDebugNamesSection());
+
+ Contents.finalize(Asm, "names");
+ Dwarf5AccelTableWriter<DWARF5AccelTableData>(
+ Asm, Contents, CompUnits,
+ [&](const DWARF5AccelTableData &Entry) {
+ const DIE *CUDie = Entry.getDie().getUnitDie();
+ return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
+ })
+ .emit();
+}
+
+void llvm::emitDWARF5AccelTable(
+ AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
+ ArrayRef<MCSymbol *> CUs,
+ llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
+ getCUIndexForEntry) {
+ Contents.finalize(Asm, "names");
+ Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs,
+ getCUIndexForEntry)
+ .emit();
+}
+
+void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const {
+ assert(Die.getDebugSectionOffset() <= UINT32_MAX &&
+ "The section offset exceeds the limit.");
+ Asm->emitInt32(Die.getDebugSectionOffset());
+}
+
+void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const {
+ assert(Die.getDebugSectionOffset() <= UINT32_MAX &&
+ "The section offset exceeds the limit.");
+ Asm->emitInt32(Die.getDebugSectionOffset());
+ Asm->emitInt16(Die.getTag());
+ Asm->emitInt8(0);
+}
+
+void AppleAccelTableStaticOffsetData::emit(AsmPrinter *Asm) const {
+ Asm->emitInt32(Offset);
+}
+
+void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
+ Asm->emitInt32(Offset);
+ Asm->emitInt16(Tag);
+ Asm->emitInt8(ObjCClassIsImplementation ? dwarf::DW_FLAG_type_implementation
+ : 0);
+ Asm->emitInt32(QualifiedNameHash);
+}
+
+constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
+constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
+constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
+constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
+
+#ifndef NDEBUG
+void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
+ OS << "Magic: " << format("0x%x", Magic) << "\n"
+ << "Version: " << Version << "\n"
+ << "Hash Function: " << HashFunction << "\n"
+ << "Bucket Count: " << BucketCount << "\n"
+ << "Header Data Length: " << HeaderDataLength << "\n";
+}
+
+void AppleAccelTableData::Atom::print(raw_ostream &OS) const {
+ OS << "Type: " << dwarf::AtomTypeString(Type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(Form) << "\n";
+}
+
+void AppleAccelTableWriter::HeaderData::print(raw_ostream &OS) const {
+ OS << "DIE Offset Base: " << DieOffsetBase << "\n";
+ for (auto Atom : Atoms)
+ Atom.print(OS);
+}
+
+void AppleAccelTableWriter::print(raw_ostream &OS) const {
+ Header.print(OS);
+ HeaderData.print(OS);
+ Contents.print(OS);
+ SecBegin->print(OS, nullptr);
+}
+
+void AccelTableBase::HashData::print(raw_ostream &OS) const {
+ OS << "Name: " << Name.getString() << "\n";
+ OS << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ OS << " Symbol: ";
+ if (Sym)
+ OS << *Sym;
+ else
+ OS << "<none>";
+ OS << "\n";
+ for (auto *Value : Values)
+ Value->print(OS);
+}
+
+void AccelTableBase::print(raw_ostream &OS) const {
+ // Print Content.
+ OS << "Entries: \n";
+ for (const auto &[Name, Data] : Entries) {
+ OS << "Name: " << Name << "\n";
+ for (auto *V : Data.Values)
+ V->print(OS);
+ }
+
+ OS << "Buckets and Hashes: \n";
+ for (const auto &Bucket : Buckets)
+ for (const auto &Hash : Bucket)
+ Hash->print(OS);
+
+ OS << "Data: \n";
+ for (const auto &E : Entries)
+ E.second.print(OS);
+}
+
+void DWARF5AccelTableData::print(raw_ostream &OS) const {
+ OS << " Offset: " << getDieOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n";
+}
+
+void DWARF5AccelTableStaticData::print(raw_ostream &OS) const {
+ OS << " Offset: " << getDieOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n";
+}
+
+void AppleAccelTableOffsetData::print(raw_ostream &OS) const {
+ OS << " Offset: " << Die.getOffset() << "\n";
+}
+
+void AppleAccelTableTypeData::print(raw_ostream &OS) const {
+ OS << " Offset: " << Die.getOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(Die.getTag()) << "\n";
+}
+
+void AppleAccelTableStaticOffsetData::print(raw_ostream &OS) const {
+ OS << " Static Offset: " << Offset << "\n";
+}
+
+void AppleAccelTableStaticTypeData::print(raw_ostream &OS) const {
+ OS << " Static Offset: " << Offset << "\n";
+ OS << " QualifiedNameHash: " << format("%x\n", QualifiedNameHash) << "\n";
+ OS << " Tag: " << dwarf::TagString(Tag) << "\n";
+ OS << " ObjCClassIsImplementation: "
+ << (ObjCClassIsImplementation ? "true" : "false");
+ OS << "\n";
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
new file mode 100644
index 000000000000..00ee4e1b47a8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -0,0 +1,73 @@
+//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddressPool.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <utility>
+
+using namespace llvm;
+
+unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
+ resetUsedFlag(true);
+ auto IterBool =
+ Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS)));
+ return IterBool.first->second.Number;
+}
+
+MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
+ static const uint8_t AddrSize = Asm.MAI->getCodePointerSize();
+
+ MCSymbol *EndLabel =
+ Asm.emitDwarfUnitLength("debug_addr", "Length of contribution");
+ Asm.OutStreamer->AddComment("DWARF version number");
+ Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.OutStreamer->AddComment("Address size");
+ Asm.emitInt8(AddrSize);
+ Asm.OutStreamer->AddComment("Segment selector size");
+ Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
+
+ return EndLabel;
+}
+
+// Emit addresses into the section given.
+void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
+ if (isEmpty())
+ return;
+
+ // Start the dwarf addr section.
+ Asm.OutStreamer->switchSection(AddrSection);
+
+ MCSymbol *EndLabel = nullptr;
+
+ if (Asm.getDwarfVersion() >= 5)
+ EndLabel = emitHeader(Asm, AddrSection);
+
+ // Define the symbol that marks the start of the contribution.
+ // It is referenced via DW_AT_addr_base.
+ Asm.OutStreamer->emitLabel(AddressTableBaseSym);
+
+ // Order the address pool entries by ID
+ SmallVector<const MCExpr *, 64> Entries(Pool.size());
+
+ for (const auto &I : Pool)
+ Entries[I.second.Number] =
+ I.second.TLS
+ ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first)
+ : MCSymbolRefExpr::create(I.first, Asm.OutContext);
+
+ for (const MCExpr *Entry : Entries)
+ Asm.OutStreamer->emitValue(Entry, Asm.MAI->getCodePointerSize());
+
+ if (EndLabel)
+ Asm.OutStreamer->emitLabel(EndLabel);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
new file mode 100644
index 000000000000..f1edc6c330d5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -0,0 +1,65 @@
+//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MCSection;
+class MCSymbol;
+
+// Collection of addresses for this unit and assorted labels.
+// A Symbol->unsigned mapping of addresses used by indirect
+// references.
+class AddressPool {
+ struct AddressPoolEntry {
+ unsigned Number;
+ bool TLS;
+
+ AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
+ };
+ DenseMap<const MCSymbol *, AddressPoolEntry> Pool;
+
+ /// Record whether the AddressPool has been queried for an address index since
+ /// the last "resetUsedFlag" call. Used to implement type unit fallback - a
+ /// type that references addresses cannot be placed in a type unit when using
+ /// fission.
+ bool HasBeenUsed = false;
+
+public:
+ AddressPool() = default;
+
+ /// Returns the index into the address pool with the given
+ /// label/symbol.
+ unsigned getIndex(const MCSymbol *Sym, bool TLS = false);
+
+ void emit(AsmPrinter &Asm, MCSection *AddrSection);
+
+ bool isEmpty() { return Pool.empty(); }
+
+ bool hasBeenUsed() const { return HasBeenUsed; }
+
+ void resetUsedFlag(bool HasBeenUsed = false) { this->HasBeenUsed = HasBeenUsed; }
+
+ MCSymbol *getLabel() { return AddressTableBaseSym; }
+ void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
+
+private:
+ MCSymbol *emitHeader(AsmPrinter &Asm, MCSection *Section);
+
+ /// Symbol designates the start of the contribution to the address table.
+ MCSymbol *AddressTableBaseSym = nullptr;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 000000000000..5381dfdd184c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,4166 @@
+//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "CodeViewDebug.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "PseudoProbePrinter.h"
+#include "WasmException.h"
+#include "WinCFGuard.h"
+#include "WinException.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GCStrategy.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Pass.h"
+#include "llvm/Remarks/RemarkStreamer.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+static cl::opt<std::string> BasicBlockProfileDump(
+ "mbb-profile-dump", cl::Hidden,
+ cl::desc("Basic block profile dump for external cost modelling. If "
+ "matching up BBs with afterwards, the compilation must be "
+ "performed with -basic-block-sections=labels. Enabling this "
+ "flag during in-process ThinLTO is not supported."));
+
+const char DWARFGroupName[] = "dwarf";
+const char DWARFGroupDescription[] = "DWARF Emission";
+const char DbgTimerName[] = "emit";
+const char DbgTimerDescription[] = "Debug Info Emission";
+const char EHTimerName[] = "write_exception";
+const char EHTimerDescription[] = "DWARF Exception Writer";
+const char CFGuardName[] = "Control Flow Guard";
+const char CFGuardDescription[] = "Control Flow Guard";
+const char CodeViewLineTablesGroupName[] = "linetables";
+const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables";
+const char PPTimerName[] = "emit";
+const char PPTimerDescription[] = "Pseudo Probe Emission";
+const char PPGroupName[] = "pseudo probe";
+const char PPGroupDescription[] = "Pseudo Probe Emission";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+namespace {
+class AddrLabelMapCallbackPtr final : CallbackVH {
+ AddrLabelMap *Map = nullptr;
+
+public:
+ AddrLabelMapCallbackPtr() = default;
+ AddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(AddrLabelMap *map) { Map = map; }
+
+ void deleted() override;
+ void allUsesReplacedWith(Value *V2) override;
+};
+} // namespace
+
+class llvm::AddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// The symbols for the label.
+ TinyPtrVector<MCSymbol *> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// Callbacks for the BasicBlock's that we have entries for. We use this so
+ /// we get notified if a block is deleted or RAUWd.
+ std::vector<AddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// This is a per-function list of symbols whose corresponding BasicBlock got
+ /// deleted. These symbols need to be emitted at some point in the file, so
+ /// AsmPrinter emits them after the function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>
+ DeletedAddrLabelsNeedingEmission;
+
+public:
+ AddrLabelMap(MCContext &context) : Context(context) {}
+
+ ~AddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+ }
+
+ ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol *> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+
+ArrayRef<MCSymbol *> AddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.empty()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ return Entry.Symbols;
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.emplace_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size() - 1;
+ Entry.Fn = BB->getParent();
+ MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
+ : Context.createTempSymbol();
+ Entry.Symbols.push_back(Sym);
+ return Entry.Symbols;
+}
+
+/// If we have any deleted symbols for F, return them.
+void AddrLabelMap::takeDeletedSymbolsForFunction(
+ Function *F, std::vector<MCSymbol *> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end())
+ return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+ArrayRef<MCSymbol *>
+AsmPrinter::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (!AddrLabelSymbols)
+ AddrLabelSymbols = std::make_unique<AddrLabelMap>(OutContext);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(
+ const_cast<BasicBlock *>(BB));
+}
+
+void AsmPrinter::takeDeletedSymbolsForFunction(
+ const Function *F, std::vector<MCSymbol *> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (!AddrLabelSymbols)
+ return;
+ return AddrLabelSymbols->takeDeletedSymbolsForFunction(
+ const_cast<Function *>(F), Result);
+}
+
+void AddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
+
+#if !LLVM_MEMORY_SANITIZER_BUILD
+ // BasicBlock is destroyed already, so this access is UB detectable by msan.
+ assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+#endif
+
+ for (MCSymbol *Sym : Entry.Symbols) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+}
+
+void AddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.empty()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = std::move(OldEntry); // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
+
+ // Otherwise, we need to add the old symbols to the new block's set.
+ llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
+}
+
+void AddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void AddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+/// getGVAlignment - Return the alignment to use for the specified global
+/// value. This rounds up to the preferred alignment if possible and legal.
+Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
+ Align InAlign) {
+ Align Alignment;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ Alignment = DL.getPreferredAlign(GVar);
+
+ // If InAlign is specified, round it to it.
+ if (InAlign > Alignment)
+ Alignment = InAlign;
+
+ // If the GV has a specified alignment, take it into account.
+ const MaybeAlign GVAlign(GV->getAlign());
+ if (!GVAlign)
+ return Alignment;
+
+ assert(GVAlign && "GVAlign must be set");
+
+ // If the GVAlign is larger than NumBits, or if we are required to obey
+ // NumBits because the GV has an assigned section, obey it.
+ if (*GVAlign > Alignment || GV->hasSection())
+ Alignment = *GVAlign;
+ return Alignment;
+}
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
+ : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
+ OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
+ SM(*this) {
+ VerboseAsm = OutStreamer->isVerboseAsm();
+ DwarfUsesRelocationsAcrossSections =
+ MAI->doesDwarfUseRelocationsAcrossSections();
+}
+
+AsmPrinter::~AsmPrinter() {
+ assert(!DD && Handlers.size() == NumUserHandlers &&
+ "Debug/EH info didn't get finalized");
+}
+
+bool AsmPrinter::isPositionIndependent() const {
+ return TM.isPositionIndependent();
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return *TM.getObjFileLowering();
+}
+
+const DataLayout &AsmPrinter::getDataLayout() const {
+ return MMI->getModule()->getDataLayout();
+}
+
+// Do not use the cached DataLayout because some client use it without a Module
+// (dsymutil, llvm-dwarfdump).
+unsigned AsmPrinter::getPointerSize() const {
+ return TM.getPointerSize(0); // FIXME: Default address space
+}
+
+const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
+ assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
+ return MF->getSubtarget<MCSubtargetInfo>();
+}
+
+void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
+ S.emitInstruction(Inst, getSubtargetInfo());
+}
+
+void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
+ if (DD) {
+ assert(OutStreamer->hasRawTextSupport() &&
+ "Expected assembly output mode.");
+ // This is NVPTX specific and it's unclear why.
+ // PR51079: If we have code without debug information we need to give up.
+ DISubprogram *MFSP = MF.getFunction().getSubprogram();
+ if (!MFSP)
+ return;
+ (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+ }
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer->getCurrentSectionOnly();
+}
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
+ HasSplitStack = false;
+ HasNoSplitStack = false;
+
+ AddrLabelSymbols = nullptr;
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+ .getModuleMetadata(M);
+
+ OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
+
+ // Emit the version-min deployment target directive if needed.
+ //
+ // FIXME: If we end up with a collection of these sorts of Darwin-specific
+ // or ELF-specific things, it may make sense to have a platform helper class
+ // that will work with the target helper class. For now keep it here, as the
+ // alternative is duplicated code in each of the target asm printers that
+ // use the directive, where it would need the same conditionalization
+ // anyway.
+ const Triple &Target = TM.getTargetTriple();
+ Triple TVT(M.getDarwinTargetVariantTriple());
+ OutStreamer->emitVersionForTarget(
+ Target, M.getSDKVersion(),
+ M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT,
+ M.getDarwinTargetVariantSDKVersion());
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ emitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+
+ SmallString<128> FileName;
+ if (MAI->hasBasenameOnlyForFileDirective())
+ FileName = llvm::sys::path::filename(M.getSourceFileName());
+ else
+ FileName = M.getSourceFileName();
+ if (MAI->hasFourStringsDotFile()) {
+#ifdef PACKAGE_VENDOR
+ const char VerStr[] =
+ PACKAGE_VENDOR " " PACKAGE_NAME " version " PACKAGE_VERSION;
+#else
+ const char VerStr[] = PACKAGE_NAME " version " PACKAGE_VERSION;
+#endif
+ // TODO: Add timestamp and description.
+ OutStreamer->emitFileDirective(FileName, VerStr, "", "");
+ } else {
+ OutStreamer->emitFileDirective(FileName);
+ }
+ }
+
+ // On AIX, emit bytes for llvm.commandline metadata after .file so that the
+ // C_INFO symbol is preserved if any csect is kept by the linker.
+ if (TM.getTargetTriple().isOSBinFormatXCOFF())
+ emitModuleCommandLines(M);
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (const auto &I : *MI)
+ if (GCMetadataPrinter *MP = getOrCreateGCPrinter(*I))
+ MP->beginAssembly(M, *MI, *this);
+
+ // Emit module-level inline asm if it exists.
+ if (!M.getModuleInlineAsm().empty()) {
+ OutStreamer->AddComment("Start of file scope inline assembly");
+ OutStreamer->addBlankLine();
+ emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
+ TM.Options.MCOptions);
+ OutStreamer->AddComment("End of file scope inline assembly");
+ OutStreamer->addBlankLine();
+ }
+
+ if (MAI->doesSupportDebugInformation()) {
+ bool EmitCodeView = M.getCodeViewFlag();
+ if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
+ Handlers.emplace_back(std::make_unique<CodeViewDebug>(this),
+ DbgTimerName, DbgTimerDescription,
+ CodeViewLineTablesGroupName,
+ CodeViewLineTablesGroupDescription);
+ }
+ if (!EmitCodeView || M.getDwarfVersion()) {
+ if (MMI->hasDebugInfo()) {
+ DD = new DwarfDebug(this);
+ Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
+ DbgTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
+ }
+ }
+ }
+
+ if (M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ PP = new PseudoProbeHandler(this);
+ Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName,
+ PPTimerDescription, PPGroupName, PPGroupDescription);
+ }
+
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ // We may want to emit CFI for debug.
+ [[fallthrough]];
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ for (auto &F : M.getFunctionList()) {
+ if (getFunctionCFISectionType(F) != CFISection::None)
+ ModuleCFISection = getFunctionCFISectionType(F);
+ // If any function needsUnwindTableEntry(), it needs .eh_frame and hence
+ // the module needs .eh_frame. If we have found that case, we are done.
+ if (ModuleCFISection == CFISection::EH)
+ break;
+ }
+ assert(MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI ||
+ usesCFIWithoutEH() || ModuleCFISection != CFISection::EH);
+ break;
+ default:
+ break;
+ }
+
+ EHStreamer *ES = nullptr;
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ if (!usesCFIWithoutEH())
+ break;
+ [[fallthrough]];
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ ES = new DwarfCFIException(this);
+ break;
+ case ExceptionHandling::ARM:
+ ES = new ARMException(this);
+ break;
+ case ExceptionHandling::WinEH:
+ switch (MAI->getWinEHEncodingType()) {
+ default: llvm_unreachable("unsupported unwinding information encoding");
+ case WinEH::EncodingType::Invalid:
+ break;
+ case WinEH::EncodingType::X86:
+ case WinEH::EncodingType::Itanium:
+ ES = new WinException(this);
+ break;
+ }
+ break;
+ case ExceptionHandling::Wasm:
+ ES = new WasmException(this);
+ break;
+ case ExceptionHandling::AIX:
+ ES = new AIXException(this);
+ break;
+ }
+ if (ES)
+ Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
+ EHTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
+
+ // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2).
+ if (mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
+ Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
+ CFGuardDescription, DWARFGroupName,
+ DWARFGroupDescription);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginModule(&M);
+ }
+
+ if (!BasicBlockProfileDump.empty()) {
+ std::error_code PossibleFileError;
+ MBBProfileDumpFileOutput = std::make_unique<raw_fd_ostream>(
+ BasicBlockProfileDump, PossibleFileError);
+ if (PossibleFileError) {
+ M.getContext().emitError("Failed to open file for MBB Profile Dump: " +
+ PossibleFileError.message() + "\n");
+ }
+ }
+
+ return false;
+}
+
+static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
+ if (!MAI.hasWeakDefCanBeHiddenDirective())
+ return false;
+
+ return GV->canBeOmittedFromSymbolTable();
+}
+
+void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
+ GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+ switch (Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ if (MAI->hasWeakDefDirective()) {
+ // .globl _foo
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
+
+ if (!canBeHidden(GV, *MAI))
+ // .weak_definition _foo
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->avoidWeakIfComdat() && GV->hasComdat()) {
+ // .globl _foo
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
+ } else {
+ // .weak _foo
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ return;
+ case GlobalValue::ExternalLinkage:
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
+ return;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ return;
+ case GlobalValue::ExternalWeakLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::AppendingLinkage:
+ llvm_unreachable("Should never emit this");
+ }
+ llvm_unreachable("Unknown linkage type!");
+}
+
+void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name,
+ const GlobalValue *GV) const {
+ TM.getNameWithPrefix(Name, GV, getObjFileLowering().getMangler());
+}
+
+MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
+ return TM.getSymbol(GV);
+}
+
+MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const {
+ // On ELF, use .Lfoo$local if GV is a non-interposable GlobalObject with an
+ // exact definion (intersection of GlobalValue::hasExactDefinition() and
+ // !isInterposable()). These linkages include: external, appending, internal,
+ // private. It may be profitable to use a local alias for external. The
+ // assembler would otherwise be conservative and assume a global default
+ // visibility symbol can be interposable, even if the code generator already
+ // assumed it.
+ if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) {
+ const Module &M = *GV.getParent();
+ if (TM.getRelocationModel() != Reloc::Static &&
+ M.getPIELevel() == PIELevel::Default && GV.isDSOLocal())
+ return getSymbolWithGlobalValueBase(&GV, "$local");
+ }
+ return TM.getSymbol(&GV);
+}
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
+ bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal();
+ assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
+ "No emulated TLS variables in the common section");
+
+ // Never emit TLS variable xyz in emulated TLS model.
+ // The initialization value is in __emutls_t.xyz instead of xyz.
+ if (IsEmuTLSVar)
+ return;
+
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (emitSpecialLLVMGlobal(GV))
+ return;
+
+ // Skip the emission of global equivalents. The symbol can be emitted later
+ // on by emitGlobalGOTEquivs in case it turns out to be needed.
+ if (GlobalGOTEquivs.count(getSymbol(GV)))
+ return;
+
+ if (isVerbose()) {
+ // When printing the control variable __emutls_v.*,
+ // we don't need to print the original TLS variable name.
+ GV->printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer->getCommentOS() << '\n';
+ }
+ }
+
+ MCSymbol *GVSym = getSymbol(GV);
+ MCSymbol *EmittedSym = GVSym;
+
+ // getOrCreateEmuTLSControlSym only creates the symbol with name and default
+ // attributes.
+ // GV's or GVSym's attributes will be used for the EmittedSym.
+ emitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
+
+ if (GV->isTagged()) {
+ Triple T = TM.getTargetTriple();
+
+ if (T.getArch() != Triple::aarch64 || !T.isAndroid())
+ OutContext.reportError(SMLoc(),
+ "tagged symbols (-fsanitize=memtag-globals) are "
+ "only supported on AArch64 Android");
+ OutStreamer->emitSymbolAttribute(EmittedSym, MAI->getMemtagAttr());
+ }
+
+ if (!GV->hasInitializer()) // External globals require no extra code.
+ return;
+
+ GVSym->redefineIfPossible();
+ if (GVSym->isDefined() || GVSym->isVariable())
+ OutContext.reportError(SMLoc(), "symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+
+ // If the alignment is specified, we *must* obey it. Overaligning a global
+ // with a specified alignment is a prompt way to break globals emitted to
+ // sections and expected to be contiguous (e.g. ObjC metadata).
+ const Align Alignment = getGVAlignment(GV, DL);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+ HI.TimerGroupName, HI.TimerGroupDescription,
+ TimePassesIsEnabled);
+ HI.Handler->setSymbolSize(GVSym, Size);
+ }
+
+ // Handle common symbols
+ if (GVKind.isCommon()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ // .comm _foo, 42, 4
+ OutStreamer->emitCommonSymbol(GVSym, Size, Alignment);
+ return;
+ }
+
+ // Determine to which section this global should be emitted.
+ MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM);
+
+ // If we have a bss global going to a section that supports the
+ // zerofill directive, do so here.
+ if (GVKind.isBSS() && MAI->hasMachoZeroFillDirective() &&
+ TheSection->isVirtualSection()) {
+ if (Size == 0)
+ Size = 1; // zerofill of 0 bytes is undefined.
+ emitLinkage(GV, GVSym);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment);
+ return;
+ }
+
+ // If this is a BSS local symbol and we are emitting in the BSS
+ // section use .lcomm/.comm directive.
+ if (GVKind.isBSSLocal() &&
+ getObjFileLowering().getBSSSection() == TheSection) {
+ if (Size == 0)
+ Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ // Use .lcomm only if it supports user-specified alignment.
+ // Otherwise, while it would still be correct to use .lcomm in some
+ // cases (e.g. when Align == 1), the external assembler might enfore
+ // some -unknown- default alignment behavior, which could cause
+ // spurious differences between external and integrated assembler.
+ // Prefer to simply fall back to .local / .comm in this case.
+ if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+ // .lcomm _foo, 42
+ OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment);
+ return;
+ }
+
+ // .local _foo
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer->emitCommonSymbol(GVSym, Size, Alignment);
+ return;
+ }
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS()) {
+ TheSection = getObjFileLowering().getTLSBSSSection();
+ OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment);
+ } else if (GVKind.isThreadData()) {
+ OutStreamer->switchSection(TheSection);
+
+ emitAlignment(Alignment, GV);
+ OutStreamer->emitLabel(MangSym);
+
+ emitGlobalConstant(GV->getParent()->getDataLayout(),
+ GV->getInitializer());
+ }
+
+ OutStreamer->addBlankLine();
+
+ // Emit the variable struct for the runtime.
+ MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer->switchSection(TLVSect);
+ // Emit the linkage here.
+ emitLinkage(GV, GVSym);
+ OutStreamer->emitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
+ OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ PtrSize);
+ OutStreamer->emitIntValue(0, PtrSize);
+ OutStreamer->emitSymbolValue(MangSym, PtrSize);
+
+ OutStreamer->addBlankLine();
+ return;
+ }
+
+ MCSymbol *EmittedInitSym = GVSym;
+
+ OutStreamer->switchSection(TheSection);
+
+ emitLinkage(GV, EmittedInitSym);
+ emitAlignment(Alignment, GV);
+
+ OutStreamer->emitLabel(EmittedInitSym);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(*GV);
+ if (LocalAlias != EmittedInitSym)
+ OutStreamer->emitLabel(LocalAlias);
+
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer->emitELFSize(EmittedInitSym,
+ MCConstantExpr::create(Size, OutContext));
+
+ OutStreamer->addBlankLine();
+}
+
+/// Emit the directive and value for debug thread local expression
+///
+/// \p Value - The value to emit.
+/// \p Size - The size of the integer (in bytes) to emit.
+void AsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const {
+ OutStreamer->emitValue(Value, Size);
+}
+
+void AsmPrinter::emitFunctionHeaderComment() {}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::emitFunctionHeader() {
+ const Function &F = MF->getFunction();
+
+ if (isVerbose())
+ OutStreamer->getCommentOS()
+ << "-- Begin function "
+ << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
+
+ // Print out constants referenced by the function
+ emitConstantPool();
+
+ // Print the 'header' of function.
+ // If basic block sections are desired, explicitly request a unique section
+ // for this function's entry block.
+ if (MF->front().isBeginSection())
+ MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM));
+ else
+ MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM));
+ OutStreamer->switchSection(MF->getSection());
+
+ if (!MAI->hasVisibilityOnlyWithLinkage())
+ emitVisibility(CurrentFnSym, F.getVisibility());
+
+ if (MAI->needsFunctionDescriptors())
+ emitLinkage(&F, CurrentFnDescSym);
+
+ emitLinkage(&F, CurrentFnSym);
+ if (MAI->hasFunctionAlignment())
+ emitAlignment(MF->getAlignment(), &F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (F.hasFnAttribute(Attribute::Cold))
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
+
+ // Emit the prefix data.
+ if (F.hasPrefixData()) {
+ if (MAI->hasSubsectionsViaSymbols()) {
+ // Preserving prefix data on platforms which use subsections-via-symbols
+ // is a bit tricky. Here we introduce a symbol for the prefix data
+ // and use the .alt_entry attribute to mark the function's real entry point
+ // as an alternative entry point to the prefix-data symbol.
+ MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->emitLabel(PrefixSym);
+
+ emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
+
+ // Emit an .alt_entry directive for the actual function symbol.
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
+ } else {
+ emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
+ }
+ }
+
+ // Emit KCFI type information before patchable-function-prefix nops.
+ emitKCFITypeId(*MF);
+
+ // Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily
+ // place prefix data before NOPs.
+ unsigned PatchableFunctionPrefix = 0;
+ unsigned PatchableFunctionEntry = 0;
+ (void)F.getFnAttribute("patchable-function-prefix")
+ .getValueAsString()
+ .getAsInteger(10, PatchableFunctionPrefix);
+ (void)F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, PatchableFunctionEntry);
+ if (PatchableFunctionPrefix) {
+ CurrentPatchableFunctionEntrySym =
+ OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
+ emitNops(PatchableFunctionPrefix);
+ } else if (PatchableFunctionEntry) {
+ // May be reassigned when emitting the body, to reference the label after
+ // the initial BTI (AArch64) or endbr32/endbr64 (x86).
+ CurrentPatchableFunctionEntrySym = CurrentFnBegin;
+ }
+
+ // Emit the function prologue data for the indirect call sanitizer.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+ assert(MD->getNumOperands() == 2);
+
+ auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+ auto *TypeHash = mdconst::extract<Constant>(MD->getOperand(1));
+ emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+ emitGlobalConstant(F.getParent()->getDataLayout(), TypeHash);
+ }
+
+ if (isVerbose()) {
+ F.printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, F.getParent());
+ emitFunctionHeaderComment();
+ OutStreamer->getCommentOS() << '\n';
+ }
+
+ // Emit the function descriptor. This is a virtual function to allow targets
+ // to emit their specific function descriptor. Right now it is only used by
+ // the AIX target. The PowerPC 64-bit V1 ELF target also uses function
+ // descriptors and should be converted to use this hook as well.
+ if (MAI->needsFunctionDescriptors())
+ emitFunctionDescriptor();
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to do
+ // their wild and crazy things as required.
+ emitFunctionEntryLabel();
+
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
+ for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
+ OutStreamer->AddComment("Address taken block that was later removed");
+ OutStreamer->emitLabel(DeadBlockSym);
+ }
+
+ if (CurrentFnBegin) {
+ if (MAI->useAssignmentForEHBegin()) {
+ MCSymbol *CurPos = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(CurPos);
+ OutStreamer->emitAssignment(CurrentFnBegin,
+ MCSymbolRefExpr::create(CurPos, OutContext));
+ } else {
+ OutStreamer->emitLabel(CurrentFnBegin);
+ }
+ }
+
+ // Emit pre-function debug and/or EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginFunction(MF);
+ }
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginBasicBlockSection(MF->front());
+ }
+
+ // Emit the prologue data.
+ if (F.hasPrologueData())
+ emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::emitFunctionEntryLabel() {
+ CurrentFnSym->redefineIfPossible();
+
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isVariable())
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' is a protected alias");
+
+ OutStreamer->emitLabel(CurrentFnSym);
+
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ MCSymbol *Sym = getSymbolPreferLocal(MF->getFunction());
+ if (Sym != CurrentFnSym) {
+ cast<MCSymbolELF>(Sym)->setType(ELF::STT_FUNC);
+ CurrentFnBeginLocal = Sym;
+ OutStreamer->emitLabel(Sym);
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+ }
+ }
+}
+
+/// emitComments - Pretty-print comments for instructions.
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getMF();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // Check for spills and reloads
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ std::optional<unsigned> Size;
+ if ((Size = MI.getRestoreSize(TII))) {
+ CommentOS << *Size << "-byte Reload\n";
+ } else if ((Size = MI.getFoldedRestoreSize(TII))) {
+ if (*Size) {
+ if (*Size == unsigned(MemoryLocation::UnknownSize))
+ CommentOS << "Unknown-size Folded Reload\n";
+ else
+ CommentOS << *Size << "-byte Folded Reload\n";
+ }
+ } else if ((Size = MI.getSpillSize(TII))) {
+ CommentOS << *Size << "-byte Spill\n";
+ } else if ((Size = MI.getFoldedSpillSize(TII))) {
+ if (*Size) {
+ if (*Size == unsigned(MemoryLocation::UnknownSize))
+ CommentOS << "Unknown-size Folded Spill\n";
+ else
+ CommentOS << *Size << "-byte Folded Spill\n";
+ }
+ }
+
+ // Check for spill-induced copies
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+}
+
+/// emitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ Register RegNo = MI->getOperand(0).getReg();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ OutStreamer->AddComment(OS.str());
+ OutStreamer->addBlankLine();
+}
+
+static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "kill:";
+ for (const MachineOperand &Op : MI->operands()) {
+ assert(Op.isReg() && "KILL instruction must have only register operands");
+ OS << ' ' << (Op.isDef() ? "def " : "killed ")
+ << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
+ }
+ AP.OutStreamer->AddComment(OS.str());
+ AP.OutStreamer->addBlankLine();
+}
+
+/// emitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so. A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+ // This code handles only the 4-operand target-independent form.
+ if (MI->isNonListDebugValue() && MI->getNumOperands() != 4)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "DEBUG_VALUE: ";
+
+ const DILocalVariable *V = MI->getDebugVariable();
+ if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+ StringRef Name = SP->getName();
+ if (!Name.empty())
+ OS << Name << ":";
+ }
+ OS << V->getName();
+ OS << " <- ";
+
+ const DIExpression *Expr = MI->getDebugExpression();
+ // First convert this to a non-variadic expression if possible, to simplify
+ // the output.
+ if (auto NonVariadicExpr = DIExpression::convertToNonVariadicExpression(Expr))
+ Expr = *NonVariadicExpr;
+ // Then, output the possibly-simplified expression.
+ if (Expr->getNumElements()) {
+ OS << '[';
+ ListSeparator LS;
+ for (auto &Op : Expr->expr_ops()) {
+ OS << LS << dwarf::OperationEncodingString(Op.getOp());
+ for (unsigned I = 0; I < Op.getNumArgs(); ++I)
+ OS << ' ' << Op.getArg(I);
+ }
+ OS << "] ";
+ }
+
+ // Register or immediate value. Register 0 means undef.
+ for (const MachineOperand &Op : MI->debug_operands()) {
+ if (&Op != MI->debug_operands().begin())
+ OS << ", ";
+ switch (Op.getType()) {
+ case MachineOperand::MO_FPImmediate: {
+ APFloat APF = APFloat(Op.getFPImm()->getValueAPF());
+ Type *ImmTy = Op.getFPImm()->getType();
+ if (ImmTy->isBFloatTy() || ImmTy->isHalfTy() || ImmTy->isFloatTy() ||
+ ImmTy->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ OS << Op.getImm();
+ break;
+ }
+ case MachineOperand::MO_CImmediate: {
+ Op.getCImm()->getValue().print(OS, false /*isSigned*/);
+ break;
+ }
+ case MachineOperand::MO_TargetIndex: {
+ OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
+ break;
+ }
+ case MachineOperand::MO_Register:
+ case MachineOperand::MO_FrameIndex: {
+ Register Reg;
+ std::optional<StackOffset> Offset;
+ if (Op.isReg()) {
+ Reg = Op.getReg();
+ } else {
+ const TargetFrameLowering *TFI =
+ AP.MF->getSubtarget().getFrameLowering();
+ Offset = TFI->getFrameIndexReference(*AP.MF, Op.getIndex(), Reg);
+ }
+ if (!Reg) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ break;
+ }
+ // The second operand is only an offset if it's an immediate.
+ if (MI->isIndirectDebugValue())
+ Offset = StackOffset::getFixed(MI->getDebugOffset().getImm());
+ if (Offset)
+ OS << '[';
+ OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
+ if (Offset)
+ OS << '+' << Offset->getFixed() << ']';
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown operand type");
+ }
+ }
+
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+}
+
+/// This method handles the target-independent form of DBG_LABEL, returning
+/// true if it was able to do so. A false return means the target will need
+/// to handle MI in EmitInstruction.
+static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) {
+ if (MI->getNumOperands() != 1)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "DEBUG_LABEL: ";
+
+ const DILabel *V = MI->getDebugLabel();
+ if (auto *SP = dyn_cast<DISubprogram>(
+ V->getScope()->getNonLexicalBlockFileScope())) {
+ StringRef Name = SP->getName();
+ if (!Name.empty())
+ OS << Name << ":";
+ }
+ OS << V->getName();
+
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+}
+
+AsmPrinter::CFISection
+AsmPrinter::getFunctionCFISectionType(const Function &F) const {
+ // Ignore functions that won't get emitted.
+ if (F.isDeclarationForLinker())
+ return CFISection::None;
+
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+ F.needsUnwindTableEntry())
+ return CFISection::EH;
+
+ if (MAI->usesCFIWithoutEH() && F.hasUWTable())
+ return CFISection::EH;
+
+ assert(MMI != nullptr && "Invalid machine module info");
+ if (MMI->hasDebugInfo() || TM.Options.ForceDwarfFrameSection)
+ return CFISection::Debug;
+
+ return CFISection::None;
+}
+
+AsmPrinter::CFISection
+AsmPrinter::getFunctionCFISectionType(const MachineFunction &MF) const {
+ return getFunctionCFISectionType(MF.getFunction());
+}
+
+bool AsmPrinter::needsSEHMoves() {
+ return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry();
+}
+
+bool AsmPrinter::usesCFIWithoutEH() const {
+ return MAI->usesCFIWithoutEH() && ModuleCFISection != CFISection::None;
+}
+
+void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+ if (!usesCFIWithoutEH() &&
+ ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+ ExceptionHandlingType != ExceptionHandling::ARM)
+ return;
+
+ if (getFunctionCFISectionType(*MF) == CFISection::None)
+ return;
+
+ // If there is no "real" instruction following this CFI instruction, skip
+ // emitting it; it would be beyond the end of the function's FDE range.
+ auto *MBB = MI.getParent();
+ auto I = std::next(MI.getIterator());
+ while (I != MBB->end() && I->isTransient())
+ ++I;
+ if (I == MBB->instr_end() &&
+ MBB->getReverseIterator() == MBB->getParent()->rbegin())
+ return;
+
+ const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
+ unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+ const MCCFIInstruction &CFI = Instrs[CFIIndex];
+ emitCFIInstruction(CFI);
+}
+
+void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
+ // The operands are the MCSymbol and the frame offset of the allocation.
+ MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol();
+ int FrameOffset = MI.getOperand(1).getImm();
+
+ // Emit a symbol assignment.
+ OutStreamer->emitAssignment(FrameAllocSym,
+ MCConstantExpr::create(FrameOffset, OutContext));
+}
+
+/// Returns the BB metadata to be emitted in the SHT_LLVM_BB_ADDR_MAP section
+/// for a given basic block. This can be used to capture more precise profile
+/// information.
+static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ return object::BBAddrMap::BBEntry::Metadata{
+ MBB.isReturnBlock(), !MBB.empty() && TII->isTailCall(MBB.back()),
+ MBB.isEHPad(), const_cast<MachineBasicBlock &>(MBB).canFallThrough(),
+ !MBB.empty() && MBB.rbegin()->isIndirectBranch()}
+ .encode();
+}
+
+void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
+ MCSection *BBAddrMapSection =
+ getObjFileLowering().getBBAddrMapSection(*MF.getSection());
+ assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized.");
+
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(BBAddrMapSection);
+ OutStreamer->AddComment("version");
+ uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion();
+ OutStreamer->emitInt8(BBAddrMapVersion);
+ OutStreamer->AddComment("feature");
+ OutStreamer->emitInt8(0);
+ OutStreamer->AddComment("function address");
+ OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
+ OutStreamer->AddComment("number of basic blocks");
+ OutStreamer->emitULEB128IntValue(MF.size());
+ const MCSymbol *PrevMBBEndSymbol = FunctionSymbol;
+ // Emit BB Information for each basic block in the function.
+ for (const MachineBasicBlock &MBB : MF) {
+ const MCSymbol *MBBSymbol =
+ MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
+ // TODO: Remove this check when version 1 is deprecated.
+ if (BBAddrMapVersion > 1) {
+ OutStreamer->AddComment("BB id");
+ // Emit the BB ID for this basic block.
+ OutStreamer->emitULEB128IntValue(*MBB.getBBID());
+ }
+ // Emit the basic block offset relative to the end of the previous block.
+ // This is zero unless the block is padded due to alignment.
+ emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol);
+ // Emit the basic block size. When BBs have alignments, their size cannot
+ // always be computed from their offsets.
+ emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
+ // Emit the Metadata.
+ OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
+ PrevMBBEndSymbol = MBB.getEndSymbol();
+ }
+ OutStreamer->popSection();
+}
+
+void AsmPrinter::emitKCFITrapEntry(const MachineFunction &MF,
+ const MCSymbol *Symbol) {
+ MCSection *Section =
+ getObjFileLowering().getKCFITrapSection(*MF.getSection());
+ if (!Section)
+ return;
+
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(Section);
+
+ MCSymbol *Loc = OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->emitLabel(Loc);
+ OutStreamer->emitAbsoluteSymbolDiff(Symbol, Loc, 4);
+
+ OutStreamer->popSection();
+}
+
+void AsmPrinter::emitKCFITypeId(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_kcfi_type))
+ emitGlobalConstant(F.getParent()->getDataLayout(),
+ mdconst::extract<ConstantInt>(MD->getOperand(0)));
+}
+
+void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
+ if (PP) {
+ auto GUID = MI.getOperand(0).getImm();
+ auto Index = MI.getOperand(1).getImm();
+ auto Type = MI.getOperand(2).getImm();
+ auto Attr = MI.getOperand(3).getImm();
+ DILocation *DebugLoc = MI.getDebugLoc();
+ PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+ }
+}
+
+void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
+ if (!MF.getTarget().Options.EmitStackSizeSection)
+ return;
+
+ MCSection *StackSizeSection =
+ getObjFileLowering().getStackSizesSection(*getCurrentSection());
+ if (!StackSizeSection)
+ return;
+
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ // Don't emit functions with dynamic stack allocations.
+ if (FrameInfo.hasVarSizedObjects())
+ return;
+
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(StackSizeSection);
+
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+ uint64_t StackSize =
+ FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize();
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+ OutStreamer->emitULEB128IntValue(StackSize);
+
+ OutStreamer->popSection();
+}
+
+void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
+ const std::string &OutputFilename = MF.getTarget().Options.StackUsageOutput;
+
+ // OutputFilename empty implies -fstack-usage is not passed.
+ if (OutputFilename.empty())
+ return;
+
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ uint64_t StackSize =
+ FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize();
+
+ if (StackUsageStream == nullptr) {
+ std::error_code EC;
+ StackUsageStream =
+ std::make_unique<raw_fd_ostream>(OutputFilename, EC, sys::fs::OF_Text);
+ if (EC) {
+ errs() << "Could not open file: " << EC.message();
+ return;
+ }
+ }
+
+ *StackUsageStream << MF.getFunction().getParent()->getName();
+ if (const DISubprogram *DSP = MF.getFunction().getSubprogram())
+ *StackUsageStream << ':' << DSP->getLine();
+
+ *StackUsageStream << ':' << MF.getName() << '\t' << StackSize << '\t';
+ if (FrameInfo.hasVarSizedObjects())
+ *StackUsageStream << "dynamic\n";
+ else
+ *StackUsageStream << "static\n";
+}
+
+void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
+ const MDNode &MD) {
+ MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
+ OutStreamer->emitLabel(S);
+ PCSectionsSymbols[&MD].emplace_back(S);
+}
+
+void AsmPrinter::emitPCSections(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (PCSectionsSymbols.empty() && !F.hasMetadata(LLVMContext::MD_pcsections))
+ return;
+
+ const CodeModel::Model CM = MF.getTarget().getCodeModel();
+ const unsigned RelativeRelocSize =
+ (CM == CodeModel::Medium || CM == CodeModel::Large) ? getPointerSize()
+ : 4;
+
+ // Switch to PCSection, short-circuiting the common case where the current
+ // section is still valid (assume most MD_pcsections contain just 1 section).
+ auto SwitchSection = [&, Prev = StringRef()](const StringRef &Sec) mutable {
+ if (Sec == Prev)
+ return;
+ MCSection *S = getObjFileLowering().getPCSection(Sec, MF.getSection());
+ assert(S && "PC section is not initialized");
+ OutStreamer->switchSection(S);
+ Prev = Sec;
+ };
+ // Emit symbols into sections and data as specified in the pcsections MDNode.
+ auto EmitForMD = [&](const MDNode &MD, ArrayRef<const MCSymbol *> Syms,
+ bool Deltas) {
+ // Expect the first operand to be a section name. After that, a tuple of
+ // constants may appear, which will simply be emitted into the current
+ // section (the user of MD_pcsections decides the format of encoded data).
+ assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+ bool ConstULEB128 = false;
+ for (const MDOperand &MDO : MD.operands()) {
+ if (auto *S = dyn_cast<MDString>(MDO)) {
+ // Found string, start of new section!
+ // Find options for this section "<section>!<opts>" - supported options:
+ // C = Compress constant integers of size 2-8 bytes as ULEB128.
+ const StringRef SecWithOpt = S->getString();
+ const size_t OptStart = SecWithOpt.find('!'); // likely npos
+ const StringRef Sec = SecWithOpt.substr(0, OptStart);
+ const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
+ ConstULEB128 = Opts.find('C') != StringRef::npos;
+#ifndef NDEBUG
+ for (char O : Opts)
+ assert((O == '!' || O == 'C') && "Invalid !pcsections options");
+#endif
+ SwitchSection(Sec);
+ const MCSymbol *Prev = Syms.front();
+ for (const MCSymbol *Sym : Syms) {
+ if (Sym == Prev || !Deltas) {
+ // Use the entry itself as the base of the relative offset.
+ MCSymbol *Base = MF.getContext().createTempSymbol("pcsection_base");
+ OutStreamer->emitLabel(Base);
+ // Emit relative relocation `addr - base`, which avoids a dynamic
+ // relocation in the final binary. User will get the address with
+ // `base + addr`.
+ emitLabelDifference(Sym, Base, RelativeRelocSize);
+ } else {
+ // Emit delta between symbol and previous symbol.
+ if (ConstULEB128)
+ emitLabelDifferenceAsULEB128(Sym, Prev);
+ else
+ emitLabelDifference(Sym, Prev, 4);
+ }
+ Prev = Sym;
+ }
+ } else {
+ // Emit auxiliary data after PC.
+ assert(isa<MDNode>(MDO) && "expecting either string or tuple");
+ const auto *AuxMDs = cast<MDNode>(MDO);
+ for (const MDOperand &AuxMDO : AuxMDs->operands()) {
+ assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
+ const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const uint64_t Size = DL.getTypeStoreSize(C->getType());
+
+ if (auto *CI = dyn_cast<ConstantInt>(C);
+ CI && ConstULEB128 && Size > 1 && Size <= 8) {
+ emitULEB128(CI->getZExtValue());
+ } else {
+ emitGlobalConstant(DL, C);
+ }
+ }
+ }
+ }
+ };
+
+ OutStreamer->pushSection();
+ // Emit PCs for function start and function size.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_pcsections))
+ EmitForMD(*MD, {getFunctionBegin(), getFunctionEnd()}, true);
+ // Emit PCs for instructions collected.
+ for (const auto &MS : PCSectionsSymbols)
+ EmitForMD(*MS.first, MS.second, false);
+ OutStreamer->popSection();
+ PCSectionsSymbols.clear();
+}
+
+/// Returns true if function begin and end labels should be emitted.
+static bool needFuncLabels(const MachineFunction &MF) {
+ MachineModuleInfo &MMI = MF.getMMI();
+ if (!MF.getLandingPads().empty() || MF.hasEHFunclets() ||
+ MMI.hasDebugInfo() ||
+ MF.getFunction().hasMetadata(LLVMContext::MD_pcsections))
+ return true;
+
+ // We might emit an EH table that uses function begin and end labels even if
+ // we don't have any landingpads.
+ if (!MF.getFunction().hasPersonalityFn())
+ return false;
+ return !isNoOpWithoutInvoke(
+ classifyEHPersonality(MF.getFunction().getPersonalityFn()));
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::emitFunctionBody() {
+ emitFunctionHeader();
+
+ // Emit target-specific gunk before the function body.
+ emitFunctionBodyStart();
+
+ if (isVerbose()) {
+ // Get MachineDominatorTree or compute it on the fly if it's unavailable
+ MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ if (!MDT) {
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(*MF);
+ MDT = OwnedMDT.get();
+ }
+
+ // Get MachineLoopInfo or compute it on the fly if it's unavailable
+ MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ if (!MLI) {
+ OwnedMLI = std::make_unique<MachineLoopInfo>();
+ OwnedMLI->getBase().analyze(MDT->getBase());
+ MLI = OwnedMLI.get();
+ }
+ }
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ int NumInstsInFunction = 0;
+ bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
+
+ bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+ for (auto &MBB : *MF) {
+ // Print a label for the basic block.
+ emitBasicBlockStart(MBB);
+ DenseMap<StringRef, unsigned> MnemonicCounts;
+ for (auto &MI : MBB) {
+ // Print the assembly for the instruction.
+ if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
+ !MI.isDebugInstr()) {
+ HasAnyRealCode = true;
+ ++NumInstsInFunction;
+ }
+
+ // If there is a pre-instruction symbol, emit a label for it here.
+ if (MCSymbol *S = MI.getPreInstrSymbol())
+ OutStreamer->emitLabel(S);
+
+ if (MDNode *MD = MI.getPCSections())
+ emitPCSectionsLabel(*MF, *MD);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginInstruction(&MI);
+ }
+
+ if (isVerbose())
+ emitComments(MI, OutStreamer->getCommentOS());
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::CFI_INSTRUCTION:
+ emitCFIInstruction(MI);
+ break;
+ case TargetOpcode::LOCAL_ESCAPE:
+ emitFrameAlloc(MI);
+ break;
+ case TargetOpcode::ANNOTATION_LABEL:
+ case TargetOpcode::GC_LABEL:
+ OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::EH_LABEL:
+ OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
+ // For AsynchEH, insert a Nop if followed by a trap inst
+ // Or the exception won't be caught.
+ // (see MCConstantExpr::create(1,..) in WinException.cpp)
+ // Ignore SDiv/UDiv because a DIV with Const-0 divisor
+ // must have being turned into an UndefValue.
+ // Div with variable opnds won't be the first instruction in
+ // an EH region as it must be led by at least a Load
+ {
+ auto MI2 = std::next(MI.getIterator());
+ if (IsEHa && MI2 != MBB.end() &&
+ (MI2->mayLoadOrStore() || MI2->mayRaiseFPException()))
+ emitNops(1);
+ }
+ break;
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ emitInlineAsm(&MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ case TargetOpcode::DBG_VALUE_LIST:
+ if (isVerbose()) {
+ if (!emitDebugValueComment(&MI, *this))
+ emitInstruction(&MI);
+ }
+ break;
+ case TargetOpcode::DBG_INSTR_REF:
+ // This instruction reference will have been resolved to a machine
+ // location, and a nearby DBG_VALUE created. We can safely ignore
+ // the instruction reference.
+ break;
+ case TargetOpcode::DBG_PHI:
+ // This instruction is only used to label a program point, it's purely
+ // meta information.
+ break;
+ case TargetOpcode::DBG_LABEL:
+ if (isVerbose()) {
+ if (!emitDebugLabelComment(&MI, *this))
+ emitInstruction(&MI);
+ }
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ if (isVerbose()) emitImplicitDef(&MI);
+ break;
+ case TargetOpcode::KILL:
+ if (isVerbose()) emitKill(&MI, *this);
+ break;
+ case TargetOpcode::PSEUDO_PROBE:
+ emitPseudoProbe(MI);
+ break;
+ case TargetOpcode::ARITH_FENCE:
+ if (isVerbose())
+ OutStreamer->emitRawComment("ARITH_FENCE");
+ break;
+ case TargetOpcode::MEMBARRIER:
+ OutStreamer->emitRawComment("MEMBARRIER");
+ break;
+ default:
+ emitInstruction(&MI);
+ if (CanDoExtraAnalysis) {
+ MCInst MCI;
+ MCI.setOpcode(MI.getOpcode());
+ auto Name = OutStreamer->getMnemonic(MCI);
+ auto I = MnemonicCounts.insert({Name, 0u});
+ I.first->second++;
+ }
+ break;
+ }
+
+ // If there is a post-instruction symbol, emit a label for it here.
+ if (MCSymbol *S = MI.getPostInstrSymbol())
+ OutStreamer->emitLabel(S);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endInstruction();
+ }
+ }
+
+ // We must emit temporary symbol for the end of this basic block, if either
+ // we have BBLabels enabled or if this basic blocks marks the end of a
+ // section.
+ if (MF->hasBBLabels() ||
+ (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection()))
+ OutStreamer->emitLabel(MBB.getEndSymbol());
+
+ if (MBB.isEndSection()) {
+ // The size directive for the section containing the entry block is
+ // handled separately by the function section.
+ if (!MBB.sameSection(&MF->front())) {
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Emit the size directive for the basic block section.
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext),
+ MCSymbolRefExpr::create(CurrentSectionBeginSym, OutContext),
+ OutContext);
+ OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp);
+ }
+ MBBSectionRanges[MBB.getSectionIDNum()] =
+ MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()};
+ }
+ }
+ emitBasicBlockEnd(MBB);
+
+ if (CanDoExtraAnalysis) {
+ // Skip empty blocks.
+ if (MBB.empty())
+ continue;
+
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionMix",
+ MBB.begin()->getDebugLoc(), &MBB);
+
+ // Generate instruction mix remark. First, sort counts in descending order
+ // by count and name.
+ SmallVector<std::pair<StringRef, unsigned>, 128> MnemonicVec;
+ for (auto &KV : MnemonicCounts)
+ MnemonicVec.emplace_back(KV.first, KV.second);
+
+ sort(MnemonicVec, [](const std::pair<StringRef, unsigned> &A,
+ const std::pair<StringRef, unsigned> &B) {
+ if (A.second > B.second)
+ return true;
+ if (A.second == B.second)
+ return StringRef(A.first) < StringRef(B.first);
+ return false;
+ });
+ R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";
+ for (auto &KV : MnemonicVec) {
+ auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str();
+ R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";
+ }
+ ORE->emit(R);
+ }
+ }
+
+ EmittedInsts += NumInstsInFunction;
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount",
+ MF->getFunction().getSubprogram(),
+ &MF->front());
+ R << ore::NV("NumInstructions", NumInstsInFunction)
+ << " instructions in function";
+ ORE->emit(R);
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a noop.
+ // Similarly, don't emit empty functions on Windows either. It can lead to
+ // duplicate entries (two functions with the same RVA) in the Guard CF Table
+ // after linking, causing the kernel not to load the binary:
+ // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html
+ // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer.
+ const Triple &TT = TM.getTargetTriple();
+ if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() ||
+ (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) {
+ MCInst Noop = MF->getSubtarget().getInstrInfo()->getNop();
+
+ // Targets can opt-out of emitting the noop here by leaving the opcode
+ // unspecified.
+ if (Noop.getOpcode()) {
+ OutStreamer->AddComment("avoids zero-length function");
+ emitNops(1);
+ }
+ }
+
+ // Switch to the original section in case basic block sections was used.
+ OutStreamer->switchSection(MF->getSection());
+
+ const Function &F = MF->getFunction();
+ for (const auto &BB : F) {
+ if (!BB.hasAddressTaken())
+ continue;
+ MCSymbol *Sym = GetBlockAddressSymbol(&BB);
+ if (Sym->isDefined())
+ continue;
+ OutStreamer->AddComment("Address of block that was removed by CodeGen");
+ OutStreamer->emitLabel(Sym);
+ }
+
+ // Emit target-specific gunk after the function body.
+ emitFunctionBodyEnd();
+
+ // Even though wasm supports .type and .size in general, function symbols
+ // are automatically sized.
+ bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm();
+
+ if (needFuncLabels(*MF) || EmitFunctionSize) {
+ // Create a symbol for the end of function.
+ CurrentFnEnd = createTempSymbol("func_end");
+ OutStreamer->emitLabel(CurrentFnEnd);
+ }
+
+ // If the target wants a .size directive for the size of the function, emit
+ // it.
+ if (EmitFunctionSize) {
+ // We can get the size as difference between the function label and the
+ // temp label.
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
+ MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
+ OutStreamer->emitELFSize(CurrentFnSym, SizeExp);
+ if (CurrentFnBeginLocal)
+ OutStreamer->emitELFSize(CurrentFnBeginLocal, SizeExp);
+ }
+
+ // Call endBasicBlockSection on the last block now, if it wasn't already
+ // called.
+ if (!MF->back().isEndSection()) {
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endBasicBlockSection(MF->back());
+ }
+ }
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->markFunctionEnd();
+ }
+
+ MBBSectionRanges[MF->front().getSectionIDNum()] =
+ MBBSectionRange{CurrentFnBegin, CurrentFnEnd};
+
+ // Print out jump tables referenced by the function.
+ emitJumpTableInfo();
+
+ // Emit post-function debug and/or EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endFunction(MF);
+ }
+
+ // Emit section containing BB address offsets and their metadata, when
+ // BB labels are requested for this function. Skip empty functions.
+ if (MF->hasBBLabels() && HasAnyRealCode)
+ emitBBAddrMapSection(*MF);
+
+ // Emit sections containing instruction and function PCs.
+ emitPCSections(*MF);
+
+ // Emit section containing stack size metadata.
+ emitStackSizeSection(*MF);
+
+ // Emit .su file containing function stack size information.
+ emitStackUsage(*MF);
+
+ emitPatchableFunctionEntries();
+
+ if (isVerbose())
+ OutStreamer->getCommentOS() << "-- End function\n";
+
+ OutStreamer->addBlankLine();
+
+ // Output MBB ids, function names, and frequencies if the flag to dump
+ // MBB profile information has been set
+ if (MBBProfileDumpFileOutput) {
+ if (!MF->hasBBLabels())
+ MF->getContext().reportError(
+ SMLoc(),
+ "Unable to find BB labels for MBB profile dump. -mbb-profile-dump "
+ "must be called with -basic-block-sections=labels");
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ for (const auto &MBB : *MF) {
+ *MBBProfileDumpFileOutput.get()
+ << MF->getName() << "," << MBB.getBBID() << ","
+ << MBFI.getBlockFreqRelativeToEntryBlock(&MBB) << "\n";
+ }
+ }
+}
+
+/// Compute the number of Global Variables that uses a Constant.
+static unsigned getNumGlobalVariableUses(const Constant *C) {
+ if (!C)
+ return 0;
+
+ if (isa<GlobalVariable>(C))
+ return 1;
+
+ unsigned NumUses = 0;
+ for (const auto *CU : C->users())
+ NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
+
+ return NumUses;
+}
+
+/// Only consider global GOT equivalents if at least one user is a
+/// cstexpr inside an initializer of another global variables. Also, don't
+/// handle cstexpr inside instructions. During global variable emission,
+/// candidates are skipped and are emitted later in case at least one cstexpr
+/// isn't replaced by a PC relative GOT entry access.
+static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
+ unsigned &NumGOTEquivUsers) {
+ // Global GOT equivalents are unnamed private globals with a constant
+ // pointer initializer to another global symbol. They must point to a
+ // GlobalVariable or Function, i.e., as GlobalValue.
+ if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() ||
+ !GV->isConstant() || !GV->isDiscardableIfUnused() ||
+ !isa<GlobalValue>(GV->getOperand(0)))
+ return false;
+
+ // To be a got equivalent, at least one of its users need to be a constant
+ // expression used by another global variable.
+ for (const auto *U : GV->users())
+ NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
+
+ return NumGOTEquivUsers > 0;
+}
+
+/// Unnamed constant global variables solely contaning a pointer to
+/// another globals variable is equivalent to a GOT table entry; it contains the
+/// the address of another symbol. Optimize it and replace accesses to these
+/// "GOT equivalents" by using the GOT entry for the final global instead.
+/// Compute GOT equivalent candidates among all global variables to avoid
+/// emitting them if possible later on, after it use is replaced by a GOT entry
+/// access.
+void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
+ if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ return;
+
+ for (const auto &G : M.globals()) {
+ unsigned NumGOTEquivUsers = 0;
+ if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
+ continue;
+
+ const MCSymbol *GOTEquivSym = getSymbol(&G);
+ GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
+ }
+}
+
+/// Constant expressions using GOT equivalent globals may not be eligible
+/// for PC relative GOT entry conversion, in such cases we need to emit such
+/// globals we previously omitted in EmitGlobalVariable.
+void AsmPrinter::emitGlobalGOTEquivs() {
+ if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ return;
+
+ SmallVector<const GlobalVariable *, 8> FailedCandidates;
+ for (auto &I : GlobalGOTEquivs) {
+ const GlobalVariable *GV = I.second.first;
+ unsigned Cnt = I.second.second;
+ if (Cnt)
+ FailedCandidates.push_back(GV);
+ }
+ GlobalGOTEquivs.clear();
+
+ for (const auto *GV : FailedCandidates)
+ emitGlobalVariable(GV);
+}
+
+void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
+ MCSymbol *Name = getSymbol(&GA);
+ bool IsFunction = GA.getValueType()->isFunctionTy();
+ // Treat bitcasts of functions as functions also. This is important at least
+ // on WebAssembly where object and function addresses can't alias each other.
+ if (!IsFunction)
+ IsFunction = isa<Function>(GA.getAliasee()->stripPointerCasts());
+
+ // AIX's assembly directive `.set` is not usable for aliasing purpose,
+ // so AIX has to use the extra-label-at-definition strategy. At this
+ // point, all the extra label is emitted, we just have to emit linkage for
+ // those labels.
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ assert(MAI->hasVisibilityOnlyWithLinkage() &&
+ "Visibility should be handled with emitLinkage() on AIX.");
+
+ // Linkage for alias of global variable has been emitted.
+ if (isa<GlobalVariable>(GA.getAliaseeObject()))
+ return;
+
+ emitLinkage(&GA, Name);
+ // If it's a function, also emit linkage for aliases of function entry
+ // point.
+ if (IsFunction)
+ emitLinkage(&GA,
+ getObjFileLowering().getFunctionEntryPointSymbol(&GA, TM));
+ return;
+ }
+
+ if (GA.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GA.hasWeakLinkage() || GA.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GA.hasLocalLinkage() && "Invalid alias linkage");
+
+ // Set the symbol type to function if the alias has a function type.
+ // This affects codegen when the aliasee is not a function.
+ if (IsFunction) {
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+ OutStreamer->beginCOFFSymbolDef(Name);
+ OutStreamer->emitCOFFSymbolStorageClass(
+ GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer->endCOFFSymbolDef();
+ }
+ }
+
+ emitVisibility(Name, GA.getVisibility());
+
+ const MCExpr *Expr = lowerConstant(GA.getAliasee());
+
+ if (MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
+ OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry);
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GA);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
+
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = GA.getAliaseeObject();
+ if (MAI->hasDotTypeDotSizeDirective() && GA.getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GA.getValueType());
+ OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
+ }
+}
+
+void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) {
+ assert(!TM.getTargetTriple().isOSBinFormatXCOFF() &&
+ "IFunc is not supported on AIX.");
+
+ MCSymbol *Name = getSymbol(&GI);
+
+ if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GI.hasLocalLinkage() && "Invalid ifunc linkage");
+
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ emitVisibility(Name, GI.getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ const MCExpr *Expr = lowerConstant(GI.getResolver());
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GI);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
+}
+
+void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
+ if (!RS.needsSection())
+ return;
+
+ remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer();
+
+ std::optional<SmallString<128>> Filename;
+ if (std::optional<StringRef> FilenameRef = RS.getFilename()) {
+ Filename = *FilenameRef;
+ sys::fs::make_absolute(*Filename);
+ assert(!Filename->empty() && "The filename can't be empty.");
+ }
+
+ std::string Buf;
+ raw_string_ostream OS(Buf);
+ std::unique_ptr<remarks::MetaSerializer> MetaSerializer =
+ Filename ? RemarkSerializer.metaSerializer(OS, Filename->str())
+ : RemarkSerializer.metaSerializer(OS);
+ MetaSerializer->emit();
+
+ // Switch to the remarks section.
+ MCSection *RemarksSection =
+ OutContext.getObjectFileInfo()->getRemarksSection();
+ OutStreamer->switchSection(RemarksSection);
+
+ OutStreamer->emitBinaryData(OS.str());
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Set the MachineFunction to nullptr so that we can catch attempted
+ // accesses to MF specific features at the module level and so that
+ // we can conditionalize accesses based on whether or not it is nullptr.
+ MF = nullptr;
+
+ // Gather all GOT equivalent globals in the module. We really need two
+ // passes over the globals: one to compute and another to avoid its emission
+ // in EmitGlobalVariable, otherwise we would not be able to handle cases
+ // where the got equivalent shows up before its use.
+ computeGlobalGOTEquivs(M);
+
+ // Emit global variables.
+ for (const auto &G : M.globals())
+ emitGlobalVariable(&G);
+
+ // Emit remaining GOT equivalent globals.
+ emitGlobalGOTEquivs();
+
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ // Emit linkage(XCOFF) and visibility info for declarations
+ for (const Function &F : M) {
+ if (!F.isDeclarationForLinker())
+ continue;
+
+ MCSymbol *Name = getSymbol(&F);
+ // Function getSymbol gives us the function descriptor symbol for XCOFF.
+
+ if (!TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ emitVisibility(Name, V, false);
+ continue;
+ }
+
+ if (F.isIntrinsic())
+ continue;
+
+ // Handle the XCOFF case.
+ // Variable `Name` is the function descriptor symbol (see above). Get the
+ // function entry point symbol.
+ MCSymbol *FnEntryPointSym = TLOF.getFunctionEntryPointSymbol(&F, TM);
+ // Emit linkage for the function entry point.
+ emitLinkage(&F, FnEntryPointSym);
+
+ // Emit linkage for the function descriptor.
+ emitLinkage(&F, Name);
+ }
+
+ // Emit the remarks section contents.
+ // FIXME: Figure out when is the safest time to emit this section. It should
+ // not come after debug info.
+ if (remarks::RemarkStreamer *RS = M.getContext().getMainRemarkStreamer())
+ emitRemarksSection(*RS);
+
+ TLOF.emitModuleMetadata(*OutStreamer, M);
+
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer->switchSection(TLOF.getDataSection());
+ const DataLayout &DL = M.getDataLayout();
+
+ emitAlignment(Align(DL.getPointerSize()));
+ for (const auto &Stub : Stubs) {
+ OutStreamer->emitLabel(Stub.first);
+ OutStreamer->emitSymbolValue(Stub.second.getPointer(),
+ DL.getPointerSize());
+ }
+ }
+ }
+
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+ MachineModuleInfoCOFF &MMICOFF =
+ MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoCOFF::SymbolListTy Stubs = MMICOFF.GetGVStubList();
+ if (!Stubs.empty()) {
+ const DataLayout &DL = M.getDataLayout();
+
+ for (const auto &Stub : Stubs) {
+ SmallString<256> SectionName = StringRef(".rdata$");
+ SectionName += Stub.first->getName();
+ OutStreamer->switchSection(OutContext.getCOFFSection(
+ SectionName,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT,
+ SectionKind::getReadOnly(), Stub.first->getName(),
+ COFF::IMAGE_COMDAT_SELECT_ANY));
+ emitAlignment(Align(DL.getPointerSize()));
+ OutStreamer->emitSymbolAttribute(Stub.first, MCSA_Global);
+ OutStreamer->emitLabel(Stub.first);
+ OutStreamer->emitSymbolValue(Stub.second.getPointer(),
+ DL.getPointerSize());
+ }
+ }
+ }
+
+ // This needs to happen before emitting debug information since that can end
+ // arbitrary sections.
+ if (auto *TS = OutStreamer->getTargetStreamer())
+ TS->emitConstantPools();
+
+ // Emit Stack maps before any debug info. Mach-O requires that no data or
+ // text sections come after debug info has been emitted. This matters for
+ // stack maps as they are arbitrary data, and may even have a custom format
+ // through user plugins.
+ emitStackMaps();
+
+ // Finalize debug and EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endModule();
+ }
+
+ // This deletes all the ephemeral handlers that AsmPrinter added, while
+ // keeping all the user-added handlers alive until the AsmPrinter is
+ // destroyed.
+ Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end());
+ DD = nullptr;
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global objects here.
+ for (const auto &GO : M.global_objects()) {
+ if (!GO.hasExternalWeakLinkage())
+ continue;
+ OutStreamer->emitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
+ }
+ if (shouldEmitWeakSwiftAsyncExtendedFramePointerFlags()) {
+ auto SymbolName = "swift_async_extendedFramePointerFlags";
+ auto Global = M.getGlobalVariable(SymbolName);
+ if (!Global) {
+ auto Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ Global = new GlobalVariable(M, Int8PtrTy, false,
+ GlobalValue::ExternalWeakLinkage, nullptr,
+ SymbolName);
+ OutStreamer->emitSymbolAttribute(getSymbol(Global), MCSA_WeakReference);
+ }
+ }
+ }
+
+ // Print aliases in topological order, that is, for each alias a = b,
+ // b must be printed before a.
+ // This is because on some targets (e.g. PowerPC) linker expects aliases in
+ // such an order to generate correct TOC information.
+ SmallVector<const GlobalAlias *, 16> AliasStack;
+ SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
+ for (const auto &Alias : M.aliases()) {
+ if (Alias.hasAvailableExternallyLinkage())
+ continue;
+ for (const GlobalAlias *Cur = &Alias; Cur;
+ Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
+ if (!AliasVisited.insert(Cur).second)
+ break;
+ AliasStack.push_back(Cur);
+ }
+ for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
+ emitGlobalAlias(M, *AncestorAlias);
+ AliasStack.clear();
+ }
+ for (const auto &IFunc : M.ifuncs())
+ emitGlobalIFunc(M, IFunc);
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = getOrCreateGCPrinter(**--I))
+ MP->finishAssembly(M, *MI, *this);
+
+ // Emit llvm.ident metadata in an '.ident' directive.
+ emitModuleIdents(M);
+
+ // Emit bytes for llvm.commandline metadata.
+ // The command line metadata is emitted earlier on XCOFF.
+ if (!TM.getTargetTriple().isOSBinFormatXCOFF())
+ emitModuleCommandLines(M);
+
+ // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
+ // split-stack is used.
+ if (TM.getTargetTriple().isOSBinFormatELF() && HasSplitStack) {
+ OutStreamer->switchSection(OutContext.getELFSection(".note.GNU-split-stack",
+ ELF::SHT_PROGBITS, 0));
+ if (HasNoSplitStack)
+ OutStreamer->switchSection(OutContext.getELFSection(
+ ".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
+ }
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer->switchSection(S);
+
+ if (TM.Options.EmitAddrsig) {
+ // Emit address-significance attributes for all globals.
+ OutStreamer->emitAddrsig();
+ for (const GlobalValue &GV : M.global_values()) {
+ if (!GV.use_empty() && !GV.isThreadLocal() &&
+ !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
+ !GV.hasAtLeastLocalUnnamedAddr())
+ OutStreamer->emitAddrsigSym(getSymbol(&GV));
+ }
+ }
+
+ // Emit symbol partition specifications (ELF only).
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ unsigned UniqueID = 0;
+ for (const GlobalValue &GV : M.global_values()) {
+ if (!GV.hasPartition() || GV.isDeclarationForLinker() ||
+ GV.getVisibility() != GlobalValue::DefaultVisibility)
+ continue;
+
+ OutStreamer->switchSection(
+ OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0,
+ "", false, ++UniqueID, nullptr));
+ OutStreamer->emitBytes(GV.getPartition());
+ OutStreamer->emitZeros(1);
+ OutStreamer->emitValue(
+ MCSymbolRefExpr::create(getSymbol(&GV), OutContext),
+ MAI->getCodePointerSize());
+ }
+ }
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ emitEndOfAsmFile(M);
+
+ MMI = nullptr;
+ AddrLabelSymbols = nullptr;
+
+ OutStreamer->finish();
+ OutStreamer->reset();
+ OwnedMLI.reset();
+ OwnedMDT.reset();
+
+ return false;
+}
+
+MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) {
+ auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum());
+ if (Res.second)
+ Res.first->second = createTempSymbol("exception");
+ return Res.first->second;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ const Function &F = MF.getFunction();
+
+ // Record that there are split-stack functions, so we will emit a special
+ // section to tell the linker.
+ if (MF.shouldSplitStack()) {
+ HasSplitStack = true;
+
+ if (!MF.getFrameInfo().needsSplitStackProlog())
+ HasNoSplitStack = true;
+ } else
+ HasNoSplitStack = true;
+
+ // Get the function symbol.
+ if (!MAI->needsFunctionDescriptors()) {
+ CurrentFnSym = getSymbol(&MF.getFunction());
+ } else {
+ assert(TM.getTargetTriple().isOSAIX() &&
+ "Only AIX uses the function descriptor hooks.");
+ // AIX is unique here in that the name of the symbol emitted for the
+ // function body does not have the same name as the source function's
+ // C-linkage name.
+ assert(CurrentFnDescSym && "The function descriptor symbol needs to be"
+ " initalized first.");
+
+ // Get the function entry point symbol.
+ CurrentFnSym = getObjFileLowering().getFunctionEntryPointSymbol(&F, TM);
+ }
+
+ CurrentFnSymForSize = CurrentFnSym;
+ CurrentFnBegin = nullptr;
+ CurrentFnBeginLocal = nullptr;
+ CurrentSectionBeginSym = nullptr;
+ MBBSectionRanges.clear();
+ MBBSectionExceptionSyms.clear();
+ bool NeedsLocalForSize = MAI->needsLocalForSize();
+ if (F.hasFnAttribute("patchable-function-entry") ||
+ F.hasFnAttribute("function-instrument") ||
+ F.hasFnAttribute("xray-instruction-threshold") ||
+ needFuncLabels(MF) || NeedsLocalForSize ||
+ MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
+ CurrentFnBegin = createTempSymbol("func_begin");
+ if (NeedsLocalForSize)
+ CurrentFnSymForSize = CurrentFnBegin;
+ }
+
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+}
+
+namespace {
+
+// Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ MCSection *S;
+ Align Alignment;
+ SmallVector<unsigned, 4> CPEs;
+
+ SectionCPs(MCSection *s, Align a) : S(s), Alignment(a) {}
+ };
+
+} // end anonymous namespace
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+void AsmPrinter::emitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ Align Alignment = CPE.getAlign();
+
+ SectionKind Kind = CPE.getSectionKind(&getDataLayout());
+
+ const Constant *C = nullptr;
+ if (!CPE.isMachineConstantPoolEntry())
+ C = CPE.Val.ConstVal;
+
+ MCSection *S = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), Kind, C, Alignment);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Alignment));
+ }
+
+ if (Alignment > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Alignment;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ const MCSection *CurSection = nullptr;
+ unsigned Offset = 0;
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MCSymbol *Sym = GetCPISymbol(CPI);
+ if (!Sym->isUndefined())
+ continue;
+
+ if (CurSection != CPSections[i].S) {
+ OutStreamer->switchSection(CPSections[i].S);
+ emitAlignment(Align(CPSections[i].Alignment));
+ CurSection = CPSections[i].S;
+ Offset = 0;
+ }
+
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned NewOffset = alignTo(Offset, CPE.getAlign());
+ OutStreamer->emitZeros(NewOffset - Offset);
+
+ Offset = NewOffset + CPE.getSizeInBytes(getDataLayout());
+
+ OutStreamer->emitLabel(Sym);
+ if (CPE.isMachineConstantPoolEntry())
+ emitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ emitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
+ }
+ }
+}
+
+// Print assembly representations of the jump tables used by the current
+// function.
+void AsmPrinter::emitJumpTableInfo() {
+ const DataLayout &DL = MF->getDataLayout();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function &F = MF->getFunction();
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+ F);
+ if (JTInDiffSection) {
+ // Drop it in the readonly section.
+ MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM);
+ OutStreamer->switchSection(ReadOnlySection);
+ }
+
+ emitAlignment(Align(MJTI->getEntryAlignment(DL)));
+
+ // Jump tables in code sections are marked with a data_region directive
+ // where that's supported.
+ if (!JTInDiffSection)
+ OutStreamer->emitDataRegion(MCDR_DataRegionJT32);
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if using .set avoids a relocation,
+ /// emit a .set directive for each unique entry.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->doesSetDirectiveSuppressReloc()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (const MachineBasicBlock *MBB : JTBBs) {
+ if (!EmittedSets.insert(MBB).second)
+ continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ OutStreamer->emitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::createSub(LHS, Base,
+ OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered local label started with 'l' would work. Simplify
+ // GetJTISymbol.
+ OutStreamer->emitLabel(GetJTISymbol(JTI, true));
+
+ MCSymbol* JTISymbol = GetJTISymbol(JTI);
+ OutStreamer->emitLabel(JTISymbol);
+
+ for (const MachineBasicBlock *MBB : JTBBs)
+ emitJumpTableEntry(MJTI, MBB, JTI);
+ }
+ if (!JTInDiffSection)
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+ const MCExpr *Value = nullptr;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ llvm_unreachable("Cannot emit EK_Inline jump table entry");
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry(
+ MJTI, MBB, UID, OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer->emitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+ // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gpdword LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer->emitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // Each entry is the address of the block minus the address of the jump
+ // table. This is used for PIC jump tables where gprel32 is not supported.
+ // e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive avoids relocations, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+ if (MAI->doesSetDirectiveSuppressReloc()) {
+ Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
+ OutStreamer->emitValue(Value, EntrySize);
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ emitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ if (GV->getName() == "llvm.global_ctors") {
+ emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ true);
+
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ false);
+
+ return true;
+ }
+
+ report_fatal_error("unknown special variable");
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list.
+void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) {
+ // Should be an array of 'i8*'.
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV)
+ OutStreamer->emitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
+ }
+}
+
+void AsmPrinter::preprocessXXStructorList(const DataLayout &DL,
+ const Constant *List,
+ SmallVector<Structor, 8> &Structors) {
+ // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is
+ // the init priority.
+ if (!isa<ConstantArray>(List))
+ return;
+
+ // Gather the structors in a form that's convenient for sorting by priority.
+ for (Value *O : cast<ConstantArray>(List)->operands()) {
+ auto *CS = cast<ConstantStruct>(O);
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, skip the rest.
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority)
+ continue; // Malformed.
+ Structors.push_back(Structor());
+ Structor &S = Structors.back();
+ S.Priority = Priority->getLimitedValue(65535);
+ S.Func = CS->getOperand(1);
+ if (!CS->getOperand(2)->isNullValue()) {
+ if (TM.getTargetTriple().isOSAIX())
+ llvm::report_fatal_error(
+ "associated data of XXStructor list is not yet supported on AIX");
+ S.ComdatKey =
+ dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
+ }
+ }
+
+ // Emit the function pointers in the target-specific order
+ llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
+ return L.Priority < R.Priority;
+ });
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor) {
+ SmallVector<Structor, 8> Structors;
+ preprocessXXStructorList(DL, List, Structors);
+ if (Structors.empty())
+ return;
+
+ // Emit the structors in reverse order if we are using the .ctor/.dtor
+ // initialization scheme.
+ if (!TM.Options.UseInitArray)
+ std::reverse(Structors.begin(), Structors.end());
+
+ const Align Align = DL.getPointerPrefAlignment();
+ for (Structor &S : Structors) {
+ const TargetLoweringObjectFile &Obj = getObjFileLowering();
+ const MCSymbol *KeySym = nullptr;
+ if (GlobalValue *GV = S.ComdatKey) {
+ if (GV->isDeclarationForLinker())
+ // If the associated variable is not defined in this module
+ // (it might be available_externally, or have been an
+ // available_externally definition that was dropped by the
+ // EliminateAvailableExternally pass), some other TU
+ // will provide its dynamic initializer.
+ continue;
+
+ KeySym = getSymbol(GV);
+ }
+
+ MCSection *OutputSection =
+ (IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+ : Obj.getStaticDtorSection(S.Priority, KeySym));
+ OutStreamer->switchSection(OutputSection);
+ if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
+ emitAlignment(Align);
+ emitXXStructor(DL, S.Func);
+ }
+}
+
+void AsmPrinter::emitModuleIdents(Module &M) {
+ if (!MAI->hasIdentDirective())
+ return;
+
+ if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.ident metadata entry can have only one operand");
+ const MDString *S = cast<MDString>(N->getOperand(0));
+ OutStreamer->emitIdent(S->getString());
+ }
+ }
+}
+
+void AsmPrinter::emitModuleCommandLines(Module &M) {
+ MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines();
+ if (!CommandLine)
+ return;
+
+ const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline");
+ if (!NMD || !NMD->getNumOperands())
+ return;
+
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(CommandLine);
+ OutStreamer->emitZeros(1);
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.commandline metadata entry can have only one operand");
+ const MDString *S = cast<MDString>(N->getOperand(0));
+ OutStreamer->emitBytes(S->getString());
+ OutStreamer->emitZeros(1);
+ }
+ OutStreamer->popSection();
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// Emit a byte directive and value.
+///
+void AsmPrinter::emitInt8(int Value) const { OutStreamer->emitInt8(Value); }
+
+/// Emit a short directive and value.
+void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); }
+
+/// Emit a long directive and value.
+void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->emitSLEB128IntValue(Value);
+}
+
+void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->emitULEB128IntValue(Value, PadTo);
+}
+
+/// Emit a long long directive and value.
+void AsmPrinter::emitInt64(uint64_t Value) const {
+ OutStreamer->emitInt64(Value);
+}
+
+/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
+/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
+/// .set if it avoids relocations.
+void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Size) const {
+ OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
+}
+
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+ const MCSymbol *Lo) const {
+ OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size,
+ bool IsSectionRelative) const {
+ if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
+ OutStreamer->emitCOFFSecRel32(Label, Offset);
+ if (Size > 4)
+ OutStreamer->emitZeros(Size - 4);
+ return;
+ }
+
+ // Emit Label+Offset (or just Label if Offset is zero)
+ const MCExpr *Expr = MCSymbolRefExpr::create(Label, OutContext);
+ if (Offset)
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
+
+ OutStreamer->emitValue(Expr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV,
+ unsigned MaxBytesToEmit) const {
+ if (GV)
+ Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment);
+
+ if (Alignment == Align(1))
+ return; // 1-byte aligned: no need to emit alignment.
+
+ if (getCurrentSection()->getKind().isText()) {
+ const MCSubtargetInfo *STI = nullptr;
+ if (this->MF)
+ STI = &getSubtargetInfo();
+ else
+ STI = TM.getMCSubtargetInfo();
+ OutStreamer->emitCodeAlignment(Alignment, STI, MaxBytesToEmit);
+ } else
+ OutStreamer->emitValueToAlignment(Alignment, 0, 1, MaxBytesToEmit);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
+ MCContext &Ctx = OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::create(getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx);
+
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV))
+ return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM);
+
+ if (const NoCFIValue *NC = dyn_cast<NoCFIValue>(CV))
+ return MCSymbolRefExpr::create(getSymbol(NC->getGlobalValue()), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (!CE) {
+ llvm_unreachable("Unknown constant value to lower!");
+ }
+
+ // The constant expression opcodes are limited to those that are necessary
+ // to represent relocations on supported targets. Expressions involving only
+ // constant addresses are constant folded instead.
+ switch (CE->getOpcode()) {
+ default:
+ break; // Error
+ case Instruction::AddrSpaceCast: {
+ const Constant *Op = CE->getOperand(0);
+ unsigned DstAS = CE->getType()->getPointerAddressSpace();
+ unsigned SrcAS = Op->getType()->getPointerAddressSpace();
+ if (TM.isNoopAddrSpaceCast(SrcAS, DstAS))
+ return lowerConstant(Op);
+
+ break; // Error
+ }
+ case Instruction::GetElementPtr: {
+ // Generate a symbolic expression for the byte address
+ APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI);
+
+ const MCExpr *Base = lowerConstant(CE->getOperand(0));
+ if (!OffsetAI)
+ return Base;
+
+ int64_t Offset = OffsetAI.getSExtValue();
+ return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ [[fallthrough]];
+ case Instruction::BitCast:
+ return lowerConstant(CE->getOperand(0));
+
+ case Instruction::IntToPtr: {
+ const DataLayout &DL = getDataLayout();
+
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
+ false/*ZExt*/);
+ return lowerConstant(Op);
+ }
+
+ case Instruction::PtrToInt: {
+ const DataLayout &DL = getDataLayout();
+
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = lowerConstant(Op);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ //
+ // If the pointer is larger than the resultant integer, then
+ // as with Trunc just depend on the assembler to truncate it.
+ if (DL.getTypeAllocSize(Ty).getFixedValue() <=
+ DL.getTypeAllocSize(Op->getType()).getFixedValue())
+ return OpExpr;
+
+ break; // Error
+ }
+
+ case Instruction::Sub: {
+ GlobalValue *LHSGV;
+ APInt LHSOffset;
+ DSOLocalEquivalent *DSOEquiv;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset,
+ getDataLayout(), &DSOEquiv)) {
+ GlobalValue *RHSGV;
+ APInt RHSOffset;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
+ getDataLayout())) {
+ const MCExpr *RelocExpr =
+ getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM);
+ if (!RelocExpr) {
+ const MCExpr *LHSExpr =
+ MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx);
+ if (DSOEquiv &&
+ getObjFileLowering().supportDSOLocalEquivalentLowering())
+ LHSExpr =
+ getObjFileLowering().lowerDSOLocalEquivalent(DSOEquiv, TM);
+ RelocExpr = MCBinaryExpr::createSub(
+ LHSExpr, MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ }
+ int64_t Addend = (LHSOffset - RHSOffset).getSExtValue();
+ if (Addend != 0)
+ RelocExpr = MCBinaryExpr::createAdd(
+ RelocExpr, MCConstantExpr::create(Addend, Ctx), Ctx);
+ return RelocExpr;
+ }
+ }
+
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0));
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1));
+ return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+ break;
+ }
+
+ case Instruction::Add: {
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0));
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1));
+ return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
+ }
+ }
+
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ Constant *C = ConstantFoldConstant(CE, getDataLayout());
+ if (C != CE)
+ return lowerConstant(C);
+
+ // Otherwise report the problem to the user.
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction().getParent());
+ report_fatal_error(Twine(OS.str()));
+}
+
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
+ AsmPrinter &AP,
+ const Constant *BaseCV = nullptr,
+ uint64_t Offset = 0,
+ AsmPrinter::AliasMapTy *AliasList = nullptr);
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
+static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP);
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+ StringRef Data = V->getRawDataValues();
+ assert(!Data.empty() && "Empty aggregates should be CAZ node");
+ char C = Data[0];
+ for (unsigned i = 1, e = Data.size(); i != e; ++i)
+ if (Data[i] != C) return -1;
+ return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint64_t Size = DL.getTypeAllocSizeInBits(V->getType());
+ assert(Size % 8 == 0);
+
+ // Extend the element to take zero padding into account.
+ APInt Value = CI->getValue().zext(Size);
+ if (!Value.isSplat(8))
+ return -1;
+
+ return Value.zextOrTrunc(8).getZExtValue();
+ }
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ // Make sure all array elements are sequences of the same repeated
+ // byte.
+ assert(CA->getNumOperands() != 0 && "Should be a CAZ");
+ Constant *Op0 = CA->getOperand(0);
+ int Byte = isRepeatedByteSequence(Op0, DL);
+ if (Byte == -1)
+ return -1;
+
+ // All array elements must be equal.
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i)
+ if (CA->getOperand(i) != Op0)
+ return -1;
+ return Byte;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+ return isRepeatedByteSequence(CDS);
+
+ return -1;
+}
+
+static void emitGlobalAliasInline(AsmPrinter &AP, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ if (AliasList) {
+ auto AliasIt = AliasList->find(Offset);
+ if (AliasIt != AliasList->end()) {
+ for (const GlobalAlias *GA : AliasIt->second)
+ AP.OutStreamer->emitLabel(AP.getSymbol(GA));
+ AliasList->erase(Offset);
+ }
+ }
+}
+
+static void emitGlobalConstantDataSequential(
+ const DataLayout &DL, const ConstantDataSequential *CDS, AsmPrinter &AP,
+ AsmPrinter::AliasMapTy *AliasList) {
+ // See if we can aggregate this into a .fill, if so, emit it as such.
+ int Value = isRepeatedByteSequence(CDS, DL);
+ if (Value != -1) {
+ uint64_t Bytes = DL.getTypeAllocSize(CDS->getType());
+ // Don't emit a 1-byte object as a .fill.
+ if (Bytes > 1)
+ return AP.OutStreamer->emitFill(Bytes, Value);
+ }
+
+ // If this can be emitted with .ascii/.asciz, emit it as such.
+ if (CDS->isString())
+ return AP.OutStreamer->emitBytes(CDS->getAsString());
+
+ // Otherwise, emit the values in successive locations.
+ unsigned ElementByteSize = CDS->getElementByteSize();
+ if (isa<IntegerType>(CDS->getElementType())) {
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, ElementByteSize * I, AliasList);
+ if (AP.isVerbose())
+ AP.OutStreamer->getCommentOS()
+ << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(I));
+ AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(I),
+ ElementByteSize);
+ }
+ } else {
+ Type *ET = CDS->getElementType();
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, ElementByteSize * I, AliasList);
+ emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP);
+ }
+ }
+
+ unsigned Size = DL.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize =
+ DL.getTypeAllocSize(CDS->getElementType()) * CDS->getNumElements();
+ assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!");
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer->emitZeros(Padding);
+}
+
+static void emitGlobalConstantArray(const DataLayout &DL,
+ const ConstantArray *CA, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, DL);
+
+ if (Value != -1) {
+ uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
+ AP.OutStreamer->emitFill(Bytes, Value);
+ } else {
+ for (unsigned I = 0, E = CA->getNumOperands(); I != E; ++I) {
+ emitGlobalConstantImpl(DL, CA->getOperand(I), AP, BaseCV, Offset,
+ AliasList);
+ Offset += DL.getTypeAllocSize(CA->getOperand(I)->getType());
+ }
+ }
+}
+
+static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP);
+
+static void emitGlobalConstantVector(const DataLayout &DL,
+ const ConstantVector *CV, AsmPrinter &AP,
+ AsmPrinter::AliasMapTy *AliasList) {
+ Type *ElementType = CV->getType()->getElementType();
+ uint64_t ElementSizeInBits = DL.getTypeSizeInBits(ElementType);
+ uint64_t ElementAllocSizeInBits = DL.getTypeAllocSizeInBits(ElementType);
+ uint64_t EmittedSize;
+ if (ElementSizeInBits != ElementAllocSizeInBits) {
+ // If the allocation size of an element is different from the size in bits,
+ // printing each element separately will insert incorrect padding.
+ //
+ // The general algorithm here is complicated; instead of writing it out
+ // here, just use the existing code in ConstantFolding.
+ Type *IntT =
+ IntegerType::get(CV->getContext(), DL.getTypeSizeInBits(CV->getType()));
+ ConstantInt *CI = dyn_cast_or_null<ConstantInt>(ConstantFoldConstant(
+ ConstantExpr::getBitCast(const_cast<ConstantVector *>(CV), IntT), DL));
+ if (!CI) {
+ report_fatal_error(
+ "Cannot lower vector global with unusual element type");
+ }
+ emitGlobalAliasInline(AP, 0, AliasList);
+ emitGlobalConstantLargeInt(CI, AP);
+ EmittedSize = DL.getTypeStoreSize(CV->getType());
+ } else {
+ for (unsigned I = 0, E = CV->getType()->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList);
+ emitGlobalConstantImpl(DL, CV->getOperand(I), AP);
+ }
+ EmittedSize =
+ DL.getTypeAllocSize(ElementType) * CV->getType()->getNumElements();
+ }
+
+ unsigned Size = DL.getTypeAllocSize(CV->getType());
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer->emitZeros(Padding);
+}
+
+static void emitGlobalConstantStruct(const DataLayout &DL,
+ const ConstantStruct *CS, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ // Print the fields in successive locations. Pad to align if needed!
+ unsigned Size = DL.getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = DL.getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) {
+ const Constant *Field = CS->getOperand(I);
+
+ // Print the actual field value.
+ emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar,
+ AliasList);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((I == E - 1 ? Size : Layout->getElementOffset(I + 1)) -
+ Layout->getElementOffset(I)) -
+ FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer->emitZeros(PadSize);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
+ assert(ET && "Unknown float type");
+ APInt API = APF.bitcastToAPInt();
+
+ // First print a comment with what we think the original floating-point value
+ // should have been.
+ if (AP.isVerbose()) {
+ SmallString<8> StrVal;
+ APF.toString(StrVal);
+ ET->print(AP.OutStreamer->getCommentOS());
+ AP.OutStreamer->getCommentOS() << ' ' << StrVal << '\n';
+ }
+
+ // Now iterate through the APInt chunks, emitting them in endian-correct
+ // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
+ // floats).
+ unsigned NumBytes = API.getBitWidth() / 8;
+ unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
+ const uint64_t *p = API.getRawData();
+
+ // PPC's long double has odd notions of endianness compared to how LLVM
+ // handles it: p[0] goes first for *big* endian on PPC.
+ if (AP.getDataLayout().isBigEndian() && !ET->isPPC_FP128Ty()) {
+ int Chunk = API.getNumWords() - 1;
+
+ if (TrailingBytes)
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk--], TrailingBytes);
+
+ for (; Chunk >= 0; --Chunk)
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], sizeof(uint64_t));
+ } else {
+ unsigned Chunk;
+ for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], sizeof(uint64_t));
+
+ if (TrailingBytes)
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], TrailingBytes);
+ }
+
+ // Emit the tail padding for the long double.
+ const DataLayout &DL = AP.getDataLayout();
+ AP.OutStreamer->emitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET));
+}
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
+ emitGlobalConstantFP(CFP->getValueAPF(), CFP->getType(), AP);
+}
+
+static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
+ const DataLayout &DL = AP.getDataLayout();
+ unsigned BitWidth = CI->getBitWidth();
+
+ // Copy the value as we may massage the layout for constants whose bit width
+ // is not a multiple of 64-bits.
+ APInt Realigned(CI->getValue());
+ uint64_t ExtraBits = 0;
+ unsigned ExtraBitsSize = BitWidth & 63;
+
+ if (ExtraBitsSize) {
+ // The bit width of the data is not a multiple of 64-bits.
+ // The extra bits are expected to be at the end of the chunk of the memory.
+ // Little endian:
+ // * Nothing to be done, just record the extra bits to emit.
+ // Big endian:
+ // * Record the extra bits to emit.
+ // * Realign the raw data to emit the chunks of 64-bits.
+ if (DL.isBigEndian()) {
+ // Basically the structure of the raw data is a chunk of 64-bits cells:
+ // 0 1 BitWidth / 64
+ // [chunk1][chunk2] ... [chunkN].
+ // The most significant chunk is chunkN and it should be emitted first.
+ // However, due to the alignment issue chunkN contains useless bits.
+ // Realign the chunks so that they contain only useful information:
+ // ExtraBits 0 1 (BitWidth / 64) - 1
+ // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
+ ExtraBitsSize = alignTo(ExtraBitsSize, 8);
+ ExtraBits = Realigned.getRawData()[0] &
+ (((uint64_t)-1) >> (64 - ExtraBitsSize));
+ if (BitWidth >= 64)
+ Realigned.lshrInPlace(ExtraBitsSize);
+ } else
+ ExtraBits = Realigned.getRawData()[BitWidth / 64];
+ }
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = Realigned.getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer->emitIntValue(Val, 8);
+ }
+
+ if (ExtraBitsSize) {
+ // Emit the extra bits after the 64-bits chunks.
+
+ // Emit a directive that fills the expected size.
+ uint64_t Size = AP.getDataLayout().getTypeStoreSize(CI->getType());
+ Size -= (BitWidth / 64) * 8;
+ assert(Size && Size * 8 >= ExtraBitsSize &&
+ (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
+ == ExtraBits && "Directive too small for extra bits.");
+ AP.OutStreamer->emitIntValue(ExtraBits, Size);
+ }
+}
+
+/// Transform a not absolute MCExpr containing a reference to a GOT
+/// equivalent global, by a target specific GOT pc relative access to the
+/// final symbol.
+static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
+ const Constant *BaseCst,
+ uint64_t Offset) {
+ // The global @foo below illustrates a global that uses a got equivalent.
+ //
+ // @bar = global i32 42
+ // @gotequiv = private unnamed_addr constant i32* @bar
+ // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64),
+ // i64 ptrtoint (i32* @foo to i64))
+ // to i32)
+ //
+ // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually
+ // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the
+ // form:
+ //
+ // foo = cstexpr, where
+ // cstexpr := <gotequiv> - "." + <cst>
+ // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
+ //
+ // After canonicalization by evaluateAsRelocatable `ME` turns into:
+ //
+ // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
+ // gotpcrelcst := <offset from @foo base> + <cst>
+ MCValue MV;
+ if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
+ return;
+ const MCSymbolRefExpr *SymA = MV.getSymA();
+ if (!SymA)
+ return;
+
+ // Check that GOT equivalent symbol is cached.
+ const MCSymbol *GOTEquivSym = &SymA->getSymbol();
+ if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
+ return;
+
+ const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst);
+ if (!BaseGV)
+ return;
+
+ // Check for a valid base symbol
+ const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
+ const MCSymbolRefExpr *SymB = MV.getSymB();
+
+ if (!SymB || BaseSym != &SymB->getSymbol())
+ return;
+
+ // Make sure to match:
+ //
+ // gotpcrelcst := <offset from @foo base> + <cst>
+ //
+ // If gotpcrelcst is positive it means that we can safely fold the pc rel
+ // displacement into the GOTPCREL. We can also can have an extra offset <cst>
+ // if the target knows how to encode it.
+ int64_t GOTPCRelCst = Offset + MV.getConstant();
+ if (GOTPCRelCst < 0)
+ return;
+ if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
+ return;
+
+ // Emit the GOT PC relative to replace the got equivalent global, i.e.:
+ //
+ // bar:
+ // .long 42
+ // gotequiv:
+ // .quad bar
+ // foo:
+ // .long gotequiv - "." + <cst>
+ //
+ // is replaced by the target specific equivalent to:
+ //
+ // bar:
+ // .long 42
+ // foo:
+ // .long bar@GOTPCREL+<gotpcrelcst>
+ AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
+ const GlobalVariable *GV = Result.first;
+ int NumUses = (int)Result.second;
+ const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
+ const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
+ *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
+ FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
+
+ // Update GOT equivalent usage information
+ --NumUses;
+ if (NumUses >= 0)
+ AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
+}
+
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
+ AsmPrinter &AP, const Constant *BaseCV,
+ uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ emitGlobalAliasInline(AP, Offset, AliasList);
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
+
+ // Globals with sub-elements such as combinations of arrays and structs
+ // are handled recursively by emitGlobalConstantImpl. Keep track of the
+ // constant symbol base and the current position with BaseCV and Offset.
+ if (!BaseCV && CV->hasOneUse())
+ BaseCV = dyn_cast<Constant>(CV->user_back());
+
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
+ return AP.OutStreamer->emitZeros(Size);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType());
+
+ if (StoreSize <= 8) {
+ if (AP.isVerbose())
+ AP.OutStreamer->getCommentOS()
+ << format("0x%" PRIx64 "\n", CI->getZExtValue());
+ AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize);
+ } else {
+ emitGlobalConstantLargeInt(CI, AP);
+ }
+
+ // Emit tail padding if needed
+ if (Size != StoreSize)
+ AP.OutStreamer->emitZeros(Size - StoreSize);
+
+ return;
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return emitGlobalConstantFP(CFP, AP);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ AP.OutStreamer->emitIntValue(0, Size);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+ return emitGlobalConstantDataSequential(DL, CDS, AP, AliasList);
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset, AliasList);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset, AliasList);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
+ if (CE->getOpcode() == Instruction::BitCast)
+ return emitGlobalConstantImpl(DL, CE->getOperand(0), AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstant(CE, DL);
+ if (New != CE)
+ return emitGlobalConstantImpl(DL, New, AP);
+ }
+ }
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return emitGlobalConstantVector(DL, V, AP, AliasList);
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ const MCExpr *ME = AP.lowerConstant(CV);
+
+ // Since lowerConstant already folded and got rid of all IR pointer and
+ // integer casts, detect GOT equivalent accesses by looking into the MCExpr
+ // directly.
+ if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
+
+ AP.OutStreamer->emitValue(ME, Size);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV,
+ AliasMapTy *AliasList) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
+ if (Size)
+ emitGlobalConstantImpl(DL, CV, *this, nullptr, 0, AliasList);
+ else if (MAI->hasSubsectionsViaSymbols()) {
+ // If the global has zero size, emit a single byte so that two labels don't
+ // look like they are at the same location.
+ OutStreamer->emitIntValue(0, 1);
+ }
+ if (!AliasList)
+ return;
+ // TODO: These remaining aliases are not emitted in the correct location. Need
+ // to handle the case where the alias offset doesn't refer to any sub-element.
+ for (auto &AliasPair : *AliasList) {
+ for (const GlobalAlias *GA : AliasPair.second)
+ OutStreamer->emitLabel(getSymbol(GA));
+ }
+}
+
+void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+ if (Offset > 0)
+ OS << '+' << Offset;
+ else if (Offset < 0)
+ OS << Offset;
+}
+
+void AsmPrinter::emitNops(unsigned N) {
+ MCInst Nop = MF->getSubtarget().getInstrInfo()->getNop();
+ for (; N; --N)
+ EmitToStreamer(*OutStreamer, Nop);
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
+ return OutContext.createTempSymbol(Name, true);
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(
+ BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+ return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) {
+ const MachineConstantPoolEntry &CPE =
+ MF->getConstantPool()->getConstants()[CPID];
+ if (!CPE.isMachineConstantPoolEntry()) {
+ const DataLayout &DL = MF->getDataLayout();
+ SectionKind Kind = CPE.getSectionKind(&DL);
+ const Constant *C = CPE.Val.ConstVal;
+ Align Alignment = CPE.Alignment;
+ if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
+ getObjFileLowering().getSectionForConstant(DL, Kind, C,
+ Alignment))) {
+ if (MCSymbol *Sym = S->getCOMDATSymbol()) {
+ if (Sym->isUndefined())
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
+ return Sym;
+ }
+ }
+ }
+ }
+
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "CPI" + Twine(getFunctionNumber()) + "_" +
+ Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix) const {
+ return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, TM);
+}
+
+/// Return the MCSymbol for the specified ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout());
+ return OutContext.getOrCreateSymbol(NameStr);
+}
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (!Loop) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (const MachineLoop *CL : *Loop) {
+ OS.indent(CL->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, CL, FunctionNumber);
+ }
+}
+
+/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (!Loop) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer->AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer->getCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->isInnermost())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+/// emitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
+ // End the previous funclet and start a new one.
+ if (MBB.isEHFuncletEntry()) {
+ for (const HandlerInfo &HI : Handlers) {
+ HI.Handler->endFunclet();
+ HI.Handler->beginFunclet(MBB);
+ }
+ }
+
+ // Switch to a new section if this basic block must begin a section. The
+ // entry block is always placed in the function section and is handled
+ // separately.
+ if (MBB.isBeginSection() && !MBB.isEntryBlock()) {
+ OutStreamer->switchSection(
+ getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
+ MBB, TM));
+ CurrentSectionBeginSym = MBB.getSymbol();
+ }
+
+ // Emit an alignment directive for this block, if needed.
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment != Align(1))
+ emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment());
+
+ // If the block has its address taken, emit any labels that were used to
+ // reference the block. It is possible that there is more than one label
+ // here, because multiple LLVM BB's may have been RAUW'd to this block after
+ // the references were generated.
+ if (MBB.isIRBlockAddressTaken()) {
+ if (isVerbose())
+ OutStreamer->AddComment("Block address taken");
+
+ BasicBlock *BB = MBB.getAddressTakenIRBlock();
+ assert(BB && BB->hasAddressTaken() && "Missing BB");
+ for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB))
+ OutStreamer->emitLabel(Sym);
+ } else if (isVerbose() && MBB.isMachineBlockAddressTaken()) {
+ OutStreamer->AddComment("Block address taken");
+ }
+
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ BB->printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, BB->getModule());
+ OutStreamer->getCommentOS() << '\n';
+ }
+ }
+
+ assert(MLI != nullptr && "MachineLoopInfo should has been computed");
+ emitBasicBlockLoopComments(MBB, MLI, *this);
+ }
+
+ // Print the main label for the block.
+ if (shouldEmitLabelForBasicBlock(MBB)) {
+ if (isVerbose() && MBB.hasLabelMustBeEmitted())
+ OutStreamer->AddComment("Label of block must be emitted");
+ OutStreamer->emitLabel(MBB.getSymbol());
+ } else {
+ if (isVerbose()) {
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
+ false);
+ }
+ }
+
+ if (MBB.isEHCatchretTarget() &&
+ MAI->getExceptionHandlingType() == ExceptionHandling::WinEH) {
+ OutStreamer->emitLabel(MBB.getEHCatchretSymbol());
+ }
+
+ // With BB sections, each basic block must handle CFI information on its own
+ // if it begins a section (Entry block call is handled separately, next to
+ // beginFunction).
+ if (MBB.isBeginSection() && !MBB.isEntryBlock())
+ for (const HandlerInfo &HI : Handlers)
+ HI.Handler->beginBasicBlockSection(MBB);
+}
+
+void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
+ // Check if CFI information needs to be updated for this MBB with basic block
+ // sections.
+ if (MBB.isEndSection())
+ for (const HandlerInfo &HI : Handlers)
+ HI.Handler->endBasicBlockSection(MBB);
+}
+
+void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ if (IsDefinition)
+ Attr = MAI->getHiddenVisibilityAttr();
+ else
+ Attr = MAI->getHiddenDeclarationVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer->emitSymbolAttribute(Sym, Attr);
+}
+
+bool AsmPrinter::shouldEmitLabelForBasicBlock(
+ const MachineBasicBlock &MBB) const {
+ // With `-fbasic-block-sections=`, a label is needed for every non-entry block
+ // in the labels mode (option `=labels`) and every section beginning in the
+ // sections mode (`=all` and `=list=`).
+ if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock())
+ return true;
+ // A label is needed for any block with at least one predecessor (when that
+ // predecessor is not the fallthrough predecessor, or if it is an EH funclet
+ // entry, or if a label is forced).
+ return !MBB.pred_empty() &&
+ (!isBlockOnlyReachableByFallthrough(&MBB) || MBB.isEHFuncletEntry() ||
+ MBB.hasLabelMustBeEmitted());
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isEHPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ if (MBB->pred_size() > 1)
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check the terminators in the previous blocks
+ for (const auto &MI : Pred->terminators()) {
+ // If it is not a simple branch, we are in a table somewhere.
+ if (!MI.isBranch() || MI.isIndirectBranch())
+ return false;
+
+ // If we are the operands of one of the branches, this is not a fall
+ // through. Note that targets with delay slots will usually bundle
+ // terminators with the delay slot instruction.
+ for (ConstMIBundleOperands OP(MI); OP.isValid(); ++OP) {
+ if (OP->isJTI())
+ return false;
+ if (OP->isMBB() && OP->getMBB() == MBB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+GCMetadataPrinter *AsmPrinter::getOrCreateGCPrinter(GCStrategy &S) {
+ if (!S.usesMetadata())
+ return nullptr;
+
+ auto [GCPI, Inserted] = GCMetadataPrinters.insert({&S, nullptr});
+ if (!Inserted)
+ return GCPI->second.get();
+
+ auto Name = S.getName();
+
+ for (const GCMetadataPrinterRegistry::entry &GCMetaPrinter :
+ GCMetadataPrinterRegistry::entries())
+ if (Name == GCMetaPrinter.getName()) {
+ std::unique_ptr<GCMetadataPrinter> GMP = GCMetaPrinter.instantiate();
+ GMP->S = &S;
+ GCPI->second = std::move(GMP);
+ return GCPI->second.get();
+ }
+
+ report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+}
+
+void AsmPrinter::emitStackMaps() {
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ bool NeedsDefault = false;
+ if (MI->begin() == MI->end())
+ // No GC strategy, use the default format.
+ NeedsDefault = true;
+ else
+ for (const auto &I : *MI) {
+ if (GCMetadataPrinter *MP = getOrCreateGCPrinter(*I))
+ if (MP->emitStackMaps(SM, *this))
+ continue;
+ // The strategy doesn't have printer or doesn't emit custom stack maps.
+ // Use the default format.
+ NeedsDefault = true;
+ }
+
+ if (NeedsDefault)
+ SM.serializeToStackMapSection();
+}
+
+/// Pin vtable to this file.
+AsmPrinterHandler::~AsmPrinterHandler() = default;
+
+void AsmPrinterHandler::markFunctionEnd() {}
+
+// In the binary's "xray_instr_map" section, an array of these function entries
+// describes each instrumentation point. When XRay patches your code, the index
+// into this table will be given to your handler as a patch point identifier.
+void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out) const {
+ auto Kind8 = static_cast<uint8_t>(Kind);
+ Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+ Out->emitBinaryData(
+ StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
+ Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1));
+ auto Padding = (4 * Bytes) - ((2 * Bytes) + 3);
+ assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size");
+ Out->emitZeros(Padding);
+}
+
+void AsmPrinter::emitXRayTable() {
+ if (Sleds.empty())
+ return;
+
+ auto PrevSection = OutStreamer->getCurrentSectionOnly();
+ const Function &F = MF->getFunction();
+ MCSection *InstMap = nullptr;
+ MCSection *FnSledIndex = nullptr;
+ const Triple &TT = TM.getTargetTriple();
+ // Use PC-relative addresses on all targets.
+ if (TT.isOSBinFormatELF()) {
+ auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
+ StringRef GroupName;
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName();
+ }
+ InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ Flags, 0, GroupName, F.hasComdat(),
+ MCSection::NonUniqueID, LinkedToSym);
+
+ if (TM.Options.XRayFunctionIndex)
+ FnSledIndex = OutContext.getELFSection(
+ "xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(),
+ MCSection::NonUniqueID, LinkedToSym);
+ } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
+ InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map",
+ MachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnlyWithRel());
+ if (TM.Options.XRayFunctionIndex)
+ FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx",
+ MachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+ } else {
+ llvm_unreachable("Unsupported target");
+ }
+
+ auto WordSizeBytes = MAI->getCodePointerSize();
+
+ // Now we switch to the instrumentation map section. Because this is done
+ // per-function, we are able to create an index entry that will represent the
+ // range of sleds associated with a function.
+ auto &Ctx = OutContext;
+ MCSymbol *SledsStart =
+ OutContext.createLinkerPrivateSymbol("xray_sleds_start");
+ OutStreamer->switchSection(InstMap);
+ OutStreamer->emitLabel(SledsStart);
+ for (const auto &Sled : Sleds) {
+ MCSymbol *Dot = Ctx.createTempSymbol();
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentFnBegin, Ctx),
+ MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Dot, Ctx),
+ MCConstantExpr::create(WordSizeBytes, Ctx),
+ Ctx),
+ Ctx),
+ WordSizeBytes);
+ Sled.emit(WordSizeBytes, OutStreamer.get());
+ }
+ MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
+ OutStreamer->emitLabel(SledsEnd);
+
+ // We then emit a single entry in the index per function. We use the symbols
+ // that bound the instrumentation map as the range for a specific function.
+ // Each entry here will be 2 * word size aligned, as we're writing down two
+ // pointers. This should work for both 32-bit and 64-bit platforms.
+ if (FnSledIndex) {
+ OutStreamer->switchSection(FnSledIndex);
+ OutStreamer->emitCodeAlignment(Align(2 * WordSizeBytes),
+ &getSubtargetInfo());
+ // For Mach-O, use an "l" symbol as the atom of this subsection. The label
+ // difference uses a SUBTRACTOR external relocation which references the
+ // symbol.
+ MCSymbol *Dot = Ctx.createLinkerPrivateSymbol("xray_fn_idx");
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(SledsStart, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(MCConstantExpr::create(Sleds.size(), Ctx),
+ WordSizeBytes);
+ OutStreamer->switchSection(PrevSection);
+ }
+ Sleds.clear();
+}
+
+void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
+ SledKind Kind, uint8_t Version) {
+ const Function &F = MI.getMF()->getFunction();
+ auto Attr = F.getFnAttribute("function-instrument");
+ bool LogArgs = F.hasFnAttribute("xray-log-args");
+ bool AlwaysInstrument =
+ Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+ if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
+ Kind = SledKind::LOG_ARGS_ENTER;
+ Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind,
+ AlwaysInstrument, &F, Version});
+}
+
+void AsmPrinter::emitPatchableFunctionEntries() {
+ const Function &F = MF->getFunction();
+ unsigned PatchableFunctionPrefix = 0, PatchableFunctionEntry = 0;
+ (void)F.getFnAttribute("patchable-function-prefix")
+ .getValueAsString()
+ .getAsInteger(10, PatchableFunctionPrefix);
+ (void)F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, PatchableFunctionEntry);
+ if (!PatchableFunctionPrefix && !PatchableFunctionEntry)
+ return;
+ const unsigned PointerSize = getPointerSize();
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC;
+ const MCSymbolELF *LinkedToSym = nullptr;
+ StringRef GroupName;
+
+ // GNU as < 2.35 did not support section flag 'o'. GNU ld < 2.36 did not
+ // support mixed SHF_LINK_ORDER and non-SHF_LINK_ORDER sections.
+ if (MAI->useIntegratedAssembler() || MAI->binutilsIsAtLeast(2, 36)) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName();
+ }
+ LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ }
+ OutStreamer->switchSection(OutContext.getELFSection(
+ "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName,
+ F.hasComdat(), MCSection::NonUniqueID, LinkedToSym));
+ emitAlignment(Align(PointerSize));
+ OutStreamer->emitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize);
+ }
+}
+
+uint16_t AsmPrinter::getDwarfVersion() const {
+ return OutStreamer->getContext().getDwarfVersion();
+}
+
+void AsmPrinter::setDwarfVersion(uint16_t Version) {
+ OutStreamer->getContext().setDwarfVersion(Version);
+}
+
+bool AsmPrinter::isDwarf64() const {
+ return OutStreamer->getContext().getDwarfFormat() == dwarf::DWARF64;
+}
+
+unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
+ return dwarf::getDwarfOffsetByteSize(
+ OutStreamer->getContext().getDwarfFormat());
+}
+
+dwarf::FormParams AsmPrinter::getDwarfFormParams() const {
+ return {getDwarfVersion(), uint8_t(MAI->getCodePointerSize()),
+ OutStreamer->getContext().getDwarfFormat(),
+ doesDwarfUseRelocationsAcrossSections()};
+}
+
+unsigned int AsmPrinter::getUnitLengthFieldByteSize() const {
+ return dwarf::getUnitLengthFieldByteSize(
+ OutStreamer->getContext().getDwarfFormat());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 000000000000..21d0d070c247
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,305 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cstdint>
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr:
+ return "absptr";
+ case dwarf::DW_EH_PE_omit:
+ return "omit";
+ case dwarf::DW_EH_PE_pcrel:
+ return "pcrel";
+ case dwarf::DW_EH_PE_uleb128:
+ return "uleb128";
+ case dwarf::DW_EH_PE_sleb128:
+ return "sleb128";
+ case dwarf::DW_EH_PE_udata4:
+ return "udata4";
+ case dwarf::DW_EH_PE_udata8:
+ return "udata8";
+ case dwarf::DW_EH_PE_sdata4:
+ return "sdata4";
+ case dwarf::DW_EH_PE_sdata8:
+ return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+ return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+ return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+ return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+ return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4
+ :
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ :
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8
+ :
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8
+ :
+ return "indirect pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ dwarf::DW_EH_PE_sdata4:
+ return "indirect datarel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ dwarf::DW_EH_PE_sdata8:
+ return "indirect datarel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::emitEncodingByte(unsigned Val, const char *Desc) const {
+ if (isVerbose()) {
+ if (Desc)
+ OutStreamer->AddComment(Twine(Desc) + " Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
+ }
+
+ OutStreamer->emitIntValue(Val, 1);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ default:
+ llvm_unreachable("Invalid encoded value.");
+ case dwarf::DW_EH_PE_absptr:
+ return MAI->getCodePointerSize();
+ case dwarf::DW_EH_PE_udata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ return 8;
+ }
+}
+
+void AsmPrinter::emitTTypeReference(const GlobalValue *GV, unsigned Encoding) {
+ if (GV) {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer);
+ OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ } else
+ OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
+ bool ForceOffset) const {
+ if (!ForceOffset) {
+ // On COFF targets, we have to emit the special .secrel32 directive.
+ if (MAI->needsDwarfSectionOffsetDirective()) {
+ assert(!isDwarf64() &&
+ "emitting DWARF64 is not implemented for COFF targets");
+ OutStreamer->emitCOFFSecRel32(Label, /*Offset=*/0);
+ return;
+ }
+
+ // If the format uses relocations with dwarf, refer to the symbol directly.
+ if (doesDwarfUseRelocationsAcrossSections()) {
+ OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize());
+ return;
+ }
+ }
+
+ // Otherwise, emit it as a label difference from the start of the section.
+ emitLabelDifference(Label, Label->getSection().getBeginSymbol(),
+ getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
+ if (doesDwarfUseRelocationsAcrossSections()) {
+ assert(S.Symbol && "No symbol available");
+ emitDwarfSymbolReference(S.Symbol);
+ return;
+ }
+
+ // Just emit the offset directly; no need for symbol math.
+ OutStreamer->emitIntValue(S.Offset, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
+ emitLabelPlusOffset(Label, Offset, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const {
+ assert(isDwarf64() || Value <= UINT32_MAX);
+ OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfUnitLength(uint64_t Length,
+ const Twine &Comment) const {
+ OutStreamer->emitDwarfUnitLength(Length, Comment);
+}
+
+MCSymbol *AsmPrinter::emitDwarfUnitLength(const Twine &Prefix,
+ const Twine &Comment) const {
+ return OutStreamer->emitDwarfUnitLength(Prefix, Comment);
+}
+
+void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Encoding) const {
+ // The least significant 3 bits specify the width of the encoding
+ if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+ emitLabelDifferenceAsULEB128(Hi, Lo);
+ else
+ emitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::emitCallSiteValue(uint64_t Value, unsigned Encoding) const {
+ // The least significant 3 bits specify the width of the encoding
+ if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+ emitULEB128(Value);
+ else
+ OutStreamer->emitIntValue(Value, GetSizeOfEncodedValue(Encoding));
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
+ SMLoc Loc = Inst.getLoc();
+ switch (Inst.getOperation()) {
+ default:
+ llvm_unreachable("Unexpected instruction");
+ case MCCFIInstruction::OpDefCfaOffset:
+ OutStreamer->emitCFIDefCfaOffset(Inst.getOffset(), Loc);
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset(), Loc);
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset(), Loc);
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OutStreamer->emitCFIDefCfaRegister(Inst.getRegister(), Loc);
+ break;
+ case MCCFIInstruction::OpLLVMDefAspaceCfa:
+ OutStreamer->emitCFILLVMDefAspaceCfa(Inst.getRegister(), Inst.getOffset(),
+ Inst.getAddressSpace(), Loc);
+ break;
+ case MCCFIInstruction::OpOffset:
+ OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset(), Loc);
+ break;
+ case MCCFIInstruction::OpRegister:
+ OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2(), Loc);
+ break;
+ case MCCFIInstruction::OpWindowSave:
+ OutStreamer->emitCFIWindowSave(Loc);
+ break;
+ case MCCFIInstruction::OpNegateRAState:
+ OutStreamer->emitCFINegateRAState(Loc);
+ break;
+ case MCCFIInstruction::OpSameValue:
+ OutStreamer->emitCFISameValue(Inst.getRegister(), Loc);
+ break;
+ case MCCFIInstruction::OpGnuArgsSize:
+ OutStreamer->emitCFIGnuArgsSize(Inst.getOffset(), Loc);
+ break;
+ case MCCFIInstruction::OpEscape:
+ OutStreamer->AddComment(Inst.getComment());
+ OutStreamer->emitCFIEscape(Inst.getValues(), Loc);
+ break;
+ case MCCFIInstruction::OpRestore:
+ OutStreamer->emitCFIRestore(Inst.getRegister(), Loc);
+ break;
+ case MCCFIInstruction::OpUndefined:
+ OutStreamer->emitCFIUndefined(Inst.getRegister(), Loc);
+ break;
+ case MCCFIInstruction::OpRememberState:
+ OutStreamer->emitCFIRememberState(Loc);
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ OutStreamer->emitCFIRestoreState(Loc);
+ break;
+ }
+}
+
+void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
+ // Emit the code (index) for the abbreviation.
+ if (isVerbose())
+ OutStreamer->AddComment("Abbrev [" + Twine(Die.getAbbrevNumber()) + "] 0x" +
+ Twine::utohexstr(Die.getOffset()) + ":0x" +
+ Twine::utohexstr(Die.getSize()) + " " +
+ dwarf::TagString(Die.getTag()));
+ emitULEB128(Die.getAbbrevNumber());
+
+ // Emit the DIE attribute values.
+ for (const auto &V : Die.values()) {
+ dwarf::Attribute Attr = V.getAttribute();
+ assert(V.getForm() && "Too many attributes for DIE (check abbreviation)");
+
+ if (isVerbose()) {
+ OutStreamer->AddComment(dwarf::AttributeString(Attr));
+ if (Attr == dwarf::DW_AT_accessibility)
+ OutStreamer->AddComment(
+ dwarf::AccessibilityString(V.getDIEInteger().getValue()));
+ }
+
+ // Emit an attribute using the defined form.
+ V.emitValue(this);
+ }
+
+ // Emit the DIE children if any.
+ if (Die.hasChildren()) {
+ for (const auto &Child : Die.children())
+ emitDwarfDIE(Child);
+
+ OutStreamer->AddComment("End Of Children Mark");
+ emitInt8(0);
+ }
+}
+
+void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const {
+ // Emit the abbreviations code (base 1 index.)
+ emitULEB128(Abbrev.getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev.Emit(this);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 000000000000..32674bbeb061
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,519 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
+ const MDNode *LocMDNode) const {
+ MCContext &Context = MMI->getContext();
+ Context.initInlineSourceManager();
+ SourceMgr &SrcMgr = *Context.getInlineSourceManager();
+ std::vector<const MDNode *> &LocInfos = Context.getLocInfos();
+
+ std::unique_ptr<MemoryBuffer> Buffer;
+ // The inline asm source manager will outlive AsmStr, so make a copy of the
+ // string for SourceMgr to own.
+ Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+ // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+ if (LocMDNode) {
+ LocInfos.resize(BufNum);
+ LocInfos[BufNum - 1] = LocMDNode;
+ }
+
+ return BufNum;
+}
+
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+ const MCTargetOptions &MCOptions,
+ const MDNode *LocMDNode,
+ InlineAsm::AsmDialect Dialect) const {
+ assert(!Str.empty() && "Can't emit empty inline asm block");
+
+ // Remember if the buffer is nul terminated or not so we can avoid a copy.
+ bool isNullTerminated = Str.back() == 0;
+ if (isNullTerminated)
+ Str = Str.substr(0, Str.size()-1);
+
+ // If the output streamer does not have mature MC support or the integrated
+ // assembler has been disabled or not required, just emit the blob textually.
+ // Otherwise parse the asm and emit it via MC support.
+ // This is useful in case the asm parser doesn't handle something but the
+ // system assembler does.
+ const MCAsmInfo *MCAI = TM.getMCAsmInfo();
+ assert(MCAI && "No MCAsmInfo");
+ if (!MCAI->useIntegratedAssembler() &&
+ !MCAI->parseInlineAsmUsingAsmParser() &&
+ !OutStreamer->isIntegratedAssemblerRequired()) {
+ emitInlineAsmStart();
+ OutStreamer->emitRawText(Str);
+ emitInlineAsmEnd(STI, nullptr);
+ return;
+ }
+
+ unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
+ SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
+
+ // Do not use assembler-level information for parsing inline assembly.
+ OutStreamer->setUseAssemblerInfoForParsing(false);
+
+ // We create a new MCInstrInfo here since we might be at the module level
+ // and not have a MachineFunction to initialize the TargetInstrInfo from and
+ // we only need MCInstrInfo for asm parsing. We create one unconditionally
+ // because it's not subtarget dependent.
+ std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+ assert(MII && "Failed to create instruction info");
+ std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
+ STI, *Parser, *MII, MCOptions));
+ if (!TAP)
+ report_fatal_error("Inline asm not supported by this streamer because"
+ " we don't have an asm parser for this target\n");
+ Parser->setAssemblerDialect(Dialect);
+ Parser->setTargetParser(*TAP);
+ // Enable lexing Masm binary and hex integer literals in intel inline
+ // assembly.
+ if (Dialect == InlineAsm::AD_Intel)
+ Parser->getLexer().setLexMasmIntegers(true);
+
+ emitInlineAsmStart();
+ // Don't implicitly switch to the text section before the asm.
+ (void)Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
+ emitInlineAsmEnd(STI, &TAP->getSTI());
+}
+
+static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
+ bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel;
+
+ if (InputIsIntelDialect) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
+ }
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ int AsmPrinterVariant;
+ if (InputIsIntelDialect)
+ AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
+ else
+ AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
+
+ // FIXME: Should this happen for `asm inteldialect` as well?
+ if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker())
+ OS << '\t';
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (!InputIsIntelDialect)
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // Consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // This is gcc's behavior for | outside a variant.
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // This is gcc's behavior for } outside a variant.
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (!StrEnd)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (isDigit(*IDEnd))
+ ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ if (Val >= NumOperands - 1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands())
+ break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ // FIXME: Shouldn't arch-independent output template handling go into
+ // PrintAsmOperand?
+ // Labels are target independent.
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
+ } else if (MI->getOperand(OpNo).isMBB()) {
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ if (InputIsIntelDialect)
+ OS << "\n\t.att_syntax";
+ OS << '\n' << (char)0; // null terminate string.
+}
+
+/// This method formats and emits the specified machine instruction that is an
+/// inline asm.
+void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ OutStreamer->emitRawComment(MAI->getInlineAsmStart());
+ OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use emitRawComment.
+ OutStreamer->emitRawComment(MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ uint64_t LocCookie = 0;
+ const MDNode *LocMD = nullptr;
+ for (const MachineOperand &MO : llvm::reverse(MI->operands())) {
+ if (MO.isMetadata() && (LocMD = MO.getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
+
+ // Emit warnings if we use reserved registers on the clobber list, as
+ // that might lead to undefined behaviour.
+ SmallVector<Register, 8> RestrRegs;
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ // Start with the first operand descriptor, and iterate over them.
+ for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
+ I < NumOps; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isImm())
+ continue;
+ unsigned Flags = MO.getImm();
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) {
+ Register Reg = MI->getOperand(I + 1).getReg();
+ if (!TRI->isAsmClobberable(*MF, Reg))
+ RestrRegs.push_back(Reg);
+ }
+ // Skip to one before the next operand descriptor, if it exists.
+ I += InlineAsm::getNumOperandRegisters(Flags);
+ }
+
+ if (!RestrRegs.empty()) {
+ std::string Msg = "inline asm clobber list contains reserved registers: ";
+ ListSeparator LS;
+ for (const Register RR : RestrRegs) {
+ Msg += LS;
+ Msg += TRI->getRegAsmName(RR);
+ }
+ const char *Note =
+ "Reserved registers on the clobber list may not be "
+ "preserved across the asm statement, and clobbering them may "
+ "lead to undefined behaviour.";
+ MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
+ LocCookie, Msg, DiagnosticSeverity::DS_Warning));
+ MMI->getModule()->getContext().diagnose(
+ DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
+
+ for (const Register RR : RestrRegs) {
+ if (std::optional<std::string> reason =
+ TRI->explainReservedReg(*MF, RR)) {
+ MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
+ LocCookie, *reason, DiagnosticSeverity::DS_Note));
+ }
+ }
+ }
+
+ emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
+ MI->getInlineAsmDialect());
+
+ // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use emitRawComment.
+ OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
+}
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+ StringRef Code) const {
+ if (Code == "private") {
+ const DataLayout &DL = MF->getDataLayout();
+ OS << DL.getPrivateGlobalPrefix();
+ } else if (Code == "comment") {
+ OS << MAI->getCommentString();
+ } else if (Code == "uid") {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != getFunctionNumber()) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = getFunctionNumber();
+ }
+ OS << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ report_fatal_error(Twine(Msg.str()));
+ }
+}
+
+void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) {
+ assert(MO.isGlobal() && "caller should check MO.isGlobal");
+ getSymbolPreferLocal(*MO.getGlobal())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate for machine specific ExtraCodes
+/// or when the arch-independent handling would be too complex otherwise.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'a': // Print as memory address.
+ if (MO.isReg()) {
+ PrintAsmMemoryOperand(MI, OpNo, nullptr, O);
+ return false;
+ }
+ [[fallthrough]]; // GCC allows '%a' to behave like '%c' with immediates.
+ case 'c': // Substitute immediate value without immediate syntax
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal()) {
+ PrintSymbolOperand(MO, O);
+ return false;
+ }
+ return true;
+ case 'n': // Negate the immediate constant.
+ if (!MO.isImm())
+ return true;
+ O << -MO.getImm();
+ return false;
+ case 's': // The GCC deprecated s modifier
+ if (!MO.isImm())
+ return true;
+ O << ((32 - MO.getImm()) & 31);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+void AsmPrinter::emitInlineAsmStart() const {}
+
+void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+ const MCSubtargetInfo *EndInfo) const {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
new file mode 100644
index 000000000000..bd2c60eadd61
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -0,0 +1,145 @@
+//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class that can take bytes that would normally be
+// streamed via the AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+
+#include "DIEHash.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include <string>
+
+namespace llvm {
+class ByteStreamer {
+ protected:
+ ~ByteStreamer() = default;
+ ByteStreamer(const ByteStreamer&) = default;
+ ByteStreamer() = default;
+
+ public:
+ // For now we're just handling the calls we need for dwarf emission/hashing.
+ virtual void emitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
+ virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+ virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
+ unsigned PadTo = 0) = 0;
+ virtual unsigned emitDIERef(const DIE &D) = 0;
+};
+
+class APByteStreamer final : public ByteStreamer {
+private:
+ AsmPrinter &AP;
+
+public:
+ APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
+ AP.OutStreamer->AddComment(Comment);
+ AP.emitInt8(Byte);
+ }
+ void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ AP.OutStreamer->AddComment(Comment);
+ AP.emitSLEB128(DWord);
+ }
+ void emitULEB128(uint64_t DWord, const Twine &Comment,
+ unsigned PadTo) override {
+ AP.OutStreamer->AddComment(Comment);
+ AP.emitULEB128(DWord, nullptr, PadTo);
+ }
+ unsigned emitDIERef(const DIE &D) override {
+ uint64_t Offset = D.getOffset();
+ static constexpr unsigned ULEB128PadSize = 4;
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ emitULEB128(Offset, "", ULEB128PadSize);
+ // Return how many comments to skip in DwarfDebug::emitDebugLocEntry to keep
+ // comments aligned with debug loc entries.
+ return ULEB128PadSize;
+ }
+};
+
+class HashingByteStreamer final : public ByteStreamer {
+ private:
+ DIEHash &Hash;
+ public:
+ HashingByteStreamer(DIEHash &H) : Hash(H) {}
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
+ Hash.update(Byte);
+ }
+ void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ Hash.addSLEB128(DWord);
+ }
+ void emitULEB128(uint64_t DWord, const Twine &Comment,
+ unsigned PadTo) override {
+ Hash.addULEB128(DWord);
+ }
+ unsigned emitDIERef(const DIE &D) override {
+ Hash.hashRawTypeReference(D);
+ return 0; // Only used together with the APByteStreamer.
+ }
+};
+
+class BufferByteStreamer final : public ByteStreamer {
+private:
+ SmallVectorImpl<char> &Buffer;
+ std::vector<std::string> &Comments;
+
+public:
+ /// Only verbose textual output needs comments. This will be set to
+ /// true for that case, and false otherwise. If false, comments passed in to
+ /// the emit methods will be ignored.
+ const bool GenerateComments;
+
+ BufferByteStreamer(SmallVectorImpl<char> &Buffer,
+ std::vector<std::string> &Comments, bool GenerateComments)
+ : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {
+ }
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
+ Buffer.push_back(Byte);
+ if (GenerateComments)
+ Comments.push_back(Comment.str());
+ }
+ void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ unsigned Length = encodeSLEB128(DWord, OSE);
+ if (GenerateComments) {
+ Comments.push_back(Comment.str());
+ // Add some empty comments to keep the Buffer and Comments vectors aligned
+ // with each other.
+ for (size_t i = 1; i < Length; ++i)
+ Comments.push_back("");
+
+ }
+ }
+ void emitULEB128(uint64_t DWord, const Twine &Comment,
+ unsigned PadTo) override {
+ raw_svector_ostream OSE(Buffer);
+ unsigned Length = encodeULEB128(DWord, OSE, PadTo);
+ if (GenerateComments) {
+ Comments.push_back(Comment.str());
+ // Add some empty comments to keep the Buffer and Comments vectors aligned
+ // with each other.
+ for (size_t i = 1; i < Length; ++i)
+ Comments.push_back("");
+ }
+ }
+ unsigned emitDIERef(const DIE &D) override {
+ uint64_t Offset = D.getOffset();
+ static constexpr unsigned ULEB128PadSize = 4;
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ emitULEB128(Offset, "", ULEB128PadSize);
+ return 0; // Only used together with the APByteStreamer.
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
new file mode 100644
index 000000000000..8161de57b58e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -0,0 +1,3444 @@
+//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Microsoft CodeView debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeViewDebug.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
+#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <iterator>
+#include <limits>
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+namespace {
+class CVMCAdapter : public CodeViewRecordStreamer {
+public:
+ CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable)
+ : OS(&OS), TypeTable(TypeTable) {}
+
+ void emitBytes(StringRef Data) override { OS->emitBytes(Data); }
+
+ void emitIntValue(uint64_t Value, unsigned Size) override {
+ OS->emitIntValueInHex(Value, Size);
+ }
+
+ void emitBinaryData(StringRef Data) override { OS->emitBinaryData(Data); }
+
+ void AddComment(const Twine &T) override { OS->AddComment(T); }
+
+ void AddRawComment(const Twine &T) override { OS->emitRawComment(T); }
+
+ bool isVerboseAsm() override { return OS->isVerboseAsm(); }
+
+ std::string getTypeName(TypeIndex TI) override {
+ std::string TypeName;
+ if (!TI.isNoneType()) {
+ if (TI.isSimple())
+ TypeName = std::string(TypeIndex::simpleTypeName(TI));
+ else
+ TypeName = std::string(TypeTable.getTypeName(TI));
+ }
+ return TypeName;
+ }
+
+private:
+ MCStreamer *OS = nullptr;
+ TypeCollection &TypeTable;
+};
+} // namespace
+
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+ switch (Type) {
+ case Triple::ArchType::x86:
+ return CPUType::Pentium3;
+ case Triple::ArchType::x86_64:
+ return CPUType::X64;
+ case Triple::ArchType::thumb:
+ // LLVM currently doesn't support Windows CE and so thumb
+ // here is indiscriminately mapped to ARMNT specifically.
+ return CPUType::ARMNT;
+ case Triple::ArchType::aarch64:
+ return CPUType::ARM64;
+ default:
+ report_fatal_error("target architecture doesn't map to a CodeView CPUType");
+ }
+}
+
+CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {}
+
+StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
+ std::string &Filepath = FileToFilepathMap[File];
+ if (!Filepath.empty())
+ return Filepath;
+
+ StringRef Dir = File->getDirectory(), Filename = File->getFilename();
+
+ // If this is a Unix-style path, just use it as is. Don't try to canonicalize
+ // it textually because one of the path components could be a symlink.
+ if (Dir.startswith("/") || Filename.startswith("/")) {
+ if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix))
+ return Filename;
+ Filepath = std::string(Dir);
+ if (Dir.back() != '/')
+ Filepath += '/';
+ Filepath += Filename;
+ return Filepath;
+ }
+
+ // Clang emits directory and relative filename info into the IR, but CodeView
+ // operates on full paths. We could change Clang to emit full paths too, but
+ // that would increase the IR size and probably not needed for other users.
+ // For now, just concatenate and canonicalize the path here.
+ if (Filename.find(':') == 1)
+ Filepath = std::string(Filename);
+ else
+ Filepath = (Dir + "\\" + Filename).str();
+
+ // Canonicalize the path. We have to do it textually because we may no longer
+ // have access the file in the filesystem.
+ // First, replace all slashes with backslashes.
+ std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
+
+ // Remove all "\.\" with "\".
+ size_t Cursor = 0;
+ while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
+ Filepath.erase(Cursor, 2);
+
+ // Replace all "\XXX\..\" with "\". Don't try too hard though as the original
+ // path should be well-formatted, e.g. start with a drive letter, etc.
+ Cursor = 0;
+ while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
+ // Something's wrong if the path starts with "\..\", abort.
+ if (Cursor == 0)
+ break;
+
+ size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
+ if (PrevSlash == std::string::npos)
+ // Something's wrong, abort.
+ break;
+
+ Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
+ // The next ".." might be following the one we've just erased.
+ Cursor = PrevSlash;
+ }
+
+ // Remove all duplicate backslashes.
+ Cursor = 0;
+ while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
+ Filepath.erase(Cursor, 1);
+
+ return Filepath;
+}
+
+unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
+ StringRef FullPath = getFullFilepath(F);
+ unsigned NextId = FileIdMap.size() + 1;
+ auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId));
+ if (Insertion.second) {
+ // We have to compute the full filepath and emit a .cv_file directive.
+ ArrayRef<uint8_t> ChecksumAsBytes;
+ FileChecksumKind CSKind = FileChecksumKind::None;
+ if (F->getChecksum()) {
+ std::string Checksum = fromHex(F->getChecksum()->Value);
+ void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
+ memcpy(CKMem, Checksum.data(), Checksum.size());
+ ChecksumAsBytes = ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(CKMem), Checksum.size());
+ switch (F->getChecksum()->Kind) {
+ case DIFile::CSK_MD5:
+ CSKind = FileChecksumKind::MD5;
+ break;
+ case DIFile::CSK_SHA1:
+ CSKind = FileChecksumKind::SHA1;
+ break;
+ case DIFile::CSK_SHA256:
+ CSKind = FileChecksumKind::SHA256;
+ break;
+ }
+ }
+ bool Success = OS.emitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
+ static_cast<unsigned>(CSKind));
+ (void)Success;
+ assert(Success && ".cv_file directive failed");
+ }
+ return Insertion.first->second;
+}
+
+CodeViewDebug::InlineSite &
+CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
+ const DISubprogram *Inlinee) {
+ auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
+ InlineSite *Site = &SiteInsertion.first->second;
+ if (SiteInsertion.second) {
+ unsigned ParentFuncId = CurFn->FuncId;
+ if (const DILocation *OuterIA = InlinedAt->getInlinedAt())
+ ParentFuncId =
+ getInlineSite(OuterIA, InlinedAt->getScope()->getSubprogram())
+ .SiteFuncId;
+
+ Site->SiteFuncId = NextFuncId++;
+ OS.emitCVInlineSiteIdDirective(
+ Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()),
+ InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
+ Site->Inlinee = Inlinee;
+ InlinedSubprograms.insert(Inlinee);
+ getFuncIdForSubprogram(Inlinee);
+ }
+ return *Site;
+}
+
+static StringRef getPrettyScopeName(const DIScope *Scope) {
+ StringRef ScopeName = Scope->getName();
+ if (!ScopeName.empty())
+ return ScopeName;
+
+ switch (Scope->getTag()) {
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ return "<unnamed-tag>";
+ case dwarf::DW_TAG_namespace:
+ return "`anonymous namespace'";
+ default:
+ return StringRef();
+ }
+}
+
+const DISubprogram *CodeViewDebug::collectParentScopeNames(
+ const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) {
+ const DISubprogram *ClosestSubprogram = nullptr;
+ while (Scope != nullptr) {
+ if (ClosestSubprogram == nullptr)
+ ClosestSubprogram = dyn_cast<DISubprogram>(Scope);
+
+ // If a type appears in a scope chain, make sure it gets emitted. The
+ // frontend will be responsible for deciding if this should be a forward
+ // declaration or a complete type.
+ if (const auto *Ty = dyn_cast<DICompositeType>(Scope))
+ DeferredCompleteTypes.push_back(Ty);
+
+ StringRef ScopeName = getPrettyScopeName(Scope);
+ if (!ScopeName.empty())
+ QualifiedNameComponents.push_back(ScopeName);
+ Scope = Scope->getScope();
+ }
+ return ClosestSubprogram;
+}
+
+static std::string formatNestedName(ArrayRef<StringRef> QualifiedNameComponents,
+ StringRef TypeName) {
+ std::string FullyQualifiedName;
+ for (StringRef QualifiedNameComponent :
+ llvm::reverse(QualifiedNameComponents)) {
+ FullyQualifiedName.append(std::string(QualifiedNameComponent));
+ FullyQualifiedName.append("::");
+ }
+ FullyQualifiedName.append(std::string(TypeName));
+ return FullyQualifiedName;
+}
+
+struct CodeViewDebug::TypeLoweringScope {
+ TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; }
+ ~TypeLoweringScope() {
+ // Don't decrement TypeEmissionLevel until after emitting deferred types, so
+ // inner TypeLoweringScopes don't attempt to emit deferred types.
+ if (CVD.TypeEmissionLevel == 1)
+ CVD.emitDeferredCompleteTypes();
+ --CVD.TypeEmissionLevel;
+ }
+ CodeViewDebug &CVD;
+};
+
+std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Scope,
+ StringRef Name) {
+ // Ensure types in the scope chain are emitted as soon as possible.
+ // This can create otherwise a situation where S_UDTs are emitted while
+ // looping in emitDebugInfoForUDTs.
+ TypeLoweringScope S(*this);
+ SmallVector<StringRef, 5> QualifiedNameComponents;
+ collectParentScopeNames(Scope, QualifiedNameComponents);
+ return formatNestedName(QualifiedNameComponents, Name);
+}
+
+std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {
+ const DIScope *Scope = Ty->getScope();
+ return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
+}
+
+TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
+ // No scope means global scope and that uses the zero index.
+ //
+ // We also use zero index when the scope is a DISubprogram
+ // to suppress the emission of LF_STRING_ID for the function,
+ // which can trigger a link-time error with the linker in
+ // VS2019 version 16.11.2 or newer.
+ // Note, however, skipping the debug info emission for the DISubprogram
+ // is a temporary fix. The root issue here is that we need to figure out
+ // the proper way to encode a function nested in another function
+ // (as introduced by the Fortran 'contains' keyword) in CodeView.
+ if (!Scope || isa<DIFile>(Scope) || isa<DISubprogram>(Scope))
+ return TypeIndex();
+
+ assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type");
+
+ // Check if we've already translated this scope.
+ auto I = TypeIndices.find({Scope, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Build the fully qualified name of the scope.
+ std::string ScopeName = getFullyQualifiedName(Scope);
+ StringIdRecord SID(TypeIndex(), ScopeName);
+ auto TI = TypeTable.writeLeafType(SID);
+ return recordTypeIndexForDINode(Scope, TI);
+}
+
+static StringRef removeTemplateArgs(StringRef Name) {
+ // Remove template args from the display name. Assume that the template args
+ // are the last thing in the name.
+ if (Name.empty() || Name.back() != '>')
+ return Name;
+
+ int OpenBrackets = 0;
+ for (int i = Name.size() - 1; i >= 0; --i) {
+ if (Name[i] == '>')
+ ++OpenBrackets;
+ else if (Name[i] == '<') {
+ --OpenBrackets;
+ if (OpenBrackets == 0)
+ return Name.substr(0, i);
+ }
+ }
+ return Name;
+}
+
+TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
+ assert(SP);
+
+ // Check if we've already translated this subprogram.
+ auto I = TypeIndices.find({SP, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // The display name includes function template arguments. Drop them to match
+ // MSVC. We need to have the template arguments in the DISubprogram name
+ // because they are used in other symbol records, such as S_GPROC32_IDs.
+ StringRef DisplayName = removeTemplateArgs(SP->getName());
+
+ const DIScope *Scope = SP->getScope();
+ TypeIndex TI;
+ if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) {
+ // If the scope is a DICompositeType, then this must be a method. Member
+ // function types take some special handling, and require access to the
+ // subprogram.
+ TypeIndex ClassType = getTypeIndex(Class);
+ MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
+ DisplayName);
+ TI = TypeTable.writeLeafType(MFuncId);
+ } else {
+ // Otherwise, this must be a free function.
+ TypeIndex ParentScope = getScopeIndex(Scope);
+ FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
+ TI = TypeTable.writeLeafType(FuncId);
+ }
+
+ return recordTypeIndexForDINode(SP, TI);
+}
+
+static bool isNonTrivial(const DICompositeType *DCTy) {
+ return ((DCTy->getFlags() & DINode::FlagNonTrivial) == DINode::FlagNonTrivial);
+}
+
+static FunctionOptions
+getFunctionOptions(const DISubroutineType *Ty,
+ const DICompositeType *ClassTy = nullptr,
+ StringRef SPName = StringRef("")) {
+ FunctionOptions FO = FunctionOptions::None;
+ const DIType *ReturnTy = nullptr;
+ if (auto TypeArray = Ty->getTypeArray()) {
+ if (TypeArray.size())
+ ReturnTy = TypeArray[0];
+ }
+
+ // Add CxxReturnUdt option to functions that return nontrivial record types
+ // or methods that return record types.
+ if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy))
+ if (isNonTrivial(ReturnDCTy) || ClassTy)
+ FO |= FunctionOptions::CxxReturnUdt;
+
+ // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
+ if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) {
+ FO |= FunctionOptions::Constructor;
+
+ // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
+
+ }
+ return FO;
+}
+
+TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
+ const DICompositeType *Class) {
+ // Always use the method declaration as the key for the function type. The
+ // method declaration contains the this adjustment.
+ if (SP->getDeclaration())
+ SP = SP->getDeclaration();
+ assert(!SP->getDeclaration() && "should use declaration as key");
+
+ // Key the MemberFunctionRecord into the map as {SP, Class}. It won't collide
+ // with the MemberFuncIdRecord, which is keyed in as {SP, nullptr}.
+ auto I = TypeIndices.find({SP, Class});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Make sure complete type info for the class is emitted *after* the member
+ // function type, as the complete class type is likely to reference this
+ // member function type.
+ TypeLoweringScope S(*this);
+ const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0;
+
+ FunctionOptions FO = getFunctionOptions(SP->getType(), Class, SP->getName());
+ TypeIndex TI = lowerTypeMemberFunction(
+ SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod, FO);
+ return recordTypeIndexForDINode(SP, TI, Class);
+}
+
+TypeIndex CodeViewDebug::recordTypeIndexForDINode(const DINode *Node,
+ TypeIndex TI,
+ const DIType *ClassTy) {
+ auto InsertResult = TypeIndices.insert({{Node, ClassTy}, TI});
+ (void)InsertResult;
+ assert(InsertResult.second && "DINode was already assigned a type index");
+ return TI;
+}
+
+unsigned CodeViewDebug::getPointerSizeInBytes() {
+ return MMI->getModule()->getDataLayout().getPointerSizeInBits() / 8;
+}
+
+void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
+ const LexicalScope *LS) {
+ if (const DILocation *InlinedAt = LS->getInlinedAt()) {
+ // This variable was inlined. Associate it with the InlineSite.
+ const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();
+ InlineSite &Site = getInlineSite(InlinedAt, Inlinee);
+ Site.InlinedLocals.emplace_back(std::move(Var));
+ } else {
+ // This variable goes into the corresponding lexical scope.
+ ScopeVariables[LS].emplace_back(std::move(Var));
+ }
+}
+
+static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
+ const DILocation *Loc) {
+ if (!llvm::is_contained(Locs, Loc))
+ Locs.push_back(Loc);
+}
+
+void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
+ const MachineFunction *MF) {
+ // Skip this instruction if it has the same location as the previous one.
+ if (!DL || DL == PrevInstLoc)
+ return;
+
+ const DIScope *Scope = DL->getScope();
+ if (!Scope)
+ return;
+
+ // Skip this line if it is longer than the maximum we can record.
+ LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true);
+ if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() ||
+ LI.isNeverStepInto())
+ return;
+
+ ColumnInfo CI(DL.getCol(), /*EndColumn=*/0);
+ if (CI.getStartColumn() != DL.getCol())
+ return;
+
+ if (!CurFn->HaveLineInfo)
+ CurFn->HaveLineInfo = true;
+ unsigned FileId = 0;
+ if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile())
+ FileId = CurFn->LastFileId;
+ else
+ FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
+ PrevInstLoc = DL;
+
+ unsigned FuncId = CurFn->FuncId;
+ if (const DILocation *SiteLoc = DL->getInlinedAt()) {
+ const DILocation *Loc = DL.get();
+
+ // If this location was actually inlined from somewhere else, give it the ID
+ // of the inline call site.
+ FuncId =
+ getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()).SiteFuncId;
+
+ // Ensure we have links in the tree of inline call sites.
+ bool FirstLoc = true;
+ while ((SiteLoc = Loc->getInlinedAt())) {
+ InlineSite &Site =
+ getInlineSite(SiteLoc, Loc->getScope()->getSubprogram());
+ if (!FirstLoc)
+ addLocIfNotPresent(Site.ChildSites, Loc);
+ FirstLoc = false;
+ Loc = SiteLoc;
+ }
+ addLocIfNotPresent(CurFn->ChildSites, Loc);
+ }
+
+ OS.emitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
+ /*PrologueEnd=*/false, /*IsStmt=*/false,
+ DL->getFilename(), SMLoc());
+}
+
+void CodeViewDebug::emitCodeViewMagicVersion() {
+ OS.emitValueToAlignment(Align(4));
+ OS.AddComment("Debug section magic");
+ OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
+}
+
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+ switch (DWLang) {
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C11:
+ return SourceLanguage::C;
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ return SourceLanguage::Cpp;
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ return SourceLanguage::Fortran;
+ case dwarf::DW_LANG_Pascal83:
+ return SourceLanguage::Pascal;
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ return SourceLanguage::Cobol;
+ case dwarf::DW_LANG_Java:
+ return SourceLanguage::Java;
+ case dwarf::DW_LANG_D:
+ return SourceLanguage::D;
+ case dwarf::DW_LANG_Swift:
+ return SourceLanguage::Swift;
+ case dwarf::DW_LANG_Rust:
+ return SourceLanguage::Rust;
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::ObjC;
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return SourceLanguage::ObjCpp;
+ default:
+ // There's no CodeView representation for this language, and CV doesn't
+ // have an "unknown" option for the language field, so we'll use MASM,
+ // as it's very low level.
+ return SourceLanguage::Masm;
+ }
+}
+
+void CodeViewDebug::beginModule(Module *M) {
+ // If module doesn't have named metadata anchors or COFF debug section
+ // is not available, skip any debug info related stuff.
+ if (!MMI->hasDebugInfo() ||
+ !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ Asm = nullptr;
+ return;
+ }
+
+ TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
+
+ // Get the current source language.
+ const MDNode *Node = *M->debug_compile_units_begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
+
+ collectGlobalVariableInfo();
+
+ // Check if we should emit type record hashes.
+ ConstantInt *GH =
+ mdconst::extract_or_null<ConstantInt>(M->getModuleFlag("CodeViewGHash"));
+ EmitDebugGlobalHashes = GH && !GH->isZero();
+}
+
+void CodeViewDebug::endModule() {
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ // The COFF .debug$S section consists of several subsections, each starting
+ // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
+ // of the payload followed by the payload itself. The subsections are 4-byte
+ // aligned.
+
+ // Use the generic .debug$S section, and make a subsection for all the inlined
+ // subprograms.
+ switchToDebugSectionForSymbol(nullptr);
+
+ MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitObjName();
+ emitCompilerInformation();
+ endCVSubsection(CompilerInfo);
+
+ emitInlineeLinesSubsection();
+
+ // Emit per-function debug information.
+ for (auto &P : FnDebugInfo)
+ if (!P.first->isDeclarationForLinker())
+ emitDebugInfoForFunction(P.first, *P.second);
+
+ // Get types used by globals without emitting anything.
+ // This is meant to collect all static const data members so they can be
+ // emitted as globals.
+ collectDebugInfoForGlobals();
+
+ // Emit retained types.
+ emitDebugInfoForRetainedTypes();
+
+ // Emit global variable debug information.
+ setCurrentSubprogram(nullptr);
+ emitDebugInfoForGlobals();
+
+ // Switch back to the generic .debug$S section after potentially processing
+ // comdat symbol sections.
+ switchToDebugSectionForSymbol(nullptr);
+
+ // Emit UDT records for any types used by global variables.
+ if (!GlobalUDTs.empty()) {
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitDebugInfoForUDTs(GlobalUDTs);
+ endCVSubsection(SymbolsEnd);
+ }
+
+ // This subsection holds a file index to offset in string table table.
+ OS.AddComment("File index to string table offset subsection");
+ OS.emitCVFileChecksumsDirective();
+
+ // This subsection holds the string table.
+ OS.AddComment("String table");
+ OS.emitCVStringTableDirective();
+
+ // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol
+ // subsection in the generic .debug$S section at the end. There is no
+ // particular reason for this ordering other than to match MSVC.
+ emitBuildInfo();
+
+ // Emit type information and hashes last, so that any types we translate while
+ // emitting function info are included.
+ emitTypeInformation();
+
+ if (EmitDebugGlobalHashes)
+ emitTypeGlobalHashes();
+
+ clear();
+}
+
+static void
+emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
+ unsigned MaxFixedRecordLength = 0xF00) {
+ // The maximum CV record length is 0xFF00. Most of the strings we emit appear
+ // after a fixed length portion of the record. The fixed length portion should
+ // always be less than 0xF00 (3840) bytes, so truncate the string so that the
+ // overall record size is less than the maximum allowed.
+ SmallString<32> NullTerminatedString(
+ S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));
+ NullTerminatedString.push_back('\0');
+ OS.emitBytes(NullTerminatedString);
+}
+
+void CodeViewDebug::emitTypeInformation() {
+ if (TypeTable.empty())
+ return;
+
+ // Start the .debug$T or .debug$P section with 0x4.
+ OS.switchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
+ emitCodeViewMagicVersion();
+
+ TypeTableCollection Table(TypeTable.records());
+ TypeVisitorCallbackPipeline Pipeline;
+
+ // To emit type record using Codeview MCStreamer adapter
+ CVMCAdapter CVMCOS(OS, Table);
+ TypeRecordMapping typeMapping(CVMCOS);
+ Pipeline.addCallbackToPipeline(typeMapping);
+
+ std::optional<TypeIndex> B = Table.getFirst();
+ while (B) {
+ // This will fail if the record data is invalid.
+ CVType Record = Table.getType(*B);
+
+ Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
+
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+
+ B = Table.getNext(*B);
+ }
+}
+
+void CodeViewDebug::emitTypeGlobalHashes() {
+ if (TypeTable.empty())
+ return;
+
+ // Start the .debug$H section with the version and hash algorithm, currently
+ // hardcoded to version 0, SHA1.
+ OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
+
+ OS.emitValueToAlignment(Align(4));
+ OS.AddComment("Magic");
+ OS.emitInt32(COFF::DEBUG_HASHES_SECTION_MAGIC);
+ OS.AddComment("Section Version");
+ OS.emitInt16(0);
+ OS.AddComment("Hash Algorithm");
+ OS.emitInt16(uint16_t(GlobalTypeHashAlg::BLAKE3));
+
+ TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
+ for (const auto &GHR : TypeTable.hashes()) {
+ if (OS.isVerboseAsm()) {
+ // Emit an EOL-comment describing which TypeIndex this hash corresponds
+ // to, as well as the stringified SHA1 hash.
+ SmallString<32> Comment;
+ raw_svector_ostream CommentOS(Comment);
+ CommentOS << formatv("{0:X+} [{1}]", TI.getIndex(), GHR);
+ OS.AddComment(Comment);
+ ++TI;
+ }
+ assert(GHR.Hash.size() == 8);
+ StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()),
+ GHR.Hash.size());
+ OS.emitBinaryData(S);
+ }
+}
+
+void CodeViewDebug::emitObjName() {
+ MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_OBJNAME);
+
+ StringRef PathRef(Asm->TM.Options.ObjectFilenameForDebug);
+ llvm::SmallString<256> PathStore(PathRef);
+
+ if (PathRef.empty() || PathRef == "-") {
+ // Don't emit the filename if we're writing to stdout or to /dev/null.
+ PathRef = {};
+ } else {
+ PathRef = PathStore;
+ }
+
+ OS.AddComment("Signature");
+ OS.emitIntValue(0, 4);
+
+ OS.AddComment("Object name");
+ emitNullTerminatedSymbolName(OS, PathRef);
+
+ endSymbolRecord(CompilerEnd);
+}
+
+namespace {
+struct Version {
+ int Part[4];
+};
+} // end anonymous namespace
+
+// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out
+// the version number.
+static Version parseVersion(StringRef Name) {
+ Version V = {{0}};
+ int N = 0;
+ for (const char C : Name) {
+ if (isdigit(C)) {
+ V.Part[N] *= 10;
+ V.Part[N] += C - '0';
+ V.Part[N] =
+ std::min<int>(V.Part[N], std::numeric_limits<uint16_t>::max());
+ } else if (C == '.') {
+ ++N;
+ if (N >= 4)
+ return V;
+ } else if (N > 0)
+ return V;
+ }
+ return V;
+}
+
+void CodeViewDebug::emitCompilerInformation() {
+ MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
+ uint32_t Flags = 0;
+
+ // The low byte of the flags indicates the source language.
+ Flags = CurrentSourceLanguage;
+ // TODO: Figure out which other flags need to be set.
+ if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
+ Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
+ }
+ using ArchType = llvm::Triple::ArchType;
+ ArchType Arch = Triple(MMI->getModule()->getTargetTriple()).getArch();
+ if (Asm->TM.Options.Hotpatch || Arch == ArchType::thumb ||
+ Arch == ArchType::aarch64) {
+ Flags |= static_cast<uint32_t>(CompileSym3Flags::HotPatch);
+ }
+
+ OS.AddComment("Flags and language");
+ OS.emitInt32(Flags);
+
+ OS.AddComment("CPUType");
+ OS.emitInt16(static_cast<uint64_t>(TheCPU));
+
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ StringRef CompilerVersion = CU->getProducer();
+ Version FrontVer = parseVersion(CompilerVersion);
+ OS.AddComment("Frontend version");
+ for (int N : FrontVer.Part) {
+ OS.emitInt16(N);
+ }
+
+ // Some Microsoft tools, like Binscope, expect a backend version number of at
+ // least 8.something, so we'll coerce the LLVM version into a form that
+ // guarantees it'll be big enough without really lying about the version.
+ int Major = 1000 * LLVM_VERSION_MAJOR +
+ 10 * LLVM_VERSION_MINOR +
+ LLVM_VERSION_PATCH;
+ // Clamp it for builds that use unusually large version numbers.
+ Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max());
+ Version BackVer = {{ Major, 0, 0, 0 }};
+ OS.AddComment("Backend version");
+ for (int N : BackVer.Part)
+ OS.emitInt16(N);
+
+ OS.AddComment("Null-terminated compiler version string");
+ emitNullTerminatedSymbolName(OS, CompilerVersion);
+
+ endSymbolRecord(CompilerEnd);
+}
+
+static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable,
+ StringRef S) {
+ StringIdRecord SIR(TypeIndex(0x0), S);
+ return TypeTable.writeLeafType(SIR);
+}
+
+static std::string flattenCommandLine(ArrayRef<std::string> Args,
+ StringRef MainFilename) {
+ std::string FlatCmdLine;
+ raw_string_ostream OS(FlatCmdLine);
+ bool PrintedOneArg = false;
+ if (!StringRef(Args[0]).contains("-cc1")) {
+ llvm::sys::printArg(OS, "-cc1", /*Quote=*/true);
+ PrintedOneArg = true;
+ }
+ for (unsigned i = 0; i < Args.size(); i++) {
+ StringRef Arg = Args[i];
+ if (Arg.empty())
+ continue;
+ if (Arg == "-main-file-name" || Arg == "-o") {
+ i++; // Skip this argument and next one.
+ continue;
+ }
+ if (Arg.startswith("-object-file-name") || Arg == MainFilename)
+ continue;
+ // Skip fmessage-length for reproduciability.
+ if (Arg.startswith("-fmessage-length"))
+ continue;
+ if (PrintedOneArg)
+ OS << " ";
+ llvm::sys::printArg(OS, Arg, /*Quote=*/true);
+ PrintedOneArg = true;
+ }
+ OS.flush();
+ return FlatCmdLine;
+}
+
+void CodeViewDebug::emitBuildInfo() {
+ // First, make LF_BUILDINFO. It's a sequence of strings with various bits of
+ // build info. The known prefix is:
+ // - Absolute path of current directory
+ // - Compiler path
+ // - Main source file path, relative to CWD or absolute
+ // - Type server PDB file
+ // - Canonical compiler command line
+ // If frontend and backend compilation are separated (think llc or LTO), it's
+ // not clear if the compiler path should refer to the executable for the
+ // frontend or the backend. Leave it blank for now.
+ TypeIndex BuildInfoArgs[BuildInfoRecord::MaxArgs] = {};
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin(); // FIXME: Multiple CUs.
+ const auto *CU = cast<DICompileUnit>(Node);
+ const DIFile *MainSourceFile = CU->getFile();
+ BuildInfoArgs[BuildInfoRecord::CurrentDirectory] =
+ getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory());
+ BuildInfoArgs[BuildInfoRecord::SourceFile] =
+ getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename());
+ // FIXME: PDB is intentionally blank unless we implement /Zi type servers.
+ BuildInfoArgs[BuildInfoRecord::TypeServerPDB] =
+ getStringIdTypeIdx(TypeTable, "");
+ if (Asm->TM.Options.MCOptions.Argv0 != nullptr) {
+ BuildInfoArgs[BuildInfoRecord::BuildTool] =
+ getStringIdTypeIdx(TypeTable, Asm->TM.Options.MCOptions.Argv0);
+ BuildInfoArgs[BuildInfoRecord::CommandLine] = getStringIdTypeIdx(
+ TypeTable, flattenCommandLine(Asm->TM.Options.MCOptions.CommandLineArgs,
+ MainSourceFile->getFilename()));
+ }
+ BuildInfoRecord BIR(BuildInfoArgs);
+ TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR);
+
+ // Make a new .debug$S subsection for the S_BUILDINFO record, which points
+ // from the module symbols into the type stream.
+ MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+ MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO);
+ OS.AddComment("LF_BUILDINFO index");
+ OS.emitInt32(BuildInfoIndex.getIndex());
+ endSymbolRecord(BIEnd);
+ endCVSubsection(BISubsecEnd);
+}
+
+void CodeViewDebug::emitInlineeLinesSubsection() {
+ if (InlinedSubprograms.empty())
+ return;
+
+ OS.AddComment("Inlinee lines subsection");
+ MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines);
+
+ // We emit the checksum info for files. This is used by debuggers to
+ // determine if a pdb matches the source before loading it. Visual Studio,
+ // for instance, will display a warning that the breakpoints are not valid if
+ // the pdb does not match the source.
+ OS.AddComment("Inlinee lines signature");
+ OS.emitInt32(unsigned(InlineeLinesSignature::Normal));
+
+ for (const DISubprogram *SP : InlinedSubprograms) {
+ assert(TypeIndices.count({SP, nullptr}));
+ TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}];
+
+ OS.addBlankLine();
+ unsigned FileId = maybeRecordFile(SP->getFile());
+ OS.AddComment("Inlined function " + SP->getName() + " starts at " +
+ SP->getFilename() + Twine(':') + Twine(SP->getLine()));
+ OS.addBlankLine();
+ OS.AddComment("Type index of inlined function");
+ OS.emitInt32(InlineeIdx.getIndex());
+ OS.AddComment("Offset into filechecksum table");
+ OS.emitCVFileChecksumOffsetDirective(FileId);
+ OS.AddComment("Starting line number");
+ OS.emitInt32(SP->getLine());
+ }
+
+ endCVSubsection(InlineEnd);
+}
+
+void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
+ const DILocation *InlinedAt,
+ const InlineSite &Site) {
+ assert(TypeIndices.count({Site.Inlinee, nullptr}));
+ TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}];
+
+ // SymbolRecord
+ MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE);
+
+ OS.AddComment("PtrParent");
+ OS.emitInt32(0);
+ OS.AddComment("PtrEnd");
+ OS.emitInt32(0);
+ OS.AddComment("Inlinee type index");
+ OS.emitInt32(InlineeIdx.getIndex());
+
+ unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
+ unsigned StartLineNum = Site.Inlinee->getLine();
+
+ OS.emitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
+ FI.Begin, FI.End);
+
+ endSymbolRecord(InlineEnd);
+
+ emitLocalVariableList(FI, Site.InlinedLocals);
+
+ // Recurse on child inlined call sites before closing the scope.
+ for (const DILocation *ChildSite : Site.ChildSites) {
+ auto I = FI.InlineSites.find(ChildSite);
+ assert(I != FI.InlineSites.end() &&
+ "child site not in function inline site map");
+ emitInlinedCallSite(FI, ChildSite, I->second);
+ }
+
+ // Close the scope.
+ emitEndSymbolRecord(SymbolKind::S_INLINESITE_END);
+}
+
+void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
+ // If we have a symbol, it may be in a section that is COMDAT. If so, find the
+ // comdat key. A section may be comdat because of -ffunction-sections or
+ // because it is comdat in the IR.
+ MCSectionCOFF *GVSec =
+ GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr;
+ const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr;
+
+ MCSectionCOFF *DebugSec = cast<MCSectionCOFF>(
+ Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
+ DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
+
+ OS.switchSection(DebugSec);
+
+ // Emit the magic version number if this is the first time we've switched to
+ // this section.
+ if (ComdatDebugSections.insert(DebugSec).second)
+ emitCodeViewMagicVersion();
+}
+
+// Emit an S_THUNK32/S_END symbol pair for a thunk routine.
+// The only supported thunk ordinal is currently the standard type.
+void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
+ FunctionInfo &FI,
+ const MCSymbol *Fn) {
+ std::string FuncName =
+ std::string(GlobalValue::dropLLVMManglingEscape(GV->getName()));
+ const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind.
+
+ OS.AddComment("Symbol subsection for " + Twine(FuncName));
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+
+ // Emit S_THUNK32
+ MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32);
+ OS.AddComment("PtrParent");
+ OS.emitInt32(0);
+ OS.AddComment("PtrEnd");
+ OS.emitInt32(0);
+ OS.AddComment("PtrNext");
+ OS.emitInt32(0);
+ OS.AddComment("Thunk section relative address");
+ OS.emitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.AddComment("Thunk section index");
+ OS.emitCOFFSectionIndex(Fn);
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2);
+ OS.AddComment("Ordinal");
+ OS.emitInt8(unsigned(ordinal));
+ OS.AddComment("Function name");
+ emitNullTerminatedSymbolName(OS, FuncName);
+ // Additional fields specific to the thunk ordinal would go here.
+ endSymbolRecord(ThunkRecordEnd);
+
+ // Local variables/inlined routines are purposely omitted here. The point of
+ // marking this as a thunk is so Visual Studio will NOT stop in this routine.
+
+ // Emit S_PROC_ID_END
+ emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
+
+ endCVSubsection(SymbolsEnd);
+}
+
+void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
+ FunctionInfo &FI) {
+ // For each function there is a separate subsection which holds the PC to
+ // file:line table.
+ const MCSymbol *Fn = Asm->getSymbol(GV);
+ assert(Fn);
+
+ // Switch to the to a comdat section, if appropriate.
+ switchToDebugSectionForSymbol(Fn);
+
+ std::string FuncName;
+ auto *SP = GV->getSubprogram();
+ assert(SP);
+ setCurrentSubprogram(SP);
+
+ if (SP->isThunk()) {
+ emitDebugInfoForThunk(GV, FI, Fn);
+ return;
+ }
+
+ // If we have a display name, build the fully qualified name by walking the
+ // chain of scopes.
+ if (!SP->getName().empty())
+ FuncName = getFullyQualifiedName(SP->getScope(), SP->getName());
+
+ // If our DISubprogram name is empty, use the mangled name.
+ if (FuncName.empty())
+ FuncName = std::string(GlobalValue::dropLLVMManglingEscape(GV->getName()));
+
+ // Emit FPO data, but only on 32-bit x86. No other platforms use it.
+ if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86)
+ OS.emitCVFPOData(Fn);
+
+ // Emit a symbol subsection, required by VS2012+ to find function boundaries.
+ OS.AddComment("Symbol subsection for " + Twine(FuncName));
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+ {
+ SymbolKind ProcKind = GV->hasLocalLinkage() ? SymbolKind::S_LPROC32_ID
+ : SymbolKind::S_GPROC32_ID;
+ MCSymbol *ProcRecordEnd = beginSymbolRecord(ProcKind);
+
+ // These fields are filled in by tools like CVPACK which run after the fact.
+ OS.AddComment("PtrParent");
+ OS.emitInt32(0);
+ OS.AddComment("PtrEnd");
+ OS.emitInt32(0);
+ OS.AddComment("PtrNext");
+ OS.emitInt32(0);
+ // This is the important bit that tells the debugger where the function
+ // code is located and what's its size:
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4);
+ OS.AddComment("Offset after prologue");
+ OS.emitInt32(0);
+ OS.AddComment("Offset before epilogue");
+ OS.emitInt32(0);
+ OS.AddComment("Function type index");
+ OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex());
+ OS.AddComment("Function section relative address");
+ OS.emitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.AddComment("Function section index");
+ OS.emitCOFFSectionIndex(Fn);
+ OS.AddComment("Flags");
+ ProcSymFlags ProcFlags = ProcSymFlags::HasOptimizedDebugInfo;
+ if (FI.HasFramePointer)
+ ProcFlags |= ProcSymFlags::HasFP;
+ if (GV->hasFnAttribute(Attribute::NoReturn))
+ ProcFlags |= ProcSymFlags::IsNoReturn;
+ if (GV->hasFnAttribute(Attribute::NoInline))
+ ProcFlags |= ProcSymFlags::IsNoInline;
+ OS.emitInt8(static_cast<uint8_t>(ProcFlags));
+ // Emit the function display name as a null-terminated string.
+ OS.AddComment("Function name");
+ // Truncate the name so we won't overflow the record length field.
+ emitNullTerminatedSymbolName(OS, FuncName);
+ endSymbolRecord(ProcRecordEnd);
+
+ MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC);
+ // Subtract out the CSR size since MSVC excludes that and we include it.
+ OS.AddComment("FrameSize");
+ OS.emitInt32(FI.FrameSize - FI.CSRSize);
+ OS.AddComment("Padding");
+ OS.emitInt32(0);
+ OS.AddComment("Offset of padding");
+ OS.emitInt32(0);
+ OS.AddComment("Bytes of callee saved registers");
+ OS.emitInt32(FI.CSRSize);
+ OS.AddComment("Exception handler offset");
+ OS.emitInt32(0);
+ OS.AddComment("Exception handler section");
+ OS.emitInt16(0);
+ OS.AddComment("Flags (defines frame register)");
+ OS.emitInt32(uint32_t(FI.FrameProcOpts));
+ endSymbolRecord(FrameProcEnd);
+
+ emitLocalVariableList(FI, FI.Locals);
+ emitGlobalVariableList(FI.Globals);
+ emitLexicalBlockList(FI.ChildBlocks, FI);
+
+ // Emit inlined call site information. Only emit functions inlined directly
+ // into the parent function. We'll emit the other sites recursively as part
+ // of their parent inline site.
+ for (const DILocation *InlinedAt : FI.ChildSites) {
+ auto I = FI.InlineSites.find(InlinedAt);
+ assert(I != FI.InlineSites.end() &&
+ "child site not in function inline site map");
+ emitInlinedCallSite(FI, InlinedAt, I->second);
+ }
+
+ for (auto Annot : FI.Annotations) {
+ MCSymbol *Label = Annot.first;
+ MDTuple *Strs = cast<MDTuple>(Annot.second);
+ MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION);
+ OS.emitCOFFSecRel32(Label, /*Offset=*/0);
+ // FIXME: Make sure we don't overflow the max record size.
+ OS.emitCOFFSectionIndex(Label);
+ OS.emitInt16(Strs->getNumOperands());
+ for (Metadata *MD : Strs->operands()) {
+ // MDStrings are null terminated, so we can do EmitBytes and get the
+ // nice .asciz directive.
+ StringRef Str = cast<MDString>(MD)->getString();
+ assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
+ OS.emitBytes(StringRef(Str.data(), Str.size() + 1));
+ }
+ endSymbolRecord(AnnotEnd);
+ }
+
+ for (auto HeapAllocSite : FI.HeapAllocSites) {
+ const MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+ const MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
+ const DIType *DITy = std::get<2>(HeapAllocSite);
+ MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
+ OS.AddComment("Call site offset");
+ OS.emitCOFFSecRel32(BeginLabel, /*Offset=*/0);
+ OS.AddComment("Call site section index");
+ OS.emitCOFFSectionIndex(BeginLabel);
+ OS.AddComment("Call instruction length");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+ OS.AddComment("Type index");
+ OS.emitInt32(getCompleteTypeIndex(DITy).getIndex());
+ endSymbolRecord(HeapAllocEnd);
+ }
+
+ if (SP != nullptr)
+ emitDebugInfoForUDTs(LocalUDTs);
+
+ // We're done with this function.
+ emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
+ }
+ endCVSubsection(SymbolsEnd);
+
+ // We have an assembler directive that takes care of the whole line table.
+ OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End);
+}
+
+CodeViewDebug::LocalVarDef
+CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
+ LocalVarDef DR;
+ DR.InMemory = -1;
+ DR.DataOffset = Offset;
+ assert(DR.DataOffset == Offset && "truncation");
+ DR.IsSubfield = 0;
+ DR.StructOffset = 0;
+ DR.CVRegister = CVRegister;
+ return DR;
+}
+
+void CodeViewDebug::collectVariableInfoFromMFTable(
+ DenseSet<InlinedEntity> &Processed) {
+ const MachineFunction &MF = *Asm->MF;
+ const TargetSubtargetInfo &TSI = MF.getSubtarget();
+ const TargetFrameLowering *TFI = TSI.getFrameLowering();
+ const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+
+ for (const MachineFunction::VariableDbgInfo &VI :
+ MF.getInStackSlotVariableDbgInfo()) {
+ if (!VI.Var)
+ continue;
+ assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+ "Expected inlined-at fields to agree");
+
+ Processed.insert(InlinedEntity(VI.Var, VI.Loc->getInlinedAt()));
+ LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
+
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ // If the variable has an attached offset expression, extract it.
+ // FIXME: Try to handle DW_OP_deref as well.
+ int64_t ExprOffset = 0;
+ bool Deref = false;
+ if (VI.Expr) {
+ // If there is one DW_OP_deref element, use offset of 0 and keep going.
+ if (VI.Expr->getNumElements() == 1 &&
+ VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref)
+ Deref = true;
+ else if (!VI.Expr->extractIfOffset(ExprOffset))
+ continue;
+ }
+
+ // Get the frame register used and the offset.
+ Register FrameReg;
+ StackOffset FrameOffset =
+ TFI->getFrameIndexReference(*Asm->MF, VI.getStackSlot(), FrameReg);
+ uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
+
+ assert(!FrameOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+
+ // Calculate the label ranges.
+ LocalVarDef DefRange =
+ createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset);
+
+ LocalVariable Var;
+ Var.DIVar = VI.Var;
+
+ for (const InsnRange &Range : Scope->getRanges()) {
+ const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+ const MCSymbol *End = getLabelAfterInsn(Range.second);
+ End = End ? End : Asm->getFunctionEnd();
+ Var.DefRanges[DefRange].emplace_back(Begin, End);
+ }
+
+ if (Deref)
+ Var.UseReferenceType = true;
+
+ recordLocalVariable(std::move(Var), Scope);
+ }
+}
+
+static bool canUseReferenceType(const DbgVariableLocation &Loc) {
+ return !Loc.LoadChain.empty() && Loc.LoadChain.back() == 0;
+}
+
+static bool needsReferenceType(const DbgVariableLocation &Loc) {
+ return Loc.LoadChain.size() == 2 && Loc.LoadChain.back() == 0;
+}
+
+void CodeViewDebug::calculateRanges(
+ LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries) {
+ const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
+
+ // Calculate the definition ranges.
+ for (auto I = Entries.begin(), E = Entries.end(); I != E; ++I) {
+ const auto &Entry = *I;
+ if (!Entry.isDbgValue())
+ continue;
+ const MachineInstr *DVInst = Entry.getInstr();
+ assert(DVInst->isDebugValue() && "Invalid History entry");
+ // FIXME: Find a way to represent constant variables, since they are
+ // relatively common.
+ std::optional<DbgVariableLocation> Location =
+ DbgVariableLocation::extractFromMachineInstruction(*DVInst);
+ if (!Location)
+ {
+ // When we don't have a location this is usually because LLVM has
+ // transformed it into a constant and we only have an llvm.dbg.value. We
+ // can't represent these well in CodeView since S_LOCAL only works on
+ // registers and memory locations. Instead, we will pretend this to be a
+ // constant value to at least have it show up in the debugger.
+ auto Op = DVInst->getDebugOperand(0);
+ if (Op.isImm())
+ Var.ConstantValue = APSInt(APInt(64, Op.getImm()), false);
+ continue;
+ }
+
+ // CodeView can only express variables in register and variables in memory
+ // at a constant offset from a register. However, for variables passed
+ // indirectly by pointer, it is common for that pointer to be spilled to a
+ // stack location. For the special case of one offseted load followed by a
+ // zero offset load (a pointer spilled to the stack), we change the type of
+ // the local variable from a value type to a reference type. This tricks the
+ // debugger into doing the load for us.
+ if (Var.UseReferenceType) {
+ // We're using a reference type. Drop the last zero offset load.
+ if (canUseReferenceType(*Location))
+ Location->LoadChain.pop_back();
+ else
+ continue;
+ } else if (needsReferenceType(*Location)) {
+ // This location can't be expressed without switching to a reference type.
+ // Start over using that.
+ Var.UseReferenceType = true;
+ Var.DefRanges.clear();
+ calculateRanges(Var, Entries);
+ return;
+ }
+
+ // We can only handle a register or an offseted load of a register.
+ if (Location->Register == 0 || Location->LoadChain.size() > 1)
+ continue;
+
+ LocalVarDef DR;
+ DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
+ DR.InMemory = !Location->LoadChain.empty();
+ DR.DataOffset =
+ !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
+ if (Location->FragmentInfo) {
+ DR.IsSubfield = true;
+ DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
+ } else {
+ DR.IsSubfield = false;
+ DR.StructOffset = 0;
+ }
+
+ // Compute the label range.
+ const MCSymbol *Begin = getLabelBeforeInsn(Entry.getInstr());
+ const MCSymbol *End;
+ if (Entry.getEndIndex() != DbgValueHistoryMap::NoEntry) {
+ auto &EndingEntry = Entries[Entry.getEndIndex()];
+ End = EndingEntry.isDbgValue()
+ ? getLabelBeforeInsn(EndingEntry.getInstr())
+ : getLabelAfterInsn(EndingEntry.getInstr());
+ } else
+ End = Asm->getFunctionEnd();
+
+ // If the last range end is our begin, just extend the last range.
+ // Otherwise make a new range.
+ SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R =
+ Var.DefRanges[DR];
+ if (!R.empty() && R.back().second == Begin)
+ R.back().second = End;
+ else
+ R.emplace_back(Begin, End);
+
+ // FIXME: Do more range combining.
+ }
+}
+
+void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
+ DenseSet<InlinedEntity> Processed;
+ // Grab the variable info that was squirreled away in the MMI side-table.
+ collectVariableInfoFromMFTable(Processed);
+
+ for (const auto &I : DbgValues) {
+ InlinedEntity IV = I.first;
+ if (Processed.count(IV))
+ continue;
+ const DILocalVariable *DIVar = cast<DILocalVariable>(IV.first);
+ const DILocation *InlinedAt = IV.second;
+
+ // Instruction ranges, specifying where IV is accessible.
+ const auto &Entries = I.second;
+
+ LexicalScope *Scope = nullptr;
+ if (InlinedAt)
+ Scope = LScopes.findInlinedScope(DIVar->getScope(), InlinedAt);
+ else
+ Scope = LScopes.findLexicalScope(DIVar->getScope());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ LocalVariable Var;
+ Var.DIVar = DIVar;
+
+ calculateRanges(Var, Entries);
+ recordLocalVariable(std::move(Var), Scope);
+ }
+}
+
+void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
+ const TargetSubtargetInfo &TSI = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const Function &GV = MF->getFunction();
+ auto Insertion = FnDebugInfo.insert({&GV, std::make_unique<FunctionInfo>()});
+ assert(Insertion.second && "function already has info");
+ CurFn = Insertion.first->second.get();
+ CurFn->FuncId = NextFuncId++;
+ CurFn->Begin = Asm->getFunctionBegin();
+
+ // The S_FRAMEPROC record reports the stack size, and how many bytes of
+ // callee-saved registers were used. For targets that don't use a PUSH
+ // instruction (AArch64), this will be zero.
+ CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters();
+ CurFn->FrameSize = MFI.getStackSize();
+ CurFn->OffsetAdjustment = MFI.getOffsetAdjustment();
+ CurFn->HasStackRealignment = TRI->hasStackRealignment(*MF);
+
+ // For this function S_FRAMEPROC record, figure out which codeview register
+ // will be the frame pointer.
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::None; // None.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::None; // None.
+ if (CurFn->FrameSize > 0) {
+ if (!TSI.getFrameLowering()->hasFP(*MF)) {
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr;
+ } else {
+ CurFn->HasFramePointer = true;
+ // If there is an FP, parameters are always relative to it.
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr;
+ if (CurFn->HasStackRealignment) {
+ // If the stack needs realignment, locals are relative to SP or VFRAME.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+ } else {
+ // Otherwise, locals are relative to EBP, and we probably have VLAs or
+ // other stack adjustments.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::FramePtr;
+ }
+ }
+ }
+
+ // Compute other frame procedure options.
+ FrameProcedureOptions FPO = FrameProcedureOptions::None;
+ if (MFI.hasVarSizedObjects())
+ FPO |= FrameProcedureOptions::HasAlloca;
+ if (MF->exposesReturnsTwice())
+ FPO |= FrameProcedureOptions::HasSetJmp;
+ // FIXME: Set HasLongJmp if we ever track that info.
+ if (MF->hasInlineAsm())
+ FPO |= FrameProcedureOptions::HasInlineAssembly;
+ if (GV.hasPersonalityFn()) {
+ if (isAsynchronousEHPersonality(
+ classifyEHPersonality(GV.getPersonalityFn())))
+ FPO |= FrameProcedureOptions::HasStructuredExceptionHandling;
+ else
+ FPO |= FrameProcedureOptions::HasExceptionHandling;
+ }
+ if (GV.hasFnAttribute(Attribute::InlineHint))
+ FPO |= FrameProcedureOptions::MarkedInline;
+ if (GV.hasFnAttribute(Attribute::Naked))
+ FPO |= FrameProcedureOptions::Naked;
+ if (MFI.hasStackProtectorIndex()) {
+ FPO |= FrameProcedureOptions::SecurityChecks;
+ if (GV.hasFnAttribute(Attribute::StackProtectStrong) ||
+ GV.hasFnAttribute(Attribute::StackProtectReq)) {
+ FPO |= FrameProcedureOptions::StrictSecurityChecks;
+ }
+ } else if (!GV.hasStackProtectorFnAttr()) {
+ // __declspec(safebuffers) disables stack guards.
+ FPO |= FrameProcedureOptions::SafeBuffers;
+ }
+ FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
+ FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
+ if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
+ !GV.hasOptSize() && !GV.hasOptNone())
+ FPO |= FrameProcedureOptions::OptimizedForSpeed;
+ if (GV.hasProfileData()) {
+ FPO |= FrameProcedureOptions::ValidProfileCounts;
+ FPO |= FrameProcedureOptions::ProfileGuidedOptimization;
+ }
+ // FIXME: Set GuardCfg when it is implemented.
+ CurFn->FrameProcOpts = FPO;
+
+ OS.emitCVFuncIdDirective(CurFn->FuncId);
+
+ // Find the end of the function prolog. First known non-DBG_VALUE and
+ // non-frame setup location marks the beginning of the function body.
+ // FIXME: is there a simpler a way to do this? Can we just search
+ // for the first instruction of the function, not the last of the prolog?
+ DebugLoc PrologEndLoc;
+ bool EmptyPrologue = true;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ MI.getDebugLoc()) {
+ PrologEndLoc = MI.getDebugLoc();
+ break;
+ } else if (!MI.isMetaInstruction()) {
+ EmptyPrologue = false;
+ }
+ }
+ }
+
+ // Record beginning of function if we have a non-empty prologue.
+ if (PrologEndLoc && !EmptyPrologue) {
+ DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
+ maybeRecordLocation(FnStartDL, MF);
+ }
+
+ // Find heap alloc sites and emit labels around them.
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MI.getHeapAllocMarker()) {
+ requestLabelBeforeInsn(&MI);
+ requestLabelAfterInsn(&MI);
+ }
+ }
+ }
+}
+
+static bool shouldEmitUdt(const DIType *T) {
+ if (!T)
+ return false;
+
+ // MSVC does not emit UDTs for typedefs that are scoped to classes.
+ if (T->getTag() == dwarf::DW_TAG_typedef) {
+ if (DIScope *Scope = T->getScope()) {
+ switch (Scope->getTag()) {
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_union_type:
+ return false;
+ default:
+ // do nothing.
+ ;
+ }
+ }
+ }
+
+ while (true) {
+ if (!T || T->isForwardDecl())
+ return false;
+
+ const DIDerivedType *DT = dyn_cast<DIDerivedType>(T);
+ if (!DT)
+ return true;
+ T = DT->getBaseType();
+ }
+ return true;
+}
+
+void CodeViewDebug::addToUDTs(const DIType *Ty) {
+ // Don't record empty UDTs.
+ if (Ty->getName().empty())
+ return;
+ if (!shouldEmitUdt(Ty))
+ return;
+
+ SmallVector<StringRef, 5> ParentScopeNames;
+ const DISubprogram *ClosestSubprogram =
+ collectParentScopeNames(Ty->getScope(), ParentScopeNames);
+
+ std::string FullyQualifiedName =
+ formatNestedName(ParentScopeNames, getPrettyScopeName(Ty));
+
+ if (ClosestSubprogram == nullptr) {
+ GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
+ } else if (ClosestSubprogram == CurrentSubprogram) {
+ LocalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
+ }
+
+ // TODO: What if the ClosestSubprogram is neither null or the current
+ // subprogram? Currently, the UDT just gets dropped on the floor.
+ //
+ // The current behavior is not desirable. To get maximal fidelity, we would
+ // need to perform all type translation before beginning emission of .debug$S
+ // and then make LocalUDTs a member of FunctionInfo
+}
+
+TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
+ // Generic dispatch for lowering an unknown type.
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_array_type:
+ return lowerTypeArray(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_typedef:
+ return lowerTypeAlias(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_base_type:
+ return lowerTypeBasic(cast<DIBasicType>(Ty));
+ case dwarf::DW_TAG_pointer_type:
+ if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type")
+ return lowerTypeVFTableShape(cast<DIDerivedType>(Ty));
+ [[fallthrough]];
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ return lowerTypePointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_ptr_to_member_type:
+ return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ // TODO: add support for DW_TAG_atomic_type here
+ return lowerTypeModifier(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_subroutine_type:
+ if (ClassTy) {
+ // The member function type of a member function pointer has no
+ // ThisAdjustment.
+ return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy,
+ /*ThisAdjustment=*/0,
+ /*IsStaticMethod=*/false);
+ }
+ return lowerTypeFunction(cast<DISubroutineType>(Ty));
+ case dwarf::DW_TAG_enumeration_type:
+ return lowerTypeEnum(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ return lowerTypeClass(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_union_type:
+ return lowerTypeUnion(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_string_type:
+ return lowerTypeString(cast<DIStringType>(Ty));
+ case dwarf::DW_TAG_unspecified_type:
+ if (Ty->getName() == "decltype(nullptr)")
+ return TypeIndex::NullptrT();
+ return TypeIndex::None();
+ default:
+ // Use the null type index.
+ return TypeIndex();
+ }
+}
+
+TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
+ TypeIndex UnderlyingTypeIndex = getTypeIndex(Ty->getBaseType());
+ StringRef TypeName = Ty->getName();
+
+ addToUDTs(Ty);
+
+ if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) &&
+ TypeName == "HRESULT")
+ return TypeIndex(SimpleTypeKind::HResult);
+ if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::UInt16Short) &&
+ TypeName == "wchar_t")
+ return TypeIndex(SimpleTypeKind::WideCharacter);
+
+ return UnderlyingTypeIndex;
+}
+
+TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
+ const DIType *ElementType = Ty->getBaseType();
+ TypeIndex ElementTypeIndex = getTypeIndex(ElementType);
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = getPointerSizeInBytes() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ uint64_t ElementSize = getBaseTypeSize(ElementType) / 8;
+
+ // Add subranges to array type.
+ DINodeArray Elements = Ty->getElements();
+ for (int i = Elements.size() - 1; i >= 0; --i) {
+ const DINode *Element = Elements[i];
+ assert(Element->getTag() == dwarf::DW_TAG_subrange_type);
+
+ const DISubrange *Subrange = cast<DISubrange>(Element);
+ int64_t Count = -1;
+
+ // If Subrange has a Count field, use it.
+ // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1),
+ // where lowerbound is from the LowerBound field of the Subrange,
+ // or the language default lowerbound if that field is unspecified.
+ if (auto *CI = dyn_cast_if_present<ConstantInt *>(Subrange->getCount()))
+ Count = CI->getSExtValue();
+ else if (auto *UI = dyn_cast_if_present<ConstantInt *>(
+ Subrange->getUpperBound())) {
+ // Fortran uses 1 as the default lowerbound; other languages use 0.
+ int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0;
+ auto *LI = dyn_cast_if_present<ConstantInt *>(Subrange->getLowerBound());
+ Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound;
+ Count = UI->getSExtValue() - Lowerbound + 1;
+ }
+
+ // Forward declarations of arrays without a size and VLAs use a count of -1.
+ // Emit a count of zero in these cases to match what MSVC does for arrays
+ // without a size. MSVC doesn't support VLAs, so it's not clear what we
+ // should do for them even if we could distinguish them.
+ if (Count == -1)
+ Count = 0;
+
+ // Update the element size and element type index for subsequent subranges.
+ ElementSize *= Count;
+
+ // If this is the outermost array, use the size from the array. It will be
+ // more accurate if we had a VLA or an incomplete element type size.
+ uint64_t ArraySize =
+ (i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
+
+ StringRef Name = (i == 0) ? Ty->getName() : "";
+ ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name);
+ ElementTypeIndex = TypeTable.writeLeafType(AR);
+ }
+
+ return ElementTypeIndex;
+}
+
+// This function lowers a Fortran character type (DIStringType).
+// Note that it handles only the character*n variant (using SizeInBits
+// field in DIString to describe the type size) at the moment.
+// Other variants (leveraging the StringLength and StringLengthExp
+// fields in DIStringType) remain TBD.
+TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) {
+ TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter);
+ uint64_t ArraySize = Ty->getSizeInBits() >> 3;
+ StringRef Name = Ty->getName();
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = getPointerSizeInBytes() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ // Create a type of character array of ArraySize.
+ ArrayRecord AR(CharType, IndexType, ArraySize, Name);
+
+ return TypeTable.writeLeafType(AR);
+}
+
+TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
+ TypeIndex Index;
+ dwarf::TypeKind Kind;
+ uint32_t ByteSize;
+
+ Kind = static_cast<dwarf::TypeKind>(Ty->getEncoding());
+ ByteSize = Ty->getSizeInBits() / 8;
+
+ SimpleTypeKind STK = SimpleTypeKind::None;
+ switch (Kind) {
+ case dwarf::DW_ATE_address:
+ // FIXME: Translate
+ break;
+ case dwarf::DW_ATE_boolean:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Boolean8; break;
+ case 2: STK = SimpleTypeKind::Boolean16; break;
+ case 4: STK = SimpleTypeKind::Boolean32; break;
+ case 8: STK = SimpleTypeKind::Boolean64; break;
+ case 16: STK = SimpleTypeKind::Boolean128; break;
+ }
+ break;
+ case dwarf::DW_ATE_complex_float:
+ // The CodeView size for a complex represents the size of
+ // an individual component.
+ switch (ByteSize) {
+ case 4: STK = SimpleTypeKind::Complex16; break;
+ case 8: STK = SimpleTypeKind::Complex32; break;
+ case 16: STK = SimpleTypeKind::Complex64; break;
+ case 20: STK = SimpleTypeKind::Complex80; break;
+ case 32: STK = SimpleTypeKind::Complex128; break;
+ }
+ break;
+ case dwarf::DW_ATE_float:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Float16; break;
+ case 4: STK = SimpleTypeKind::Float32; break;
+ case 6: STK = SimpleTypeKind::Float48; break;
+ case 8: STK = SimpleTypeKind::Float64; break;
+ case 10: STK = SimpleTypeKind::Float80; break;
+ case 16: STK = SimpleTypeKind::Float128; break;
+ }
+ break;
+ case dwarf::DW_ATE_signed:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::SignedCharacter; break;
+ case 2: STK = SimpleTypeKind::Int16Short; break;
+ case 4: STK = SimpleTypeKind::Int32; break;
+ case 8: STK = SimpleTypeKind::Int64Quad; break;
+ case 16: STK = SimpleTypeKind::Int128Oct; break;
+ }
+ break;
+ case dwarf::DW_ATE_unsigned:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::UnsignedCharacter; break;
+ case 2: STK = SimpleTypeKind::UInt16Short; break;
+ case 4: STK = SimpleTypeKind::UInt32; break;
+ case 8: STK = SimpleTypeKind::UInt64Quad; break;
+ case 16: STK = SimpleTypeKind::UInt128Oct; break;
+ }
+ break;
+ case dwarf::DW_ATE_UTF:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Character8; break;
+ case 2: STK = SimpleTypeKind::Character16; break;
+ case 4: STK = SimpleTypeKind::Character32; break;
+ }
+ break;
+ case dwarf::DW_ATE_signed_char:
+ if (ByteSize == 1)
+ STK = SimpleTypeKind::SignedCharacter;
+ break;
+ case dwarf::DW_ATE_unsigned_char:
+ if (ByteSize == 1)
+ STK = SimpleTypeKind::UnsignedCharacter;
+ break;
+ default:
+ break;
+ }
+
+ // Apply some fixups based on the source-level type name.
+ // Include some amount of canonicalization from an old naming scheme Clang
+ // used to use for integer types (in an outdated effort to be compatible with
+ // GCC's debug info/GDB's behavior, which has since been addressed).
+ if (STK == SimpleTypeKind::Int32 &&
+ (Ty->getName() == "long int" || Ty->getName() == "long"))
+ STK = SimpleTypeKind::Int32Long;
+ if (STK == SimpleTypeKind::UInt32 && (Ty->getName() == "long unsigned int" ||
+ Ty->getName() == "unsigned long"))
+ STK = SimpleTypeKind::UInt32Long;
+ if (STK == SimpleTypeKind::UInt16Short &&
+ (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t"))
+ STK = SimpleTypeKind::WideCharacter;
+ if ((STK == SimpleTypeKind::SignedCharacter ||
+ STK == SimpleTypeKind::UnsignedCharacter) &&
+ Ty->getName() == "char")
+ STK = SimpleTypeKind::NarrowCharacter;
+
+ return TypeIndex(STK);
+}
+
+TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty,
+ PointerOptions PO) {
+ TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
+
+ // Pointers to simple types without any options can use SimpleTypeMode, rather
+ // than having a dedicated pointer type record.
+ if (PointeeTI.isSimple() && PO == PointerOptions::None &&
+ PointeeTI.getSimpleMode() == SimpleTypeMode::Direct &&
+ Ty->getTag() == dwarf::DW_TAG_pointer_type) {
+ SimpleTypeMode Mode = Ty->getSizeInBits() == 64
+ ? SimpleTypeMode::NearPointer64
+ : SimpleTypeMode::NearPointer32;
+ return TypeIndex(PointeeTI.getSimpleKind(), Mode);
+ }
+
+ PointerKind PK =
+ Ty->getSizeInBits() == 64 ? PointerKind::Near64 : PointerKind::Near32;
+ PointerMode PM = PointerMode::Pointer;
+ switch (Ty->getTag()) {
+ default: llvm_unreachable("not a pointer tag type");
+ case dwarf::DW_TAG_pointer_type:
+ PM = PointerMode::Pointer;
+ break;
+ case dwarf::DW_TAG_reference_type:
+ PM = PointerMode::LValueReference;
+ break;
+ case dwarf::DW_TAG_rvalue_reference_type:
+ PM = PointerMode::RValueReference;
+ break;
+ }
+
+ if (Ty->isObjectPointer())
+ PO |= PointerOptions::Const;
+
+ PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
+ return TypeTable.writeLeafType(PR);
+}
+
+static PointerToMemberRepresentation
+translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) {
+ // SizeInBytes being zero generally implies that the member pointer type was
+ // incomplete, which can happen if it is part of a function prototype. In this
+ // case, use the unknown model instead of the general model.
+ if (IsPMF) {
+ switch (Flags & DINode::FlagPtrToMemberRep) {
+ case 0:
+ return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown
+ : PointerToMemberRepresentation::GeneralFunction;
+ case DINode::FlagSingleInheritance:
+ return PointerToMemberRepresentation::SingleInheritanceFunction;
+ case DINode::FlagMultipleInheritance:
+ return PointerToMemberRepresentation::MultipleInheritanceFunction;
+ case DINode::FlagVirtualInheritance:
+ return PointerToMemberRepresentation::VirtualInheritanceFunction;
+ }
+ } else {
+ switch (Flags & DINode::FlagPtrToMemberRep) {
+ case 0:
+ return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown
+ : PointerToMemberRepresentation::GeneralData;
+ case DINode::FlagSingleInheritance:
+ return PointerToMemberRepresentation::SingleInheritanceData;
+ case DINode::FlagMultipleInheritance:
+ return PointerToMemberRepresentation::MultipleInheritanceData;
+ case DINode::FlagVirtualInheritance:
+ return PointerToMemberRepresentation::VirtualInheritanceData;
+ }
+ }
+ llvm_unreachable("invalid ptr to member representation");
+}
+
+TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty,
+ PointerOptions PO) {
+ assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
+ bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
+ TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
+ TypeIndex PointeeTI =
+ getTypeIndex(Ty->getBaseType(), IsPMF ? Ty->getClassType() : nullptr);
+ PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
+ PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
+ : PointerMode::PointerToDataMember;
+
+ assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big");
+ uint8_t SizeInBytes = Ty->getSizeInBits() / 8;
+ MemberPointerInfo MPI(
+ ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
+ PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
+ return TypeTable.writeLeafType(PR);
+}
+
+/// Given a DWARF calling convention, get the CodeView equivalent. If we don't
+/// have a translation, use the NearC convention.
+static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) {
+ switch (DwarfCC) {
+ case dwarf::DW_CC_normal: return CallingConvention::NearC;
+ case dwarf::DW_CC_BORLAND_msfastcall: return CallingConvention::NearFast;
+ case dwarf::DW_CC_BORLAND_thiscall: return CallingConvention::ThisCall;
+ case dwarf::DW_CC_BORLAND_stdcall: return CallingConvention::NearStdCall;
+ case dwarf::DW_CC_BORLAND_pascal: return CallingConvention::NearPascal;
+ case dwarf::DW_CC_LLVM_vectorcall: return CallingConvention::NearVector;
+ }
+ return CallingConvention::NearC;
+}
+
+TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
+ ModifierOptions Mods = ModifierOptions::None;
+ PointerOptions PO = PointerOptions::None;
+ bool IsModifier = true;
+ const DIType *BaseTy = Ty;
+ while (IsModifier && BaseTy) {
+ // FIXME: Need to add DWARF tags for __unaligned and _Atomic
+ switch (BaseTy->getTag()) {
+ case dwarf::DW_TAG_const_type:
+ Mods |= ModifierOptions::Const;
+ PO |= PointerOptions::Const;
+ break;
+ case dwarf::DW_TAG_volatile_type:
+ Mods |= ModifierOptions::Volatile;
+ PO |= PointerOptions::Volatile;
+ break;
+ case dwarf::DW_TAG_restrict_type:
+ // Only pointer types be marked with __restrict. There is no known flag
+ // for __restrict in LF_MODIFIER records.
+ PO |= PointerOptions::Restrict;
+ break;
+ default:
+ IsModifier = false;
+ break;
+ }
+ if (IsModifier)
+ BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType();
+ }
+
+ // Check if the inner type will use an LF_POINTER record. If so, the
+ // qualifiers will go in the LF_POINTER record. This comes up for types like
+ // 'int *const' and 'int *__restrict', not the more common cases like 'const
+ // char *'.
+ if (BaseTy) {
+ switch (BaseTy->getTag()) {
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ return lowerTypePointer(cast<DIDerivedType>(BaseTy), PO);
+ case dwarf::DW_TAG_ptr_to_member_type:
+ return lowerTypeMemberPointer(cast<DIDerivedType>(BaseTy), PO);
+ default:
+ break;
+ }
+ }
+
+ TypeIndex ModifiedTI = getTypeIndex(BaseTy);
+
+ // Return the base type index if there aren't any modifiers. For example, the
+ // metadata could contain restrict wrappers around non-pointer types.
+ if (Mods == ModifierOptions::None)
+ return ModifiedTI;
+
+ ModifierRecord MR(ModifiedTI, Mods);
+ return TypeTable.writeLeafType(MR);
+}
+
+TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
+ SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
+ for (const DIType *ArgType : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgType));
+
+ // MSVC uses type none for variadic argument.
+ if (ReturnAndArgTypeIndices.size() > 1 &&
+ ReturnAndArgTypeIndices.back() == TypeIndex::Void()) {
+ ReturnAndArgTypeIndices.back() = TypeIndex::None();
+ }
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ ArrayRef<TypeIndex> ArgTypeIndices = std::nullopt;
+ if (!ReturnAndArgTypeIndices.empty()) {
+ auto ReturnAndArgTypesRef = ArrayRef(ReturnAndArgTypeIndices);
+ ReturnTypeIndex = ReturnAndArgTypesRef.front();
+ ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
+ }
+
+ ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
+ TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
+
+ CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
+
+ FunctionOptions FO = getFunctionOptions(Ty);
+ ProcedureRecord Procedure(ReturnTypeIndex, CC, FO, ArgTypeIndices.size(),
+ ArgListIndex);
+ return TypeTable.writeLeafType(Procedure);
+}
+
+TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
+ const DIType *ClassTy,
+ int ThisAdjustment,
+ bool IsStaticMethod,
+ FunctionOptions FO) {
+ // Lower the containing class type.
+ TypeIndex ClassType = getTypeIndex(ClassTy);
+
+ DITypeRefArray ReturnAndArgs = Ty->getTypeArray();
+
+ unsigned Index = 0;
+ SmallVector<TypeIndex, 8> ArgTypeIndices;
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ if (ReturnAndArgs.size() > Index) {
+ ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+ }
+
+ // If the first argument is a pointer type and this isn't a static method,
+ // treat it as the special 'this' parameter, which is encoded separately from
+ // the arguments.
+ TypeIndex ThisTypeIndex;
+ if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
+ if (const DIDerivedType *PtrTy =
+ dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index])) {
+ if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
+ ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
+ Index++;
+ }
+ }
+ }
+
+ while (Index < ReturnAndArgs.size())
+ ArgTypeIndices.push_back(getTypeIndex(ReturnAndArgs[Index++]));
+
+ // MSVC uses type none for variadic argument.
+ if (!ArgTypeIndices.empty() && ArgTypeIndices.back() == TypeIndex::Void())
+ ArgTypeIndices.back() = TypeIndex::None();
+
+ ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
+ TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
+
+ CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
+
+ MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FO,
+ ArgTypeIndices.size(), ArgListIndex, ThisAdjustment);
+ return TypeTable.writeLeafType(MFR);
+}
+
+TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
+ unsigned VSlotCount =
+ Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize());
+ SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
+
+ VFTableShapeRecord VFTSR(Slots);
+ return TypeTable.writeLeafType(VFTSR);
+}
+
+static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
+ switch (Flags & DINode::FlagAccessibility) {
+ case DINode::FlagPrivate: return MemberAccess::Private;
+ case DINode::FlagPublic: return MemberAccess::Public;
+ case DINode::FlagProtected: return MemberAccess::Protected;
+ case 0:
+ // If there was no explicit access control, provide the default for the tag.
+ return RecordTag == dwarf::DW_TAG_class_type ? MemberAccess::Private
+ : MemberAccess::Public;
+ }
+ llvm_unreachable("access flags are exclusive");
+}
+
+static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) {
+ if (SP->isArtificial())
+ return MethodOptions::CompilerGenerated;
+
+ // FIXME: Handle other MethodOptions.
+
+ return MethodOptions::None;
+}
+
+static MethodKind translateMethodKindFlags(const DISubprogram *SP,
+ bool Introduced) {
+ if (SP->getFlags() & DINode::FlagStaticMember)
+ return MethodKind::Static;
+
+ switch (SP->getVirtuality()) {
+ case dwarf::DW_VIRTUALITY_none:
+ break;
+ case dwarf::DW_VIRTUALITY_virtual:
+ return Introduced ? MethodKind::IntroducingVirtual : MethodKind::Virtual;
+ case dwarf::DW_VIRTUALITY_pure_virtual:
+ return Introduced ? MethodKind::PureIntroducingVirtual
+ : MethodKind::PureVirtual;
+ default:
+ llvm_unreachable("unhandled virtuality case");
+ }
+
+ return MethodKind::Vanilla;
+}
+
+static TypeRecordKind getRecordKind(const DICompositeType *Ty) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ return TypeRecordKind::Class;
+ case dwarf::DW_TAG_structure_type:
+ return TypeRecordKind::Struct;
+ default:
+ llvm_unreachable("unexpected tag");
+ }
+}
+
+/// Return ClassOptions that should be present on both the forward declaration
+/// and the defintion of a tag type.
+static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
+ ClassOptions CO = ClassOptions::None;
+
+ // MSVC always sets this flag, even for local types. Clang doesn't always
+ // appear to give every type a linkage name, which may be problematic for us.
+ // FIXME: Investigate the consequences of not following them here.
+ if (!Ty->getIdentifier().empty())
+ CO |= ClassOptions::HasUniqueName;
+
+ // Put the Nested flag on a type if it appears immediately inside a tag type.
+ // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass
+ // here. That flag is only set on definitions, and not forward declarations.
+ const DIScope *ImmediateScope = Ty->getScope();
+ if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
+ CO |= ClassOptions::Nested;
+
+ // Put the Scoped flag on function-local types. MSVC puts this flag for enum
+ // type only when it has an immediate function scope. Clang never puts enums
+ // inside DILexicalBlock scopes. Enum types, as generated by clang, are
+ // always in function, class, or file scopes.
+ if (Ty->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (ImmediateScope && isa<DISubprogram>(ImmediateScope))
+ CO |= ClassOptions::Scoped;
+ } else {
+ for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
+ Scope = Scope->getScope()) {
+ if (isa<DISubprogram>(Scope)) {
+ CO |= ClassOptions::Scoped;
+ break;
+ }
+ }
+ }
+
+ return CO;
+}
+
+void CodeViewDebug::addUDTSrcLine(const DIType *Ty, TypeIndex TI) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ break;
+ default:
+ return;
+ }
+
+ if (const auto *File = Ty->getFile()) {
+ StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File));
+ TypeIndex SIDI = TypeTable.writeLeafType(SIDR);
+
+ UdtSourceLineRecord USLR(TI, SIDI, Ty->getLine());
+ TypeTable.writeLeafType(USLR);
+ }
+}
+
+TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
+ ClassOptions CO = getCommonClassOptions(Ty);
+ TypeIndex FTI;
+ unsigned EnumeratorCount = 0;
+
+ if (Ty->isForwardDecl()) {
+ CO |= ClassOptions::ForwardReference;
+ } else {
+ ContinuationRecordBuilder ContinuationBuilder;
+ ContinuationBuilder.begin(ContinuationRecordKind::FieldList);
+ for (const DINode *Element : Ty->getElements()) {
+ // We assume that the frontend provides all members in source declaration
+ // order, which is what MSVC does.
+ if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
+ // FIXME: Is it correct to always emit these as unsigned here?
+ EnumeratorRecord ER(MemberAccess::Public,
+ APSInt(Enumerator->getValue(), true),
+ Enumerator->getName());
+ ContinuationBuilder.writeMemberType(ER);
+ EnumeratorCount++;
+ }
+ }
+ FTI = TypeTable.insertRecord(ContinuationBuilder);
+ }
+
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
+ getTypeIndex(Ty->getBaseType()));
+ TypeIndex EnumTI = TypeTable.writeLeafType(ER);
+
+ addUDTSrcLine(Ty, EnumTI);
+
+ return EnumTI;
+}
+
+//===----------------------------------------------------------------------===//
+// ClassInfo
+//===----------------------------------------------------------------------===//
+
+struct llvm::ClassInfo {
+ struct MemberInfo {
+ const DIDerivedType *MemberTypeNode;
+ uint64_t BaseOffset;
+ };
+ // [MemberInfo]
+ using MemberList = std::vector<MemberInfo>;
+
+ using MethodsList = TinyPtrVector<const DISubprogram *>;
+ // MethodName -> MethodsList
+ using MethodsMap = MapVector<MDString *, MethodsList>;
+
+ /// Base classes.
+ std::vector<const DIDerivedType *> Inheritance;
+
+ /// Direct members.
+ MemberList Members;
+ // Direct overloaded methods gathered by name.
+ MethodsMap Methods;
+
+ TypeIndex VShapeTI;
+
+ std::vector<const DIType *> NestedTypes;
+};
+
+void CodeViewDebug::clear() {
+ assert(CurFn == nullptr);
+ FileIdMap.clear();
+ FnDebugInfo.clear();
+ FileToFilepathMap.clear();
+ LocalUDTs.clear();
+ GlobalUDTs.clear();
+ TypeIndices.clear();
+ CompleteTypeIndices.clear();
+ ScopeGlobals.clear();
+ CVGlobalVariableOffsets.clear();
+}
+
+void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
+ const DIDerivedType *DDTy) {
+ if (!DDTy->getName().empty()) {
+ Info.Members.push_back({DDTy, 0});
+
+ // Collect static const data members with values.
+ if ((DDTy->getFlags() & DINode::FlagStaticMember) ==
+ DINode::FlagStaticMember) {
+ if (DDTy->getConstant() && (isa<ConstantInt>(DDTy->getConstant()) ||
+ isa<ConstantFP>(DDTy->getConstant())))
+ StaticConstMembers.push_back(DDTy);
+ }
+
+ return;
+ }
+
+ // An unnamed member may represent a nested struct or union. Attempt to
+ // interpret the unnamed member as a DICompositeType possibly wrapped in
+ // qualifier types. Add all the indirect fields to the current record if that
+ // succeeds, and drop the member if that fails.
+ assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
+ uint64_t Offset = DDTy->getOffsetInBits();
+ const DIType *Ty = DDTy->getBaseType();
+ bool FullyResolved = false;
+ while (!FullyResolved) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ // FIXME: we should apply the qualifier types to the indirect fields
+ // rather than dropping them.
+ Ty = cast<DIDerivedType>(Ty)->getBaseType();
+ break;
+ default:
+ FullyResolved = true;
+ break;
+ }
+ }
+
+ const DICompositeType *DCTy = dyn_cast<DICompositeType>(Ty);
+ if (!DCTy)
+ return;
+
+ ClassInfo NestedInfo = collectClassInfo(DCTy);
+ for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members)
+ Info.Members.push_back(
+ {IndirectField.MemberTypeNode, IndirectField.BaseOffset + Offset});
+}
+
+ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
+ ClassInfo Info;
+ // Add elements to structure type.
+ DINodeArray Elements = Ty->getElements();
+ for (auto *Element : Elements) {
+ // We assume that the frontend provides all members in source declaration
+ // order, which is what MSVC does.
+ if (!Element)
+ continue;
+ if (auto *SP = dyn_cast<DISubprogram>(Element)) {
+ Info.Methods[SP->getRawName()].push_back(SP);
+ } else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+ if (DDTy->getTag() == dwarf::DW_TAG_member) {
+ collectMemberInfo(Info, DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) {
+ Info.Inheritance.push_back(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type &&
+ DDTy->getName() == "__vtbl_ptr_type") {
+ Info.VShapeTI = getTypeIndex(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_typedef) {
+ Info.NestedTypes.push_back(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
+ // Ignore friend members. It appears that MSVC emitted info about
+ // friends in the past, but modern versions do not.
+ }
+ } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
+ Info.NestedTypes.push_back(Composite);
+ }
+ // Skip other unrecognized kinds of elements.
+ }
+ return Info;
+}
+
+static bool shouldAlwaysEmitCompleteClassType(const DICompositeType *Ty) {
+ // This routine is used by lowerTypeClass and lowerTypeUnion to determine
+ // if a complete type should be emitted instead of a forward reference.
+ return Ty->getName().empty() && Ty->getIdentifier().empty() &&
+ !Ty->isForwardDecl();
+}
+
+TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
+ // Emit the complete type for unnamed structs. C++ classes with methods
+ // which have a circular reference back to the class type are expected to
+ // be named by the front-end and should not be "unnamed". C unnamed
+ // structs should not have circular references.
+ if (shouldAlwaysEmitCompleteClassType(Ty)) {
+ // If this unnamed complete type is already in the process of being defined
+ // then the description of the type is malformed and cannot be emitted
+ // into CodeView correctly so report a fatal error.
+ auto I = CompleteTypeIndices.find(Ty);
+ if (I != CompleteTypeIndices.end() && I->second == TypeIndex())
+ report_fatal_error("cannot debug circular reference to unnamed type");
+ return getCompleteTypeIndex(Ty);
+ }
+
+ // First, construct the forward decl. Don't look into Ty to compute the
+ // forward decl options, since it might not be available in all TUs.
+ TypeRecordKind Kind = getRecordKind(Ty);
+ ClassOptions CO =
+ ClassOptions::ForwardReference | getCommonClassOptions(Ty);
+ std::string FullName = getFullyQualifiedName(Ty);
+ ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0,
+ FullName, Ty->getIdentifier());
+ TypeIndex FwdDeclTI = TypeTable.writeLeafType(CR);
+ if (!Ty->isForwardDecl())
+ DeferredCompleteTypes.push_back(Ty);
+ return FwdDeclTI;
+}
+
+TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
+ // Construct the field list and complete type record.
+ TypeRecordKind Kind = getRecordKind(Ty);
+ ClassOptions CO = getCommonClassOptions(Ty);
+ TypeIndex FieldTI;
+ TypeIndex VShapeTI;
+ unsigned FieldCount;
+ bool ContainsNestedClass;
+ std::tie(FieldTI, VShapeTI, FieldCount, ContainsNestedClass) =
+ lowerRecordFieldList(Ty);
+
+ if (ContainsNestedClass)
+ CO |= ClassOptions::ContainsNestedClass;
+
+ // MSVC appears to set this flag by searching any destructor or method with
+ // FunctionOptions::Constructor among the emitted members. Clang AST has all
+ // the members, however special member functions are not yet emitted into
+ // debug information. For now checking a class's non-triviality seems enough.
+ // FIXME: not true for a nested unnamed struct.
+ if (isNonTrivial(Ty))
+ CO |= ClassOptions::HasConstructorOrDestructor;
+
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
+
+ ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI,
+ SizeInBytes, FullName, Ty->getIdentifier());
+ TypeIndex ClassTI = TypeTable.writeLeafType(CR);
+
+ addUDTSrcLine(Ty, ClassTI);
+
+ addToUDTs(Ty);
+
+ return ClassTI;
+}
+
+TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
+ // Emit the complete type for unnamed unions.
+ if (shouldAlwaysEmitCompleteClassType(Ty))
+ return getCompleteTypeIndex(Ty);
+
+ ClassOptions CO =
+ ClassOptions::ForwardReference | getCommonClassOptions(Ty);
+ std::string FullName = getFullyQualifiedName(Ty);
+ UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier());
+ TypeIndex FwdDeclTI = TypeTable.writeLeafType(UR);
+ if (!Ty->isForwardDecl())
+ DeferredCompleteTypes.push_back(Ty);
+ return FwdDeclTI;
+}
+
+TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
+ ClassOptions CO = ClassOptions::Sealed | getCommonClassOptions(Ty);
+ TypeIndex FieldTI;
+ unsigned FieldCount;
+ bool ContainsNestedClass;
+ std::tie(FieldTI, std::ignore, FieldCount, ContainsNestedClass) =
+ lowerRecordFieldList(Ty);
+
+ if (ContainsNestedClass)
+ CO |= ClassOptions::ContainsNestedClass;
+
+ uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName,
+ Ty->getIdentifier());
+ TypeIndex UnionTI = TypeTable.writeLeafType(UR);
+
+ addUDTSrcLine(Ty, UnionTI);
+
+ addToUDTs(Ty);
+
+ return UnionTI;
+}
+
+std::tuple<TypeIndex, TypeIndex, unsigned, bool>
+CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
+ // Manually count members. MSVC appears to count everything that generates a
+ // field list record. Each individual overload in a method overload group
+ // contributes to this count, even though the overload group is a single field
+ // list record.
+ unsigned MemberCount = 0;
+ ClassInfo Info = collectClassInfo(Ty);
+ ContinuationRecordBuilder ContinuationBuilder;
+ ContinuationBuilder.begin(ContinuationRecordKind::FieldList);
+
+ // Create base classes.
+ for (const DIDerivedType *I : Info.Inheritance) {
+ if (I->getFlags() & DINode::FlagVirtual) {
+ // Virtual base.
+ unsigned VBPtrOffset = I->getVBPtrOffset();
+ // FIXME: Despite the accessor name, the offset is really in bytes.
+ unsigned VBTableIndex = I->getOffsetInBits() / 4;
+ auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase
+ ? TypeRecordKind::IndirectVirtualBaseClass
+ : TypeRecordKind::VirtualBaseClass;
+ VirtualBaseClassRecord VBCR(
+ RecordKind, translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
+ VBTableIndex);
+
+ ContinuationBuilder.writeMemberType(VBCR);
+ MemberCount++;
+ } else {
+ assert(I->getOffsetInBits() % 8 == 0 &&
+ "bases must be on byte boundaries");
+ BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()),
+ I->getOffsetInBits() / 8);
+ ContinuationBuilder.writeMemberType(BCR);
+ MemberCount++;
+ }
+ }
+
+ // Create members.
+ for (ClassInfo::MemberInfo &MemberInfo : Info.Members) {
+ const DIDerivedType *Member = MemberInfo.MemberTypeNode;
+ TypeIndex MemberBaseType = getTypeIndex(Member->getBaseType());
+ StringRef MemberName = Member->getName();
+ MemberAccess Access =
+ translateAccessFlags(Ty->getTag(), Member->getFlags());
+
+ if (Member->isStaticMember()) {
+ StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName);
+ ContinuationBuilder.writeMemberType(SDMR);
+ MemberCount++;
+ continue;
+ }
+
+ // Virtual function pointer member.
+ if ((Member->getFlags() & DINode::FlagArtificial) &&
+ Member->getName().startswith("_vptr$")) {
+ VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
+ ContinuationBuilder.writeMemberType(VFPR);
+ MemberCount++;
+ continue;
+ }
+
+ // Data member.
+ uint64_t MemberOffsetInBits =
+ Member->getOffsetInBits() + MemberInfo.BaseOffset;
+ if (Member->isBitField()) {
+ uint64_t StartBitOffset = MemberOffsetInBits;
+ if (const auto *CI =
+ dyn_cast_or_null<ConstantInt>(Member->getStorageOffsetInBits())) {
+ MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset;
+ }
+ StartBitOffset -= MemberOffsetInBits;
+ BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(),
+ StartBitOffset);
+ MemberBaseType = TypeTable.writeLeafType(BFR);
+ }
+ uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
+ DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes,
+ MemberName);
+ ContinuationBuilder.writeMemberType(DMR);
+ MemberCount++;
+ }
+
+ // Create methods
+ for (auto &MethodItr : Info.Methods) {
+ StringRef Name = MethodItr.first->getString();
+
+ std::vector<OneMethodRecord> Methods;
+ for (const DISubprogram *SP : MethodItr.second) {
+ TypeIndex MethodType = getMemberFunctionType(SP, Ty);
+ bool Introduced = SP->getFlags() & DINode::FlagIntroducedVirtual;
+
+ unsigned VFTableOffset = -1;
+ if (Introduced)
+ VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes();
+
+ Methods.push_back(OneMethodRecord(
+ MethodType, translateAccessFlags(Ty->getTag(), SP->getFlags()),
+ translateMethodKindFlags(SP, Introduced),
+ translateMethodOptionFlags(SP), VFTableOffset, Name));
+ MemberCount++;
+ }
+ assert(!Methods.empty() && "Empty methods map entry");
+ if (Methods.size() == 1)
+ ContinuationBuilder.writeMemberType(Methods[0]);
+ else {
+ // FIXME: Make this use its own ContinuationBuilder so that
+ // MethodOverloadList can be split correctly.
+ MethodOverloadListRecord MOLR(Methods);
+ TypeIndex MethodList = TypeTable.writeLeafType(MOLR);
+
+ OverloadedMethodRecord OMR(Methods.size(), MethodList, Name);
+ ContinuationBuilder.writeMemberType(OMR);
+ }
+ }
+
+ // Create nested classes.
+ for (const DIType *Nested : Info.NestedTypes) {
+ NestedTypeRecord R(getTypeIndex(Nested), Nested->getName());
+ ContinuationBuilder.writeMemberType(R);
+ MemberCount++;
+ }
+
+ TypeIndex FieldTI = TypeTable.insertRecord(ContinuationBuilder);
+ return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
+ !Info.NestedTypes.empty());
+}
+
+TypeIndex CodeViewDebug::getVBPTypeIndex() {
+ if (!VBPType.getIndex()) {
+ // Make a 'const int *' type.
+ ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
+ TypeIndex ModifiedTI = TypeTable.writeLeafType(MR);
+
+ PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
+ PointerMode PM = PointerMode::Pointer;
+ PointerOptions PO = PointerOptions::None;
+ PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
+ VBPType = TypeTable.writeLeafType(PR);
+ }
+
+ return VBPType;
+}
+
+TypeIndex CodeViewDebug::getTypeIndex(const DIType *Ty, const DIType *ClassTy) {
+ // The null DIType is the void type. Don't try to hash it.
+ if (!Ty)
+ return TypeIndex::Void();
+
+ // Check if we've already translated this type. Don't try to do a
+ // get-or-create style insertion that caches the hash lookup across the
+ // lowerType call. It will update the TypeIndices map.
+ auto I = TypeIndices.find({Ty, ClassTy});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ TypeLoweringScope S(*this);
+ TypeIndex TI = lowerType(Ty, ClassTy);
+ return recordTypeIndexForDINode(Ty, TI, ClassTy);
+}
+
+codeview::TypeIndex
+CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+ const DISubroutineType *SubroutineTy) {
+ assert(PtrTy->getTag() == dwarf::DW_TAG_pointer_type &&
+ "this type must be a pointer type");
+
+ PointerOptions Options = PointerOptions::None;
+ if (SubroutineTy->getFlags() & DINode::DIFlags::FlagLValueReference)
+ Options = PointerOptions::LValueRefThisPointer;
+ else if (SubroutineTy->getFlags() & DINode::DIFlags::FlagRValueReference)
+ Options = PointerOptions::RValueRefThisPointer;
+
+ // Check if we've already translated this type. If there is no ref qualifier
+ // on the function then we look up this pointer type with no associated class
+ // so that the TypeIndex for the this pointer can be shared with the type
+ // index for other pointers to this class type. If there is a ref qualifier
+ // then we lookup the pointer using the subroutine as the parent type.
+ auto I = TypeIndices.find({PtrTy, SubroutineTy});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ TypeLoweringScope S(*this);
+ TypeIndex TI = lowerTypePointer(PtrTy, Options);
+ return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
+}
+
+TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(const DIType *Ty) {
+ PointerRecord PR(getTypeIndex(Ty),
+ getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32,
+ PointerMode::LValueReference, PointerOptions::None,
+ Ty->getSizeInBits() / 8);
+ return TypeTable.writeLeafType(PR);
+}
+
+TypeIndex CodeViewDebug::getCompleteTypeIndex(const DIType *Ty) {
+ // The null DIType is the void type. Don't try to hash it.
+ if (!Ty)
+ return TypeIndex::Void();
+
+ // Look through typedefs when getting the complete type index. Call
+ // getTypeIndex on the typdef to ensure that any UDTs are accumulated and are
+ // emitted only once.
+ if (Ty->getTag() == dwarf::DW_TAG_typedef)
+ (void)getTypeIndex(Ty);
+ while (Ty->getTag() == dwarf::DW_TAG_typedef)
+ Ty = cast<DIDerivedType>(Ty)->getBaseType();
+
+ // If this is a non-record type, the complete type index is the same as the
+ // normal type index. Just call getTypeIndex.
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ break;
+ default:
+ return getTypeIndex(Ty);
+ }
+
+ const auto *CTy = cast<DICompositeType>(Ty);
+
+ TypeLoweringScope S(*this);
+
+ // Make sure the forward declaration is emitted first. It's unclear if this
+ // is necessary, but MSVC does it, and we should follow suit until we can show
+ // otherwise.
+ // We only emit a forward declaration for named types.
+ if (!CTy->getName().empty() || !CTy->getIdentifier().empty()) {
+ TypeIndex FwdDeclTI = getTypeIndex(CTy);
+
+ // Just use the forward decl if we don't have complete type info. This
+ // might happen if the frontend is using modules and expects the complete
+ // definition to be emitted elsewhere.
+ if (CTy->isForwardDecl())
+ return FwdDeclTI;
+ }
+
+ // Check if we've already translated the complete record type.
+ // Insert the type with a null TypeIndex to signify that the type is currently
+ // being lowered.
+ auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
+ if (!InsertResult.second)
+ return InsertResult.first->second;
+
+ TypeIndex TI;
+ switch (CTy->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ TI = lowerCompleteTypeClass(CTy);
+ break;
+ case dwarf::DW_TAG_union_type:
+ TI = lowerCompleteTypeUnion(CTy);
+ break;
+ default:
+ llvm_unreachable("not a record");
+ }
+
+ // Update the type index associated with this CompositeType. This cannot
+ // use the 'InsertResult' iterator above because it is potentially
+ // invalidated by map insertions which can occur while lowering the class
+ // type above.
+ CompleteTypeIndices[CTy] = TI;
+ return TI;
+}
+
+/// Emit all the deferred complete record types. Try to do this in FIFO order,
+/// and do this until fixpoint, as each complete record type typically
+/// references
+/// many other record types.
+void CodeViewDebug::emitDeferredCompleteTypes() {
+ SmallVector<const DICompositeType *, 4> TypesToEmit;
+ while (!DeferredCompleteTypes.empty()) {
+ std::swap(DeferredCompleteTypes, TypesToEmit);
+ for (const DICompositeType *RecordTy : TypesToEmit)
+ getCompleteTypeIndex(RecordTy);
+ TypesToEmit.clear();
+ }
+}
+
+void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
+ ArrayRef<LocalVariable> Locals) {
+ // Get the sorted list of parameters and emit them first.
+ SmallVector<const LocalVariable *, 6> Params;
+ for (const LocalVariable &L : Locals)
+ if (L.DIVar->isParameter())
+ Params.push_back(&L);
+ llvm::sort(Params, [](const LocalVariable *L, const LocalVariable *R) {
+ return L->DIVar->getArg() < R->DIVar->getArg();
+ });
+ for (const LocalVariable *L : Params)
+ emitLocalVariable(FI, *L);
+
+ // Next emit all non-parameters in the order that we found them.
+ for (const LocalVariable &L : Locals) {
+ if (!L.DIVar->isParameter()) {
+ if (L.ConstantValue) {
+ // If ConstantValue is set we will emit it as a S_CONSTANT instead of a
+ // S_LOCAL in order to be able to represent it at all.
+ const DIType *Ty = L.DIVar->getType();
+ APSInt Val(*L.ConstantValue);
+ emitConstantSymbolRecord(Ty, Val, std::string(L.DIVar->getName()));
+ } else {
+ emitLocalVariable(FI, L);
+ }
+ }
+ }
+}
+
+void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
+ const LocalVariable &Var) {
+ // LocalSym record, see SymbolRecord.h for more info.
+ MCSymbol *LocalEnd = beginSymbolRecord(SymbolKind::S_LOCAL);
+
+ LocalSymFlags Flags = LocalSymFlags::None;
+ if (Var.DIVar->isParameter())
+ Flags |= LocalSymFlags::IsParameter;
+ if (Var.DefRanges.empty())
+ Flags |= LocalSymFlags::IsOptimizedOut;
+
+ OS.AddComment("TypeIndex");
+ TypeIndex TI = Var.UseReferenceType
+ ? getTypeIndexForReferenceTo(Var.DIVar->getType())
+ : getCompleteTypeIndex(Var.DIVar->getType());
+ OS.emitInt32(TI.getIndex());
+ OS.AddComment("Flags");
+ OS.emitInt16(static_cast<uint16_t>(Flags));
+ // Truncate the name so we won't overflow the record length field.
+ emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
+ endSymbolRecord(LocalEnd);
+
+ // Calculate the on disk prefix of the appropriate def range record. The
+ // records and on disk formats are described in SymbolRecords.h. BytePrefix
+ // should be big enough to hold all forms without memory allocation.
+ SmallString<20> BytePrefix;
+ for (const auto &Pair : Var.DefRanges) {
+ LocalVarDef DefRange = Pair.first;
+ const auto &Ranges = Pair.second;
+ BytePrefix.clear();
+ if (DefRange.InMemory) {
+ int Offset = DefRange.DataOffset;
+ unsigned Reg = DefRange.CVRegister;
+
+ // 32-bit x86 call sequences often use PUSH instructions, which disrupt
+ // ESP-relative offsets. Use the virtual frame pointer, VFRAME or $T0,
+ // instead. In frames without stack realignment, $T0 will be the CFA.
+ if (RegisterId(Reg) == RegisterId::ESP) {
+ Reg = unsigned(RegisterId::VFRAME);
+ Offset += FI.OffsetAdjustment;
+ }
+
+ // If we can use the chosen frame pointer for the frame and this isn't a
+ // sliced aggregate, use the smaller S_DEFRANGE_FRAMEPOINTER_REL record.
+ // Otherwise, use S_DEFRANGE_REGISTER_REL.
+ EncodedFramePtrReg EncFP = encodeFramePtrReg(RegisterId(Reg), TheCPU);
+ if (!DefRange.IsSubfield && EncFP != EncodedFramePtrReg::None &&
+ (bool(Flags & LocalSymFlags::IsParameter)
+ ? (EncFP == FI.EncodedParamFramePtrReg)
+ : (EncFP == FI.EncodedLocalFramePtrReg))) {
+ DefRangeFramePointerRelHeader DRHdr;
+ DRHdr.Offset = Offset;
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
+ } else {
+ uint16_t RegRelFlags = 0;
+ if (DefRange.IsSubfield) {
+ RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+ (DefRange.StructOffset
+ << DefRangeRegisterRelSym::OffsetInParentShift);
+ }
+ DefRangeRegisterRelHeader DRHdr;
+ DRHdr.Register = Reg;
+ DRHdr.Flags = RegRelFlags;
+ DRHdr.BasePointerOffset = Offset;
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
+ }
+ } else {
+ assert(DefRange.DataOffset == 0 && "unexpected offset into register");
+ if (DefRange.IsSubfield) {
+ DefRangeSubfieldRegisterHeader DRHdr;
+ DRHdr.Register = DefRange.CVRegister;
+ DRHdr.MayHaveNoName = 0;
+ DRHdr.OffsetInParent = DefRange.StructOffset;
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
+ } else {
+ DefRangeRegisterHeader DRHdr;
+ DRHdr.Register = DefRange.CVRegister;
+ DRHdr.MayHaveNoName = 0;
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
+ }
+ }
+ }
+}
+
+void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
+ const FunctionInfo& FI) {
+ for (LexicalBlock *Block : Blocks)
+ emitLexicalBlock(*Block, FI);
+}
+
+/// Emit an S_BLOCK32 and S_END record pair delimiting the contents of a
+/// lexical block scope.
+void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
+ const FunctionInfo& FI) {
+ MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32);
+ OS.AddComment("PtrParent");
+ OS.emitInt32(0); // PtrParent
+ OS.AddComment("PtrEnd");
+ OS.emitInt32(0); // PtrEnd
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size
+ OS.AddComment("Function section relative address");
+ OS.emitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset
+ OS.AddComment("Function section index");
+ OS.emitCOFFSectionIndex(FI.Begin); // Func Symbol
+ OS.AddComment("Lexical block name");
+ emitNullTerminatedSymbolName(OS, Block.Name); // Name
+ endSymbolRecord(RecordEnd);
+
+ // Emit variables local to this lexical block.
+ emitLocalVariableList(FI, Block.Locals);
+ emitGlobalVariableList(Block.Globals);
+
+ // Emit lexical blocks contained within this block.
+ emitLexicalBlockList(Block.Children, FI);
+
+ // Close the lexical block scope.
+ emitEndSymbolRecord(SymbolKind::S_END);
+}
+
+/// Convenience routine for collecting lexical block information for a list
+/// of lexical scopes.
+void CodeViewDebug::collectLexicalBlockInfo(
+ SmallVectorImpl<LexicalScope *> &Scopes,
+ SmallVectorImpl<LexicalBlock *> &Blocks,
+ SmallVectorImpl<LocalVariable> &Locals,
+ SmallVectorImpl<CVGlobalVariable> &Globals) {
+ for (LexicalScope *Scope : Scopes)
+ collectLexicalBlockInfo(*Scope, Blocks, Locals, Globals);
+}
+
+/// Populate the lexical blocks and local variable lists of the parent with
+/// information about the specified lexical scope.
+void CodeViewDebug::collectLexicalBlockInfo(
+ LexicalScope &Scope,
+ SmallVectorImpl<LexicalBlock *> &ParentBlocks,
+ SmallVectorImpl<LocalVariable> &ParentLocals,
+ SmallVectorImpl<CVGlobalVariable> &ParentGlobals) {
+ if (Scope.isAbstractScope())
+ return;
+
+ // Gather information about the lexical scope including local variables,
+ // global variables, and address ranges.
+ bool IgnoreScope = false;
+ auto LI = ScopeVariables.find(&Scope);
+ SmallVectorImpl<LocalVariable> *Locals =
+ LI != ScopeVariables.end() ? &LI->second : nullptr;
+ auto GI = ScopeGlobals.find(Scope.getScopeNode());
+ SmallVectorImpl<CVGlobalVariable> *Globals =
+ GI != ScopeGlobals.end() ? GI->second.get() : nullptr;
+ const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode());
+ const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges();
+
+ // Ignore lexical scopes which do not contain variables.
+ if (!Locals && !Globals)
+ IgnoreScope = true;
+
+ // Ignore lexical scopes which are not lexical blocks.
+ if (!DILB)
+ IgnoreScope = true;
+
+ // Ignore scopes which have too many address ranges to represent in the
+ // current CodeView format or do not have a valid address range.
+ //
+ // For lexical scopes with multiple address ranges you may be tempted to
+ // construct a single range covering every instruction where the block is
+ // live and everything in between. Unfortunately, Visual Studio only
+ // displays variables from the first matching lexical block scope. If the
+ // first lexical block contains exception handling code or cold code which
+ // is moved to the bottom of the routine creating a single range covering
+ // nearly the entire routine, then it will hide all other lexical blocks
+ // and the variables they contain.
+ if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second))
+ IgnoreScope = true;
+
+ if (IgnoreScope) {
+ // This scope can be safely ignored and eliminating it will reduce the
+ // size of the debug information. Be sure to collect any variable and scope
+ // information from the this scope or any of its children and collapse them
+ // into the parent scope.
+ if (Locals)
+ ParentLocals.append(Locals->begin(), Locals->end());
+ if (Globals)
+ ParentGlobals.append(Globals->begin(), Globals->end());
+ collectLexicalBlockInfo(Scope.getChildren(),
+ ParentBlocks,
+ ParentLocals,
+ ParentGlobals);
+ return;
+ }
+
+ // Create a new CodeView lexical block for this lexical scope. If we've
+ // seen this DILexicalBlock before then the scope tree is malformed and
+ // we can handle this gracefully by not processing it a second time.
+ auto BlockInsertion = CurFn->LexicalBlocks.insert({DILB, LexicalBlock()});
+ if (!BlockInsertion.second)
+ return;
+
+ // Create a lexical block containing the variables and collect the the
+ // lexical block information for the children.
+ const InsnRange &Range = Ranges.front();
+ assert(Range.first && Range.second);
+ LexicalBlock &Block = BlockInsertion.first->second;
+ Block.Begin = getLabelBeforeInsn(Range.first);
+ Block.End = getLabelAfterInsn(Range.second);
+ assert(Block.Begin && "missing label for scope begin");
+ assert(Block.End && "missing label for scope end");
+ Block.Name = DILB->getName();
+ if (Locals)
+ Block.Locals = std::move(*Locals);
+ if (Globals)
+ Block.Globals = std::move(*Globals);
+ ParentBlocks.push_back(&Block);
+ collectLexicalBlockInfo(Scope.getChildren(),
+ Block.Children,
+ Block.Locals,
+ Block.Globals);
+}
+
+void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
+ const Function &GV = MF->getFunction();
+ assert(FnDebugInfo.count(&GV));
+ assert(CurFn == FnDebugInfo[&GV].get());
+
+ collectVariableInfo(GV.getSubprogram());
+
+ // Build the lexical block structure to emit for this routine.
+ if (LexicalScope *CFS = LScopes.getCurrentFunctionScope())
+ collectLexicalBlockInfo(*CFS,
+ CurFn->ChildBlocks,
+ CurFn->Locals,
+ CurFn->Globals);
+
+ // Clear the scope and variable information from the map which will not be
+ // valid after we have finished processing this routine. This also prepares
+ // the map for the subsequent routine.
+ ScopeVariables.clear();
+
+ // Don't emit anything if we don't have any line tables.
+ // Thunks are compiler-generated and probably won't have source correlation.
+ if (!CurFn->HaveLineInfo && !GV.getSubprogram()->isThunk()) {
+ FnDebugInfo.erase(&GV);
+ CurFn = nullptr;
+ return;
+ }
+
+ // Find heap alloc sites and add to list.
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MDNode *MD = MI.getHeapAllocMarker()) {
+ CurFn->HeapAllocSites.push_back(std::make_tuple(getLabelBeforeInsn(&MI),
+ getLabelAfterInsn(&MI),
+ dyn_cast<DIType>(MD)));
+ }
+ }
+ }
+
+ CurFn->Annotations = MF->getCodeViewAnnotations();
+
+ CurFn->End = Asm->getFunctionEnd();
+
+ CurFn = nullptr;
+}
+
+// Usable locations are valid with non-zero line numbers. A line number of zero
+// corresponds to optimized code that doesn't have a distinct source location.
+// In this case, we try to use the previous or next source location depending on
+// the context.
+static bool isUsableDebugLoc(DebugLoc DL) {
+ return DL && DL.getLine() != 0;
+}
+
+void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
+ DebugHandlerBase::beginInstruction(MI);
+
+ // Ignore DBG_VALUE and DBG_LABEL locations and function prologue.
+ if (!Asm || !CurFn || MI->isDebugInstr() ||
+ MI->getFlag(MachineInstr::FrameSetup))
+ return;
+
+ // If the first instruction of a new MBB has no location, find the first
+ // instruction with a location and use that.
+ DebugLoc DL = MI->getDebugLoc();
+ if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) {
+ for (const auto &NextMI : *MI->getParent()) {
+ if (NextMI.isDebugInstr())
+ continue;
+ DL = NextMI.getDebugLoc();
+ if (isUsableDebugLoc(DL))
+ break;
+ }
+ // FIXME: Handle the case where the BB has no valid locations. This would
+ // probably require doing a real dataflow analysis.
+ }
+ PrevInstBB = MI->getParent();
+
+ // If we still don't have a debug location, don't record a location.
+ if (!isUsableDebugLoc(DL))
+ return;
+
+ maybeRecordLocation(DL, Asm->MF);
+}
+
+MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) {
+ MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+ *EndLabel = MMI->getContext().createTempSymbol();
+ OS.emitInt32(unsigned(Kind));
+ OS.AddComment("Subsection size");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
+ OS.emitLabel(BeginLabel);
+ return EndLabel;
+}
+
+void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
+ OS.emitLabel(EndLabel);
+ // Every subsection must be aligned to a 4-byte boundary.
+ OS.emitValueToAlignment(Align(4));
+}
+
+static StringRef getSymbolName(SymbolKind SymKind) {
+ for (const EnumEntry<SymbolKind> &EE : getSymbolTypeNames())
+ if (EE.Value == SymKind)
+ return EE.Name;
+ return "";
+}
+
+MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) {
+ MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+ *EndLabel = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+ OS.emitLabel(BeginLabel);
+ if (OS.isVerboseAsm())
+ OS.AddComment("Record kind: " + getSymbolName(SymKind));
+ OS.emitInt16(unsigned(SymKind));
+ return EndLabel;
+}
+
+void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) {
+ // MSVC does not pad out symbol records to four bytes, but LLVM does to avoid
+ // an extra copy of every symbol record in LLD. This increases object file
+ // size by less than 1% in the clang build, and is compatible with the Visual
+ // C++ linker.
+ OS.emitValueToAlignment(Align(4));
+ OS.emitLabel(SymEnd);
+}
+
+void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) {
+ OS.AddComment("Record length");
+ OS.emitInt16(2);
+ if (OS.isVerboseAsm())
+ OS.AddComment("Record kind: " + getSymbolName(EndKind));
+ OS.emitInt16(uint16_t(EndKind)); // Record Kind
+}
+
+void CodeViewDebug::emitDebugInfoForUDTs(
+ const std::vector<std::pair<std::string, const DIType *>> &UDTs) {
+#ifndef NDEBUG
+ size_t OriginalSize = UDTs.size();
+#endif
+ for (const auto &UDT : UDTs) {
+ const DIType *T = UDT.second;
+ assert(shouldEmitUdt(T));
+ MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT);
+ OS.AddComment("Type");
+ OS.emitInt32(getCompleteTypeIndex(T).getIndex());
+ assert(OriginalSize == UDTs.size() &&
+ "getCompleteTypeIndex found new UDTs!");
+ emitNullTerminatedSymbolName(OS, UDT.first);
+ endSymbolRecord(UDTRecordEnd);
+ }
+}
+
+void CodeViewDebug::collectGlobalVariableInfo() {
+ DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
+ GlobalMap;
+ for (const GlobalVariable &GV : MMI->getModule()->globals()) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+ GV.getDebugInfo(GVEs);
+ for (const auto *GVE : GVEs)
+ GlobalMap[GVE] = &GV;
+ }
+
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ for (const MDNode *Node : CUs->operands()) {
+ const auto *CU = cast<DICompileUnit>(Node);
+ for (const auto *GVE : CU->getGlobalVariables()) {
+ const DIGlobalVariable *DIGV = GVE->getVariable();
+ const DIExpression *DIE = GVE->getExpression();
+ // Don't emit string literals in CodeView, as the only useful parts are
+ // generally the filename and line number, which isn't possible to output
+ // in CodeView. String literals should be the only unnamed GlobalVariable
+ // with debug info.
+ if (DIGV->getName().empty()) continue;
+
+ if ((DIE->getNumElements() == 2) &&
+ (DIE->getElement(0) == dwarf::DW_OP_plus_uconst))
+ // Record the constant offset for the variable.
+ //
+ // A Fortran common block uses this idiom to encode the offset
+ // of a variable from the common block's starting address.
+ CVGlobalVariableOffsets.insert(
+ std::make_pair(DIGV, DIE->getElement(1)));
+
+ // Emit constant global variables in a global symbol section.
+ if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
+ CVGlobalVariable CVGV = {DIGV, DIE};
+ GlobalVariables.emplace_back(std::move(CVGV));
+ }
+
+ const auto *GV = GlobalMap.lookup(GVE);
+ if (!GV || GV->isDeclarationForLinker())
+ continue;
+
+ DIScope *Scope = DIGV->getScope();
+ SmallVector<CVGlobalVariable, 1> *VariableList;
+ if (Scope && isa<DILocalScope>(Scope)) {
+ // Locate a global variable list for this scope, creating one if
+ // necessary.
+ auto Insertion = ScopeGlobals.insert(
+ {Scope, std::unique_ptr<GlobalVariableList>()});
+ if (Insertion.second)
+ Insertion.first->second = std::make_unique<GlobalVariableList>();
+ VariableList = Insertion.first->second.get();
+ } else if (GV->hasComdat())
+ // Emit this global variable into a COMDAT section.
+ VariableList = &ComdatVariables;
+ else
+ // Emit this global variable in a single global symbol section.
+ VariableList = &GlobalVariables;
+ CVGlobalVariable CVGV = {DIGV, GV};
+ VariableList->emplace_back(std::move(CVGV));
+ }
+ }
+}
+
+void CodeViewDebug::collectDebugInfoForGlobals() {
+ for (const CVGlobalVariable &CVGV : GlobalVariables) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ const DIScope *Scope = DIGV->getScope();
+ getCompleteTypeIndex(DIGV->getType());
+ getFullyQualifiedName(Scope, DIGV->getName());
+ }
+
+ for (const CVGlobalVariable &CVGV : ComdatVariables) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ const DIScope *Scope = DIGV->getScope();
+ getCompleteTypeIndex(DIGV->getType());
+ getFullyQualifiedName(Scope, DIGV->getName());
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobals() {
+ // First, emit all globals that are not in a comdat in a single symbol
+ // substream. MSVC doesn't like it if the substream is empty, so only open
+ // it if we have at least one global to emit.
+ switchToDebugSectionForSymbol(nullptr);
+ if (!GlobalVariables.empty() || !StaticConstMembers.empty()) {
+ OS.AddComment("Symbol subsection for globals");
+ MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitGlobalVariableList(GlobalVariables);
+ emitStaticConstMemberList();
+ endCVSubsection(EndLabel);
+ }
+
+ // Second, emit each global that is in a comdat into its own .debug$S
+ // section along with its own symbol substream.
+ for (const CVGlobalVariable &CVGV : ComdatVariables) {
+ const GlobalVariable *GV = cast<const GlobalVariable *>(CVGV.GVInfo);
+ MCSymbol *GVSym = Asm->getSymbol(GV);
+ OS.AddComment("Symbol subsection for " +
+ Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
+ switchToDebugSectionForSymbol(GVSym);
+ MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(CVGV);
+ endCVSubsection(EndLabel);
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForRetainedTypes() {
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ for (const MDNode *Node : CUs->operands()) {
+ for (auto *Ty : cast<DICompileUnit>(Node)->getRetainedTypes()) {
+ if (DIType *RT = dyn_cast<DIType>(Ty)) {
+ getTypeIndex(RT);
+ // FIXME: Add to global/local DTU list.
+ }
+ }
+ }
+}
+
+// Emit each global variable in the specified array.
+void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
+ for (const CVGlobalVariable &CVGV : Globals) {
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(CVGV);
+ }
+}
+
+void CodeViewDebug::emitConstantSymbolRecord(const DIType *DTy, APSInt &Value,
+ const std::string &QualifiedName) {
+ MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+ OS.AddComment("Type");
+ OS.emitInt32(getTypeIndex(DTy).getIndex());
+
+ OS.AddComment("Value");
+
+ // Encoded integers shouldn't need more than 10 bytes.
+ uint8_t Data[10];
+ BinaryStreamWriter Writer(Data, llvm::support::endianness::little);
+ CodeViewRecordIO IO(Writer);
+ cantFail(IO.mapEncodedInteger(Value));
+ StringRef SRef((char *)Data, Writer.getOffset());
+ OS.emitBinaryData(SRef);
+
+ OS.AddComment("Name");
+ emitNullTerminatedSymbolName(OS, QualifiedName);
+ endSymbolRecord(SConstantEnd);
+}
+
+void CodeViewDebug::emitStaticConstMemberList() {
+ for (const DIDerivedType *DTy : StaticConstMembers) {
+ const DIScope *Scope = DTy->getScope();
+
+ APSInt Value;
+ if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DTy->getConstant()))
+ Value = APSInt(CI->getValue(),
+ DebugHandlerBase::isUnsignedDIType(DTy->getBaseType()));
+ else if (const ConstantFP *CFP =
+ dyn_cast_or_null<ConstantFP>(DTy->getConstant()))
+ Value = APSInt(CFP->getValueAPF().bitcastToAPInt(), true);
+ else
+ llvm_unreachable("cannot emit a constant without a value");
+
+ emitConstantSymbolRecord(DTy->getBaseType(), Value,
+ getFullyQualifiedName(Scope, DTy->getName()));
+ }
+}
+
+static bool isFloatDIType(const DIType *Ty) {
+ if (isa<DICompositeType>(Ty))
+ return false;
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return false;
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isFloatDIType(DTy->getBaseType());
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ return (BTy->getEncoding() == dwarf::DW_ATE_float);
+}
+
+void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+
+ const DIScope *Scope = DIGV->getScope();
+ // For static data members, get the scope from the declaration.
+ if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
+ DIGV->getRawStaticDataMemberDeclaration()))
+ Scope = MemberDecl->getScope();
+ // For static local variables and Fortran, the scoping portion is elided
+ // in its name so that we can reference the variable in the command line
+ // of the VS debugger.
+ std::string QualifiedName =
+ (moduleIsInFortran() || (Scope && isa<DILocalScope>(Scope)))
+ ? std::string(DIGV->getName())
+ : getFullyQualifiedName(Scope, DIGV->getName());
+
+ if (const GlobalVariable *GV =
+ dyn_cast_if_present<const GlobalVariable *>(CVGV.GVInfo)) {
+ // DataSym record, see SymbolRecord.h for more info. Thread local data
+ // happens to have the same format as global data.
+ MCSymbol *GVSym = Asm->getSymbol(GV);
+ SymbolKind DataSym = GV->isThreadLocal()
+ ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+ : SymbolKind::S_GTHREAD32)
+ : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+ : SymbolKind::S_GDATA32);
+ MCSymbol *DataEnd = beginSymbolRecord(DataSym);
+ OS.AddComment("Type");
+ OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex());
+ OS.AddComment("DataOffset");
+
+ uint64_t Offset = 0;
+ if (CVGlobalVariableOffsets.contains(DIGV))
+ // Use the offset seen while collecting info on globals.
+ Offset = CVGlobalVariableOffsets[DIGV];
+ OS.emitCOFFSecRel32(GVSym, Offset);
+
+ OS.AddComment("Segment");
+ OS.emitCOFFSectionIndex(GVSym);
+ OS.AddComment("Name");
+ const unsigned LengthOfDataRecord = 12;
+ emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord);
+ endSymbolRecord(DataEnd);
+ } else {
+ const DIExpression *DIE = cast<const DIExpression *>(CVGV.GVInfo);
+ assert(DIE->isConstant() &&
+ "Global constant variables must contain a constant expression.");
+
+ // Use unsigned for floats.
+ bool isUnsigned = isFloatDIType(DIGV->getType())
+ ? true
+ : DebugHandlerBase::isUnsignedDIType(DIGV->getType());
+ APSInt Value(APInt(/*BitWidth=*/64, DIE->getElement(1)), isUnsigned);
+ emitConstantSymbolRecord(DIGV->getType(), Value, QualifiedName);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
new file mode 100644
index 000000000000..1455ac417824
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -0,0 +1,530 @@
+//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Microsoft CodeView debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
+#include <map>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+struct ClassInfo;
+class StringRef;
+class AsmPrinter;
+class Function;
+class GlobalVariable;
+class MCSectionCOFF;
+class MCStreamer;
+class MCSymbol;
+class MachineFunction;
+
+/// Collects and handles line tables information in a CodeView format.
+class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
+public:
+ struct LocalVarDef {
+ /// Indicates that variable data is stored in memory relative to the
+ /// specified register.
+ int InMemory : 1;
+
+ /// Offset of variable data in memory.
+ int DataOffset : 31;
+
+ /// Non-zero if this is a piece of an aggregate.
+ uint16_t IsSubfield : 1;
+
+ /// Offset into aggregate.
+ uint16_t StructOffset : 15;
+
+ /// Register containing the data or the register base of the memory
+ /// location containing the data.
+ uint16_t CVRegister;
+
+ uint64_t static toOpaqueValue(const LocalVarDef DR) {
+ uint64_t Val = 0;
+ std::memcpy(&Val, &DR, sizeof(Val));
+ return Val;
+ }
+
+ LocalVarDef static createFromOpaqueValue(uint64_t Val) {
+ LocalVarDef DR;
+ std::memcpy(&DR, &Val, sizeof(Val));
+ return DR;
+ }
+ };
+
+ static_assert(sizeof(uint64_t) == sizeof(LocalVarDef));
+
+private:
+ MCStreamer &OS;
+ BumpPtrAllocator Allocator;
+ codeview::GlobalTypeTableBuilder TypeTable;
+
+ /// Whether to emit type record hashes into .debug$H.
+ bool EmitDebugGlobalHashes = false;
+
+ /// The codeview CPU type used by the translation unit.
+ codeview::CPUType TheCPU;
+
+ static LocalVarDef createDefRangeMem(uint16_t CVRegister, int Offset);
+
+ /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
+ struct LocalVariable {
+ const DILocalVariable *DIVar = nullptr;
+ MapVector<LocalVarDef,
+ SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>>
+ DefRanges;
+ bool UseReferenceType = false;
+ std::optional<APSInt> ConstantValue;
+ };
+
+ struct CVGlobalVariable {
+ const DIGlobalVariable *DIGV;
+ PointerUnion<const GlobalVariable *, const DIExpression *> GVInfo;
+ };
+
+ struct InlineSite {
+ SmallVector<LocalVariable, 1> InlinedLocals;
+ SmallVector<const DILocation *, 1> ChildSites;
+ const DISubprogram *Inlinee = nullptr;
+
+ /// The ID of the inline site or function used with .cv_loc. Not a type
+ /// index.
+ unsigned SiteFuncId = 0;
+ };
+
+ // Combines information from DILexicalBlock and LexicalScope.
+ struct LexicalBlock {
+ SmallVector<LocalVariable, 1> Locals;
+ SmallVector<CVGlobalVariable, 1> Globals;
+ SmallVector<LexicalBlock *, 1> Children;
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ StringRef Name;
+ };
+
+ // For each function, store a vector of labels to its instructions, as well as
+ // to the end of the function.
+ struct FunctionInfo {
+ FunctionInfo() = default;
+
+ // Uncopyable.
+ FunctionInfo(const FunctionInfo &FI) = delete;
+
+ /// Map from inlined call site to inlined instructions and child inlined
+ /// call sites. Listed in program order.
+ std::unordered_map<const DILocation *, InlineSite> InlineSites;
+
+ /// Ordered list of top-level inlined call sites.
+ SmallVector<const DILocation *, 1> ChildSites;
+
+ SmallVector<LocalVariable, 1> Locals;
+ SmallVector<CVGlobalVariable, 1> Globals;
+
+ std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks;
+
+ // Lexical blocks containing local variables.
+ SmallVector<LexicalBlock *, 1> ChildBlocks;
+
+ std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
+ std::vector<std::tuple<const MCSymbol *, const MCSymbol *, const DIType *>>
+ HeapAllocSites;
+
+ const MCSymbol *Begin = nullptr;
+ const MCSymbol *End = nullptr;
+ unsigned FuncId = 0;
+ unsigned LastFileId = 0;
+
+ /// Number of bytes allocated in the prologue for all local stack objects.
+ unsigned FrameSize = 0;
+
+ /// Number of bytes of parameters on the stack.
+ unsigned ParamSize = 0;
+
+ /// Number of bytes pushed to save CSRs.
+ unsigned CSRSize = 0;
+
+ /// Adjustment to apply on x86 when using the VFRAME frame pointer.
+ int OffsetAdjustment = 0;
+
+ /// Two-bit value indicating which register is the designated frame pointer
+ /// register for local variables. Included in S_FRAMEPROC.
+ codeview::EncodedFramePtrReg EncodedLocalFramePtrReg =
+ codeview::EncodedFramePtrReg::None;
+
+ /// Two-bit value indicating which register is the designated frame pointer
+ /// register for stack parameters. Included in S_FRAMEPROC.
+ codeview::EncodedFramePtrReg EncodedParamFramePtrReg =
+ codeview::EncodedFramePtrReg::None;
+
+ codeview::FrameProcedureOptions FrameProcOpts;
+
+ bool HasStackRealignment = false;
+
+ bool HaveLineInfo = false;
+
+ bool HasFramePointer = false;
+ };
+ FunctionInfo *CurFn = nullptr;
+
+ codeview::SourceLanguage CurrentSourceLanguage =
+ codeview::SourceLanguage::Masm;
+
+ // This map records the constant offset in DIExpression of the
+ // DIGlobalVariableExpression referencing the DIGlobalVariable.
+ DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets;
+
+ // Map used to seperate variables according to the lexical scope they belong
+ // in. This is populated by recordLocalVariable() before
+ // collectLexicalBlocks() separates the variables between the FunctionInfo
+ // and LexicalBlocks.
+ DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables;
+
+ // Map to separate global variables according to the lexical scope they
+ // belong in. A null local scope represents the global scope.
+ typedef SmallVector<CVGlobalVariable, 1> GlobalVariableList;
+ DenseMap<const DIScope*, std::unique_ptr<GlobalVariableList> > ScopeGlobals;
+
+ // Array of global variables which need to be emitted into a COMDAT section.
+ SmallVector<CVGlobalVariable, 1> ComdatVariables;
+
+ // Array of non-COMDAT global variables.
+ SmallVector<CVGlobalVariable, 1> GlobalVariables;
+
+ /// List of static const data members to be emitted as S_CONSTANTs.
+ SmallVector<const DIDerivedType *, 4> StaticConstMembers;
+
+ /// The set of comdat .debug$S sections that we've seen so far. Each section
+ /// must start with a magic version number that must only be emitted once.
+ /// This set tracks which sections we've already opened.
+ DenseSet<MCSectionCOFF *> ComdatDebugSections;
+
+ /// Switch to the appropriate .debug$S section for GVSym. If GVSym, the symbol
+ /// of an emitted global value, is in a comdat COFF section, this will switch
+ /// to a new .debug$S section in that comdat. This method ensures that the
+ /// section starts with the magic version number on first use. If GVSym is
+ /// null, uses the main .debug$S section.
+ void switchToDebugSectionForSymbol(const MCSymbol *GVSym);
+
+ /// The next available function index for use with our .cv_* directives. Not
+ /// to be confused with type indices for LF_FUNC_ID records.
+ unsigned NextFuncId = 0;
+
+ InlineSite &getInlineSite(const DILocation *InlinedAt,
+ const DISubprogram *Inlinee);
+
+ codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
+
+ void calculateRanges(LocalVariable &Var,
+ const DbgValueHistoryMap::Entries &Entries);
+
+ /// Remember some debug info about each function. Keep it in a stable order to
+ /// emit at the end of the TU.
+ MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo;
+
+ /// Map from full file path to .cv_file id. Full paths are built from DIFiles
+ /// and are stored in FileToFilepathMap;
+ DenseMap<StringRef, unsigned> FileIdMap;
+
+ /// All inlined subprograms in the order they should be emitted.
+ SmallSetVector<const DISubprogram *, 4> InlinedSubprograms;
+
+ /// Map from a pair of DI metadata nodes and its DI type (or scope) that can
+ /// be nullptr, to CodeView type indices. Primarily indexed by
+ /// {DIType*, DIType*} and {DISubprogram*, DIType*}.
+ ///
+ /// The second entry in the key is needed for methods as DISubroutineType
+ /// representing static method type are shared with non-method function type.
+ DenseMap<std::pair<const DINode *, const DIType *>, codeview::TypeIndex>
+ TypeIndices;
+
+ /// Map from DICompositeType* to complete type index. Non-record types are
+ /// always looked up in the normal TypeIndices map.
+ DenseMap<const DICompositeType *, codeview::TypeIndex> CompleteTypeIndices;
+
+ /// Complete record types to emit after all active type lowerings are
+ /// finished.
+ SmallVector<const DICompositeType *, 4> DeferredCompleteTypes;
+
+ /// Number of type lowering frames active on the stack.
+ unsigned TypeEmissionLevel = 0;
+
+ codeview::TypeIndex VBPType;
+
+ const DISubprogram *CurrentSubprogram = nullptr;
+
+ // The UDTs we have seen while processing types; each entry is a pair of type
+ // index and type name.
+ std::vector<std::pair<std::string, const DIType *>> LocalUDTs;
+ std::vector<std::pair<std::string, const DIType *>> GlobalUDTs;
+
+ using FileToFilepathMapTy = std::map<const DIFile *, std::string>;
+ FileToFilepathMapTy FileToFilepathMap;
+
+ StringRef getFullFilepath(const DIFile *File);
+
+ unsigned maybeRecordFile(const DIFile *F);
+
+ void maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF);
+
+ void clear();
+
+ void setCurrentSubprogram(const DISubprogram *SP) {
+ CurrentSubprogram = SP;
+ LocalUDTs.clear();
+ }
+
+ /// Emit the magic version number at the start of a CodeView type or symbol
+ /// section. Appears at the front of every .debug$S or .debug$T or .debug$P
+ /// section.
+ void emitCodeViewMagicVersion();
+
+ void emitTypeInformation();
+
+ void emitTypeGlobalHashes();
+
+ void emitObjName();
+
+ void emitCompilerInformation();
+
+ void emitBuildInfo();
+
+ void emitInlineeLinesSubsection();
+
+ void emitDebugInfoForThunk(const Function *GV,
+ FunctionInfo &FI,
+ const MCSymbol *Fn);
+
+ void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
+
+ void emitDebugInfoForRetainedTypes();
+
+ void emitDebugInfoForUDTs(
+ const std::vector<std::pair<std::string, const DIType *>> &UDTs);
+
+ void collectDebugInfoForGlobals();
+ void emitDebugInfoForGlobals();
+ void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
+ void emitConstantSymbolRecord(const DIType *DTy, APSInt &Value,
+ const std::string &QualifiedName);
+ void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
+ void emitStaticConstMemberList();
+
+ /// Opens a subsection of the given kind in a .debug$S codeview section.
+ /// Returns an end label for use with endCVSubsection when the subsection is
+ /// finished.
+ MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind);
+ void endCVSubsection(MCSymbol *EndLabel);
+
+ /// Opens a symbol record of the given kind. Returns an end label for use with
+ /// endSymbolRecord.
+ MCSymbol *beginSymbolRecord(codeview::SymbolKind Kind);
+ void endSymbolRecord(MCSymbol *SymEnd);
+
+ /// Emits an S_END, S_INLINESITE_END, or S_PROC_ID_END record. These records
+ /// are empty, so we emit them with a simpler assembly sequence that doesn't
+ /// involve labels.
+ void emitEndSymbolRecord(codeview::SymbolKind EndKind);
+
+ void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
+ const InlineSite &Site);
+
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+
+ void collectGlobalVariableInfo();
+ void collectVariableInfo(const DISubprogram *SP);
+
+ void collectVariableInfoFromMFTable(DenseSet<InlinedEntity> &Processed);
+
+ // Construct the lexical block tree for a routine, pruning emptpy lexical
+ // scopes, and populate it with local variables.
+ void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes,
+ SmallVectorImpl<LexicalBlock *> &Blocks,
+ SmallVectorImpl<LocalVariable> &Locals,
+ SmallVectorImpl<CVGlobalVariable> &Globals);
+ void collectLexicalBlockInfo(LexicalScope &Scope,
+ SmallVectorImpl<LexicalBlock *> &ParentBlocks,
+ SmallVectorImpl<LocalVariable> &ParentLocals,
+ SmallVectorImpl<CVGlobalVariable> &ParentGlobals);
+
+ /// Records information about a local variable in the appropriate scope. In
+ /// particular, locals from inlined code live inside the inlining site.
+ void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS);
+
+ /// Emits local variables in the appropriate order.
+ void emitLocalVariableList(const FunctionInfo &FI,
+ ArrayRef<LocalVariable> Locals);
+
+ /// Emits an S_LOCAL record and its associated defined ranges.
+ void emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var);
+
+ /// Emits a sequence of lexical block scopes and their children.
+ void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
+ const FunctionInfo& FI);
+
+ /// Emit a lexical block scope and its children.
+ void emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI);
+
+ /// Translates the DIType to codeview if necessary and returns a type index
+ /// for it.
+ codeview::TypeIndex getTypeIndex(const DIType *Ty,
+ const DIType *ClassTy = nullptr);
+
+ codeview::TypeIndex
+ getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+ const DISubroutineType *SubroutineTy);
+
+ codeview::TypeIndex getTypeIndexForReferenceTo(const DIType *Ty);
+
+ codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
+ const DICompositeType *Class);
+
+ codeview::TypeIndex getScopeIndex(const DIScope *Scope);
+
+ codeview::TypeIndex getVBPTypeIndex();
+
+ void addToUDTs(const DIType *Ty);
+
+ void addUDTSrcLine(const DIType *Ty, codeview::TypeIndex TI);
+
+ codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
+ codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeString(const DIStringType *Ty);
+ codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
+ codeview::TypeIndex lowerTypePointer(
+ const DIDerivedType *Ty,
+ codeview::PointerOptions PO = codeview::PointerOptions::None);
+ codeview::TypeIndex lowerTypeMemberPointer(
+ const DIDerivedType *Ty,
+ codeview::PointerOptions PO = codeview::PointerOptions::None);
+ codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
+ codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeMemberFunction(
+ const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment,
+ bool IsStaticMethod,
+ codeview::FunctionOptions FO = codeview::FunctionOptions::None);
+ codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
+
+ /// Symbol records should point to complete types, but type records should
+ /// always point to incomplete types to avoid cycles in the type graph. Only
+ /// use this entry point when generating symbol records. The complete and
+ /// incomplete type indices only differ for record types. All other types use
+ /// the same index.
+ codeview::TypeIndex getCompleteTypeIndex(const DIType *Ty);
+
+ codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty);
+ codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty);
+
+ struct TypeLoweringScope;
+
+ void emitDeferredCompleteTypes();
+
+ void collectMemberInfo(ClassInfo &Info, const DIDerivedType *DDTy);
+ ClassInfo collectClassInfo(const DICompositeType *Ty);
+
+ /// Common record member lowering functionality for record types, which are
+ /// structs, classes, and unions. Returns the field list index and the member
+ /// count.
+ std::tuple<codeview::TypeIndex, codeview::TypeIndex, unsigned, bool>
+ lowerRecordFieldList(const DICompositeType *Ty);
+
+ /// Inserts {{Node, ClassTy}, TI} into TypeIndices and checks for duplicates.
+ codeview::TypeIndex recordTypeIndexForDINode(const DINode *Node,
+ codeview::TypeIndex TI,
+ const DIType *ClassTy = nullptr);
+
+ /// Collect the names of parent scopes, innermost to outermost. Return the
+ /// innermost subprogram scope if present. Ensure that parent type scopes are
+ /// inserted into the type table.
+ const DISubprogram *
+ collectParentScopeNames(const DIScope *Scope,
+ SmallVectorImpl<StringRef> &ParentScopeNames);
+ std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name);
+ std::string getFullyQualifiedName(const DIScope *Scope);
+
+ unsigned getPointerSizeInBytes();
+
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Gather post-function debug information.
+ void endFunctionImpl(const MachineFunction *) override;
+
+ /// Check if the current module is in Fortran.
+ bool moduleIsInFortran() {
+ return CurrentSourceLanguage == codeview::SourceLanguage::Fortran;
+ }
+
+public:
+ CodeViewDebug(AsmPrinter *AP);
+
+ void beginModule(Module *M) override;
+
+ void setSymbolSize(const MCSymbol *, uint64_t) override {}
+
+ /// Emit the COFF section that holds the line table information.
+ void endModule() override;
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+};
+
+template <> struct DenseMapInfo<CodeViewDebug::LocalVarDef> {
+
+ static inline CodeViewDebug::LocalVarDef getEmptyKey() {
+ return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL);
+ }
+
+ static inline CodeViewDebug::LocalVarDef getTombstoneKey() {
+ return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL - 1ULL);
+ }
+
+ static unsigned getHashValue(const CodeViewDebug::LocalVarDef &DR) {
+ return CodeViewDebug::LocalVarDef::toOpaqueValue(DR) * 37ULL;
+ }
+
+ static bool isEqual(const CodeViewDebug::LocalVarDef &LHS,
+ const CodeViewDebug::LocalVarDef &RHS) {
+ return CodeViewDebug::LocalVarDef::toOpaqueValue(LHS) ==
+ CodeViewDebug::LocalVarDef::toOpaqueValue(RHS);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 000000000000..619155cafe92
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,872 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DIE.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ // Explicitly cast to an integer type for which FoldingSetNodeID has
+ // overloads. Otherwise MSVC 2010 thinks this call is ambiguous.
+ ID.AddInteger(unsigned(Attribute));
+ ID.AddInteger(unsigned(Form));
+ if (Form == dwarf::DW_FORM_implicit_const)
+ ID.AddInteger(Value);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(unsigned(Tag));
+ ID.AddInteger(unsigned(Children));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(const AsmPrinter *AP) const {
+ // Emit its Dwarf tag type.
+ AP->emitULEB128(Tag, dwarf::TagString(Tag).data());
+
+ // Emit whether it has children DIEs.
+ AP->emitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ AP->emitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()).data());
+
+ // Emit form type.
+#ifndef NDEBUG
+ // Could be an assertion, but this way we can see the failing form code
+ // easily, which helps track down where it came from.
+ if (!dwarf::isValidFormForVersion(AttrData.getForm(),
+ AP->getDwarfVersion())) {
+ LLVM_DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
+ << " for DWARF version " << AP->getDwarfVersion()
+ << "\n");
+ llvm_unreachable("Invalid form for specified DWARF version");
+ }
+#endif
+ AP->emitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()).data());
+
+ // Emit value for DW_FORM_implicit_const.
+ if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
+ AP->emitSLEB128(AttrData.getValue());
+ }
+
+ // Mark end of abbreviation.
+ AP->emitULEB128(0, "EOM(1)");
+ AP->emitULEB128(0, "EOM(2)");
+}
+
+LLVM_DUMP_METHOD
+void DIEAbbrev::print(raw_ostream &O) const {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(Children)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm());
+
+ if (Data[i].getForm() == dwarf::DW_FORM_implicit_const)
+ O << " " << Data[i].getValue();
+
+ O << '\n';
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEAbbrev::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevSet Implementation
+//===----------------------------------------------------------------------===//
+
+DIEAbbrevSet::~DIEAbbrevSet() {
+ for (DIEAbbrev *Abbrev : Abbreviations)
+ Abbrev->~DIEAbbrev();
+}
+
+DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) {
+
+ FoldingSetNodeID ID;
+ DIEAbbrev Abbrev = Die.generateAbbrev();
+ Abbrev.Profile(ID);
+
+ void *InsertPos;
+ if (DIEAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Die.setAbbrevNumber(Existing->getNumber());
+ return *Existing;
+ }
+
+ // Move the abbreviation to the heap and assign a number.
+ DIEAbbrev *New = new (Alloc) DIEAbbrev(std::move(Abbrev));
+ Abbreviations.push_back(New);
+ New->setNumber(Abbreviations.size());
+ Die.setAbbrevNumber(Abbreviations.size());
+
+ // Store it for lookup.
+ AbbreviationsSet.InsertNode(New, InsertPos);
+ return *New;
+}
+
+void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ AP->OutStreamer->switchSection(Section);
+ AP->emitDwarfAbbrevs(Abbreviations);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE *DIE::getParent() const { return dyn_cast_if_present<DIE *>(Owner); }
+
+DIEAbbrev DIE::generateAbbrev() const {
+ DIEAbbrev Abbrev(Tag, hasChildren());
+ for (const DIEValue &V : values())
+ if (V.getForm() == dwarf::DW_FORM_implicit_const)
+ Abbrev.AddImplicitConstAttribute(V.getAttribute(),
+ V.getDIEInteger().getValue());
+ else
+ Abbrev.AddAttribute(V.getAttribute(), V.getForm());
+ return Abbrev;
+}
+
+uint64_t DIE::getDebugSectionOffset() const {
+ const DIEUnit *Unit = getUnit();
+ assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset");
+ return Unit->getDebugSectionOffset() + getOffset();
+}
+
+const DIE *DIE::getUnitDie() const {
+ const DIE *p = this;
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit ||
+ p->getTag() == dwarf::DW_TAG_skeleton_unit ||
+ p->getTag() == dwarf::DW_TAG_type_unit)
+ return p;
+ p = p->getParent();
+ }
+ return nullptr;
+}
+
+DIEUnit *DIE::getUnit() const {
+ const DIE *UnitDie = getUnitDie();
+ if (UnitDie)
+ return dyn_cast_if_present<DIEUnit *>(UnitDie->Owner);
+ return nullptr;
+}
+
+DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
+ // Iterate through all the attributes until we find the one we're
+ // looking for, if we can't find it return NULL.
+ for (const auto &V : values())
+ if (V.getAttribute() == Attribute)
+ return V;
+ return DIEValue();
+}
+
+LLVM_DUMP_METHOD
+static void printValues(raw_ostream &O, const DIEValueList &Values,
+ StringRef Type, unsigned Size, unsigned IndentCount) {
+ O << Type << ": Size: " << Size << "\n";
+
+ unsigned I = 0;
+ const std::string Indent(IndentCount, ' ');
+ for (const auto &V : Values.values()) {
+ O << Indent;
+ O << "Blk[" << I++ << "]";
+ O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
+ V.print(O);
+ O << "\n";
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
+ const std::string Indent(IndentCount, ' ');
+ O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this)
+ << ", Offset: " << Offset << ", Size: " << Size << "\n";
+
+ O << Indent << dwarf::TagString(getTag()) << " "
+ << dwarf::ChildrenString(hasChildren()) << "\n";
+
+ IndentCount += 2;
+ for (const auto &V : values()) {
+ O << Indent;
+ O << dwarf::AttributeString(V.getAttribute());
+ O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
+ V.print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (const auto &Child : children())
+ Child.print(O, IndentCount + 4);
+
+ O << "\n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIE::dump() const {
+ print(dbgs());
+}
+#endif
+
+unsigned DIE::computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams,
+ DIEAbbrevSet &AbbrevSet,
+ unsigned CUOffset) {
+ // Unique the abbreviation and fill in the abbreviation number so this DIE
+ // can be emitted.
+ const DIEAbbrev &Abbrev = AbbrevSet.uniqueAbbreviation(*this);
+
+ // Set compile/type unit relative offset of this DIE.
+ setOffset(CUOffset);
+
+ // Add the byte size of the abbreviation code.
+ CUOffset += getULEB128Size(getAbbrevNumber());
+
+ // Add the byte size of all the DIE attribute values.
+ for (const auto &V : values())
+ CUOffset += V.sizeOf(FormParams);
+
+ // Let the children compute their offsets and abbreviation numbers.
+ if (hasChildren()) {
+ (void)Abbrev;
+ assert(Abbrev.hasChildren() && "Children flag not set");
+
+ for (auto &Child : children())
+ CUOffset =
+ Child.computeOffsetsAndAbbrevs(FormParams, AbbrevSet, CUOffset);
+
+ // Each child chain is terminated with a zero byte, adjust the offset.
+ CUOffset += sizeof(int8_t);
+ }
+
+ // Compute the byte size of this DIE and all of its children correctly. This
+ // is needed so that top level DIE can help the compile unit set its length
+ // correctly.
+ setSize(CUOffset - getOffset());
+ return CUOffset;
+}
+
+//===----------------------------------------------------------------------===//
+// DIEUnit Implementation
+//===----------------------------------------------------------------------===//
+DIEUnit::DIEUnit(dwarf::Tag UnitTag) : Die(UnitTag) {
+ Die.Owner = this;
+ assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+ UnitTag == dwarf::DW_TAG_skeleton_unit ||
+ UnitTag == dwarf::DW_TAG_type_unit ||
+ UnitTag == dwarf::DW_TAG_partial_unit) &&
+ "expected a unit TAG");
+}
+
+void DIEValue::emitValue(const AsmPrinter *AP) const {
+ switch (Ty) {
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+#define HANDLE_DIEVALUE(T) \
+ case is##T: \
+ getDIE##T().emitValue(AP, Form); \
+ break;
+#include "llvm/CodeGen/DIEValue.def"
+ }
+}
+
+unsigned DIEValue::sizeOf(const dwarf::FormParams &FormParams) const {
+ switch (Ty) {
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+#define HANDLE_DIEVALUE(T) \
+ case is##T: \
+ return getDIE##T().sizeOf(FormParams, Form);
+#include "llvm/CodeGen/DIEValue.def"
+ }
+ llvm_unreachable("Unknown DIE kind");
+}
+
+LLVM_DUMP_METHOD
+void DIEValue::print(raw_ostream &O) const {
+ switch (Ty) {
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+#define HANDLE_DIEVALUE(T) \
+ case is##T: \
+ getDIE##T().print(O); \
+ break;
+#include "llvm/CodeGen/DIEValue.def"
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEValue::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_implicit_const:
+ case dwarf::DW_FORM_flag_present:
+ // Emit something to keep the lines and comments in sync.
+ // FIXME: Is there a better way to do this?
+ Asm->OutStreamer->addBlankLine();
+ return;
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_addrx3:
+ case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_sig8:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_ref_sup8:
+ case dwarf::DW_FORM_GNU_ref_alt:
+ case dwarf::DW_FORM_GNU_strp_alt:
+ case dwarf::DW_FORM_line_strp:
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_strp_sup:
+ case dwarf::DW_FORM_addr:
+ case dwarf::DW_FORM_ref_addr:
+ Asm->OutStreamer->emitIntValue(Integer,
+ sizeOf(Asm->getDwarfFormParams(), Form));
+ return;
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_GNU_addr_index:
+ case dwarf::DW_FORM_ref_udata:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
+ case dwarf::DW_FORM_udata:
+ Asm->emitULEB128(Integer);
+ return;
+ case dwarf::DW_FORM_sdata:
+ Asm->emitSLEB128(Integer);
+ return;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+/// sizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ if (std::optional<uint8_t> FixedSize =
+ dwarf::getFixedFormByteSize(Form, FormParams))
+ return *FixedSize;
+
+ switch (Form) {
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_GNU_addr_index:
+ case dwarf::DW_FORM_ref_udata:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
+ case dwarf::DW_FORM_udata:
+ return getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata:
+ return getSLEB128Size(Integer);
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEInteger::print(raw_ostream &O) const {
+ O << "Int: " << (int64_t)Integer << " 0x";
+ O.write_hex(Integer);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEExpr Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit expression value.
+///
+void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->emitDebugValue(Expr, sizeOf(AP->getDwarfFormParams(), Form));
+}
+
+/// SizeOf - Determine size of expression value in bytes.
+///
+unsigned DIEExpr::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return FormParams.getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ bool IsSectionRelative = Form != dwarf::DW_FORM_addr;
+ AP->emitLabelReference(Label, sizeOf(AP->getDwarfFormParams(), Form),
+ IsSectionRelative);
+}
+
+/// sizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_strp:
+ return FormParams.getDwarfOffsetByteSize();
+ case dwarf::DW_FORM_addr:
+ return FormParams.AddrSize;
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
+
+//===----------------------------------------------------------------------===//
+// DIEBaseTypeRef Implementation
+//===----------------------------------------------------------------------===//
+
+void DIEBaseTypeRef::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset();
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ AP->emitULEB128(Offset, nullptr, ULEB128PadSize);
+}
+
+unsigned DIEBaseTypeRef::sizeOf(const dwarf::FormParams &, dwarf::Form) const {
+ return ULEB128PadSize;
+}
+
+LLVM_DUMP_METHOD
+void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index; }
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->emitLabelDifference(LabelHi, LabelLo,
+ sizeOf(AP->getDwarfFormParams(), Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return FormParams.getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEDelta::print(raw_ostream &O) const {
+ O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ // Index of string in symbol table.
+ switch (Form) {
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_strx4:
+ DIEInteger(S.getIndex()).emitValue(AP, Form);
+ return;
+ case dwarf::DW_FORM_strp:
+ if (AP->doesDwarfUseRelocationsAcrossSections())
+ DIELabel(S.getSymbol()).emitValue(AP, Form);
+ else
+ DIEInteger(S.getOffset()).emitValue(AP, Form);
+ return;
+ default:
+ llvm_unreachable("Expected valid string form");
+ }
+}
+
+/// sizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEString::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ // Index of string in symbol table.
+ switch (Form) {
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_strx4:
+ return DIEInteger(S.getIndex()).sizeOf(FormParams, Form);
+ case dwarf::DW_FORM_strp:
+ if (FormParams.DwarfUsesRelocationsAcrossSections)
+ return DIELabel(S.getSymbol()).sizeOf(FormParams, Form);
+ return DIEInteger(S.getOffset()).sizeOf(FormParams, Form);
+ default:
+ llvm_unreachable("Expected valid string form");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEString::print(raw_ostream &O) const {
+ O << "String: " << S.getString();
+}
+
+//===----------------------------------------------------------------------===//
+// DIEInlineString Implementation
+//===----------------------------------------------------------------------===//
+void DIEInlineString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_string) {
+ AP->OutStreamer->emitBytes(S);
+ AP->emitInt8(0);
+ return;
+ }
+ llvm_unreachable("Expected valid string form");
+}
+
+unsigned DIEInlineString::sizeOf(const dwarf::FormParams &, dwarf::Form) const {
+ // Emit string bytes + NULL byte.
+ return S.size() + 1;
+}
+
+LLVM_DUMP_METHOD
+void DIEInlineString::print(raw_ostream &O) const {
+ O << "InlineString: " << S;
+}
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+
+ switch (Form) {
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_ref8:
+ AP->OutStreamer->emitIntValue(Entry->getOffset(),
+ sizeOf(AP->getDwarfFormParams(), Form));
+ return;
+
+ case dwarf::DW_FORM_ref_udata:
+ AP->emitULEB128(Entry->getOffset());
+ return;
+
+ case dwarf::DW_FORM_ref_addr: {
+ // Get the absolute offset for this DIE within the debug info/types section.
+ uint64_t Addr = Entry->getDebugSectionOffset();
+ if (const MCSymbol *SectionSym =
+ Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
+ AP->emitLabelPlusOffset(SectionSym, Addr,
+ sizeOf(AP->getDwarfFormParams(), Form), true);
+ return;
+ }
+
+ AP->OutStreamer->emitIntValue(Addr, sizeOf(AP->getDwarfFormParams(), Form));
+ return;
+ }
+ default:
+ llvm_unreachable("Improper form for DIE reference");
+ }
+}
+
+unsigned DIEEntry::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_ref1:
+ return 1;
+ case dwarf::DW_FORM_ref2:
+ return 2;
+ case dwarf::DW_FORM_ref4:
+ return 4;
+ case dwarf::DW_FORM_ref8:
+ return 8;
+ case dwarf::DW_FORM_ref_udata:
+ return getULEB128Size(Entry->getOffset());
+ case dwarf::DW_FORM_ref_addr:
+ return FormParams.getRefAddrByteSize();
+
+ default:
+ llvm_unreachable("Improper form for DIE reference");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEEntry::print(raw_ostream &O) const {
+ O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
+}
+
+//===----------------------------------------------------------------------===//
+// DIELoc Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIELoc::computeSize(const dwarf::FormParams &FormParams) const {
+ if (!Size) {
+ for (const auto &V : values())
+ Size += V.sizeOf(FormParams);
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit location data.
+///
+void DIELoc::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+ switch (Form) {
+ default: llvm_unreachable("Improper form for block");
+ case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
+ case dwarf::DW_FORM_block:
+ case dwarf::DW_FORM_exprloc:
+ Asm->emitULEB128(Size);
+ break;
+ }
+
+ for (const auto &V : values())
+ V.emitValue(Asm);
+}
+
+/// sizeOf - Determine size of location data in bytes.
+///
+unsigned DIELoc::sizeOf(const dwarf::FormParams &, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block:
+ case dwarf::DW_FORM_exprloc:
+ return Size + getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIELoc::print(raw_ostream &O) const {
+ printValues(O, *this, "ExprLoc", Size, 5);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIEBlock::computeSize(const dwarf::FormParams &FormParams) const {
+ if (!Size) {
+ for (const auto &V : values())
+ Size += V.sizeOf(FormParams);
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+ switch (Form) {
+ default: llvm_unreachable("Improper form for block");
+ case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
+ case dwarf::DW_FORM_exprloc:
+ case dwarf::DW_FORM_block:
+ Asm->emitULEB128(Size);
+ break;
+ case dwarf::DW_FORM_string: break;
+ case dwarf::DW_FORM_data16: break;
+ }
+
+ for (const auto &V : values())
+ V.emitValue(Asm);
+}
+
+/// sizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::sizeOf(const dwarf::FormParams &, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_exprloc:
+ case dwarf::DW_FORM_block: return Size + getULEB128Size(Size);
+ case dwarf::DW_FORM_data16: return 16;
+ default: llvm_unreachable("Improper form for block");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEBlock::print(raw_ostream &O) const {
+ printValues(O, *this, "Blk", Size, 5);
+}
+
+//===----------------------------------------------------------------------===//
+// DIELocList Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIELocList::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_loclistx:
+ return getULEB128Size(Index);
+ case dwarf::DW_FORM_data4:
+ assert(FormParams.Format != dwarf::DWARF64 &&
+ "DW_FORM_data4 is not suitable to emit a pointer to a location list "
+ "in the 64-bit DWARF format");
+ return 4;
+ case dwarf::DW_FORM_data8:
+ assert(FormParams.Format == dwarf::DWARF64 &&
+ "DW_FORM_data8 is not suitable to emit a pointer to a location list "
+ "in the 32-bit DWARF format");
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return FormParams.getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+/// EmitValue - Emit label value.
+///
+void DIELocList::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_loclistx) {
+ AP->emitULEB128(Index);
+ return;
+ }
+ DwarfDebug *DD = AP->getDwarfDebug();
+ MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
+ AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
+}
+
+LLVM_DUMP_METHOD
+void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
+
+//===----------------------------------------------------------------------===//
+// DIEAddrOffset Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIEAddrOffset::sizeOf(const dwarf::FormParams &FormParams,
+ dwarf::Form) const {
+ return Addr.sizeOf(FormParams, dwarf::DW_FORM_addrx) +
+ Offset.sizeOf(FormParams, dwarf::DW_FORM_data4);
+}
+
+/// EmitValue - Emit label value.
+///
+void DIEAddrOffset::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ Addr.emitValue(AP, dwarf::DW_FORM_addrx);
+ Offset.emitValue(AP, dwarf::DW_FORM_data4);
+}
+
+LLVM_DUMP_METHOD
+void DIEAddrOffset::print(raw_ostream &O) const {
+ O << "AddrOffset: ";
+ Addr.print(O);
+ O << " + ";
+ Offset.print(O);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
new file mode 100644
index 000000000000..08ed78eb20a1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -0,0 +1,440 @@
+//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for DWARF4 hashing of DIEs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIEHash.h"
+#include "ByteStreamer.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+/// Grabs the string in whichever attribute is passed in and returns
+/// a reference to it.
+static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
+ // Iterate through all the attributes until we find the one we're
+ // looking for, if we can't find it return an empty string.
+ for (const auto &V : Die.values())
+ if (V.getAttribute() == Attr)
+ return V.getDIEString().getString();
+
+ return StringRef("");
+}
+
+/// Adds the string in \p Str to the hash. This also hashes
+/// a trailing NULL with the string.
+void DIEHash::addString(StringRef Str) {
+ LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
+ Hash.update(Str);
+ Hash.update(ArrayRef((uint8_t)'\0'));
+}
+
+// FIXME: The LEB128 routines are copied and only slightly modified out of
+// LEB128.h.
+
+/// Adds the unsigned in \p Value to the hash encoded as a ULEB128.
+void DIEHash::addULEB128(uint64_t Value) {
+ LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ if (Value != 0)
+ Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+ Hash.update(Byte);
+ } while (Value != 0);
+}
+
+void DIEHash::addSLEB128(int64_t Value) {
+ LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ bool More;
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ More = !((((Value == 0) && ((Byte & 0x40) == 0)) ||
+ ((Value == -1) && ((Byte & 0x40) != 0))));
+ if (More)
+ Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+ Hash.update(Byte);
+ } while (More);
+}
+
+/// Including \p Parent adds the context of Parent to the hash..
+void DIEHash::addParentContext(const DIE &Parent) {
+
+ LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n");
+
+ // [7.27.2] For each surrounding type or namespace beginning with the
+ // outermost such construct...
+ SmallVector<const DIE *, 1> Parents;
+ const DIE *Cur = &Parent;
+ while (Cur->getParent()) {
+ Parents.push_back(Cur);
+ Cur = Cur->getParent();
+ }
+ assert(Cur->getTag() == dwarf::DW_TAG_compile_unit ||
+ Cur->getTag() == dwarf::DW_TAG_type_unit);
+
+ // Reverse iterate over our list to go from the outermost construct to the
+ // innermost.
+ for (const DIE *Die : llvm::reverse(Parents)) {
+ // ... Append the letter "C" to the sequence...
+ addULEB128('C');
+
+ // ... Followed by the DWARF tag of the construct...
+ addULEB128(Die->getTag());
+
+ // ... Then the name, taken from the DW_AT_name attribute.
+ StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name);
+ LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");
+ if (!Name.empty())
+ addString(Name);
+ }
+}
+
+// Collect all of the attributes for a particular DIE in single structure.
+void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
+
+ for (const auto &V : Die.values()) {
+ LLVM_DEBUG(dbgs() << "Attribute: "
+ << dwarf::AttributeString(V.getAttribute())
+ << " added.\n");
+ switch (V.getAttribute()) {
+#define HANDLE_DIE_HASH_ATTR(NAME) \
+ case dwarf::NAME: \
+ Attrs.NAME = V; \
+ break;
+#include "DIEHashAttributes.def"
+ default:
+ break;
+ }
+ }
+}
+
+void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute,
+ const DIE &Entry, StringRef Name) {
+ // append the letter 'N'
+ addULEB128('N');
+
+ // the DWARF attribute code (DW_AT_type or DW_AT_friend),
+ addULEB128(Attribute);
+
+ // the context of the tag,
+ if (const DIE *Parent = Entry.getParent())
+ addParentContext(*Parent);
+
+ // the letter 'E',
+ addULEB128('E');
+
+ // and the name of the type.
+ addString(Name);
+
+ // Currently DW_TAG_friends are not used by Clang, but if they do become so,
+ // here's the relevant spec text to implement:
+ //
+ // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram,
+ // the context is omitted and the name to be used is the ABI-specific name
+ // of the subprogram (e.g., the mangled linker name).
+}
+
+void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute,
+ unsigned DieNumber) {
+ // a) If T is in the list of [previously hashed types], use the letter
+ // 'R' as the marker
+ addULEB128('R');
+
+ addULEB128(Attribute);
+
+ // and use the unsigned LEB128 encoding of [the index of T in the
+ // list] as the attribute value;
+ addULEB128(DieNumber);
+}
+
+void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
+ const DIE &Entry) {
+ assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend "
+ "tags. Add support here when there's "
+ "a use case");
+ // Step 5
+ // If the tag in Step 3 is one of [the below tags]
+ if ((Tag == dwarf::DW_TAG_pointer_type ||
+ Tag == dwarf::DW_TAG_reference_type ||
+ Tag == dwarf::DW_TAG_rvalue_reference_type ||
+ Tag == dwarf::DW_TAG_ptr_to_member_type) &&
+ // and the referenced type (via the [below attributes])
+ // FIXME: This seems overly restrictive, and causes hash mismatches
+ // there's a decl/def difference in the containing type of a
+ // ptr_to_member_type, but it's what DWARF says, for some reason.
+ Attribute == dwarf::DW_AT_type) {
+ // ... has a DW_AT_name attribute,
+ StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name);
+ if (!Name.empty()) {
+ hashShallowTypeReference(Attribute, Entry, Name);
+ return;
+ }
+ }
+
+ unsigned &DieNumber = Numbering[&Entry];
+ if (DieNumber) {
+ hashRepeatedTypeReference(Attribute, DieNumber);
+ return;
+ }
+
+ // otherwise, b) use the letter 'T' as the marker, ...
+ addULEB128('T');
+
+ addULEB128(Attribute);
+
+ // ... process the type T recursively by performing Steps 2 through 7, and
+ // use the result as the attribute value.
+ DieNumber = Numbering.size();
+ computeHash(Entry);
+}
+
+void DIEHash::hashRawTypeReference(const DIE &Entry) {
+ unsigned &DieNumber = Numbering[&Entry];
+ if (DieNumber) {
+ addULEB128('R');
+ addULEB128(DieNumber);
+ return;
+ }
+ DieNumber = Numbering.size();
+ addULEB128('T');
+ computeHash(Entry);
+}
+
+// Hash all of the values in a block like set of values. This assumes that
+// all of the data is going to be added as integers.
+void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
+ for (const auto &V : Values)
+ if (V.getType() == DIEValue::isBaseTypeRef) {
+ const DIE &C =
+ *CU->ExprRefedBaseTypes[V.getDIEBaseTypeRef().getIndex()].Die;
+ StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
+ assert(!Name.empty() &&
+ "Base types referenced from DW_OP_convert should have a name");
+ hashNestedType(C, Name);
+ } else
+ Hash.update((uint64_t)V.getDIEInteger().getValue());
+}
+
+// Hash the contents of a loclistptr class.
+void DIEHash::hashLocList(const DIELocList &LocList) {
+ HashingByteStreamer Streamer(*this);
+ DwarfDebug &DD = *AP->getDwarfDebug();
+ const DebugLocStream &Locs = DD.getDebugLocs();
+ const DebugLocStream::List &List = Locs.getList(LocList.getValue());
+ for (const DebugLocStream::Entry &Entry : Locs.getEntries(List))
+ DD.emitDebugLocEntry(Streamer, Entry, List.CU);
+}
+
+// Hash an individual attribute \param Attr based on the type of attribute and
+// the form.
+void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
+ dwarf::Attribute Attribute = Value.getAttribute();
+
+ // Other attribute values use the letter 'A' as the marker, and the value
+ // consists of the form code (encoded as an unsigned LEB128 value) followed by
+ // the encoding of the value according to the form code. To ensure
+ // reproducibility of the signature, the set of forms used in the signature
+ // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,
+ // DW_FORM_string, and DW_FORM_block.
+
+ switch (Value.getType()) {
+ case DIEValue::isNone:
+ llvm_unreachable("Expected valid DIEValue");
+
+ // 7.27 Step 3
+ // ... An attribute that refers to another type entry T is processed as
+ // follows:
+ case DIEValue::isEntry:
+ hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry());
+ break;
+ case DIEValue::isInteger: {
+ addULEB128('A');
+ addULEB128(Attribute);
+ switch (Value.getForm()) {
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_udata:
+ case dwarf::DW_FORM_sdata:
+ addULEB128(dwarf::DW_FORM_sdata);
+ addSLEB128((int64_t)Value.getDIEInteger().getValue());
+ break;
+ // DW_FORM_flag_present is just flag with a value of one. We still give it a
+ // value so just use the value.
+ case dwarf::DW_FORM_flag_present:
+ case dwarf::DW_FORM_flag:
+ addULEB128(dwarf::DW_FORM_flag);
+ addULEB128((int64_t)Value.getDIEInteger().getValue());
+ break;
+ default:
+ llvm_unreachable("Unknown integer form!");
+ }
+ break;
+ }
+ case DIEValue::isString:
+ addULEB128('A');
+ addULEB128(Attribute);
+ addULEB128(dwarf::DW_FORM_string);
+ addString(Value.getDIEString().getString());
+ break;
+ case DIEValue::isInlineString:
+ addULEB128('A');
+ addULEB128(Attribute);
+ addULEB128(dwarf::DW_FORM_string);
+ addString(Value.getDIEInlineString().getString());
+ break;
+ case DIEValue::isBlock:
+ case DIEValue::isLoc:
+ case DIEValue::isLocList:
+ addULEB128('A');
+ addULEB128(Attribute);
+ addULEB128(dwarf::DW_FORM_block);
+ if (Value.getType() == DIEValue::isBlock) {
+ addULEB128(Value.getDIEBlock().computeSize(AP->getDwarfFormParams()));
+ hashBlockData(Value.getDIEBlock().values());
+ } else if (Value.getType() == DIEValue::isLoc) {
+ addULEB128(Value.getDIELoc().computeSize(AP->getDwarfFormParams()));
+ hashBlockData(Value.getDIELoc().values());
+ } else {
+ // We could add the block length, but that would take
+ // a bit of work and not add a lot of uniqueness
+ // to the hash in some way we could test.
+ hashLocList(Value.getDIELocList());
+ }
+ break;
+ // FIXME: It's uncertain whether or not we should handle this at the moment.
+ case DIEValue::isExpr:
+ case DIEValue::isLabel:
+ case DIEValue::isBaseTypeRef:
+ case DIEValue::isDelta:
+ case DIEValue::isAddrOffset:
+ llvm_unreachable("Add support for additional value types.");
+ }
+}
+
+// Go through the attributes from \param Attrs in the order specified in 7.27.4
+// and hash them.
+void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
+#define HANDLE_DIE_HASH_ATTR(NAME) \
+ { \
+ if (Attrs.NAME) \
+ hashAttribute(Attrs.NAME, Tag); \
+ }
+#include "DIEHashAttributes.def"
+ // FIXME: Add the extended attributes.
+}
+
+// Add all of the attributes for \param Die to the hash.
+void DIEHash::addAttributes(const DIE &Die) {
+ DIEAttrs Attrs = {};
+ collectAttributes(Die, Attrs);
+ hashAttributes(Attrs, Die.getTag());
+}
+
+void DIEHash::hashNestedType(const DIE &Die, StringRef Name) {
+ // 7.27 Step 7
+ // ... append the letter 'S',
+ addULEB128('S');
+
+ // the tag of C,
+ addULEB128(Die.getTag());
+
+ // and the name.
+ addString(Name);
+}
+
+// Compute the hash of a DIE. This is based on the type signature computation
+// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a
+// flattened description of the DIE.
+void DIEHash::computeHash(const DIE &Die) {
+ // Append the letter 'D', followed by the DWARF tag of the DIE.
+ addULEB128('D');
+ addULEB128(Die.getTag());
+
+ // Add each of the attributes of the DIE.
+ addAttributes(Die);
+
+ // Then hash each of the children of the DIE.
+ for (const auto &C : Die.children()) {
+ // 7.27 Step 7
+ // If C is a nested type entry or a member function entry, ...
+ if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) {
+ StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
+ // ... and has a DW_AT_name attribute
+ if (!Name.empty()) {
+ hashNestedType(C, Name);
+ continue;
+ }
+ }
+ computeHash(C);
+ }
+
+ // Following the last (or if there are no children), append a zero byte.
+ Hash.update(ArrayRef((uint8_t)'\0'));
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
+/// with the inclusion of the full CU and all top level CU entities.
+// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
+uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) {
+ Numbering.clear();
+ Numbering[&Die] = 1;
+
+ if (!DWOName.empty())
+ Hash.update(DWOName);
+ // Hash the DIE.
+ computeHash(Die);
+
+ // Now return the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
+/// with the inclusion of additional forms not specifically called out in the
+/// standard.
+uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
+ Numbering.clear();
+ Numbering[&Die] = 1;
+
+ if (const DIE *Parent = Die.getParent())
+ addParentContext(*Parent);
+
+ // Hash the DIE.
+ computeHash(Die);
+
+ // Now return the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
new file mode 100644
index 000000000000..24a973b39271
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -0,0 +1,112 @@
+//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for DWARF4 hashing of DIEs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+class AsmPrinter;
+
+/// An object containing the capability of hashing and adding hash
+/// attributes onto a DIE.
+class DIEHash {
+ // Collection of all attributes used in hashing a particular DIE.
+ struct DIEAttrs {
+#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME;
+#include "DIEHashAttributes.def"
+ };
+
+public:
+ DIEHash(AsmPrinter *A = nullptr, DwarfCompileUnit *CU = nullptr)
+ : AP(A), CU(CU) {}
+
+ /// Computes the CU signature.
+ uint64_t computeCUSignature(StringRef DWOName, const DIE &Die);
+
+ /// Computes the type signature.
+ uint64_t computeTypeSignature(const DIE &Die);
+
+ // Helper routines to process parts of a DIE.
+private:
+ /// Adds the parent context of \param Parent to the hash.
+ void addParentContext(const DIE &Parent);
+
+ /// Adds the attributes of \param Die to the hash.
+ void addAttributes(const DIE &Die);
+
+ /// Computes the full DWARF4 7.27 hash of the DIE.
+ void computeHash(const DIE &Die);
+
+ // Routines that add DIEValues to the hash.
+public:
+ /// Adds \param Value to the hash.
+ void update(uint8_t Value) { Hash.update(Value); }
+
+ /// Encodes and adds \param Value to the hash as a ULEB128.
+ void addULEB128(uint64_t Value);
+
+ /// Encodes and adds \param Value to the hash as a SLEB128.
+ void addSLEB128(int64_t Value);
+
+ void hashRawTypeReference(const DIE &Entry);
+
+private:
+ /// Adds \param Str to the hash and includes a NULL byte.
+ void addString(StringRef Str);
+
+ /// Collects the attributes of DIE \param Die into the \param Attrs
+ /// structure.
+ void collectAttributes(const DIE &Die, DIEAttrs &Attrs);
+
+ /// Hashes the attributes in \param Attrs in order.
+ void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag);
+
+ /// Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
+ /// DW_FORM_exprloc.
+ void hashBlockData(const DIE::const_value_range &Values);
+
+ /// Hashes the contents pointed to in the .debug_loc section.
+ void hashLocList(const DIELocList &LocList);
+
+ /// Hashes an individual attribute.
+ void hashAttribute(const DIEValue &Value, dwarf::Tag Tag);
+
+ /// Hashes an attribute that refers to another DIE.
+ void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
+ const DIE &Entry);
+
+ /// Hashes a reference to a named type in such a way that is
+ /// independent of whether that type is described by a declaration or a
+ /// definition.
+ void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry,
+ StringRef Name);
+
+ /// Hashes a reference to a previously referenced type DIE.
+ void hashRepeatedTypeReference(dwarf::Attribute Attribute,
+ unsigned DieNumber);
+
+ void hashNestedType(const DIE &Die, StringRef Name);
+
+private:
+ MD5 Hash;
+ AsmPrinter *AP;
+ DwarfCompileUnit *CU;
+ DenseMap<const DIE *, unsigned> Numbering;
+};
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
new file mode 100644
index 000000000000..c872d0dd2dfa
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
@@ -0,0 +1,55 @@
+#ifndef HANDLE_DIE_HASH_ATTR
+#error "Missing macro definition of HANDLE_DIE_HASH_ATTR"
+#endif
+
+HANDLE_DIE_HASH_ATTR(DW_AT_name)
+HANDLE_DIE_HASH_ATTR(DW_AT_accessibility)
+HANDLE_DIE_HASH_ATTR(DW_AT_address_class)
+HANDLE_DIE_HASH_ATTR(DW_AT_allocated)
+HANDLE_DIE_HASH_ATTR(DW_AT_artificial)
+HANDLE_DIE_HASH_ATTR(DW_AT_associated)
+HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_size)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride)
+HANDLE_DIE_HASH_ATTR(DW_AT_byte_size)
+HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride)
+HANDLE_DIE_HASH_ATTR(DW_AT_const_expr)
+HANDLE_DIE_HASH_ATTR(DW_AT_const_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_containing_type)
+HANDLE_DIE_HASH_ATTR(DW_AT_count)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale)
+HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign)
+HANDLE_DIE_HASH_ATTR(DW_AT_default_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_digit_count)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr_list)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_encoding)
+HANDLE_DIE_HASH_ATTR(DW_AT_enum_class)
+HANDLE_DIE_HASH_ATTR(DW_AT_endianity)
+HANDLE_DIE_HASH_ATTR(DW_AT_explicit)
+HANDLE_DIE_HASH_ATTR(DW_AT_is_optional)
+HANDLE_DIE_HASH_ATTR(DW_AT_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound)
+HANDLE_DIE_HASH_ATTR(DW_AT_mutable)
+HANDLE_DIE_HASH_ATTR(DW_AT_ordering)
+HANDLE_DIE_HASH_ATTR(DW_AT_picture_string)
+HANDLE_DIE_HASH_ATTR(DW_AT_prototyped)
+HANDLE_DIE_HASH_ATTR(DW_AT_small)
+HANDLE_DIE_HASH_ATTR(DW_AT_segment)
+HANDLE_DIE_HASH_ATTR(DW_AT_string_length)
+HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled)
+HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound)
+HANDLE_DIE_HASH_ATTR(DW_AT_use_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8)
+HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter)
+HANDLE_DIE_HASH_ATTR(DW_AT_virtuality)
+HANDLE_DIE_HASH_ATTR(DW_AT_visibility)
+HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_type)
+HANDLE_DIE_HASH_ATTR(DW_AT_linkage_name)
+#undef HANDLE_DIE_HASH_ATTR
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
new file mode 100644
index 000000000000..55a0afcf7a33
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -0,0 +1,602 @@
+//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <map>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+namespace {
+using EntryIndex = DbgValueHistoryMap::EntryIndex;
+}
+
+void InstructionOrdering::initialize(const MachineFunction &MF) {
+ // We give meta instructions the same ordinal as the preceding instruction
+ // because this class is written for the task of comparing positions of
+ // variable location ranges against scope ranges. To reflect what we'll see
+ // in the binary, when we look at location ranges we must consider all
+ // DBG_VALUEs between two real instructions at the same position. And a
+ // scope range which ends on a meta instruction should be considered to end
+ // at the last seen real instruction. E.g.
+ //
+ // 1 instruction p Both the variable location for x and for y start
+ // 1 DBG_VALUE for "x" after instruction p so we give them all the same
+ // 1 DBG_VALUE for "y" number. If a scope range ends at DBG_VALUE for "y",
+ // 2 instruction q we should treat it as ending after instruction p
+ // because it will be the last real instruction in the
+ // range. DBG_VALUEs at or after this position for
+ // variables declared in the scope will have no effect.
+ clear();
+ unsigned Position = 0;
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ InstNumberMap[&MI] = MI.isMetaInstruction() ? Position : ++Position;
+}
+
+bool InstructionOrdering::isBefore(const MachineInstr *A,
+ const MachineInstr *B) const {
+ assert(A->getParent() && B->getParent() && "Operands must have a parent");
+ assert(A->getMF() == B->getMF() &&
+ "Operands must be in the same MachineFunction");
+ return InstNumberMap.lookup(A) < InstNumberMap.lookup(B);
+}
+
+bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
+ const MachineInstr &MI,
+ EntryIndex &NewIndex) {
+ // Instruction range should start with a DBG_VALUE instruction for the
+ // variable.
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ auto &Entries = VarEntries[Var];
+ if (!Entries.empty() && Entries.back().isDbgValue() &&
+ !Entries.back().isClosed() &&
+ Entries.back().getInstr()->isEquivalentDbgInstr(MI)) {
+ LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+ << "\t" << Entries.back().getInstr() << "\t" << MI
+ << "\n");
+ return false;
+ }
+ Entries.emplace_back(&MI, Entry::DbgValue);
+ NewIndex = Entries.size() - 1;
+ return true;
+}
+
+EntryIndex DbgValueHistoryMap::startClobber(InlinedEntity Var,
+ const MachineInstr &MI) {
+ auto &Entries = VarEntries[Var];
+ // If an instruction clobbers multiple registers that the variable is
+ // described by, then we may have already created a clobbering instruction.
+ if (Entries.back().isClobber() && Entries.back().getInstr() == &MI)
+ return Entries.size() - 1;
+ Entries.emplace_back(&MI, Entry::Clobber);
+ return Entries.size() - 1;
+}
+
+void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
+ // For now, instruction ranges are not allowed to cross basic block
+ // boundaries.
+ assert(isDbgValue() && "Setting end index for non-debug value");
+ assert(!isClosed() && "End index has already been set");
+ EndIndex = Index;
+}
+
+/// Check if the instruction range [StartMI, EndMI] intersects any instruction
+/// range in Ranges. EndMI can be nullptr to indicate that the range is
+/// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points
+/// to the first intersecting scope range if one exists.
+static std::optional<ArrayRef<InsnRange>::iterator>
+intersects(const MachineInstr *StartMI, const MachineInstr *EndMI,
+ const ArrayRef<InsnRange> &Ranges,
+ const InstructionOrdering &Ordering) {
+ for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();
+ RangesI != RangesE; ++RangesI) {
+ if (EndMI && Ordering.isBefore(EndMI, RangesI->first))
+ return std::nullopt;
+ if (EndMI && !Ordering.isBefore(RangesI->second, EndMI))
+ return RangesI;
+ if (Ordering.isBefore(StartMI, RangesI->second))
+ return RangesI;
+ }
+ return std::nullopt;
+}
+
+void DbgValueHistoryMap::trimLocationRanges(
+ const MachineFunction &MF, LexicalScopes &LScopes,
+ const InstructionOrdering &Ordering) {
+ // The indices of the entries we're going to remove for each variable.
+ SmallVector<EntryIndex, 4> ToRemove;
+ // Entry reference count for each variable. Clobbers left with no references
+ // will be removed.
+ SmallVector<int, 4> ReferenceCount;
+ // Entries reference other entries by index. Offsets is used to remap these
+ // references if any entries are removed.
+ SmallVector<size_t, 4> Offsets;
+
+ LLVM_DEBUG(dbgs() << "Trimming location ranges for function '" << MF.getName()
+ << "'\n");
+
+ for (auto &Record : VarEntries) {
+ auto &HistoryMapEntries = Record.second;
+ if (HistoryMapEntries.empty())
+ continue;
+
+ InlinedEntity Entity = Record.first;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(Entity.first);
+
+ LexicalScope *Scope = nullptr;
+ if (const DILocation *InlinedAt = Entity.second) {
+ Scope = LScopes.findInlinedScope(LocalVar->getScope(), InlinedAt);
+ } else {
+ Scope = LScopes.findLexicalScope(LocalVar->getScope());
+ // Ignore variables for non-inlined function level scopes. The scope
+ // ranges (from scope->getRanges()) will not include any instructions
+ // before the first one with a debug-location, which could cause us to
+ // incorrectly drop a location. We could introduce special casing for
+ // these variables, but it doesn't seem worth it because no out-of-scope
+ // locations have been observed for variables declared in function level
+ // scopes.
+ if (Scope &&
+ (Scope->getScopeNode() == Scope->getScopeNode()->getSubprogram()) &&
+ (Scope->getScopeNode() == LocalVar->getScope()))
+ continue;
+ }
+
+ // If there is no scope for the variable then something has probably gone
+ // wrong.
+ if (!Scope)
+ continue;
+
+ ToRemove.clear();
+ // Zero the reference counts.
+ ReferenceCount.assign(HistoryMapEntries.size(), 0);
+ // Index of the DBG_VALUE which marks the start of the current location
+ // range.
+ EntryIndex StartIndex = 0;
+ ArrayRef<InsnRange> ScopeRanges(Scope->getRanges());
+ for (auto EI = HistoryMapEntries.begin(), EE = HistoryMapEntries.end();
+ EI != EE; ++EI, ++StartIndex) {
+ // Only DBG_VALUEs can open location ranges so skip anything else.
+ if (!EI->isDbgValue())
+ continue;
+
+ // Index of the entry which closes this range.
+ EntryIndex EndIndex = EI->getEndIndex();
+ // If this range is closed bump the reference count of the closing entry.
+ if (EndIndex != NoEntry)
+ ReferenceCount[EndIndex] += 1;
+ // Skip this location range if the opening entry is still referenced. It
+ // may close a location range which intersects a scope range.
+ // TODO: We could be 'smarter' and trim these kinds of ranges such that
+ // they do not leak out of the scope ranges if they partially overlap.
+ if (ReferenceCount[StartIndex] > 0)
+ continue;
+
+ const MachineInstr *StartMI = EI->getInstr();
+ const MachineInstr *EndMI = EndIndex != NoEntry
+ ? HistoryMapEntries[EndIndex].getInstr()
+ : nullptr;
+ // Check if the location range [StartMI, EndMI] intersects with any scope
+ // range for the variable.
+ if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) {
+ // Adjust ScopeRanges to exclude ranges which subsequent location ranges
+ // cannot possibly intersect.
+ ScopeRanges = ArrayRef<InsnRange>(*R, ScopeRanges.end());
+ } else {
+ // If the location range does not intersect any scope range then the
+ // DBG_VALUE which opened this location range is usless, mark it for
+ // removal.
+ ToRemove.push_back(StartIndex);
+ // Because we'll be removing this entry we need to update the reference
+ // count of the closing entry, if one exists.
+ if (EndIndex != NoEntry)
+ ReferenceCount[EndIndex] -= 1;
+ LLVM_DEBUG(dbgs() << "Dropping value outside scope range of variable: ";
+ StartMI->print(llvm::dbgs()););
+ }
+ }
+
+ // If there is nothing to remove then jump to next variable.
+ if (ToRemove.empty())
+ continue;
+
+ // Mark clobbers that will no longer close any location ranges for removal.
+ for (size_t i = 0; i < HistoryMapEntries.size(); ++i)
+ if (ReferenceCount[i] <= 0 && HistoryMapEntries[i].isClobber())
+ ToRemove.push_back(i);
+
+ llvm::sort(ToRemove);
+
+ // Build an offset map so we can update the EndIndex of the remaining
+ // entries.
+ // Zero the offsets.
+ Offsets.assign(HistoryMapEntries.size(), 0);
+ size_t CurOffset = 0;
+ auto ToRemoveItr = ToRemove.begin();
+ for (size_t EntryIdx = *ToRemoveItr; EntryIdx < HistoryMapEntries.size();
+ ++EntryIdx) {
+ // Check if this is an entry which will be removed.
+ if (ToRemoveItr != ToRemove.end() && *ToRemoveItr == EntryIdx) {
+ ++ToRemoveItr;
+ ++CurOffset;
+ }
+ Offsets[EntryIdx] = CurOffset;
+ }
+
+ // Update the EndIndex of the entries to account for those which will be
+ // removed.
+ for (auto &Entry : HistoryMapEntries)
+ if (Entry.isClosed())
+ Entry.EndIndex -= Offsets[Entry.EndIndex];
+
+ // Now actually remove the entries. Iterate backwards so that our remaining
+ // ToRemove indices are valid after each erase.
+ for (EntryIndex Idx : llvm::reverse(ToRemove))
+ HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);
+ LLVM_DEBUG(llvm::dbgs() << "New HistoryMap('" << LocalVar->getName()
+ << "') size: " << HistoryMapEntries.size() << "\n");
+ }
+}
+
+bool DbgValueHistoryMap::hasNonEmptyLocation(const Entries &Entries) const {
+ for (const auto &Entry : Entries) {
+ if (!Entry.isDbgValue())
+ continue;
+
+ const MachineInstr *MI = Entry.getInstr();
+ assert(MI->isDebugValue());
+ // A DBG_VALUE $noreg is an empty variable location
+ if (MI->isUndefDebugValue())
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
+ assert(MI.isDebugLabel() && "not a DBG_LABEL");
+ LabelInstr[Label] = &MI;
+}
+
+namespace {
+
+// Maps physreg numbers to the variables they describe.
+using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
+
+// Keeps track of the debug value entries that are currently live for each
+// inlined entity. As the history map entries are stored in a SmallVector, they
+// may be moved at insertion of new entries, so store indices rather than
+// pointers.
+using DbgValueEntriesMap = std::map<InlinedEntity, SmallSet<EntryIndex, 1>>;
+
+} // end anonymous namespace
+
+// Claim that @Var is not described by @RegNo anymore.
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ InlinedEntity Var) {
+ const auto &I = RegVars.find(RegNo);
+ assert(RegNo != 0U && I != RegVars.end());
+ auto &VarSet = I->second;
+ const auto &VarPos = llvm::find(VarSet, Var);
+ assert(VarPos != VarSet.end());
+ VarSet.erase(VarPos);
+ // Don't keep empty sets in a map to keep it as small as possible.
+ if (VarSet.empty())
+ RegVars.erase(I);
+}
+
+// Claim that @Var is now described by @RegNo.
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ InlinedEntity Var) {
+ assert(RegNo != 0U);
+ auto &VarSet = RegVars[RegNo];
+ assert(!is_contained(VarSet, Var));
+ VarSet.push_back(Var);
+}
+
+/// Create a clobbering entry and end all open debug value entries
+/// for \p Var that are described by \p RegNo using that entry. Inserts into \p
+/// FellowRegisters the set of Registers that were also used to describe \p Var
+/// alongside \p RegNo.
+static void clobberRegEntries(InlinedEntity Var, unsigned RegNo,
+ const MachineInstr &ClobberingInstr,
+ DbgValueEntriesMap &LiveEntries,
+ DbgValueHistoryMap &HistMap,
+ SmallVectorImpl<Register> &FellowRegisters) {
+ EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr);
+ // Close all entries whose values are described by the register.
+ SmallVector<EntryIndex, 4> IndicesToErase;
+ // If a given register appears in a live DBG_VALUE_LIST for Var alongside the
+ // clobbered register, and never appears in a live DBG_VALUE* for Var without
+ // the clobbered register, then it is no longer linked to the variable.
+ SmallSet<Register, 4> MaybeRemovedRegisters;
+ SmallSet<Register, 4> KeepRegisters;
+ for (auto Index : LiveEntries[Var]) {
+ auto &Entry = HistMap.getEntry(Var, Index);
+ assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+ if (Entry.getInstr()->isDebugEntryValue())
+ continue;
+ if (Entry.getInstr()->hasDebugOperandForReg(RegNo)) {
+ IndicesToErase.push_back(Index);
+ Entry.endEntry(ClobberIndex);
+ for (const auto &MO : Entry.getInstr()->debug_operands())
+ if (MO.isReg() && MO.getReg() && MO.getReg() != RegNo)
+ MaybeRemovedRegisters.insert(MO.getReg());
+ } else {
+ for (const auto &MO : Entry.getInstr()->debug_operands())
+ if (MO.isReg() && MO.getReg())
+ KeepRegisters.insert(MO.getReg());
+ }
+ }
+
+ for (Register Reg : MaybeRemovedRegisters)
+ if (!KeepRegisters.contains(Reg))
+ FellowRegisters.push_back(Reg);
+
+ // Drop all entries that have ended.
+ for (auto Index : IndicesToErase)
+ LiveEntries[Var].erase(Index);
+}
+
+/// Add a new debug value for \p Var. Closes all overlapping debug values.
+static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
+ RegDescribedVarsMap &RegVars,
+ DbgValueEntriesMap &LiveEntries,
+ DbgValueHistoryMap &HistMap) {
+ EntryIndex NewIndex;
+ if (HistMap.startDbgValue(Var, DV, NewIndex)) {
+ SmallDenseMap<unsigned, bool, 4> TrackedRegs;
+
+ // If we have created a new debug value entry, close all preceding
+ // live entries that overlap.
+ SmallVector<EntryIndex, 4> IndicesToErase;
+ const DIExpression *DIExpr = DV.getDebugExpression();
+ for (auto Index : LiveEntries[Var]) {
+ auto &Entry = HistMap.getEntry(Var, Index);
+ assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+ const MachineInstr &DV = *Entry.getInstr();
+ bool Overlaps = DIExpr->fragmentsOverlap(DV.getDebugExpression());
+ if (Overlaps) {
+ IndicesToErase.push_back(Index);
+ Entry.endEntry(NewIndex);
+ }
+ if (!DV.isDebugEntryValue())
+ for (const MachineOperand &Op : DV.debug_operands())
+ if (Op.isReg() && Op.getReg())
+ TrackedRegs[Op.getReg()] |= !Overlaps;
+ }
+
+ // If the new debug value is described by a register, add tracking of
+ // that register if it is not already tracked.
+ if (!DV.isDebugEntryValue()) {
+ for (const MachineOperand &Op : DV.debug_operands()) {
+ if (Op.isReg() && Op.getReg()) {
+ Register NewReg = Op.getReg();
+ if (!TrackedRegs.count(NewReg))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ LiveEntries[Var].insert(NewIndex);
+ TrackedRegs[NewReg] = true;
+ }
+ }
+ }
+
+ // Drop tracking of registers that are no longer used.
+ for (auto I : TrackedRegs)
+ if (!I.second)
+ dropRegDescribedVar(RegVars, I.first, Var);
+
+ // Drop all entries that have ended, and mark the new entry as live.
+ for (auto Index : IndicesToErase)
+ LiveEntries[Var].erase(Index);
+ LiveEntries[Var].insert(NewIndex);
+ }
+}
+
+// Terminate the location range for variables described by register at
+// @I by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
+ RegDescribedVarsMap::iterator I,
+ DbgValueHistoryMap &HistMap,
+ DbgValueEntriesMap &LiveEntries,
+ const MachineInstr &ClobberingInstr) {
+ // Iterate over all variables described by this register and add this
+ // instruction to their history, clobbering it. All registers that also
+ // describe the clobbered variables (i.e. in variadic debug values) will have
+ // those Variables removed from their DescribedVars.
+ for (const auto &Var : I->second) {
+ SmallVector<Register, 4> FellowRegisters;
+ clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap,
+ FellowRegisters);
+ for (Register RegNo : FellowRegisters)
+ dropRegDescribedVar(RegVars, RegNo, Var);
+ }
+ RegVars.erase(I);
+}
+
+// Terminate the location range for variables described by register
+// @RegNo by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ DbgValueHistoryMap &HistMap,
+ DbgValueEntriesMap &LiveEntries,
+ const MachineInstr &ClobberingInstr) {
+ const auto &I = RegVars.find(RegNo);
+ if (I == RegVars.end())
+ return;
+ clobberRegisterUses(RegVars, I, HistMap, LiveEntries, ClobberingInstr);
+}
+
+void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &DbgValues,
+ DbgLabelInstrMap &DbgLabels) {
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FrameReg = TRI->getFrameRegister(*MF);
+ RegDescribedVarsMap RegVars;
+ DbgValueEntriesMap LiveEntries;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MI.isDebugValue()) {
+ assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
+ // Use the base variable (without any DW_OP_piece expressions)
+ // as index into History. The full variables including the
+ // piece expressions are attached to the MI.
+ const DILocalVariable *RawVar = MI.getDebugVariable();
+ assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+ handleNewDebugValue(Var, MI, RegVars, LiveEntries, DbgValues);
+ } else if (MI.isDebugLabel()) {
+ assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
+ const DILabel *RawLabel = MI.getDebugLabel();
+ assert(RawLabel->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ // When collecting debug information for labels, there is no MCSymbol
+ // generated for it. So, we keep MachineInstr in DbgLabels in order
+ // to query MCSymbol afterward.
+ InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
+ DbgLabels.addInstr(L, MI);
+ }
+
+ // Meta Instructions have no output and do not change any values and so
+ // can be safely ignored.
+ if (MI.isMetaInstruction())
+ continue;
+
+ // Not a DBG_VALUE instruction. It may clobber registers which describe
+ // some variables.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Ignore call instructions that claim to clobber SP. The AArch64
+ // backend does this for aggregate function arguments.
+ if (MI.isCall() && MO.getReg() == SP)
+ continue;
+ // If this is a virtual register, only clobber it since it doesn't
+ // have aliases.
+ if (MO.getReg().isVirtual())
+ clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
+ MI);
+ // If this is a register def operand, it may end a debug value
+ // range. Ignore frame-register defs in the epilogue and prologue,
+ // we expect debuggers to understand that stack-locations are
+ // invalid outside of the function body.
+ else if (MO.getReg() != FrameReg ||
+ (!MI.getFlag(MachineInstr::FrameDestroy) &&
+ !MI.getFlag(MachineInstr::FrameSetup))) {
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ clobberRegisterUses(RegVars, *AI, DbgValues, LiveEntries, MI);
+ }
+ } else if (MO.isRegMask()) {
+ // If this is a register mask operand, clobber all debug values in
+ // non-CSRs.
+ SmallVector<unsigned, 32> RegsToClobber;
+ // Don't consider SP to be clobbered by register masks.
+ for (auto It : RegVars) {
+ unsigned int Reg = It.first;
+ if (Reg != SP && Register::isPhysicalRegister(Reg) &&
+ MO.clobbersPhysReg(Reg))
+ RegsToClobber.push_back(Reg);
+ }
+
+ for (unsigned Reg : RegsToClobber) {
+ clobberRegisterUses(RegVars, Reg, DbgValues, LiveEntries, MI);
+ }
+ }
+ } // End MO loop.
+ } // End instr loop.
+
+ // Make sure locations for all variables are valid only until the end of
+ // the basic block (unless it's the last basic block, in which case let
+ // their liveness run off to the end of the function).
+ if (!MBB.empty() && &MBB != &MF->back()) {
+ // Iterate over all variables that have open debug values.
+ for (auto &Pair : LiveEntries) {
+ if (Pair.second.empty())
+ continue;
+
+ // Create a clobbering entry.
+ EntryIndex ClobIdx = DbgValues.startClobber(Pair.first, MBB.back());
+
+ // End all entries.
+ for (EntryIndex Idx : Pair.second) {
+ DbgValueHistoryMap::Entry &Ent = DbgValues.getEntry(Pair.first, Idx);
+ assert(Ent.isDbgValue() && !Ent.isClosed());
+ Ent.endEntry(ClobIdx);
+ }
+ }
+
+ LiveEntries.clear();
+ RegVars.clear();
+ }
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DbgValueHistoryMap::dump(StringRef FuncName) const {
+ dbgs() << "DbgValueHistoryMap('" << FuncName << "'):\n";
+ for (const auto &VarRangePair : *this) {
+ const InlinedEntity &Var = VarRangePair.first;
+ const Entries &Entries = VarRangePair.second;
+
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
+ const DILocation *Location = Var.second;
+
+ dbgs() << " - " << LocalVar->getName() << " at ";
+
+ if (Location)
+ dbgs() << Location->getFilename() << ":" << Location->getLine() << ":"
+ << Location->getColumn();
+ else
+ dbgs() << "<unknown location>";
+
+ dbgs() << " --\n";
+
+ for (const auto &E : enumerate(Entries)) {
+ const auto &Entry = E.value();
+ dbgs() << " Entry[" << E.index() << "]: ";
+ if (Entry.isDbgValue())
+ dbgs() << "Debug value\n";
+ else
+ dbgs() << "Clobber\n";
+ dbgs() << " Instr: " << *Entry.getInstr();
+ if (Entry.isDbgValue()) {
+ if (Entry.getEndIndex() == NoEntry)
+ dbgs() << " - Valid until end of function\n";
+ else
+ dbgs() << " - Closed by Entry[" << Entry.getEndIndex() << "]\n";
+ }
+ dbgs() << "\n";
+ }
+ }
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
new file mode 100644
index 000000000000..eb2d992c7e75
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -0,0 +1,427 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for different debug information format backends.
+// LLVM currently supports DWARF and CodeView.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+/// If true, we drop variable location ranges which exist entirely outside the
+/// variable's lexical scope instruction ranges.
+static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true));
+
+std::optional<DbgVariableLocation>
+DbgVariableLocation::extractFromMachineInstruction(
+ const MachineInstr &Instruction) {
+ DbgVariableLocation Location;
+ // Variables calculated from multiple locations can't be represented here.
+ if (Instruction.getNumDebugOperands() != 1)
+ return std::nullopt;
+ if (!Instruction.getDebugOperand(0).isReg())
+ return std::nullopt;
+ Location.Register = Instruction.getDebugOperand(0).getReg();
+ Location.FragmentInfo.reset();
+ // We only handle expressions generated by DIExpression::appendOffset,
+ // which doesn't require a full stack machine.
+ int64_t Offset = 0;
+ const DIExpression *DIExpr = Instruction.getDebugExpression();
+ auto Op = DIExpr->expr_op_begin();
+ // We can handle a DBG_VALUE_LIST iff it has exactly one location operand that
+ // appears exactly once at the start of the expression.
+ if (Instruction.isDebugValueList()) {
+ if (Instruction.getNumDebugOperands() == 1 &&
+ Op->getOp() == dwarf::DW_OP_LLVM_arg)
+ ++Op;
+ else
+ return std::nullopt;
+ }
+ while (Op != DIExpr->expr_op_end()) {
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_constu: {
+ int Value = Op->getArg(0);
+ ++Op;
+ if (Op != DIExpr->expr_op_end()) {
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_minus:
+ Offset -= Value;
+ break;
+ case dwarf::DW_OP_plus:
+ Offset += Value;
+ break;
+ default:
+ continue;
+ }
+ }
+ } break;
+ case dwarf::DW_OP_plus_uconst:
+ Offset += Op->getArg(0);
+ break;
+ case dwarf::DW_OP_LLVM_fragment:
+ Location.FragmentInfo = {Op->getArg(1), Op->getArg(0)};
+ break;
+ case dwarf::DW_OP_deref:
+ Location.LoadChain.push_back(Offset);
+ Offset = 0;
+ break;
+ default:
+ return std::nullopt;
+ }
+ ++Op;
+ }
+
+ // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE
+ // instruction.
+ // FIXME: Replace these with DIExpression.
+ if (Instruction.isIndirectDebugValue())
+ Location.LoadChain.push_back(Offset);
+
+ return Location;
+}
+
+DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+
+void DebugHandlerBase::beginModule(Module *M) {
+ if (M->debug_compile_units().empty())
+ Asm = nullptr;
+}
+
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
+void DebugHandlerBase::identifyScopeMarkers() {
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
+ while (!WorkList.empty()) {
+ LexicalScope *S = WorkList.pop_back_val();
+
+ const SmallVectorImpl<LexicalScope *> &Children = S->getChildren();
+ if (!Children.empty())
+ WorkList.append(Children.begin(), Children.end());
+
+ if (S->isAbstractScope())
+ continue;
+
+ for (const InsnRange &R : S->getRanges()) {
+ assert(R.first && "InsnRange does not have first instruction!");
+ assert(R.second && "InsnRange does not have second instruction!");
+ requestLabelBeforeInsn(R.first);
+ requestLabelAfterInsn(R.second);
+ }
+ }
+}
+
+// Return Label preceding the instruction.
+MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+// Return Label immediately following the instruction.
+MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+/// If this type is derived from a base type then return base type size.
+uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
+ assert(Ty);
+ const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
+ if (!DDTy)
+ return Ty->getSizeInBits();
+
+ unsigned Tag = DDTy->getTag();
+
+ if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
+ Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type &&
+ Tag != dwarf::DW_TAG_immutable_type)
+ return DDTy->getSizeInBits();
+
+ DIType *BaseType = DDTy->getBaseType();
+
+ if (!BaseType)
+ return 0;
+
+ // If this is a derived type, go ahead and get the base type, unless it's a
+ // reference then it's just the size of the field. Pointer types have no need
+ // of this since they're a different type of qualification on the type.
+ if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+ BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+ return Ty->getSizeInBits();
+
+ return getBaseTypeSize(BaseType);
+}
+
+bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
+ if (isa<DIStringType>(Ty)) {
+ // Some transformations (e.g. instcombine) may decide to turn a Fortran
+ // character object into an integer, and later ones (e.g. SROA) may
+ // further inject a constant integer in a llvm.dbg.value call to track
+ // the object's value. Here we trust the transformations are doing the
+ // right thing, and treat the constant as unsigned to preserve that value
+ // (i.e. avoid sign extension).
+ return true;
+ }
+
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (!(Ty = CTy->getBaseType()))
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ return false;
+ } else
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
+ }
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ // Encode pointer constants as unsigned bytes. This is used at least for
+ // null pointer constant emission.
+ // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
+ // here, but accept them for now due to a bug in SROA producing bogus
+ // dbg.values.
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return true;
+ assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
+ T == dwarf::DW_TAG_volatile_type ||
+ T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type ||
+ T == dwarf::DW_TAG_immutable_type);
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isUnsignedDIType(DTy->getBaseType());
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ unsigned Encoding = BTy->getEncoding();
+ assert((Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_signed ||
+ Encoding == dwarf::DW_ATE_signed_char ||
+ Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+ Encoding == dwarf::DW_ATE_boolean ||
+ Encoding == dwarf::DW_ATE_complex_float ||
+ (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+ Ty->getName() == "decltype(nullptr)")) &&
+ "Unsupported encoding");
+ return Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ Ty->getTag() == dwarf::DW_TAG_unspecified_type;
+}
+
+static bool hasDebugInfo(const MachineModuleInfo *MMI,
+ const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo())
+ return false;
+ auto *SP = MF->getFunction().getSubprogram();
+ if (!SP)
+ return false;
+ assert(SP->getUnit());
+ auto EK = SP->getUnit()->getEmissionKind();
+ if (EK == DICompileUnit::NoDebug)
+ return false;
+ return true;
+}
+
+void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
+ PrevInstBB = nullptr;
+
+ if (!Asm || !hasDebugInfo(MMI, MF)) {
+ skippedNonDebugFunction();
+ return;
+ }
+
+ // Grab the lexical scopes for the function, if we don't have any of those
+ // then we're not going to be able to do anything.
+ LScopes.initialize(*MF);
+ if (LScopes.empty()) {
+ beginFunctionImpl(MF);
+ return;
+ }
+
+ // Make sure that each lexical scope will have a begin/end label.
+ identifyScopeMarkers();
+
+ // Calculate history for local variables.
+ assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
+ assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
+ calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
+ DbgValues, DbgLabels);
+ InstOrdering.initialize(*MF);
+ if (TrimVarLocs)
+ DbgValues.trimLocationRanges(*MF, LScopes, InstOrdering);
+ LLVM_DEBUG(DbgValues.dump(MF->getName()));
+
+ // Request labels for the full history.
+ for (const auto &I : DbgValues) {
+ const auto &Entries = I.second;
+ if (Entries.empty())
+ continue;
+
+ auto IsDescribedByReg = [](const MachineInstr *MI) {
+ return any_of(MI->debug_operands(),
+ [](auto &MO) { return MO.isReg() && MO.getReg(); });
+ };
+
+ // The first mention of a function argument gets the CurrentFnBegin label,
+ // so arguments are visible when breaking at function entry.
+ //
+ // We do not change the label for values that are described by registers,
+ // as that could place them above their defining instructions. We should
+ // ideally not change the labels for constant debug values either, since
+ // doing that violates the ranges that are calculated in the history map.
+ // However, we currently do not emit debug values for constant arguments
+ // directly at the start of the function, so this code is still useful.
+ const DILocalVariable *DIVar =
+ Entries.front().getInstr()->getDebugVariable();
+ if (DIVar->isParameter() &&
+ getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
+ if (!IsDescribedByReg(Entries.front().getInstr()))
+ LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
+ if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
+ // Mark all non-overlapping initial fragments.
+ for (const auto *I = Entries.begin(); I != Entries.end(); ++I) {
+ if (!I->isDbgValue())
+ continue;
+ const DIExpression *Fragment = I->getInstr()->getDebugExpression();
+ if (std::any_of(Entries.begin(), I,
+ [&](DbgValueHistoryMap::Entry Pred) {
+ return Pred.isDbgValue() &&
+ Fragment->fragmentsOverlap(
+ Pred.getInstr()->getDebugExpression());
+ }))
+ break;
+ // The code that generates location lists for DWARF assumes that the
+ // entries' start labels are monotonically increasing, and since we
+ // don't change the label for fragments that are described by
+ // registers, we must bail out when encountering such a fragment.
+ if (IsDescribedByReg(I->getInstr()))
+ break;
+ LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin();
+ }
+ }
+ }
+
+ for (const auto &Entry : Entries) {
+ if (Entry.isDbgValue())
+ requestLabelBeforeInsn(Entry.getInstr());
+ else
+ requestLabelAfterInsn(Entry.getInstr());
+ }
+ }
+
+ // Ensure there is a symbol before DBG_LABEL.
+ for (const auto &I : DbgLabels) {
+ const MachineInstr *MI = I.second;
+ requestLabelBeforeInsn(MI);
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = Asm->getFunctionBegin();
+ beginFunctionImpl(MF);
+}
+
+void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ assert(CurMI == nullptr);
+ CurMI = MI;
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().createTempSymbol();
+ Asm->OutStreamer->emitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+void DebugHandlerBase::endInstruction() {
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ assert(CurMI != nullptr);
+ // Don't create a new label after DBG_VALUE and other instructions that don't
+ // generate code.
+ if (!CurMI->isMetaInstruction()) {
+ PrevLabel = nullptr;
+ PrevInstBB = CurMI->getParent();
+ }
+
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsAfterInsn.find(CurMI);
+
+ // No label needed or label already assigned.
+ if (I == LabelsAfterInsn.end() || I->second) {
+ CurMI = nullptr;
+ return;
+ }
+
+ // We need a label after this instruction. With basic block sections, just
+ // use the end symbol of the section if this is the last instruction of the
+ // section. This reduces the need for an additional label and also helps
+ // merging ranges.
+ if (CurMI->getParent()->isEndSection() && CurMI->getNextNode() == nullptr) {
+ PrevLabel = CurMI->getParent()->getEndSymbol();
+ } else if (!PrevLabel) {
+ PrevLabel = MMI->getContext().createTempSymbol();
+ Asm->OutStreamer->emitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+ CurMI = nullptr;
+}
+
+void DebugHandlerBase::endFunction(const MachineFunction *MF) {
+ if (Asm && hasDebugInfo(MMI, MF))
+ endFunctionImpl(MF);
+ DbgValues.clear();
+ DbgLabels.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ InstOrdering.clear();
+}
+
+void DebugHandlerBase::beginBasicBlockSection(const MachineBasicBlock &MBB) {
+ EpilogBeginBlock = nullptr;
+ if (!MBB.isEntryBlock())
+ PrevLabel = MBB.getSymbol();
+}
+
+void DebugHandlerBase::endBasicBlockSection(const MachineBasicBlock &MBB) {
+ PrevLabel = nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
new file mode 100644
index 000000000000..726aba18bb80
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -0,0 +1,290 @@
+//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+
+#include "DebugLocStream.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+class AsmPrinter;
+
+/// This struct describes target specific location.
+struct TargetIndexLocation {
+ int Index;
+ int Offset;
+
+ TargetIndexLocation() = default;
+ TargetIndexLocation(unsigned Idx, int64_t Offset)
+ : Index(Idx), Offset(Offset) {}
+
+ bool operator==(const TargetIndexLocation &Other) const {
+ return Index == Other.Index && Offset == Other.Offset;
+ }
+};
+
+/// A single location or constant within a variable location description, with
+/// either a single entry (with an optional DIExpression) used for a DBG_VALUE,
+/// or a list of entries used for a DBG_VALUE_LIST.
+class DbgValueLocEntry {
+
+ /// Type of entry that this represents.
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt,
+ E_TargetIndexLocation
+ };
+ enum EntryType EntryKind;
+
+ /// Either a constant,
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constant;
+
+ union {
+ /// Or a location in the machine frame.
+ MachineLocation Loc;
+ /// Or a location from target specific location.
+ TargetIndexLocation TIL;
+ };
+
+public:
+ DbgValueLocEntry(int64_t i) : EntryKind(E_Integer) { Constant.Int = i; }
+ DbgValueLocEntry(const ConstantFP *CFP) : EntryKind(E_ConstantFP) {
+ Constant.CFP = CFP;
+ }
+ DbgValueLocEntry(const ConstantInt *CIP) : EntryKind(E_ConstantInt) {
+ Constant.CIP = CIP;
+ }
+ DbgValueLocEntry(MachineLocation Loc) : EntryKind(E_Location), Loc(Loc) {}
+ DbgValueLocEntry(TargetIndexLocation Loc)
+ : EntryKind(E_TargetIndexLocation), TIL(Loc) {}
+
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isIndirectLocation() const {
+ return EntryKind == E_Location && Loc.isIndirect();
+ }
+ bool isTargetIndexLocation() const {
+ return EntryKind == E_TargetIndexLocation;
+ }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() const { return Constant.Int; }
+ const ConstantFP *getConstantFP() const { return Constant.CFP; }
+ const ConstantInt *getConstantInt() const { return Constant.CIP; }
+ MachineLocation getLoc() const { return Loc; }
+ TargetIndexLocation getTargetIndexLocation() const { return TIL; }
+ friend bool operator==(const DbgValueLocEntry &, const DbgValueLocEntry &);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ if (isLocation()) {
+ llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
+ if (Loc.isIndirect())
+ llvm::dbgs() << "+0";
+ llvm::dbgs() << "} ";
+ } else if (isConstantInt())
+ Constant.CIP->dump();
+ else if (isConstantFP())
+ Constant.CFP->dump();
+ }
+#endif
+};
+
+/// The location of a single variable, composed of an expression and 0 or more
+/// DbgValueLocEntries.
+class DbgValueLoc {
+ /// Any complex address location expression for this DbgValueLoc.
+ const DIExpression *Expression;
+
+ SmallVector<DbgValueLocEntry, 2> ValueLocEntries;
+
+ bool IsVariadic;
+
+public:
+ DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs)
+ : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
+ IsVariadic(true) {}
+
+ DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs,
+ bool IsVariadic)
+ : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
+ IsVariadic(IsVariadic) {
+#ifndef NDEBUG
+ assert(Expr->isValid() ||
+ !any_of(Locs, [](auto LE) { return LE.isLocation(); }));
+ if (!IsVariadic) {
+ assert(ValueLocEntries.size() == 1);
+ }
+#endif
+ }
+
+ DbgValueLoc(const DIExpression *Expr, DbgValueLocEntry Loc)
+ : Expression(Expr), ValueLocEntries(1, Loc), IsVariadic(false) {
+ assert(((Expr && Expr->isValid()) || !Loc.isLocation()) &&
+ "DBG_VALUE with a machine location must have a valid expression.");
+ }
+
+ bool isFragment() const { return getExpression()->isFragment(); }
+ bool isEntryVal() const { return getExpression()->isEntryValue(); }
+ bool isVariadic() const { return IsVariadic; }
+ bool isEquivalent(const DbgValueLoc &Other) const {
+ // Cannot be equivalent with different numbers of entries.
+ if (ValueLocEntries.size() != Other.ValueLocEntries.size())
+ return false;
+ bool ThisIsIndirect =
+ !IsVariadic && ValueLocEntries[0].isIndirectLocation();
+ bool OtherIsIndirect =
+ !Other.IsVariadic && Other.ValueLocEntries[0].isIndirectLocation();
+ // Check equivalence of DIExpressions + Directness together.
+ if (!DIExpression::isEqualExpression(Expression, ThisIsIndirect,
+ Other.Expression, OtherIsIndirect))
+ return false;
+ // Indirectness should have been accounted for in the above check, so just
+ // compare register values directly here.
+ if (ThisIsIndirect || OtherIsIndirect) {
+ DbgValueLocEntry ThisOp = ValueLocEntries[0];
+ DbgValueLocEntry OtherOp = Other.ValueLocEntries[0];
+ return ThisOp.isLocation() && OtherOp.isLocation() &&
+ ThisOp.getLoc().getReg() == OtherOp.getLoc().getReg();
+ }
+ // If neither are indirect, then just compare the loc entries directly.
+ return ValueLocEntries == Other.ValueLocEntries;
+ }
+ const DIExpression *getExpression() const { return Expression; }
+ ArrayRef<DbgValueLocEntry> getLocEntries() const { return ValueLocEntries; }
+ friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
+ friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ for (const DbgValueLocEntry &DV : ValueLocEntries)
+ DV.dump();
+ if (Expression)
+ Expression->dump();
+ }
+#endif
+};
+
+/// This struct describes location entries emitted in the .debug_loc
+/// section.
+class DebugLocEntry {
+ /// Begin and end symbols for the address range that this location is valid.
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+
+ /// A nonempty list of locations/constants belonging to this entry,
+ /// sorted by offset.
+ SmallVector<DbgValueLoc, 1> Values;
+
+public:
+ /// Create a location list entry for the range [\p Begin, \p End).
+ ///
+ /// \param Vals One or more values describing (parts of) the variable.
+ DebugLocEntry(const MCSymbol *Begin, const MCSymbol *End,
+ ArrayRef<DbgValueLoc> Vals)
+ : Begin(Begin), End(End) {
+ addValues(Vals);
+ }
+
+ /// Attempt to merge this DebugLocEntry with Next and return
+ /// true if the merge was successful. Entries can be merged if they
+ /// share the same Loc/Constant and if Next immediately follows this
+ /// Entry.
+ bool MergeRanges(const DebugLocEntry &Next) {
+ // If this and Next are describing the same variable, merge them.
+ if (End != Next.Begin)
+ return false;
+ if (Values.size() != Next.Values.size())
+ return false;
+ for (unsigned EntryIdx = 0; EntryIdx < Values.size(); ++EntryIdx)
+ if (!Values[EntryIdx].isEquivalent(Next.Values[EntryIdx]))
+ return false;
+ End = Next.End;
+ return true;
+ }
+
+ const MCSymbol *getBeginSym() const { return Begin; }
+ const MCSymbol *getEndSym() const { return End; }
+ ArrayRef<DbgValueLoc> getValues() const { return Values; }
+ void addValues(ArrayRef<DbgValueLoc> Vals) {
+ Values.append(Vals.begin(), Vals.end());
+ sortUniqueValues();
+ assert((Values.size() == 1 || all_of(Values, [](DbgValueLoc V) {
+ return V.isFragment();
+ })) && "must either have a single value or multiple pieces");
+ }
+
+ // Sort the pieces by offset.
+ // Remove any duplicate entries by dropping all but the first.
+ void sortUniqueValues() {
+ // Values is either 1 item that does not have a fragment, or many items
+ // that all do. No need to sort if the former and also prevents operator<
+ // being called on a non fragment item when _GLIBCXX_DEBUG is defined.
+ if (Values.size() == 1)
+ return;
+ llvm::sort(Values);
+ Values.erase(std::unique(Values.begin(), Values.end(),
+ [](const DbgValueLoc &A, const DbgValueLoc &B) {
+ return A.getExpression() == B.getExpression();
+ }),
+ Values.end());
+ }
+
+ /// Lower this entry into a DWARF expression.
+ void finalize(const AsmPrinter &AP,
+ DebugLocStream::ListBuilder &List,
+ const DIBasicType *BT,
+ DwarfCompileUnit &TheCU);
+};
+
+/// Compare two DbgValueLocEntries for equality.
+inline bool operator==(const DbgValueLocEntry &A, const DbgValueLocEntry &B) {
+ if (A.EntryKind != B.EntryKind)
+ return false;
+
+ switch (A.EntryKind) {
+ case DbgValueLocEntry::E_Location:
+ return A.Loc == B.Loc;
+ case DbgValueLocEntry::E_TargetIndexLocation:
+ return A.TIL == B.TIL;
+ case DbgValueLocEntry::E_Integer:
+ return A.Constant.Int == B.Constant.Int;
+ case DbgValueLocEntry::E_ConstantFP:
+ return A.Constant.CFP == B.Constant.CFP;
+ case DbgValueLocEntry::E_ConstantInt:
+ return A.Constant.CIP == B.Constant.CIP;
+ }
+ llvm_unreachable("unhandled EntryKind");
+}
+
+/// Compare two DbgValueLocs for equality.
+inline bool operator==(const DbgValueLoc &A, const DbgValueLoc &B) {
+ return A.ValueLocEntries == B.ValueLocEntries &&
+ A.Expression == B.Expression && A.IsVariadic == B.IsVariadic;
+}
+
+/// Compare two fragments based on their offset.
+inline bool operator<(const DbgValueLoc &A,
+ const DbgValueLoc &B) {
+ return A.getExpression()->getFragmentInfo()->OffsetInBits <
+ B.getExpression()->getFragmentInfo()->OffsetInBits;
+}
+
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
new file mode 100644
index 000000000000..8c6109880afc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -0,0 +1,47 @@
+//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DebugLocStream.h"
+#include "DwarfDebug.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+using namespace llvm;
+
+bool DebugLocStream::finalizeList(AsmPrinter &Asm) {
+ if (Lists.back().EntryOffset == Entries.size()) {
+ // Empty list. Delete it.
+ Lists.pop_back();
+ return false;
+ }
+
+ // Real list. Generate a label for it.
+ Lists.back().Label = Asm.createTempSymbol("debug_loc");
+ return true;
+}
+
+void DebugLocStream::finalizeEntry() {
+ if (Entries.back().ByteOffset != DWARFBytes.size())
+ return;
+
+ // The last entry was empty. Delete it.
+ Comments.erase(Comments.begin() + Entries.back().CommentOffset,
+ Comments.end());
+ Entries.pop_back();
+
+ assert(Lists.back().EntryOffset <= Entries.size() &&
+ "Popped off more entries than are in the list");
+}
+
+DebugLocStream::ListBuilder::~ListBuilder() {
+ if (!Locs.finalizeList(Asm))
+ return;
+ V.initializeDbgValue(&MI);
+ V.setDebugLocListIndex(ListIndex);
+ if (TagOffset)
+ V.setDebugLocListTagOffset(*TagOffset);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
new file mode 100644
index 000000000000..a96bdd034918
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -0,0 +1,200 @@
+//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+
+#include "ByteStreamer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class DbgVariable;
+class DwarfCompileUnit;
+class MachineInstr;
+class MCSymbol;
+
+/// Byte stream of .debug_loc entries.
+///
+/// Stores a unified stream of .debug_loc entries. There's \a List for each
+/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry.
+///
+/// FIXME: Do we need all these temp symbols?
+/// FIXME: Why not output directly to the output stream?
+class DebugLocStream {
+public:
+ struct List {
+ DwarfCompileUnit *CU;
+ MCSymbol *Label = nullptr;
+ size_t EntryOffset;
+ List(DwarfCompileUnit *CU, size_t EntryOffset)
+ : CU(CU), EntryOffset(EntryOffset) {}
+ };
+ struct Entry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ size_t ByteOffset;
+ size_t CommentOffset;
+ };
+
+private:
+ SmallVector<List, 4> Lists;
+ SmallVector<Entry, 32> Entries;
+ SmallString<256> DWARFBytes;
+ std::vector<std::string> Comments;
+ MCSymbol *Sym = nullptr;
+
+ /// Only verbose textual output needs comments. This will be set to
+ /// true for that case, and false otherwise.
+ bool GenerateComments;
+
+public:
+ DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { }
+ size_t getNumLists() const { return Lists.size(); }
+ const List &getList(size_t LI) const { return Lists[LI]; }
+ ArrayRef<List> getLists() const { return Lists; }
+ MCSymbol *getSym() const {
+ return Sym;
+ }
+ void setSym(MCSymbol *Sym) {
+ this->Sym = Sym;
+ }
+
+ class ListBuilder;
+ class EntryBuilder;
+
+private:
+ /// Start a new .debug_loc entry list.
+ ///
+ /// Start a new .debug_loc entry list. Return the new list's index so it can
+ /// be retrieved later via \a getList().
+ ///
+ /// Until the next call, \a startEntry() will add entries to this list.
+ size_t startList(DwarfCompileUnit *CU) {
+ size_t LI = Lists.size();
+ Lists.emplace_back(CU, Entries.size());
+ return LI;
+ }
+
+ /// Finalize a .debug_loc entry list.
+ ///
+ /// If there are no entries in this list, delete it outright. Otherwise,
+ /// create a label with \a Asm.
+ ///
+ /// \return false iff the list is deleted.
+ bool finalizeList(AsmPrinter &Asm);
+
+ /// Start a new .debug_loc entry.
+ ///
+ /// Until the next call, bytes added to the stream will be added to this
+ /// entry.
+ void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) {
+ Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()});
+ }
+
+ /// Finalize a .debug_loc entry, deleting if it's empty.
+ void finalizeEntry();
+
+public:
+ BufferByteStreamer getStreamer() {
+ return BufferByteStreamer(DWARFBytes, Comments, GenerateComments);
+ }
+
+ ArrayRef<Entry> getEntries(const List &L) const {
+ size_t LI = getIndex(L);
+ return ArrayRef(Entries).slice(Lists[LI].EntryOffset, getNumEntries(LI));
+ }
+
+ ArrayRef<char> getBytes(const Entry &E) const {
+ size_t EI = getIndex(E);
+ return ArrayRef(DWARFBytes.begin(), DWARFBytes.end())
+ .slice(Entries[EI].ByteOffset, getNumBytes(EI));
+ }
+ ArrayRef<std::string> getComments(const Entry &E) const {
+ size_t EI = getIndex(E);
+ return ArrayRef(Comments).slice(Entries[EI].CommentOffset,
+ getNumComments(EI));
+ }
+
+private:
+ size_t getIndex(const List &L) const {
+ assert(&Lists.front() <= &L && &L <= &Lists.back() &&
+ "Expected valid list");
+ return &L - &Lists.front();
+ }
+ size_t getIndex(const Entry &E) const {
+ assert(&Entries.front() <= &E && &E <= &Entries.back() &&
+ "Expected valid entry");
+ return &E - &Entries.front();
+ }
+ size_t getNumEntries(size_t LI) const {
+ if (LI + 1 == Lists.size())
+ return Entries.size() - Lists[LI].EntryOffset;
+ return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset;
+ }
+ size_t getNumBytes(size_t EI) const {
+ if (EI + 1 == Entries.size())
+ return DWARFBytes.size() - Entries[EI].ByteOffset;
+ return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset;
+ }
+ size_t getNumComments(size_t EI) const {
+ if (EI + 1 == Entries.size())
+ return Comments.size() - Entries[EI].CommentOffset;
+ return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset;
+ }
+};
+
+/// Builder for DebugLocStream lists.
+class DebugLocStream::ListBuilder {
+ DebugLocStream &Locs;
+ AsmPrinter &Asm;
+ DbgVariable &V;
+ const MachineInstr &MI;
+ size_t ListIndex;
+ std::optional<uint8_t> TagOffset;
+
+public:
+ ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
+ DbgVariable &V, const MachineInstr &MI)
+ : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
+ TagOffset(std::nullopt) {}
+
+ void setTagOffset(uint8_t TO) {
+ TagOffset = TO;
+ }
+
+ /// Finalize the list.
+ ///
+ /// If the list is empty, delete it. Otherwise, finalize it by creating a
+ /// temp symbol in \a Asm and setting up the \a DbgVariable.
+ ~ListBuilder();
+
+ DebugLocStream &getLocs() { return Locs; }
+};
+
+/// Builder for DebugLocStream entries.
+class DebugLocStream::EntryBuilder {
+ DebugLocStream &Locs;
+
+public:
+ EntryBuilder(ListBuilder &List, const MCSymbol *Begin, const MCSymbol *End)
+ : Locs(List.getLocs()) {
+ Locs.startEntry(Begin, End);
+ }
+
+ /// Finalize the entry, deleting it if it's empty.
+ ~EntryBuilder() { Locs.finalizeEntry(); }
+
+ BufferByteStreamer getStreamer() { return Locs.getStreamer(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..10c844ddb14a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,151 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A) : EHStreamer(A) {}
+
+DwarfCFIException::~DwarfCFIException() = default;
+
+void DwarfCFIException::addPersonality(const GlobalValue *Personality) {
+ if (!llvm::is_contained(Personalities, Personality))
+ Personalities.push_back(Personality);
+}
+
+/// endModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::endModule() {
+ // SjLj uses this pass and it doesn't need this info.
+ if (!Asm->MAI->usesCFIForEH())
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect)
+ return;
+
+ // Emit indirect reference table for all used personality functions
+ for (const GlobalValue *Personality : Personalities) {
+ MCSymbol *Sym = Asm->getSymbol(Personality);
+ TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym);
+ }
+ Personalities.clear();
+}
+
+void DwarfCFIException::beginFunction(const MachineFunction *MF) {
+ shouldEmitPersonality = shouldEmitLSDA = false;
+ const Function &F = MF->getFunction();
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MF->getLandingPads().empty();
+
+ // See if we need frame move info.
+ bool shouldEmitMoves =
+ Asm->getFunctionCFISectionType(*MF) != AsmPrinter::CFISection::None;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const GlobalValue *Per = nullptr;
+ if (F.hasPersonalityFn())
+ Per = dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+
+ // Emit a personality function even when there are no landing pads
+ forceEmitPersonality =
+ // ...if a personality function is explicitly specified
+ F.hasPersonalityFn() &&
+ // ... and it's not known to be a noop in the absence of invokes
+ !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ // ... and we're not explicitly asked not to emit it
+ F.needsUnwindTableEntry();
+
+ shouldEmitPersonality =
+ (forceEmitPersonality ||
+ (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) &&
+ Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo();
+ if (MAI.getExceptionHandlingType() != ExceptionHandling::None)
+ shouldEmitCFI =
+ MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
+ else
+ shouldEmitCFI = Asm->usesCFIWithoutEH() && shouldEmitMoves;
+}
+
+void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) {
+ if (!shouldEmitCFI)
+ return;
+
+ if (!hasEmittedCFISections) {
+ AsmPrinter::CFISection CFISecType = Asm->getModuleCFISectionType();
+ // If we don't say anything it implies `.cfi_sections .eh_frame`, so we
+ // chose not to be verbose in that case. And with `ForceDwarfFrameSection`,
+ // we should always emit .debug_frame.
+ if (CFISecType == AsmPrinter::CFISection::Debug ||
+ Asm->TM.Options.ForceDwarfFrameSection)
+ Asm->OutStreamer->emitCFISections(
+ CFISecType == AsmPrinter::CFISection::EH, true);
+ hasEmittedCFISections = true;
+ }
+
+ Asm->OutStreamer->emitCFIStartProc(/*IsSimple=*/false);
+
+ // Indicate personality routine, if any.
+ if (!shouldEmitPersonality)
+ return;
+
+ auto &F = MBB.getParent()->getFunction();
+ auto *P = dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+ assert(P && "Expected personality function");
+ // Record the personality function.
+ addPersonality(P);
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI);
+ Asm->OutStreamer->emitCFIPersonality(Sym, PerEncoding);
+
+ // Provide LSDA information.
+ if (shouldEmitLSDA)
+ Asm->OutStreamer->emitCFILsda(Asm->getMBBExceptionSym(MBB),
+ TLOF.getLSDAEncoding());
+}
+
+void DwarfCFIException::endBasicBlockSection(const MachineBasicBlock &MBB) {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->emitCFIEndProc();
+}
+
+/// endFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::endFunction(const MachineFunction *MF) {
+ if (!shouldEmitPersonality)
+ return;
+
+ emitExceptionTable();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 000000000000..58ed21379d29
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1708 @@
+//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfCompileUnit.h"
+#include "AddressPool.h"
+#include "DwarfExpression.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iterator>
+#include <optional>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
+
+ // According to DWARF Debugging Information Format Version 5,
+ // 3.1.2 Skeleton Compilation Unit Entries:
+ // "When generating a split DWARF object file (see Section 7.3.2
+ // on page 187), the compilation unit in the .debug_info section
+ // is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit"
+ if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton)
+ return dwarf::DW_TAG_skeleton_unit;
+
+ return dwarf::DW_TAG_compile_unit;
+}
+
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
+ AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU, UnitKind Kind)
+ : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
+ insertDIE(Node, &getUnitDie());
+ MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
+}
+
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+ if ((Skeleton || !DD->useSplitDwarf()) && Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ // Don't use the address pool in non-fission or in the skeleton unit itself.
+ if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
+ return addLocalLabelAddress(Die, Attribute, Label);
+
+ bool UseAddrOffsetFormOrExpressions =
+ DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
+
+ const MCSymbol *Base = nullptr;
+ if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
+ Base = DD->getSectionLabel(&Label->getSection());
+
+ if (!Base || Base == Label) {
+ unsigned idx = DD->getAddressPool().getIndex(Label);
+ addAttribute(Die, Attribute,
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
+ : dwarf::DW_FORM_GNU_addr_index,
+ DIEInteger(idx));
+ return;
+ }
+
+ // Could be extended to work with DWARFv4 Split DWARF if that's important for
+ // someone. In that case DW_FORM_data would be used.
+ assert(DD->getDwarfVersion() >= 5 &&
+ "Addr+offset expressions are only valuable when using debug_addr (to "
+ "reduce relocations) available in DWARFv5 or higher");
+ if (DD->useAddrOffsetExpressions()) {
+ auto *Loc = new (DIEValueAllocator) DIEBlock();
+ addPoolOpAddress(*Loc, Label);
+ addBlock(Die, Attribute, dwarf::DW_FORM_exprloc, Loc);
+ } else
+ addAttribute(Die, Attribute, dwarf::DW_FORM_LLVM_addrx_offset,
+ new (DIEValueAllocator) DIEAddrOffset(
+ DD->getAddressPool().getIndex(Base), Label, Base));
+}
+
+void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
+ dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+ if (Label)
+ addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
+ else
+ addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
+}
+
+unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
+ // If we print assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+
+ // FIXME: add a better feature test than hasRawTextSupport. Even better,
+ // extend .file to support this.
+ unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
+ if (!File)
+ return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", std::nullopt,
+ std::nullopt, CUID);
+
+ if (LastFile != File) {
+ LastFile = File;
+ LastFileID = Asm->OutStreamer->emitDwarfFileDirective(
+ 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+ File->getSource(), CUID);
+ }
+ return LastFileID;
+}
+
+DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
+ const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(GV))
+ return Die;
+
+ assert(GV);
+
+ auto *GVContext = GV->getScope();
+ const DIType *GTy = GV->getType();
+
+ auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
+ DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
+ : getOrCreateContextDIE(GVContext);
+
+ // Add to map.
+ DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
+ DIScope *DeclContext;
+ if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
+ DeclContext = SDMDecl->getScope();
+ assert(SDMDecl->isStaticMember() && "Expected static member decl");
+ assert(GV->isDefinition());
+ // We need the declaration DIE that is in the static member's class.
+ DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
+ addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+ // If the global variable's type is different from the one in the class
+ // member type, assume that it's more specific and also emit it.
+ if (GTy != SDMDecl->getBaseType())
+ addType(*VariableDIE, GTy);
+ } else {
+ DeclContext = GV->getScope();
+ // Add name and type.
+ StringRef DisplayName = GV->getDisplayName();
+ if (!DisplayName.empty())
+ addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
+ if (GTy)
+ addType(*VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV->isLocalToUnit())
+ addFlag(*VariableDIE, dwarf::DW_AT_external);
+
+ // Add line number info.
+ addSourceLine(*VariableDIE, GV);
+ }
+
+ if (!GV->isDefinition())
+ addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+ else
+ addGlobalName(GV->getName(), *VariableDIE, DeclContext);
+
+ addAnnotation(*VariableDIE, GV->getAnnotations());
+
+ if (uint32_t AlignInBytes = GV->getAlignInBytes())
+ addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+
+ if (MDTuple *TP = GV->getTemplateParams())
+ addTemplateParams(*VariableDIE, DINodeArray(TP));
+
+ // Add location.
+ addLocationAttribute(VariableDIE, GV, GlobalExprs);
+
+ return VariableDIE;
+}
+
+void DwarfCompileUnit::addLocationAttribute(
+ DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
+ bool addToAccelTable = false;
+ DIELoc *Loc = nullptr;
+ std::optional<unsigned> NVPTXAddressSpace;
+ std::unique_ptr<DIEDwarfExpression> DwarfExpr;
+ for (const auto &GE : GlobalExprs) {
+ const GlobalVariable *Global = GE.Var;
+ const DIExpression *Expr = GE.Expr;
+
+ // For compatibility with DWARF 3 and earlier,
+ // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) or
+ // DW_AT_location(DW_OP_consts, X, DW_OP_stack_value) becomes
+ // DW_AT_const_value(X).
+ if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+ addToAccelTable = true;
+ addConstantValue(
+ *VariableDIE,
+ DIExpression::SignedOrUnsignedConstant::UnsignedConstant ==
+ *Expr->isConstant(),
+ Expr->getElement(1));
+ break;
+ }
+
+ // We cannot describe the location of dllimport'd variables: the
+ // computation of their address requires loads from the IAT.
+ if (Global && Global->hasDLLImportStorageClass())
+ continue;
+
+ // Nothing to describe without address or constant.
+ if (!Global && (!Expr || !Expr->isConstant()))
+ continue;
+
+ if (Global && Global->isThreadLocal() &&
+ !Asm->getObjFileLowering().supportDebugThreadLocalLocation())
+ continue;
+
+ if (!Loc) {
+ addToAccelTable = true;
+ Loc = new (DIEValueAllocator) DIELoc;
+ DwarfExpr = std::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ }
+
+ if (Expr) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ // sequence for the NVPTX + gdb target.
+ unsigned LocalNVPTXAddressSpace;
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ const DIExpression *NewExpr =
+ DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+ if (NewExpr != Expr) {
+ Expr = NewExpr;
+ NVPTXAddressSpace = LocalNVPTXAddressSpace;
+ }
+ }
+ DwarfExpr->addFragmentOffset(Expr);
+ }
+
+ if (Global) {
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ // 16-bit platforms like MSP430 and AVR take this path, so sink this
+ // assert to platforms that use it.
+ auto GetPointerSizedFormAndOp = [this]() {
+ unsigned PointerSize = Asm->MAI->getCodePointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ struct FormAndOp {
+ dwarf::Form Form;
+ dwarf::LocationAtom Op;
+ };
+ return PointerSize == 4
+ ? FormAndOp{dwarf::DW_FORM_data4, dwarf::DW_OP_const4u}
+ : FormAndOp{dwarf::DW_FORM_data8, dwarf::DW_OP_const8u};
+ };
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.getTargetTriple().isWasm()) {
+ // FIXME This is not guaranteed, but in practice, in static linking,
+ // if present, __tls_base's index is 1. This doesn't hold for dynamic
+ // linking, so TLS variables used in dynamic linking won't have
+ // correct debug info for now. See
+ // https://github.com/llvm/llvm-project/blob/19afbfe33156d211fa959dadeea46cd17b9c723c/lld/wasm/Driver.cpp#L786-L823
+ addWasmRelocBaseGlobal(Loc, "__tls_base", 1);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ } else if (Asm->TM.useEmulatedTLS()) {
+ // TODO: add debug info for emulated thread local mode.
+ } else {
+ // FIXME: Make this work with -gsplit-dwarf.
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ auto FormAndOp = GetPointerSizedFormAndOp();
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, FormAndOp.Form,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
+ }
+ } else if (Asm->TM.getTargetTriple().isWasm() &&
+ Asm->TM.getRelocationModel() == Reloc::PIC_) {
+ // FIXME This is not guaranteed, but in practice, if present,
+ // __memory_base's index is 1. See
+ // https://github.com/llvm/llvm-project/blob/19afbfe33156d211fa959dadeea46cd17b9c723c/lld/wasm/Driver.cpp#L786-L823
+ addWasmRelocBaseGlobal(Loc, "__memory_base", 1);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ } else if ((Asm->TM.getRelocationModel() == Reloc::RWPI ||
+ Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) &&
+ !Asm->getObjFileLowering()
+ .getKindForGlobal(Global, Asm->TM)
+ .isReadOnly()) {
+ auto FormAndOp = GetPointerSizedFormAndOp();
+ // Constant
+ addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op);
+ // Relocation offset
+ addExpr(*Loc, FormAndOp.Form,
+ Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym));
+ // Base register
+ Register BaseReg = Asm->getObjFileLowering().getStaticBase();
+ BaseReg = Asm->TM.getMCRegisterInfo()->getDwarfRegNum(BaseReg, false);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + BaseReg);
+ // Offset from base register
+ addSInt(*Loc, dwarf::DW_FORM_sdata, 0);
+ // Operation
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ }
+ }
+ // Global variables attached to symbols are memory locations.
+ // It would be better if this were unconditional, but malformed input that
+ // mixes non-fragments and fragments for the same variable is too expensive
+ // to detect in the verifier.
+ if (DwarfExpr->isUnknownLocation())
+ DwarfExpr->setMemoryLocationKind();
+ DwarfExpr->addExpression(Expr);
+ }
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ const unsigned NVPTX_ADDR_global_space = 5;
+ addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAddressSpace.value_or(NVPTX_ADDR_global_space));
+ }
+ if (Loc)
+ addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
+
+ if (DD->useAllLinkageNames())
+ addLinkageName(*VariableDIE, GV->getLinkageName());
+
+ if (addToAccelTable) {
+ DD->addAccelName(*CUNode, GV->getName(), *VariableDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
+ DD->useAllLinkageNames())
+ DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
+ }
+}
+
+DIE *DwarfCompileUnit::getOrCreateCommonBlock(
+ const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
+ // Check for pre-existence.
+ if (DIE *NDie = getDIE(CB))
+ return NDie;
+ DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
+ DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
+ StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
+ addString(NDie, dwarf::DW_AT_name, Name);
+ addGlobalName(Name, NDie, CB->getScope());
+ if (CB->getFile())
+ addSourceLine(NDie, CB->getLineNo(), CB->getFile());
+ if (DIGlobalVariable *V = CB->getDecl())
+ getCU().addLocationAttribute(&NDie, V, GlobalExprs);
+ return &NDie;
+}
+
+void DwarfCompileUnit::addRange(RangeSpan Range) {
+ DD->insertSectionLabel(Range.Begin);
+
+ auto *PrevCU = DD->getPrevCU();
+ bool SameAsPrevCU = this == PrevCU;
+ DD->setPrevCU(this);
+ // If we have no current ranges just add the range and return, otherwise,
+ // check the current section and CU against the previous section and CU we
+ // emitted into and the subprogram was contained within. If these are the
+ // same then extend our current range, otherwise add this as a new range.
+ if (CURanges.empty() || !SameAsPrevCU ||
+ (&CURanges.back().End->getSection() !=
+ &Range.End->getSection())) {
+ // Before a new range is added, always terminate the prior line table.
+ if (PrevCU)
+ DD->terminateLineTable(PrevCU);
+ CURanges.push_back(Range);
+ return;
+ }
+
+ CURanges.back().End = Range.End;
+}
+
+void DwarfCompileUnit::initStmtList() {
+ if (CUNode->isDebugDirectivesOnly())
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ if (DD->useSectionsAsReferences()) {
+ LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol();
+ } else {
+ LineTableStartSym =
+ Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
+ }
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
+ TLOF.getDwarfLineSection()->getBeginSymbol());
+}
+
+void DwarfCompileUnit::applyStmtList(DIE &D) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(D, dwarf::DW_AT_stmt_list, LineTableStartSym,
+ TLOF.getDwarfLineSection()->getBeginSymbol());
+}
+
+void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
+ const MCSymbol *End) {
+ assert(Begin && "Begin label should not be null!");
+ assert(End && "End label should not be null!");
+ assert(Begin->isDefined() && "Invalid starting label");
+ assert(End->isDefined() && "Invalid end label");
+
+ addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
+ if (DD->getDwarfVersion() < 4)
+ addLabelAddress(D, dwarf::DW_AT_high_pc, End);
+ else
+ addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+}
+
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
+ DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
+ auto *ContextCU = static_cast<DwarfCompileUnit *>(SPDie->getUnit());
+ return ContextCU->updateSubprogramScopeDIEImpl(SP, SPDie);
+}
+
+// Add info for Wasm-global-based relocation.
+// 'GlobalIndex' is used for split dwarf, which currently relies on a few
+// assumptions that are not guaranteed in a formal way but work in practice.
+void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
+ uint64_t GlobalIndex) {
+ // FIXME: duplicated from Target/WebAssembly/WebAssembly.h
+ // don't want to depend on target specific headers in this code?
+ const unsigned TI_GLOBAL_RELOC = 3;
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ auto *Sym = cast<MCSymbolWasm>(Asm->GetExternalSymbolSymbol(GlobalName));
+ // FIXME: this repeats what WebAssemblyMCInstLower::
+ // GetExternalSymbolSymbol does, since if there's no code that
+ // refers to this symbol, we have to set it here.
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ Sym->setGlobalType(wasm::WasmGlobalType{
+ static_cast<uint8_t>(PointerSize == 4 ? wasm::WASM_TYPE_I32
+ : wasm::WASM_TYPE_I64),
+ true});
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
+ if (!isDwoUnit()) {
+ addLabel(*Loc, dwarf::DW_FORM_data4, Sym);
+ } else {
+ // FIXME: when writing dwo, we need to avoid relocations. Probably
+ // the "right" solution is to treat globals the way func and data
+ // symbols are (with entries in .debug_addr).
+ // For now we hardcode the indices in the callsites. Global indices are not
+ // fixed, but in practice a few are fixed; for example, __stack_pointer is
+ // always index 0.
+ addUInt(*Loc, dwarf::DW_FORM_data4, GlobalIndex);
+ }
+}
+
+DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
+ DIE *SPDie) {
+ SmallVector<RangeSpan, 2> BB_List;
+ // If basic block sections are on, ranges for each basic block section has
+ // to be emitted separately.
+ for (const auto &R : Asm->MBBSectionRanges)
+ BB_List.push_back({R.second.BeginLabel, R.second.EndLabel});
+
+ attachRangesOrLowHighPC(*SPDie, BB_List);
+
+ if (DD->useAppleExtensionAttributes() &&
+ !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
+ *DD->getCurrentFunction()))
+ addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
+
+ // Only include DW_AT_frame_base in full debug info
+ if (!includeMinimalInlineScopes()) {
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ TargetFrameLowering::DwarfFrameBase FrameBase =
+ TFI->getDwarfFrameBase(*Asm->MF);
+ switch (FrameBase.Kind) {
+ case TargetFrameLowering::DwarfFrameBase::Register: {
+ if (Register::isPhysicalRegister(FrameBase.Location.Reg)) {
+ MachineLocation Location(FrameBase.Location.Reg);
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+ break;
+ }
+ case TargetFrameLowering::DwarfFrameBase::CFA: {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
+ if (FrameBase.Location.Offset != 0) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_consts);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.Offset);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ }
+ addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
+ break;
+ }
+ case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
+ // FIXME: duplicated from Target/WebAssembly/WebAssembly.h
+ const unsigned TI_GLOBAL_RELOC = 3;
+ if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
+ // These need to be relocatable.
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
+ // For now, since we only ever use index 0, this should work as-is.
+ addWasmRelocBaseGlobal(Loc, "__stack_pointer",
+ FrameBase.Location.WasmLoc.Index);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
+ } else {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DIExpressionCursor Cursor({});
+ DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind,
+ FrameBase.Location.WasmLoc.Index);
+ DwarfExpr.addExpression(std::move(Cursor));
+ addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize());
+ }
+ break;
+ }
+ }
+ }
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ DD->addSubprogramNames(*CUNode, SP, *SPDie);
+
+ return *SPDie;
+}
+
+// Construct a DIE for this scope.
+void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope,
+ DIE &ParentScopeDIE) {
+ if (!Scope || !Scope->getScopeNode())
+ return;
+
+ auto *DS = Scope->getScopeNode();
+
+ assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) &&
+ "Only handle inlined subprograms here, use "
+ "constructSubprogramScopeDIE for non-inlined "
+ "subprograms");
+
+ // Emit inlined subprograms.
+ if (Scope->getParent() && isa<DISubprogram>(DS)) {
+ DIE *ScopeDIE = constructInlinedScopeDIE(Scope, ParentScopeDIE);
+ assert(ScopeDIE && "Scope DIE should not be null.");
+ createAndAddScopeChildren(Scope, *ScopeDIE);
+ return;
+ }
+
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ // Emit lexical blocks.
+ DIE *ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
+
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
+}
+
+void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
+ SmallVector<RangeSpan, 2> Range) {
+
+ HasRangeLists = true;
+
+ // Add the range list to the set of ranges to be emitted.
+ auto IndexAndList =
+ (DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU)
+ ->addRange(*(Skeleton ? Skeleton : this), std::move(Range));
+
+ uint32_t Index = IndexAndList.first;
+ auto &List = *IndexAndList.second;
+
+ // Under fission, ranges are specified by constant offsets relative to the
+ // CU's DW_AT_GNU_ranges_base.
+ // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
+ // fission until we support the forms using the .debug_addr section
+ // (DW_RLE_startx_endx etc.).
+ if (DD->getDwarfVersion() >= 5)
+ addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
+ else {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const MCSymbol *RangeSectionSym =
+ TLOF.getDwarfRangesSection()->getBeginSymbol();
+ if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
+ RangeSectionSym);
+ else
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
+ RangeSectionSym);
+ }
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
+ assert(!Ranges.empty());
+ if (!DD->useRangesSection() ||
+ (Ranges.size() == 1 &&
+ (!DD->alwaysUseRanges(*this) ||
+ DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
+ Ranges.front().Begin))) {
+ const RangeSpan &Front = Ranges.front();
+ const RangeSpan &Back = Ranges.back();
+ attachLowHighPC(Die, Front.Begin, Back.End);
+ } else
+ addScopeRangeList(Die, std::move(Ranges));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
+ SmallVector<RangeSpan, 2> List;
+ List.reserve(Ranges.size());
+ for (const InsnRange &R : Ranges) {
+ auto *BeginLabel = DD->getLabelBeforeInsn(R.first);
+ auto *EndLabel = DD->getLabelAfterInsn(R.second);
+
+ const auto *BeginMBB = R.first->getParent();
+ const auto *EndMBB = R.second->getParent();
+
+ const auto *MBB = BeginMBB;
+ // Basic block sections allows basic block subsets to be placed in unique
+ // sections. For each section, the begin and end label must be added to the
+ // list. If there is more than one range, debug ranges must be used.
+ // Otherwise, low/high PC can be used.
+ // FIXME: Debug Info Emission depends on block order and this assumes that
+ // the order of blocks will be frozen beyond this point.
+ do {
+ if (MBB->sameSection(EndMBB) || MBB->isEndSection()) {
+ auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()];
+ List.push_back(
+ {MBB->sameSection(BeginMBB) ? BeginLabel
+ : MBBSectionRange.BeginLabel,
+ MBB->sameSection(EndMBB) ? EndLabel : MBBSectionRange.EndLabel});
+ }
+ if (MBB->sameSection(EndMBB))
+ break;
+ MBB = MBB->getNextNode();
+ } while (true);
+ }
+ attachRangesOrLowHighPC(Die, std::move(List));
+}
+
+DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
+ DIE &ParentScopeDIE) {
+ assert(Scope->getScopeNode());
+ auto *DS = Scope->getScopeNode();
+ auto *InlinedSP = getDISubprogram(DS);
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP];
+ assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
+
+ auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
+ ParentScopeDIE.addChild(ScopeDIE);
+ addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ // Add the call site information to the DIE.
+ const DILocation *IA = Scope->getInlinedAt();
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_file, std::nullopt,
+ getOrCreateSourceID(IA->getFile()));
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_line, std::nullopt, IA->getLine());
+ if (IA->getColumn())
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_column, std::nullopt, IA->getColumn());
+ if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
+ addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, std::nullopt,
+ IA->getDiscriminator());
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE);
+
+ return ScopeDIE;
+}
+
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
+ if (DD->isLexicalScopeDIENull(Scope))
+ return nullptr;
+ const auto *DS = Scope->getScopeNode();
+
+ auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope()) {
+ assert(!getAbstractScopeDIEs().count(DS) &&
+ "Abstract DIE for this scope exists!");
+ getAbstractScopeDIEs()[DS] = ScopeDIE;
+ return ScopeDIE;
+ }
+ if (!Scope->getInlinedAt()) {
+ assert(!LexicalBlockDIEs.count(DS) &&
+ "Concrete out-of-line DIE for this scope exists!");
+ LexicalBlockDIEs[DS] = ScopeDIE;
+ }
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ return ScopeDIE;
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
+ auto D = constructVariableDIEImpl(DV, Abstract);
+ DV.setDIE(*D);
+ return D;
+}
+
+DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
+ const LexicalScope &Scope) {
+ auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
+ insertDIE(DL.getLabel(), LabelDie);
+ DL.setDIE(*LabelDie);
+
+ if (Scope.isAbstractScope())
+ applyLabelAttributes(DL, *LabelDie);
+
+ return LabelDie;
+}
+
+DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract) {
+ // Define variable debug information entry.
+ auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
+ insertDIE(DV.getVariable(), VariableDie);
+
+ if (Abstract) {
+ applyVariableAttributes(DV, *VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Index = DV.getDebugLocListIndex();
+ if (Index != ~0U) {
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Index);
+ auto TagOffset = DV.getDebugLocListTagOffset();
+ if (TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *TagOffset);
+ return VariableDie;
+ }
+
+ // Check if variable has a single location description.
+ if (auto *DVal = DV.getValueLoc()) {
+ if (!DVal->isVariadic()) {
+ const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
+ if (Entry->isLocation()) {
+ addVariableAddress(DV, *VariableDie, Entry->getLoc());
+ } else if (Entry->isInt()) {
+ auto *Expr = DV.getSingleExpression();
+ if (Expr && Expr->getNumElements()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.addFragmentOffset(Expr);
+ DwarfExpr.addUnsignedConstant(Entry->getInt());
+ DwarfExpr.addExpression(Expr);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+ dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+ } else
+ addConstantValue(*VariableDie, Entry->getInt(), DV.getType());
+ } else if (Entry->isConstantFP()) {
+ addConstantFPValue(*VariableDie, Entry->getConstantFP());
+ } else if (Entry->isConstantInt()) {
+ addConstantValue(*VariableDie, Entry->getConstantInt(), DV.getType());
+ } else if (Entry->isTargetIndexLocation()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(DV.getVariable()->getType()));
+ DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ }
+ return VariableDie;
+ }
+ // If any of the location entries are registers with the value 0, then the
+ // location is undefined.
+ if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) {
+ return Entry.isLocation() && !Entry.getLoc().getReg();
+ }))
+ return VariableDie;
+ const DIExpression *Expr = DV.getSingleExpression();
+ assert(Expr && "Variadic Debug Value must have an Expression.");
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DwarfExpr.addFragmentOffset(Expr);
+ DIExpressionCursor Cursor(Expr);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+
+ auto AddEntry = [&](const DbgValueLocEntry &Entry,
+ DIExpressionCursor &Cursor) {
+ if (Entry.isLocation()) {
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
+ Entry.getLoc().getReg()))
+ return false;
+ } else if (Entry.isInt()) {
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.addUnsignedConstant(Entry.getInt());
+ } else if (Entry.isConstantFP()) {
+ // DwarfExpression does not support arguments wider than 64 bits
+ // (see PR52584).
+ // TODO: Consider chunking expressions containing overly wide
+ // arguments into separate pointer-sized fragment expressions.
+ APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
+ if (RawBytes.getBitWidth() > 64)
+ return false;
+ DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
+ } else if (Entry.isConstantInt()) {
+ APInt RawBytes = Entry.getConstantInt()->getValue();
+ if (RawBytes.getBitWidth() > 64)
+ return false;
+ DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
+ } else if (Entry.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Entry.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently only the
+ // WebAssembly-specific encoding is supported.
+ assert(Asm->TM.getTargetTriple().isWasm());
+ DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ } else {
+ llvm_unreachable("Unsupported Entry type.");
+ }
+ return true;
+ };
+
+ if (!DwarfExpr.addExpression(
+ std::move(Cursor),
+ [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
+ return AddEntry(DVal->getLocEntries()[Idx], Cursor);
+ }))
+ return VariableDie;
+
+ // Now attach the location information to the DIE.
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
+
+ return VariableDie;
+ }
+
+ // .. else use frame index.
+ if (!DV.hasFrameIndexExprs())
+ return VariableDie;
+
+ std::optional<unsigned> NVPTXAddressSpace;
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ for (const auto &Fragment : DV.getFrameIndexExprs()) {
+ Register FrameReg;
+ const DIExpression *Expr = Fragment.Expr;
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ StackOffset Offset =
+ TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
+ DwarfExpr.addFragmentOffset(Expr);
+
+ auto *TRI = Asm->MF->getSubtarget().getRegisterInfo();
+ SmallVector<uint64_t, 8> Ops;
+ TRI->getOffsetOpcodes(Offset, Ops);
+
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ // sequence for the NVPTX + gdb target.
+ unsigned LocalNVPTXAddressSpace;
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ const DIExpression *NewExpr =
+ DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+ if (NewExpr != Expr) {
+ Expr = NewExpr;
+ NVPTXAddressSpace = LocalNVPTXAddressSpace;
+ }
+ }
+ if (Expr)
+ Ops.append(Expr->elements_begin(), Expr->elements_end());
+ DIExpressionCursor Cursor(Ops);
+ DwarfExpr.setMemoryLocationKind();
+ if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
+ addOpAddress(*Loc, FrameSymbol);
+ else
+ DwarfExpr.addMachineRegExpression(
+ *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
+ DwarfExpr.addExpression(std::move(Cursor));
+ }
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ const unsigned NVPTX_ADDR_local_space = 6;
+ addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAddressSpace.value_or(NVPTX_ADDR_local_space));
+ }
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
+
+ return VariableDie;
+}
+
+DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
+ const LexicalScope &Scope,
+ DIE *&ObjectPointer) {
+ auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
+ if (DV.isObjectPointer())
+ ObjectPointer = Var;
+ return Var;
+}
+
+/// Return all DIVariables that appear in count: expressions.
+static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
+ SmallVector<const DIVariable *, 2> Result;
+ auto *Array = dyn_cast<DICompositeType>(Var->getType());
+ if (!Array || Array->getTag() != dwarf::DW_TAG_array_type)
+ return Result;
+ if (auto *DLVar = Array->getDataLocation())
+ Result.push_back(DLVar);
+ if (auto *AsVar = Array->getAssociated())
+ Result.push_back(AsVar);
+ if (auto *AlVar = Array->getAllocated())
+ Result.push_back(AlVar);
+ for (auto *El : Array->getElements()) {
+ if (auto *Subrange = dyn_cast<DISubrange>(El)) {
+ if (auto Count = Subrange->getCount())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(Count))
+ Result.push_back(Dependency);
+ if (auto LB = Subrange->getLowerBound())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(LB))
+ Result.push_back(Dependency);
+ if (auto UB = Subrange->getUpperBound())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(UB))
+ Result.push_back(Dependency);
+ if (auto ST = Subrange->getStride())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(ST))
+ Result.push_back(Dependency);
+ } else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
+ if (auto Count = GenericSubrange->getCount())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(Count))
+ Result.push_back(Dependency);
+ if (auto LB = GenericSubrange->getLowerBound())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(LB))
+ Result.push_back(Dependency);
+ if (auto UB = GenericSubrange->getUpperBound())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(UB))
+ Result.push_back(Dependency);
+ if (auto ST = GenericSubrange->getStride())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(ST))
+ Result.push_back(Dependency);
+ }
+ }
+ return Result;
+}
+
+/// Sort local variables so that variables appearing inside of helper
+/// expressions come first.
+static SmallVector<DbgVariable *, 8>
+sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
+ SmallVector<DbgVariable *, 8> Result;
+ SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList;
+ // Map back from a DIVariable to its containing DbgVariable.
+ SmallDenseMap<const DILocalVariable *, DbgVariable *> DbgVar;
+ // Set of DbgVariables in Result.
+ SmallDenseSet<DbgVariable *, 8> Visited;
+ // For cycle detection.
+ SmallDenseSet<DbgVariable *, 8> Visiting;
+
+ // Initialize the worklist and the DIVariable lookup table.
+ for (auto *Var : reverse(Input)) {
+ DbgVar.insert({Var->getVariable(), Var});
+ WorkList.push_back({Var, 0});
+ }
+
+ // Perform a stable topological sort by doing a DFS.
+ while (!WorkList.empty()) {
+ auto Item = WorkList.back();
+ DbgVariable *Var = Item.getPointer();
+ bool visitedAllDependencies = Item.getInt();
+ WorkList.pop_back();
+
+ assert(Var);
+
+ // Already handled.
+ if (Visited.count(Var))
+ continue;
+
+ // Add to Result if all dependencies are visited.
+ if (visitedAllDependencies) {
+ Visited.insert(Var);
+ Result.push_back(Var);
+ continue;
+ }
+
+ // Detect cycles.
+ auto Res = Visiting.insert(Var);
+ if (!Res.second) {
+ assert(false && "dependency cycle in local variables");
+ return Result;
+ }
+
+ // Push dependencies and this node onto the worklist, so that this node is
+ // visited again after all of its dependencies are handled.
+ WorkList.push_back({Var, 1});
+ for (const auto *Dependency : dependencies(Var)) {
+ // Don't add dependency if it is in a different lexical scope or a global.
+ if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency))
+ if (DbgVariable *Var = DbgVar.lookup(Dep))
+ WorkList.push_back({Var, 0});
+ }
+ }
+ return Result;
+}
+
+DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
+ LexicalScope *Scope) {
+ DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
+ auto *ContextCU = static_cast<DwarfCompileUnit *>(ScopeDIE.getUnit());
+
+ if (Scope) {
+ assert(!Scope->getInlinedAt());
+ assert(!Scope->isAbstractScope());
+ // Collect lexical scope children first.
+ // ObjectPointer might be a local (non-argument) local variable if it's a
+ // block's synthetic this pointer.
+ if (DIE *ObjectPointer =
+ ContextCU->createAndAddScopeChildren(Scope, ScopeDIE))
+ ContextCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+ *ObjectPointer);
+ }
+
+ // If this is a variadic function, add an unspecified parameter.
+ DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
+
+ // If we have a single element of null, it is a function that returns void.
+ // If we have more than one elements and the last one is null, it is a
+ // variadic function.
+ if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
+ !includeMinimalInlineScopes())
+ ScopeDIE.addChild(
+ DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
+
+ return ScopeDIE;
+}
+
+DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
+ DIE &ScopeDIE) {
+ DIE *ObjectPointer = nullptr;
+
+ // Emit function arguments (order is significant).
+ auto Vars = DU->getScopeVariables().lookup(Scope);
+ for (auto &DV : Vars.Args)
+ ScopeDIE.addChild(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
+
+ // Emit local variables.
+ auto Locals = sortLocalVars(Vars.Locals);
+ for (DbgVariable *DV : Locals)
+ ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ // Emit labels.
+ for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+ ScopeDIE.addChild(constructLabelDIE(*DL, *Scope));
+
+ // Track other local entities (skipped in gmlt-like data).
+ // This creates mapping between CU and a set of local declarations that
+ // should be emitted for subprograms in this CU.
+ if (!includeMinimalInlineScopes() && !Scope->getInlinedAt()) {
+ auto &LocalDecls = DD->getLocalDeclsForScope(Scope->getScopeNode());
+ DeferredLocalDecls.insert(LocalDecls.begin(), LocalDecls.end());
+ }
+
+ // Emit inner lexical scopes.
+ auto skipLexicalScope = [this](LexicalScope *S) -> bool {
+ if (isa<DISubprogram>(S->getScopeNode()))
+ return false;
+ auto Vars = DU->getScopeVariables().lookup(S);
+ if (!Vars.Args.empty() || !Vars.Locals.empty())
+ return false;
+ return includeMinimalInlineScopes() ||
+ DD->getLocalDeclsForScope(S->getScopeNode()).empty();
+ };
+ for (LexicalScope *LS : Scope->getChildren()) {
+ // If the lexical block doesn't have non-scope children, skip
+ // its emission and put its children directly to the parent scope.
+ if (skipLexicalScope(LS))
+ createAndAddScopeChildren(LS, ScopeDIE);
+ else
+ constructScopeDIE(LS, ScopeDIE);
+ }
+
+ return ObjectPointer;
+}
+
+void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
+ LexicalScope *Scope) {
+ auto *SP = cast<DISubprogram>(Scope->getScopeNode());
+ if (getAbstractScopeDIEs().count(SP))
+ return;
+
+ DIE *ContextDIE;
+ DwarfCompileUnit *ContextCU = this;
+
+ if (includeMinimalInlineScopes())
+ ContextDIE = &getUnitDie();
+ // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
+ // the important distinction that the debug node is not associated with the
+ // DIE (since the debug node will be associated with the concrete DIE, if
+ // any). It could be refactored to some common utility function.
+ else if (auto *SPDecl = SP->getDeclaration()) {
+ ContextDIE = &getUnitDie();
+ getOrCreateSubprogramDIE(SPDecl);
+ } else {
+ ContextDIE = getOrCreateContextDIE(SP->getScope());
+ // The scope may be shared with a subprogram that has already been
+ // constructed in another CU, in which case we need to construct this
+ // subprogram in the same CU.
+ ContextCU = DD->lookupCU(ContextDIE->getUnitDie());
+ }
+
+ // Passing null as the associated node because the abstract definition
+ // shouldn't be found by lookup.
+ DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram,
+ *ContextDIE, nullptr);
+
+ // Store the DIE before creating children.
+ ContextCU->getAbstractScopeDIEs()[SP] = &AbsDef;
+
+ ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef);
+ ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline,
+ DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>()
+ : dwarf::DW_FORM_implicit_const,
+ dwarf::DW_INL_inlined);
+ if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef))
+ ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+}
+
+bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
+ return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB();
+}
+
+dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
+ if (!useGNUAnalogForDwarf5Feature())
+ return Tag;
+ switch (Tag) {
+ case dwarf::DW_TAG_call_site:
+ return dwarf::DW_TAG_GNU_call_site;
+ case dwarf::DW_TAG_call_site_parameter:
+ return dwarf::DW_TAG_GNU_call_site_parameter;
+ default:
+ llvm_unreachable("DWARF5 tag with no GNU analog");
+ }
+}
+
+dwarf::Attribute
+DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
+ if (!useGNUAnalogForDwarf5Feature())
+ return Attr;
+ switch (Attr) {
+ case dwarf::DW_AT_call_all_calls:
+ return dwarf::DW_AT_GNU_all_call_sites;
+ case dwarf::DW_AT_call_target:
+ return dwarf::DW_AT_GNU_call_site_target;
+ case dwarf::DW_AT_call_origin:
+ return dwarf::DW_AT_abstract_origin;
+ case dwarf::DW_AT_call_return_pc:
+ return dwarf::DW_AT_low_pc;
+ case dwarf::DW_AT_call_value:
+ return dwarf::DW_AT_GNU_call_site_value;
+ case dwarf::DW_AT_call_tail_call:
+ return dwarf::DW_AT_GNU_tail_call;
+ default:
+ llvm_unreachable("DWARF5 attribute with no GNU analog");
+ }
+}
+
+dwarf::LocationAtom
+DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
+ if (!useGNUAnalogForDwarf5Feature())
+ return Loc;
+ switch (Loc) {
+ case dwarf::DW_OP_entry_value:
+ return dwarf::DW_OP_GNU_entry_value;
+ default:
+ llvm_unreachable("DWARF5 location atom with no GNU analog");
+ }
+}
+
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
+ const DISubprogram *CalleeSP,
+ bool IsTail,
+ const MCSymbol *PCAddr,
+ const MCSymbol *CallAddr,
+ unsigned CallReg) {
+ // Insert a call site entry DIE within ScopeDIE.
+ DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
+ ScopeDIE, nullptr);
+
+ if (CallReg) {
+ // Indirect call.
+ addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
+ MachineLocation(CallReg));
+ } else {
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
+ *CalleeDIE);
+ }
+
+ if (IsTail) {
+ // Attach DW_AT_call_tail_call to tail calls for standards compliance.
+ addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call));
+
+ // Attach the address of the branch instruction to allow the debugger to
+ // show where the tail call occurred. This attribute has no GNU analog.
+ //
+ // GDB works backwards from non-standard usage of DW_AT_low_pc (in DWARF4
+ // mode -- equivalently, in DWARF5 mode, DW_AT_call_return_pc) at tail-call
+ // site entries to figure out the PC of tail-calling branch instructions.
+ // This means it doesn't need the compiler to emit DW_AT_call_pc, so we
+ // don't emit it here.
+ //
+ // There's no need to tie non-GDB debuggers to this non-standardness, as it
+ // adds unnecessary complexity to the debugger. For non-GDB debuggers, emit
+ // the standard DW_AT_call_pc info.
+ if (!useGNUAnalogForDwarf5Feature())
+ addLabelAddress(CallSiteDIE, dwarf::DW_AT_call_pc, CallAddr);
+ }
+
+ // Attach the return PC to allow the debugger to disambiguate call paths
+ // from one function to another.
+ //
+ // The return PC is only really needed when the call /isn't/ a tail call, but
+ // GDB expects it in DWARF4 mode, even for tail calls (see the comment above
+ // the DW_AT_call_pc emission logic for an explanation).
+ if (!IsTail || useGNUAnalogForDwarf5Feature()) {
+ assert(PCAddr && "Missing return PC information for a call");
+ addLabelAddress(CallSiteDIE,
+ getDwarf5OrGNUAttr(dwarf::DW_AT_call_return_pc), PCAddr);
+ }
+
+ return CallSiteDIE;
+}
+
+void DwarfCompileUnit::constructCallSiteParmEntryDIEs(
+ DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) {
+ for (const auto &Param : Params) {
+ unsigned Register = Param.getRegister();
+ auto CallSiteDieParam =
+ DIE::get(DIEValueAllocator,
+ getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter));
+ insertDIE(CallSiteDieParam);
+ addAddress(*CallSiteDieParam, dwarf::DW_AT_location,
+ MachineLocation(Register));
+
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DwarfExpr.setCallSiteParamValueFlag();
+
+ DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr);
+
+ addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value),
+ DwarfExpr.finalize());
+
+ CallSiteDIE.addChild(CallSiteDieParam);
+ }
+}
+
+DIE *DwarfCompileUnit::constructImportedEntityDIE(
+ const DIImportedEntity *Module) {
+ DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
+ insertDIE(Module, IMDie);
+ DIE *EntityDie;
+ auto *Entity = Module->getEntity();
+ if (auto *NS = dyn_cast<DINamespace>(Entity))
+ EntityDie = getOrCreateNameSpace(NS);
+ else if (auto *M = dyn_cast<DIModule>(Entity))
+ EntityDie = getOrCreateModule(M);
+ else if (auto *SP = dyn_cast<DISubprogram>(Entity)) {
+ // If there is an abstract subprogram, refer to it. Note that this assumes
+ // that all the abstract subprograms have been already created (which is
+ // correct until imported entities get emitted in DwarfDebug::endModule()).
+ if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP))
+ EntityDie = AbsSPDie;
+ else
+ EntityDie = getOrCreateSubprogramDIE(SP);
+ } else if (auto *T = dyn_cast<DIType>(Entity))
+ EntityDie = getOrCreateTypeDIE(T);
+ else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
+ EntityDie = getOrCreateGlobalVariableDIE(GV, {});
+ else if (auto *IE = dyn_cast<DIImportedEntity>(Entity))
+ EntityDie = getOrCreateImportedEntityDIE(IE);
+ else
+ EntityDie = getDIE(Entity);
+ assert(EntityDie);
+ addSourceLine(*IMDie, Module->getLine(), Module->getFile());
+ addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
+ StringRef Name = Module->getName();
+ if (!Name.empty()) {
+ addString(*IMDie, dwarf::DW_AT_name, Name);
+
+ // FIXME: if consumers ever start caring about handling
+ // unnamed import declarations such as `using ::nullptr_t`
+ // or `using namespace std::ranges`, we could add the
+ // import declaration into the accelerator table with the
+ // name being the one of the entity being imported.
+ DD->addAccelNamespace(*CUNode, Name, *IMDie);
+ }
+
+ // This is for imported module with renamed entities (such as variables and
+ // subprograms).
+ DINodeArray Elements = Module->getElements();
+ for (const auto *Element : Elements) {
+ if (!Element)
+ continue;
+ IMDie->addChild(
+ constructImportedEntityDIE(cast<DIImportedEntity>(Element)));
+ }
+
+ return IMDie;
+}
+
+DIE *DwarfCompileUnit::getOrCreateImportedEntityDIE(
+ const DIImportedEntity *IE) {
+
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(IE))
+ return Die;
+
+ DIE *ContextDIE = getOrCreateContextDIE(IE->getScope());
+ assert(ContextDIE && "Empty scope for the imported entity!");
+
+ DIE *IMDie = constructImportedEntityDIE(IE);
+ ContextDIE->addChild(IMDie);
+ return IMDie;
+}
+
+void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
+ DIE *D = getDIE(SP);
+ if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) {
+ if (D)
+ // If this subprogram has an abstract definition, reference that
+ addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
+ } else {
+ assert(D || includeMinimalInlineScopes());
+ if (D)
+ // And attach the attributes
+ applySubprogramAttributesToDefinition(SP, *D);
+ }
+}
+
+void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
+ DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity());
+
+ auto *Die = Entity->getDIE();
+ /// Label may be used to generate DW_AT_low_pc, so put it outside
+ /// if/else block.
+ const DbgLabel *Label = nullptr;
+ if (AbsEntity && AbsEntity->getDIE()) {
+ addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE());
+ Label = dyn_cast<const DbgLabel>(Entity);
+ } else {
+ if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
+ applyVariableAttributes(*Var, *Die);
+ else if ((Label = dyn_cast<const DbgLabel>(Entity)))
+ applyLabelAttributes(*Label, *Die);
+ else
+ llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
+ }
+
+ if (Label)
+ if (const auto *Sym = Label->getSymbol())
+ addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
+}
+
+DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
+ auto &AbstractEntities = getAbstractEntities();
+ auto I = AbstractEntities.find(Node);
+ if (I != AbstractEntities.end())
+ return I->second.get();
+ return nullptr;
+}
+
+void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
+ LexicalScope *Scope) {
+ assert(Scope && Scope->isAbstractScope());
+ auto &Entity = getAbstractEntities()[Node];
+ if (isa<const DILocalVariable>(Node)) {
+ Entity = std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ nullptr /* IA */);
+ DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
+ } else if (isa<const DILabel>(Node)) {
+ Entity = std::make_unique<DbgLabel>(
+ cast<const DILabel>(Node), nullptr /* IA */);
+ DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
+ }
+}
+
+void DwarfCompileUnit::emitHeader(bool UseOffsets) {
+ // Don't bother labeling the .dwo unit, as its offset isn't used.
+ if (!Skeleton && !DD->useSectionsAsReferences()) {
+ LabelBegin = Asm->createTempSymbol("cu_begin");
+ Asm->OutStreamer->emitLabel(LabelBegin);
+ }
+
+ dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
+ : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
+ : dwarf::DW_UT_compile;
+ DwarfUnit::emitCommonHeader(UseOffsets, UT);
+ if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile)
+ Asm->emitInt64(getDWOId());
+}
+
+bool DwarfCompileUnit::hasDwarfPubSections() const {
+ switch (CUNode->getNameTableKind()) {
+ case DICompileUnit::DebugNameTableKind::None:
+ return false;
+ // Opting in to GNU Pubnames/types overrides the default to ensure these are
+ // generated for things like Gold's gdb_index generation.
+ case DICompileUnit::DebugNameTableKind::GNU:
+ return true;
+ case DICompileUnit::DebugNameTableKind::Apple:
+ return false;
+ case DICompileUnit::DebugNameTableKind::Default:
+ return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
+ !CUNode->isDebugDirectivesOnly() &&
+ DD->getAccelTableKind() != AccelTableKind::Apple &&
+ DD->getDwarfVersion() < 5;
+ }
+ llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
+}
+
+/// addGlobalName - Add a new global name to the compile unit.
+void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) {
+ if (!hasDwarfPubSections())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ GlobalNames[FullName] = &Die;
+}
+
+void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
+ const DIScope *Context) {
+ if (!hasDwarfPubSections())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
+/// Add a new global type to the unit.
+void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
+ if (!hasDwarfPubSections())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty->getName().str();
+ GlobalTypes[FullName] = &Die;
+}
+
+void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
+ const DIScope *Context) {
+ if (!hasDwarfPubSections())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty->getName().str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
+void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location) {
+ if (DV.hasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// Add an address attribute to a die based on the location provided.
+void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
+
+ DIExpressionCursor Cursor({});
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ DwarfExpr.addExpression(std::move(Cursor));
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, DwarfExpr.finalize());
+
+ if (DwarfExpr.TagOffset)
+ addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
+}
+
+/// Start with the address based on the location provided, and generate the
+/// DWARF information necessary to find the actual variable given the extra
+/// address information encoded in the DbgVariable, starting from the starting
+/// location. Add the DWARF information to the die.
+void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ const DIExpression *DIExpr = DV.getSingleExpression();
+ DwarfExpr.addFragmentOffset(DIExpr);
+ DwarfExpr.setLocation(Location, DIExpr);
+
+ DIExpressionCursor Cursor(DIExpr);
+
+ if (DIExpr->isEntryValue())
+ DwarfExpr.beginEntryValueExpression(Cursor);
+
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ DwarfExpr.addExpression(std::move(Cursor));
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, DwarfExpr.finalize());
+
+ if (DwarfExpr.TagOffset)
+ addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
+}
+
+/// Add a Dwarf loclistptr attribute data and value.
+void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
+ unsigned Index) {
+ dwarf::Form Form = (DD->getDwarfVersion() >= 5)
+ ? dwarf::DW_FORM_loclistx
+ : DD->getDwarfSectionOffsetForm();
+ addAttribute(Die, Attribute, Form, DIELocList(Index));
+}
+
+void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
+ StringRef Name = Var.getName();
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ const auto *DIVar = Var.getVariable();
+ if (DIVar) {
+ if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
+ addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+ addAnnotation(VariableDie, DIVar->getAnnotations());
+ }
+
+ addSourceLine(VariableDie, DIVar);
+ addType(VariableDie, Var.getType());
+ if (Var.isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
+void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
+ DIE &LabelDie) {
+ StringRef Name = Label.getName();
+ if (!Name.empty())
+ addString(LabelDie, dwarf::DW_AT_name, Name);
+ const auto *DILabel = Label.getLabel();
+ addSourceLine(LabelDie, DILabel);
+}
+
+/// Add a Dwarf expression attribute data and value.
+void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
+ const MCExpr *Expr) {
+ addAttribute(Die, (dwarf::Attribute)0, Form, DIEExpr(Expr));
+}
+
+void DwarfCompileUnit::applySubprogramAttributesToDefinition(
+ const DISubprogram *SP, DIE &SPDie) {
+ auto *SPDecl = SP->getDeclaration();
+ auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope();
+ applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
+ addGlobalName(SP->getName(), SPDie, Context);
+}
+
+bool DwarfCompileUnit::isDwoUnit() const {
+ return DD->useSplitDwarf() && Skeleton;
+}
+
+void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+ constructTypeDIE(D, CTy);
+}
+
+bool DwarfCompileUnit::includeMinimalInlineScopes() const {
+ return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
+ (DD->useSplitDwarf() && !Skeleton);
+}
+
+void DwarfCompileUnit::addAddrTableBase() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ MCSymbol *Label = DD->getAddressPool().getLabel();
+ addSectionLabel(getUnitDie(),
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
+ : dwarf::DW_AT_GNU_addr_base,
+ Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
+}
+
+void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
+ addAttribute(Die, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
+ new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
+}
+
+void DwarfCompileUnit::createBaseTypeDIEs() {
+ // Insert the base_type DIEs directly after the CU so that their offsets will
+ // fit in the fixed size ULEB128 used inside the location expressions.
+ // Maintain order by iterating backwards and inserting to the front of CU
+ // child list.
+ for (auto &Btr : reverse(ExprRefedBaseTypes)) {
+ DIE &Die = getUnitDie().addChildFront(
+ DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type));
+ SmallString<32> Str;
+ addString(Die, dwarf::DW_AT_name,
+ Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
+ "_" + Twine(Btr.BitSize)).toStringRef(Str));
+ addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
+ // Round up to smallest number of bytes that contains this number of bits.
+ addUInt(Die, dwarf::DW_AT_byte_size, std::nullopt,
+ divideCeil(Btr.BitSize, 8));
+
+ Btr.Die = &Die;
+ }
+}
+
+DIE *DwarfCompileUnit::getLexicalBlockDIE(const DILexicalBlock *LB) {
+ // Assume if there is an abstract tree all the DIEs are already emitted.
+ bool isAbstract = getAbstractScopeDIEs().count(LB->getSubprogram());
+ if (isAbstract && getAbstractScopeDIEs().count(LB))
+ return getAbstractScopeDIEs()[LB];
+ assert(!isAbstract && "Missed lexical block DIE in abstract tree!");
+
+ // Return a concrete DIE if it exists or nullptr otherwise.
+ return LexicalBlockDIEs.lookup(LB);
+}
+
+DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) {
+ if (isa_and_nonnull<DILocalScope>(Context)) {
+ if (auto *LFScope = dyn_cast<DILexicalBlockFile>(Context))
+ Context = LFScope->getNonLexicalBlockFileScope();
+ if (auto *LScope = dyn_cast<DILexicalBlock>(Context))
+ return getLexicalBlockDIE(LScope);
+
+ // Otherwise the context must be a DISubprogram.
+ auto *SPScope = cast<DISubprogram>(Context);
+ if (getAbstractScopeDIEs().count(SPScope))
+ return getAbstractScopeDIEs()[SPScope];
+ }
+ return DwarfUnit::getOrCreateContextDIE(Context);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 000000000000..6ef73ebd4f7f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,380 @@
+//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DwarfDebug.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DIELoc;
+class DIEValueList;
+class DwarfFile;
+class GlobalVariable;
+class MCExpr;
+class MCSymbol;
+class MDNode;
+
+enum class UnitKind { Skeleton, Full };
+
+class DwarfCompileUnit final : public DwarfUnit {
+ /// A numeric ID unique among all CUs in the module
+ unsigned UniqueID;
+ bool HasRangeLists = false;
+
+ /// The start of the unit line section, this is also
+ /// reused in appyStmtList.
+ MCSymbol *LineTableStartSym;
+
+ /// Skeleton unit associated with this unit.
+ DwarfCompileUnit *Skeleton = nullptr;
+
+ /// The start of the unit within its section.
+ MCSymbol *LabelBegin = nullptr;
+
+ /// The start of the unit macro info within macro section.
+ MCSymbol *MacroLabelBegin;
+
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ StringMap<const DIE *> GlobalNames;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ StringMap<const DIE *> GlobalTypes;
+
+ // List of ranges for a given compile unit.
+ SmallVector<RangeSpan, 2> CURanges;
+
+ // The base address of this unit, if any. Used for relative references in
+ // ranges/locs.
+ const MCSymbol *BaseAddress = nullptr;
+
+ using MDNodeSetVector =
+ SetVector<const MDNode *, SmallVector<const MDNode *, 4>,
+ SmallPtrSet<const MDNode *, 4>>;
+
+ // List of entities (either static locals, types or imports) that
+ // belong to subprograms within this CU.
+ MDNodeSetVector DeferredLocalDecls;
+
+ // List of concrete lexical block scopes belong to subprograms within this CU.
+ DenseMap<const DILocalScope *, DIE *> LexicalBlockDIEs;
+
+ // List of abstract local scopes (either DISubprogram or DILexicalBlock).
+ DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
+
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
+
+ /// DWO ID for correlating skeleton and split units.
+ uint64_t DWOId = 0;
+
+ const DIFile *LastFile = nullptr;
+ unsigned LastFileID;
+
+ /// Construct a DIE for the given DbgVariable without initializing the
+ /// DbgVariable's DIE reference.
+ DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
+
+ bool isDwoUnit() const override;
+
+ DenseMap<const DILocalScope *, DIE *> &getAbstractScopeDIEs() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return AbstractLocalScopeDIEs;
+ return DU->getAbstractScopeDIEs();
+ }
+
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return AbstractEntities;
+ return DU->getAbstractEntities();
+ }
+
+ void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+
+ /// Add info for Wasm-global-based relocation.
+ void addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
+ uint64_t GlobalIndex);
+
+public:
+ DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU,
+ UnitKind Kind = UnitKind::Full);
+
+ bool hasRangeLists() const { return HasRangeLists; }
+ unsigned getUniqueID() const { return UniqueID; }
+
+ DwarfCompileUnit *getSkeleton() const {
+ return Skeleton;
+ }
+
+ bool includeMinimalInlineScopes() const;
+
+ void initStmtList();
+
+ /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
+ void applyStmtList(DIE &D);
+
+ /// Get line table start symbol for this unit.
+ MCSymbol *getLineTableStartSym() const { return LineTableStartSym; }
+
+ /// A pair of GlobalVariable and DIExpression.
+ struct GlobalExpr {
+ const GlobalVariable *Var;
+ const DIExpression *Expr;
+ };
+
+ struct BaseTypeRef {
+ BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) :
+ BitSize(BitSize), Encoding(Encoding) {}
+ unsigned BitSize;
+ dwarf::TypeKind Encoding;
+ DIE *Die = nullptr;
+ };
+
+ std::vector<BaseTypeRef> ExprRefedBaseTypes;
+
+ /// Get or create global variable DIE.
+ DIE *
+ getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
+ DIE *getOrCreateCommonBlock(const DICommonBlock *CB,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
+ void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addLocalLabelAddress - Add a dwarf label attribute data and value using
+ /// DW_FORM_addr only.
+ void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ DwarfCompileUnit &getCU() override { return *this; }
+
+ unsigned getOrCreateSourceID(const DIFile *File) override;
+
+ /// addRange - Add an address range to the list of ranges for this unit.
+ void addRange(RangeSpan Range);
+
+ void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
+
+ /// Find DIE for the given subprogram and attach appropriate
+ /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+ /// variables in this scope then create and insert DIEs for these
+ /// variables.
+ DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
+ DIE &updateSubprogramScopeDIEImpl(const DISubprogram *SP, DIE *SPDie);
+
+ void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
+
+ /// A helper function to construct a RangeSpanList for a given
+ /// lexical scope.
+ void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);
+
+ void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges);
+
+ void attachRangesOrLowHighPC(DIE &D,
+ const SmallVectorImpl<InsnRange> &Ranges);
+
+ /// This scope represents an inlined body of a function. Construct a
+ /// DIE to represent this concrete inlined copy of the function.
+ DIE *constructInlinedScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
+
+ /// Construct new DW_TAG_lexical_block for this scope and
+ /// attach DW_AT_low_pc/DW_AT_high_pc labels.
+ DIE *constructLexicalScopeDIE(LexicalScope *Scope);
+
+ /// Get a DIE for the given DILexicalBlock.
+ /// Note that this function assumes that the DIE has been already created
+ /// and it's an error, if it hasn't.
+ DIE *getLexicalBlockDIE(const DILexicalBlock *LB);
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
+
+ DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
+ DIE *&ObjectPointer);
+
+ /// Construct a DIE for the given DbgLabel.
+ DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
+
+ void createBaseTypeDIEs();
+
+ /// Construct a DIE for a given scope.
+ /// This instance of 'getOrCreateContextDIE()' can handle DILocalScope.
+ DIE *getOrCreateContextDIE(const DIScope *Ty) override;
+
+ /// Construct a DIE for this subprogram scope.
+ DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
+ LexicalScope *Scope);
+
+ DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
+
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+
+ /// Whether to use the GNU analog for a DWARF5 tag, attribute, or location
+ /// atom. Only applicable when emitting otherwise DWARF4-compliant debug info.
+ bool useGNUAnalogForDwarf5Feature() const;
+
+ /// This takes a DWARF 5 tag and returns it or a GNU analog.
+ dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const;
+
+ /// This takes a DWARF 5 attribute and returns it or a GNU analog.
+ dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const;
+
+ /// This takes a DWARF 5 location atom and either returns it or a GNU analog.
+ dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
+
+ /// Construct a call site entry DIE describing a call within \p Scope to a
+ /// callee described by \p CalleeSP.
+ /// \p IsTail specifies whether the call is a tail call.
+ /// \p PCAddr points to the PC value after the call instruction.
+ /// \p CallAddr points to the PC value at the call instruction (or is null).
+ /// \p CallReg is a register location for an indirect call. For direct calls
+ /// the \p CallReg is set to 0.
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
+ bool IsTail, const MCSymbol *PCAddr,
+ const MCSymbol *CallAddr, unsigned CallReg);
+ /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
+ /// were collected by the \ref collectCallSiteParameters.
+ /// Note: The order of parameters does not matter, since debuggers recognize
+ /// call site parameters by the DW_AT_location attribute.
+ void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
+ SmallVector<DbgCallSiteParam, 4> &Params);
+
+ /// Get or create a DIE for an imported entity.
+ DIE *getOrCreateImportedEntityDIE(const DIImportedEntity *IE);
+ DIE *constructImportedEntityDIE(const DIImportedEntity *IE);
+
+ void finishSubprogramDefinition(const DISubprogram *SP);
+ void finishEntityDefinition(const DbgEntity *Entity);
+
+ /// Find abstract variable associated with Var.
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+ DbgEntity *getExistingAbstractEntity(const DINode *Node);
+ void createAbstractEntity(const DINode *Node, LexicalScope *Scope);
+
+ /// Set the skeleton unit associated with this unit.
+ void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
+
+ unsigned getHeaderSize() const override {
+ // DWARF v5 added the DWO ID to the header for split/skeleton units.
+ unsigned DWOIdSize =
+ DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t)
+ : 0;
+ return DwarfUnit::getHeaderSize() + DWOIdSize;
+ }
+ unsigned getLength() {
+ return Asm->getUnitLengthFieldByteSize() + // Length field
+ getHeaderSize() + getUnitDie().getSize();
+ }
+
+ void emitHeader(bool UseOffsets) override;
+
+ /// Add the DW_AT_addr_base attribute to the unit DIE.
+ void addAddrTableBase();
+
+ MCSymbol *getLabelBegin() const {
+ assert(LabelBegin && "LabelBegin is not initialized");
+ return LabelBegin;
+ }
+
+ MCSymbol *getMacroLabelBegin() const {
+ return MacroLabelBegin;
+ }
+
+ /// Add a new global name to the compile unit.
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+
+ /// Add a new global name present in a type unit to this compile unit.
+ void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);
+
+ /// Add a new global type to the compile unit.
+ void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
+
+ /// Add a new global type present in a type unit to this compile unit.
+ void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context);
+
+ const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// Add DW_AT_location attribute for a DbgVariable based on provided
+ /// MachineLocation.
+ void addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location);
+ /// Add an address attribute to a die based on the location provided.
+ void addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Start with the address based on the location provided, and generate the
+ /// DWARF information necessary to find the actual variable (navigating the
+ /// extra location information encoded in the type) based on the starting
+ /// location. Add the DWARF information to the die.
+ void addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Add a Dwarf loclistptr attribute data and value.
+ void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
+ void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+
+ /// Add a Dwarf expression attribute data and value.
+ void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+
+ void applySubprogramAttributesToDefinition(const DISubprogram *SP,
+ DIE &SPDie);
+
+ void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie);
+
+ /// getRanges - Get the list of ranges for this unit.
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
+ SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); }
+
+ void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
+ const MCSymbol *getBaseAddress() const { return BaseAddress; }
+
+ uint64_t getDWOId() const { return DWOId; }
+ void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
+
+ bool hasDwarfPubSections() const;
+
+ void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
+
+ MDNodeSetVector &getDeferredLocalDecls() { return DeferredLocalDecls; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 000000000000..1ae17ec9b874
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,3638 @@
+//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfDebug.h"
+#include "ByteStreamer.h"
+#include "DIEHash.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfExpression.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include <algorithm>
+#include <cstddef>
+#include <iterator>
+#include <optional>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+STATISTIC(NumCSParams, "Number of dbg call site params created");
+
+static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
+ "use-dwarf-ranges-base-address-specifier", cl::Hidden,
+ cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
+
+static cl::opt<bool> GenerateARangeSection("generate-arange-section",
+ cl::Hidden,
+ cl::desc("Generate dwarf aranges"),
+ cl::init(false));
+
+static cl::opt<bool>
+ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
+ cl::desc("Generate DWARF4 type units."),
+ cl::init(false));
+
+static cl::opt<bool> SplitDwarfCrossCuReferences(
+ "split-dwarf-cross-cu-references", cl::Hidden,
+ cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
+
+enum DefaultOnOff { Default, Enable, Disable };
+
+static cl::opt<DefaultOnOff> UnknownLocations(
+ "use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::values(clEnumVal(Default, "At top of block or after label"),
+ clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
+ cl::init(Default));
+
+static cl::opt<AccelTableKind> AccelTables(
+ "accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."),
+ cl::values(clEnumValN(AccelTableKind::Default, "Default",
+ "Default for platform"),
+ clEnumValN(AccelTableKind::None, "Disable", "Disabled."),
+ clEnumValN(AccelTableKind::Apple, "Apple", "Apple"),
+ clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")),
+ cl::init(AccelTableKind::Default));
+
+static cl::opt<DefaultOnOff>
+DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
+ cl::desc("Use inlined strings rather than string section."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled")),
+ cl::init(Default));
+
+static cl::opt<bool>
+ NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
+ cl::desc("Disable emission .debug_ranges section."),
+ cl::init(false));
+
+static cl::opt<DefaultOnOff> DwarfSectionsAsReferences(
+ "dwarf-sections-as-references", cl::Hidden,
+ cl::desc("Use sections+offset as references rather than labels."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
+ cl::init(Default));
+
+static cl::opt<bool>
+ UseGNUDebugMacro("use-gnu-debug-macro", cl::Hidden,
+ cl::desc("Emit the GNU .debug_macro format with DWARF <5"),
+ cl::init(false));
+
+static cl::opt<DefaultOnOff> DwarfOpConvert(
+ "dwarf-op-convert", cl::Hidden,
+ cl::desc("Enable use of the DWARFv5 DW_OP_convert operator"),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
+ cl::init(Default));
+
+enum LinkageNameOption {
+ DefaultLinkageNames,
+ AllLinkageNames,
+ AbstractLinkageNames
+};
+
+static cl::opt<LinkageNameOption>
+ DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
+ cl::desc("Which DWARF linkage-name attributes to emit."),
+ cl::values(clEnumValN(DefaultLinkageNames, "Default",
+ "Default for platform"),
+ clEnumValN(AllLinkageNames, "All", "All"),
+ clEnumValN(AbstractLinkageNames, "Abstract",
+ "Abstract subprograms")),
+ cl::init(DefaultLinkageNames));
+
+static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
+ "minimize-addr-in-v5", cl::Hidden,
+ cl::desc("Always use DW_AT_ranges in DWARFv5 whenever it could allow more "
+ "address pool entry sharing to reduce relocations/object size"),
+ cl::values(clEnumValN(DwarfDebug::MinimizeAddrInV5::Default, "Default",
+ "Default address minimization strategy"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
+ "Use rnglists for contiguous ranges if that allows "
+ "using a pre-existing base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Expressions,
+ "Expressions",
+ "Use exprloc addrx+offset expressions for any "
+ "address with a prior base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Form, "Form",
+ "Use addrx+offset extension form for any address "
+ "with a prior base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
+ "Stuff")),
+ cl::init(DwarfDebug::MinimizeAddrInV5::Default));
+
+static constexpr unsigned ULEB128PadSize = 4;
+
+void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
+ getActiveStreamer().emitInt8(
+ Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+ : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::emitSigned(int64_t Value) {
+ getActiveStreamer().emitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
+ getActiveStreamer().emitULEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::emitData1(uint8_t Value) {
+ getActiveStreamer().emitInt8(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+ assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
+ getActiveStreamer().emitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
+ llvm::Register MachineReg) {
+ // This information is not available while emitting .debug_loc entries.
+ return false;
+}
+
+void DebugLocDwarfExpression::enableTemporaryBuffer() {
+ assert(!IsBuffering && "Already buffering?");
+ if (!TmpBuf)
+ TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments);
+ IsBuffering = true;
+}
+
+void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
+
+unsigned DebugLocDwarfExpression::getTemporaryBufferSize() {
+ return TmpBuf ? TmpBuf->Bytes.size() : 0;
+}
+
+void DebugLocDwarfExpression::commitTemporaryBuffer() {
+ if (!TmpBuf)
+ return;
+ for (auto Byte : enumerate(TmpBuf->Bytes)) {
+ const char *Comment = (Byte.index() < TmpBuf->Comments.size())
+ ? TmpBuf->Comments[Byte.index()].c_str()
+ : "";
+ OutBS.emitInt8(Byte.value(), Comment);
+ }
+ TmpBuf->Bytes.clear();
+ TmpBuf->Comments.clear();
+}
+
+const DIType *DbgVariable::getType() const {
+ return getVariable()->getType();
+}
+
+/// Get .debug_loc entry for the instruction range starting at MI.
+static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
+ const DIExpression *Expr = MI->getDebugExpression();
+ const bool IsVariadic = MI->isDebugValueList();
+ assert(MI->getNumOperands() >= 3);
+ SmallVector<DbgValueLocEntry, 4> DbgValueLocEntries;
+ for (const MachineOperand &Op : MI->debug_operands()) {
+ if (Op.isReg()) {
+ MachineLocation MLoc(Op.getReg(),
+ MI->isNonListDebugValue() && MI->isDebugOffsetImm());
+ DbgValueLocEntries.push_back(DbgValueLocEntry(MLoc));
+ } else if (Op.isTargetIndex()) {
+ DbgValueLocEntries.push_back(
+ DbgValueLocEntry(TargetIndexLocation(Op.getIndex(), Op.getOffset())));
+ } else if (Op.isImm())
+ DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getImm()));
+ else if (Op.isFPImm())
+ DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getFPImm()));
+ else if (Op.isCImm())
+ DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getCImm()));
+ else
+ llvm_unreachable("Unexpected debug operand in DBG_VALUE* instruction!");
+ }
+ return DbgValueLoc(Expr, DbgValueLocEntries, IsVariadic);
+}
+
+void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
+ assert(FrameIndexExprs.empty() && "Already initialized?");
+ assert(!ValueLoc.get() && "Already initialized?");
+
+ assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+ assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+ "Wrong inlined-at");
+
+ ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
+ if (auto *E = DbgValue->getDebugExpression())
+ if (E->getNumElements())
+ FrameIndexExprs.push_back({0, E});
+}
+
+ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+ if (FrameIndexExprs.size() == 1)
+ return FrameIndexExprs;
+
+ assert(llvm::all_of(FrameIndexExprs,
+ [](const FrameIndexExpr &A) {
+ return A.Expr->isFragment();
+ }) &&
+ "multiple FI expressions without DW_OP_LLVM_fragment");
+ llvm::sort(FrameIndexExprs,
+ [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
+ return A.Expr->getFragmentInfo()->OffsetInBits <
+ B.Expr->getFragmentInfo()->OffsetInBits;
+ });
+
+ return FrameIndexExprs;
+}
+
+void DbgVariable::addMMIEntry(const DbgVariable &V) {
+ assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
+ assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
+ assert(V.getVariable() == getVariable() && "conflicting variable");
+ assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
+
+ assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
+ assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
+
+ // FIXME: This logic should not be necessary anymore, as we now have proper
+ // deduplication. However, without it, we currently run into the assertion
+ // below, which means that we are likely dealing with broken input, i.e. two
+ // non-fragment entries for the same variable at different frame indices.
+ if (FrameIndexExprs.size()) {
+ auto *Expr = FrameIndexExprs.back().Expr;
+ if (!Expr || !Expr->isFragment())
+ return;
+ }
+
+ for (const auto &FIE : V.FrameIndexExprs)
+ // Ignore duplicate entries.
+ if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) {
+ return FIE.FI == Other.FI && FIE.Expr == Other.Expr;
+ }))
+ FrameIndexExprs.push_back(FIE);
+
+ assert((FrameIndexExprs.size() == 1 ||
+ llvm::all_of(FrameIndexExprs,
+ [](FrameIndexExpr &FIE) {
+ return FIE.Expr && FIE.Expr->isFragment();
+ })) &&
+ "conflicting locations for variable");
+}
+
+static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
+ bool GenerateTypeUnits,
+ DebuggerKind Tuning,
+ const Triple &TT) {
+ // Honor an explicit request.
+ if (AccelTables != AccelTableKind::Default)
+ return AccelTables;
+
+ // Accelerator tables with type units are currently not supported.
+ if (GenerateTypeUnits)
+ return AccelTableKind::None;
+
+ // Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5
+ // always implies debug_names. For lower standard versions we use apple
+ // accelerator tables on apple platforms and debug_names elsewhere.
+ if (DwarfVersion >= 5)
+ return AccelTableKind::Dwarf;
+ if (Tuning == DebuggerKind::LLDB)
+ return TT.isOSBinFormatMachO() ? AccelTableKind::Apple
+ : AccelTableKind::Dwarf;
+ return AccelTableKind::None;
+}
+
+DwarfDebug::DwarfDebug(AsmPrinter *A)
+ : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
+ InfoHolder(A, "info_string", DIEValueAllocator),
+ SkeletonHolder(A, "skel_string", DIEValueAllocator),
+ IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
+ const Triple &TT = Asm->TM.getTargetTriple();
+
+ // Make sure we know our "debugger tuning". The target option takes
+ // precedence; fall back to triple-based defaults.
+ if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
+ DebuggerTuning = Asm->TM.Options.DebuggerTuning;
+ else if (IsDarwin)
+ DebuggerTuning = DebuggerKind::LLDB;
+ else if (TT.isPS())
+ DebuggerTuning = DebuggerKind::SCE;
+ else if (TT.isOSAIX())
+ DebuggerTuning = DebuggerKind::DBX;
+ else
+ DebuggerTuning = DebuggerKind::GDB;
+
+ if (DwarfInlinedStrings == Default)
+ UseInlineStrings = TT.isNVPTX() || tuneForDBX();
+ else
+ UseInlineStrings = DwarfInlinedStrings == Enable;
+
+ UseLocSection = !TT.isNVPTX();
+
+ HasAppleExtensionAttributes = tuneForLLDB();
+
+ // Handle split DWARF.
+ HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty();
+
+ // SCE defaults to linkage names only for abstract subprograms.
+ if (DwarfLinkageNames == DefaultLinkageNames)
+ UseAllLinkageNames = !tuneForSCE();
+ else
+ UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
+
+ unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
+ unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
+ : MMI->getModule()->getDwarfVersion();
+ // Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2.
+ DwarfVersion =
+ TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
+
+ bool Dwarf64 = DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
+ TT.isArch64Bit(); // DWARF64 requires 64-bit relocations.
+
+ // Support DWARF64
+ // 1: For ELF when requested.
+ // 2: For XCOFF64: the AIX assembler will fill in debug section lengths
+ // according to the DWARF64 format for 64-bit assembly, so we must use
+ // DWARF64 in the compiler too for 64-bit mode.
+ Dwarf64 &=
+ ((Asm->TM.Options.MCOptions.Dwarf64 || MMI->getModule()->isDwarf64()) &&
+ TT.isOSBinFormatELF()) ||
+ TT.isOSBinFormatXCOFF();
+
+ if (!Dwarf64 && TT.isArch64Bit() && TT.isOSBinFormatXCOFF())
+ report_fatal_error("XCOFF requires DWARF64 for 64-bit mode!");
+
+ UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
+
+ // Use sections as references. Force for NVPTX.
+ if (DwarfSectionsAsReferences == Default)
+ UseSectionsAsReferences = TT.isNVPTX();
+ else
+ UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
+
+ // Don't generate type units for unsupported object file formats.
+ GenerateTypeUnits = (A->TM.getTargetTriple().isOSBinFormatELF() ||
+ A->TM.getTargetTriple().isOSBinFormatWasm()) &&
+ GenerateDwarfTypeUnits;
+
+ TheAccelTableKind = computeAccelTableKind(
+ DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
+
+ // Work around a GDB bug. GDB doesn't support the standard opcode;
+ // SCE doesn't support GNU's; LLDB prefers the standard opcode, which
+ // is defined as of DWARF 3.
+ // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
+ // https://sourceware.org/bugzilla/show_bug.cgi?id=11616
+ UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
+
+ UseDWARF2Bitfields = DwarfVersion < 4;
+
+ // The DWARF v5 string offsets table has - possibly shared - contributions
+ // from each compile and type unit each preceded by a header. The string
+ // offsets table used by the pre-DWARF v5 split-DWARF implementation uses
+ // a monolithic string offsets table without any header.
+ UseSegmentedStringOffsetsTable = DwarfVersion >= 5;
+
+ // Emit call-site-param debug info for GDB and LLDB, if the target supports
+ // the debug entry values feature. It can also be enabled explicitly.
+ EmitDebugEntryValues = Asm->TM.Options.ShouldEmitDebugEntryValues();
+
+ // It is unclear if the GCC .debug_macro extension is well-specified
+ // for split DWARF. For now, do not allow LLVM to emit it.
+ UseDebugMacroSection =
+ DwarfVersion >= 5 || (UseGNUDebugMacro && !useSplitDwarf());
+ if (DwarfOpConvert == Default)
+ EnableOpConvert = !((tuneForGDB() && useSplitDwarf()) || (tuneForLLDB() && !TT.isOSBinFormatMachO()));
+ else
+ EnableOpConvert = (DwarfOpConvert == Enable);
+
+ // Split DWARF would benefit object size significantly by trading reductions
+ // in address pool usage for slightly increased range list encodings.
+ if (DwarfVersion >= 5)
+ MinimizeAddr = MinimizeAddrInV5Option;
+
+ Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
+ Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
+ : dwarf::DWARF32);
+}
+
+// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
+DwarfDebug::~DwarfDebug() = default;
+
+static bool isObjCClass(StringRef Name) {
+ return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+ if (!isObjCClass(Name))
+ return false;
+
+ return Name.contains(") ");
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+ StringRef &Category) {
+ if (!hasObjCCategory(In)) {
+ Class = In.slice(In.find('[') + 1, In.find(' '));
+ Category = "";
+ return;
+ }
+
+ Class = In.slice(In.find('[') + 1, In.find('('));
+ Category = In.slice(In.find('[') + 1, In.find(' '));
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+ return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
+ const DISubprogram *SP, DIE &Die) {
+ if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
+ CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ return;
+
+ if (!SP->isDefinition())
+ return;
+
+ if (SP->getName() != "")
+ addAccelName(CU, SP->getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output that as
+ // well into the name table. Only do that if we are going to actually emit
+ // that name.
+ if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
+ (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP)))
+ addAccelName(CU, SP->getLinkageName(), Die);
+
+ // If this is an Objective-C selector name add it to the ObjC accelerator
+ // too.
+ if (isObjCClass(SP->getName())) {
+ StringRef Class, Category;
+ getObjCClassCategory(SP->getName(), Class, Category);
+ addAccelObjC(CU, Class, Die);
+ if (Category != "")
+ addAccelObjC(CU, Category, Die);
+ // Also add the base method name to the name table.
+ addAccelName(CU, getObjCMethodName(SP->getName()), Die);
+ }
+}
+
+/// Check whether we should create a DIE for the given Scope, return true
+/// if we don't create a DIE (the corresponding DIE is null).
+bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
+ if (Scope->isAbstractScope())
+ return false;
+
+ // We don't create a DIE if there is no Range.
+ const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return true;
+
+ if (Ranges.size() > 1)
+ return false;
+
+ // We don't create a DIE if we have a single Range and the end label
+ // is null.
+ return !getLabelAfterInsn(Ranges.front().second);
+}
+
+template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
+ F(CU);
+ if (auto *SkelCU = CU.getSkeleton())
+ if (CU.getCUNode()->getSplitDebugInlining())
+ F(*SkelCU);
+}
+
+bool DwarfDebug::shareAcrossDWOCUs() const {
+ return SplitDwarfCrossCuReferences;
+}
+
+void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
+ LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(Scope->isAbstractScope());
+ assert(!Scope->getInlinedAt());
+
+ auto *SP = cast<DISubprogram>(Scope->getScopeNode());
+
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining())
+ // Avoid building the original CU if it won't be used
+ SrcCU.constructAbstractSubprogramScopeDIE(Scope);
+ else {
+ auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ if (auto *SkelCU = CU.getSkeleton()) {
+ (shareAcrossDWOCUs() ? CU : SrcCU)
+ .constructAbstractSubprogramScopeDIE(Scope);
+ if (CU.getCUNode()->getSplitDebugInlining())
+ SkelCU->constructAbstractSubprogramScopeDIE(Scope);
+ } else
+ CU.constructAbstractSubprogramScopeDIE(Scope);
+ }
+}
+
+/// Represents a parameter whose call site value can be described by applying a
+/// debug expression to a register in the forwarded register worklist.
+struct FwdRegParamInfo {
+ /// The described parameter register.
+ unsigned ParamReg;
+
+ /// Debug expression that has been built up when walking through the
+ /// instruction chain that produces the parameter's value.
+ const DIExpression *Expr;
+};
+
+/// Register worklist for finding call site values.
+using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>;
+/// Container for the set of registers known to be clobbered on the path to a
+/// call site.
+using ClobberedRegSet = SmallSet<Register, 16>;
+
+/// Append the expression \p Addition to \p Original and return the result.
+static const DIExpression *combineDIExpressions(const DIExpression *Original,
+ const DIExpression *Addition) {
+ std::vector<uint64_t> Elts = Addition->getElements().vec();
+ // Avoid multiple DW_OP_stack_values.
+ if (Original->isImplicit() && Addition->isImplicit())
+ erase_value(Elts, dwarf::DW_OP_stack_value);
+ const DIExpression *CombinedExpr =
+ (Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
+ return CombinedExpr;
+}
+
+/// Emit call site parameter entries that are described by the given value and
+/// debug expression.
+template <typename ValT>
+static void finishCallSiteParams(ValT Val, const DIExpression *Expr,
+ ArrayRef<FwdRegParamInfo> DescribedParams,
+ ParamSet &Params) {
+ for (auto Param : DescribedParams) {
+ bool ShouldCombineExpressions = Expr && Param.Expr->getNumElements() > 0;
+
+ // TODO: Entry value operations can currently not be combined with any
+ // other expressions, so we can't emit call site entries in those cases.
+ if (ShouldCombineExpressions && Expr->isEntryValue())
+ continue;
+
+ // If a parameter's call site value is produced by a chain of
+ // instructions we may have already created an expression for the
+ // parameter when walking through the instructions. Append that to the
+ // base expression.
+ const DIExpression *CombinedExpr =
+ ShouldCombineExpressions ? combineDIExpressions(Expr, Param.Expr)
+ : Expr;
+ assert((!CombinedExpr || CombinedExpr->isValid()) &&
+ "Combined debug expression is invalid");
+
+ DbgValueLoc DbgLocVal(CombinedExpr, DbgValueLocEntry(Val));
+ DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal);
+ Params.push_back(CSParm);
+ ++NumCSParams;
+ }
+}
+
+/// Add \p Reg to the worklist, if it's not already present, and mark that the
+/// given parameter registers' values can (potentially) be described using
+/// that register and an debug expression.
+static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg,
+ const DIExpression *Expr,
+ ArrayRef<FwdRegParamInfo> ParamsToAdd) {
+ auto I = Worklist.insert({Reg, {}});
+ auto &ParamsForFwdReg = I.first->second;
+ for (auto Param : ParamsToAdd) {
+ assert(none_of(ParamsForFwdReg,
+ [Param](const FwdRegParamInfo &D) {
+ return D.ParamReg == Param.ParamReg;
+ }) &&
+ "Same parameter described twice by forwarding reg");
+
+ // If a parameter's call site value is produced by a chain of
+ // instructions we may have already created an expression for the
+ // parameter when walking through the instructions. Append that to the
+ // new expression.
+ const DIExpression *CombinedExpr = combineDIExpressions(Expr, Param.Expr);
+ ParamsForFwdReg.push_back({Param.ParamReg, CombinedExpr});
+ }
+}
+
+/// Interpret values loaded into registers by \p CurMI.
+static void interpretValues(const MachineInstr *CurMI,
+ FwdRegWorklist &ForwardedRegWorklist,
+ ParamSet &Params,
+ ClobberedRegSet &ClobberedRegUnits) {
+
+ const MachineFunction *MF = CurMI->getMF();
+ const DIExpression *EmptyExpr =
+ DIExpression::get(MF->getFunction().getContext(), {});
+ const auto &TRI = *MF->getSubtarget().getRegisterInfo();
+ const auto &TII = *MF->getSubtarget().getInstrInfo();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+
+ // If an instruction defines more than one item in the worklist, we may run
+ // into situations where a worklist register's value is (potentially)
+ // described by the previous value of another register that is also defined
+ // by that instruction.
+ //
+ // This can for example occur in cases like this:
+ //
+ // $r1 = mov 123
+ // $r0, $r1 = mvrr $r1, 456
+ // call @foo, $r0, $r1
+ //
+ // When describing $r1's value for the mvrr instruction, we need to make sure
+ // that we don't finalize an entry value for $r0, as that is dependent on the
+ // previous value of $r1 (123 rather than 456).
+ //
+ // In order to not have to distinguish between those cases when finalizing
+ // entry values, we simply postpone adding new parameter registers to the
+ // worklist, by first keeping them in this temporary container until the
+ // instruction has been handled.
+ FwdRegWorklist TmpWorklistItems;
+
+ // If the MI is an instruction defining one or more parameters' forwarding
+ // registers, add those defines.
+ ClobberedRegSet NewClobberedRegUnits;
+ auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
+ SmallSetVector<unsigned, 4> &Defs) {
+ if (MI.isDebugInstr())
+ return;
+
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (MO.getReg().isPhysical()) {
+ for (auto &FwdReg : ForwardedRegWorklist)
+ if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
+ Defs.insert(FwdReg.first);
+ for (MCRegUnit Unit : TRI.regunits(MO.getReg()))
+ NewClobberedRegUnits.insert(Unit);
+ }
+ }
+ };
+
+ // Set of worklist registers that are defined by this instruction.
+ SmallSetVector<unsigned, 4> FwdRegDefs;
+
+ getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs);
+ if (FwdRegDefs.empty()) {
+ // Any definitions by this instruction will clobber earlier reg movements.
+ ClobberedRegUnits.insert(NewClobberedRegUnits.begin(),
+ NewClobberedRegUnits.end());
+ return;
+ }
+
+ // It's possible that we find a copy from a non-volatile register to the param
+ // register, which is clobbered in the meantime. Test for clobbered reg unit
+ // overlaps before completing.
+ auto IsRegClobberedInMeantime = [&](Register Reg) -> bool {
+ for (auto &RegUnit : ClobberedRegUnits)
+ if (TRI.hasRegUnit(Reg, RegUnit))
+ return true;
+ return false;
+ };
+
+ for (auto ParamFwdReg : FwdRegDefs) {
+ if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) {
+ if (ParamValue->first.isImm()) {
+ int64_t Val = ParamValue->first.getImm();
+ finishCallSiteParams(Val, ParamValue->second,
+ ForwardedRegWorklist[ParamFwdReg], Params);
+ } else if (ParamValue->first.isReg()) {
+ Register RegLoc = ParamValue->first.getReg();
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ Register FP = TRI.getFrameRegister(*MF);
+ bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+ if (!IsRegClobberedInMeantime(RegLoc) &&
+ (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP)) {
+ MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP);
+ finishCallSiteParams(MLoc, ParamValue->second,
+ ForwardedRegWorklist[ParamFwdReg], Params);
+ } else {
+ // ParamFwdReg was described by the non-callee saved register
+ // RegLoc. Mark that the call site values for the parameters are
+ // dependent on that register instead of ParamFwdReg. Since RegLoc
+ // may be a register that will be handled in this iteration, we
+ // postpone adding the items to the worklist, and instead keep them
+ // in a temporary container.
+ addToFwdRegWorklist(TmpWorklistItems, RegLoc, ParamValue->second,
+ ForwardedRegWorklist[ParamFwdReg]);
+ }
+ }
+ }
+ }
+
+ // Remove all registers that this instruction defines from the worklist.
+ for (auto ParamFwdReg : FwdRegDefs)
+ ForwardedRegWorklist.erase(ParamFwdReg);
+
+ // Any definitions by this instruction will clobber earlier reg movements.
+ ClobberedRegUnits.insert(NewClobberedRegUnits.begin(),
+ NewClobberedRegUnits.end());
+
+ // Now that we are done handling this instruction, add items from the
+ // temporary worklist to the real one.
+ for (auto &New : TmpWorklistItems)
+ addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second);
+ TmpWorklistItems.clear();
+}
+
+static bool interpretNextInstr(const MachineInstr *CurMI,
+ FwdRegWorklist &ForwardedRegWorklist,
+ ParamSet &Params,
+ ClobberedRegSet &ClobberedRegUnits) {
+ // Skip bundle headers.
+ if (CurMI->isBundle())
+ return true;
+
+ // If the next instruction is a call we can not interpret parameter's
+ // forwarding registers or we finished the interpretation of all
+ // parameters.
+ if (CurMI->isCall())
+ return false;
+
+ if (ForwardedRegWorklist.empty())
+ return false;
+
+ // Avoid NOP description.
+ if (CurMI->getNumOperands() == 0)
+ return true;
+
+ interpretValues(CurMI, ForwardedRegWorklist, Params, ClobberedRegUnits);
+
+ return true;
+}
+
+/// Try to interpret values loaded into registers that forward parameters
+/// for \p CallMI. Store parameters with interpreted value into \p Params.
+static void collectCallSiteParameters(const MachineInstr *CallMI,
+ ParamSet &Params) {
+ const MachineFunction *MF = CallMI->getMF();
+ const auto &CalleesMap = MF->getCallSitesInfo();
+ auto CallFwdRegsInfo = CalleesMap.find(CallMI);
+
+ // There is no information for the call instruction.
+ if (CallFwdRegsInfo == CalleesMap.end())
+ return;
+
+ const MachineBasicBlock *MBB = CallMI->getParent();
+
+ // Skip the call instruction.
+ auto I = std::next(CallMI->getReverseIterator());
+
+ FwdRegWorklist ForwardedRegWorklist;
+
+ const DIExpression *EmptyExpr =
+ DIExpression::get(MF->getFunction().getContext(), {});
+
+ // Add all the forwarding registers into the ForwardedRegWorklist.
+ for (const auto &ArgReg : CallFwdRegsInfo->second) {
+ bool InsertedReg =
+ ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}})
+ .second;
+ assert(InsertedReg && "Single register used to forward two arguments?");
+ (void)InsertedReg;
+ }
+
+ // Do not emit CSInfo for undef forwarding registers.
+ for (const auto &MO : CallMI->uses())
+ if (MO.isReg() && MO.isUndef())
+ ForwardedRegWorklist.erase(MO.getReg());
+
+ // We erase, from the ForwardedRegWorklist, those forwarding registers for
+ // which we successfully describe a loaded value (by using
+ // the describeLoadedValue()). For those remaining arguments in the working
+ // list, for which we do not describe a loaded value by
+ // the describeLoadedValue(), we try to generate an entry value expression
+ // for their call site value description, if the call is within the entry MBB.
+ // TODO: Handle situations when call site parameter value can be described
+ // as the entry value within basic blocks other than the first one.
+ bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();
+
+ // Search for a loading value in forwarding registers inside call delay slot.
+ ClobberedRegSet ClobberedRegUnits;
+ if (CallMI->hasDelaySlot()) {
+ auto Suc = std::next(CallMI->getIterator());
+ // Only one-instruction delay slot is supported.
+ auto BundleEnd = llvm::getBundleEnd(CallMI->getIterator());
+ (void)BundleEnd;
+ assert(std::next(Suc) == BundleEnd &&
+ "More than one instruction in call delay slot");
+ // Try to interpret value loaded by instruction.
+ if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params, ClobberedRegUnits))
+ return;
+ }
+
+ // Search for a loading value in forwarding registers.
+ for (; I != MBB->rend(); ++I) {
+ // Try to interpret values loaded by instruction.
+ if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params, ClobberedRegUnits))
+ return;
+ }
+
+ // Emit the call site parameter's value as an entry value.
+ if (ShouldTryEmitEntryVals) {
+ // Create an expression where the register's entry value is used.
+ DIExpression *EntryExpr = DIExpression::get(
+ MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
+ for (auto &RegEntry : ForwardedRegWorklist) {
+ MachineLocation MLoc(RegEntry.first);
+ finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params);
+ }
+ }
+}
+
+void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
+ DwarfCompileUnit &CU, DIE &ScopeDIE,
+ const MachineFunction &MF) {
+ // Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if
+ // the subprogram is required to have one.
+ if (!SP.areAllCallsDescribed() || !SP.isDefinition())
+ return;
+
+ // Use DW_AT_call_all_calls to express that call site entries are present
+ // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
+ // because one of its requirements is not met: call site entries for
+ // optimized-out calls are elided.
+ CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls));
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "TargetInstrInfo not found: cannot label tail calls");
+
+ // Delay slot support check.
+ auto delaySlotSupported = [&](const MachineInstr &MI) {
+ if (!MI.isBundledWithSucc())
+ return false;
+ auto Suc = std::next(MI.getIterator());
+ auto CallInstrBundle = getBundleStart(MI.getIterator());
+ (void)CallInstrBundle;
+ auto DelaySlotBundle = getBundleStart(Suc);
+ (void)DelaySlotBundle;
+ // Ensure that label after call is following delay slot instruction.
+ // Ex. CALL_INSTRUCTION {
+ // DELAY_SLOT_INSTRUCTION }
+ // LABEL_AFTER_CALL
+ assert(getLabelAfterInsn(&*CallInstrBundle) ==
+ getLabelAfterInsn(&*DelaySlotBundle) &&
+ "Call and its successor instruction don't have same label after.");
+ return true;
+ };
+
+ // Emit call site entries for each call or tail call in the function.
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB.instrs()) {
+ // Bundles with call in them will pass the isCall() test below but do not
+ // have callee operand information so skip them here. Iterator will
+ // eventually reach the call MI.
+ if (MI.isBundle())
+ continue;
+
+ // Skip instructions which aren't calls. Both calls and tail-calling jump
+ // instructions (e.g TAILJMPd64) are classified correctly here.
+ if (!MI.isCandidateForCallSiteEntry())
+ continue;
+
+ // Skip instructions marked as frame setup, as they are not interesting to
+ // the user.
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ continue;
+
+ // Check if delay slot support is enabled.
+ if (MI.hasDelaySlot() && !delaySlotSupported(*&MI))
+ return;
+
+ // If this is a direct call, find the callee's subprogram.
+ // In the case of an indirect call find the register that holds
+ // the callee.
+ const MachineOperand &CalleeOp = TII->getCalleeOperand(MI);
+ if (!CalleeOp.isGlobal() &&
+ (!CalleeOp.isReg() || !CalleeOp.getReg().isPhysical()))
+ continue;
+
+ unsigned CallReg = 0;
+ const DISubprogram *CalleeSP = nullptr;
+ const Function *CalleeDecl = nullptr;
+ if (CalleeOp.isReg()) {
+ CallReg = CalleeOp.getReg();
+ if (!CallReg)
+ continue;
+ } else {
+ CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
+ if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ continue;
+ CalleeSP = CalleeDecl->getSubprogram();
+ }
+
+ // TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
+
+ bool IsTail = TII->isTailCall(MI);
+
+ // If MI is in a bundle, the label was created after the bundle since
+ // EmitFunctionBody iterates over top-level MIs. Get that top-level MI
+ // to search for that label below.
+ const MachineInstr *TopLevelCallMI =
+ MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;
+
+ // For non-tail calls, the return PC is needed to disambiguate paths in
+ // the call graph which could lead to some target function. For tail
+ // calls, no return PC information is needed, unless tuning for GDB in
+ // DWARF4 mode in which case we fake a return PC for compatibility.
+ const MCSymbol *PCAddr =
+ (!IsTail || CU.useGNUAnalogForDwarf5Feature())
+ ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
+ : nullptr;
+
+ // For tail calls, it's necessary to record the address of the branch
+ // instruction so that the debugger can show where the tail call occurred.
+ const MCSymbol *CallAddr =
+ IsTail ? getLabelBeforeInsn(TopLevelCallMI) : nullptr;
+
+ assert((IsTail || PCAddr) && "Non-tail call without return PC");
+
+ LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
+ << (CalleeDecl ? CalleeDecl->getName()
+ : StringRef(MF.getSubtarget()
+ .getRegisterInfo()
+ ->getName(CallReg)))
+ << (IsTail ? " [IsTail]" : "") << "\n");
+
+ DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
+ ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg);
+
+ // Optionally emit call-site-param debug info.
+ if (emitDebugEntryValues()) {
+ ParamSet Params;
+ // Try to interpret values of call site parameters.
+ collectCallSiteParameters(&MI, Params);
+ CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params);
+ }
+ }
+ }
+}
+
+void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
+ if (!U.hasDwarfPubSections())
+ return;
+
+ U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
+}
+
+void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
+ DwarfCompileUnit &NewCU) {
+ DIE &Die = NewCU.getUnitDie();
+ StringRef FN = DIUnit->getFilename();
+
+ StringRef Producer = DIUnit->getProducer();
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty() && !useAppleExtensionAttributes()) {
+ std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
+ NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
+ } else
+ NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
+
+ NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit->getSourceLanguage());
+ NewCU.addString(Die, dwarf::DW_AT_name, FN);
+ StringRef SysRoot = DIUnit->getSysRoot();
+ if (!SysRoot.empty())
+ NewCU.addString(Die, dwarf::DW_AT_LLVM_sysroot, SysRoot);
+ StringRef SDK = DIUnit->getSDK();
+ if (!SDK.empty())
+ NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK);
+
+ if (!useSplitDwarf()) {
+ // Add DW_str_offsets_base to the unit DIE, except for split units.
+ if (useSegmentedStringOffsetsTable())
+ NewCU.addStringOffsetsStart();
+
+ NewCU.initStmtList();
+
+ // If we're using split dwarf the compilation dir is going to be in the
+ // skeleton CU and so we don't need to duplicate it here.
+ if (!CompilationDir.empty())
+ NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ addGnuPubAttributes(NewCU, Die);
+ }
+
+ if (useAppleExtensionAttributes()) {
+ if (DIUnit->isOptimized())
+ NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
+
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty())
+ NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
+
+ if (unsigned RVer = DIUnit->getRuntimeVersion())
+ NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+ }
+
+ if (DIUnit->getDWOId()) {
+ // This CU is either a clang module DWO or a skeleton CU.
+ NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
+ DIUnit->getDWOId());
+ if (!DIUnit->getSplitDebugFilename().empty()) {
+ // This is a prefabricated skeleton CU.
+ dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_dwo_name
+ : dwarf::DW_AT_GNU_dwo_name;
+ NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename());
+ }
+ }
+}
+// Create new DwarfCompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+DwarfCompileUnit &
+DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
+ if (auto *CU = CUMap.lookup(DIUnit))
+ return *CU;
+
+ if (useSplitDwarf() &&
+ !shareAcrossDWOCUs() &&
+ (!DIUnit->getSplitDebugInlining() ||
+ DIUnit->getEmissionKind() == DICompileUnit::FullDebug) &&
+ !CUMap.empty()) {
+ return *CUMap.begin()->second;
+ }
+ CompilationDir = DIUnit->getDirectory();
+
+ auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
+ InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ InfoHolder.addUnit(std::move(OwnedUnit));
+
+ // LTO with assembly output shares a single line table amongst multiple CUs.
+ // To avoid the compilation directory being ambiguous, let the line table
+ // explicitly describe the directory of all files, never relying on the
+ // compilation directory.
+ if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+ Asm->OutStreamer->emitDwarfFile0Directive(
+ CompilationDir, DIUnit->getFilename(), getMD5AsBytes(DIUnit->getFile()),
+ DIUnit->getSource(), NewCU.getUniqueID());
+
+ if (useSplitDwarf()) {
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+ } else {
+ finishUnitAttributes(DIUnit, NewCU);
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ }
+
+ CUMap.insert({DIUnit, &NewCU});
+ CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
+ return NewCU;
+}
+
+/// Sort and unique GVEs by comparing their fragment offset.
+static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
+sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
+ llvm::sort(
+ GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+ // Sort order: first null exprs, then exprs without fragment
+ // info, then sort by fragment offset in bits.
+ // FIXME: Come up with a more comprehensive comparator so
+ // the sorting isn't non-deterministic, and so the following
+ // std::unique call works correctly.
+ if (!A.Expr || !B.Expr)
+ return !!B.Expr;
+ auto FragmentA = A.Expr->getFragmentInfo();
+ auto FragmentB = B.Expr->getFragmentInfo();
+ if (!FragmentA || !FragmentB)
+ return !!FragmentB;
+ return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+ });
+ GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
+ [](DwarfCompileUnit::GlobalExpr A,
+ DwarfCompileUnit::GlobalExpr B) {
+ return A.Expr == B.Expr;
+ }),
+ GVEs.end());
+ return GVEs;
+}
+
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
+void DwarfDebug::beginModule(Module *M) {
+ DebugHandlerBase::beginModule(M);
+
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
+ M->debug_compile_units_end());
+ assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
+ assert(MMI->hasDebugInfo() &&
+ "DebugInfoAvailabilty unexpectedly not initialized");
+ SingleCU = NumDebugCUs == 1;
+ DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
+ GVMap;
+ for (const GlobalVariable &Global : M->globals()) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVs;
+ Global.getDebugInfo(GVs);
+ for (auto *GVE : GVs)
+ GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
+ }
+
+ // Create the symbol that designates the start of the unit's contribution
+ // to the string offsets table. In a split DWARF scenario, only the skeleton
+ // unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol).
+ if (useSegmentedStringOffsetsTable())
+ (useSplitDwarf() ? SkeletonHolder : InfoHolder)
+ .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
+
+
+ // Create the symbols that designates the start of the DWARF v5 range list
+ // and locations list tables. They are located past the table headers.
+ if (getDwarfVersion() >= 5) {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.setRnglistsTableBaseSym(
+ Asm->createTempSymbol("rnglists_table_base"));
+
+ if (useSplitDwarf())
+ InfoHolder.setRnglistsTableBaseSym(
+ Asm->createTempSymbol("rnglists_dwo_table_base"));
+ }
+
+ // Create the symbol that points to the first entry following the debug
+ // address table (.debug_addr) header.
+ AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
+ DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
+
+ for (DICompileUnit *CUNode : M->debug_compile_units()) {
+ if (CUNode->getImportedEntities().empty() &&
+ CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
+ CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
+ continue;
+
+ DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);
+
+ // Global Variables.
+ for (auto *GVE : CUNode->getGlobalVariables()) {
+ // Don't bother adding DIGlobalVariableExpressions listed in the CU if we
+ // already know about the variable and it isn't adding a constant
+ // expression.
+ auto &GVMapEntry = GVMap[GVE->getVariable()];
+ auto *Expr = GVE->getExpression();
+ if (!GVMapEntry.size() || (Expr && Expr->isConstant()))
+ GVMapEntry.push_back({nullptr, Expr});
+ }
+
+ DenseSet<DIGlobalVariable *> Processed;
+ for (auto *GVE : CUNode->getGlobalVariables()) {
+ DIGlobalVariable *GV = GVE->getVariable();
+ if (Processed.insert(GV).second)
+ CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
+ }
+
+ for (auto *Ty : CUNode->getEnumTypes())
+ CU.getOrCreateTypeDIE(cast<DIType>(Ty));
+
+ for (auto *Ty : CUNode->getRetainedTypes()) {
+ // The retained types array by design contains pointers to
+ // MDNodes rather than DIRefs. Unique them here.
+ if (DIType *RT = dyn_cast<DIType>(Ty))
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
+ }
+ }
+}
+
+void DwarfDebug::finishEntityDefinitions() {
+ for (const auto &Entity : ConcreteEntities) {
+ DIE *Die = Entity->getDIE();
+ assert(Die);
+ // FIXME: Consider the time-space tradeoff of just storing the unit pointer
+ // in the ConcreteEntities list, rather than looking it up again here.
+ // DIE::getUnit isn't simple - it walks parent pointers, etc.
+ DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie());
+ assert(Unit);
+ Unit->finishEntityDefinition(Entity.get());
+ }
+}
+
+void DwarfDebug::finishSubprogramDefinitions() {
+ for (const DISubprogram *SP : ProcessedSPNodes) {
+ assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug);
+ forBothCUs(
+ getOrCreateDwarfCompileUnit(SP->getUnit()),
+ [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); });
+ }
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ finishSubprogramDefinitions();
+
+ finishEntityDefinitions();
+
+ // Include the DWO file name in the hash if there's more than one CU.
+ // This handles ThinLTO's situation where imported CUs may very easily be
+ // duplicate with the same CU partially imported into another ThinLTO unit.
+ StringRef DWOName;
+ if (CUMap.size() > 1)
+ DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;
+
+ bool HasEmittedSplitCU = false;
+
+ // Handle anything that needs to be done on a per-unit basis after
+ // all other generation.
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
+ if (TheCU.getCUNode()->isDebugDirectivesOnly())
+ continue;
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ TheCU.constructContainingTypeDIEs();
+
+ // Add CU specific attributes if we need to add any.
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then add the dwo id to it.
+ auto *SkCU = TheCU.getSkeleton();
+
+ bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
+
+ if (HasSplitUnit) {
+ (void)HasEmittedSplitCU;
+ assert((shareAcrossDWOCUs() || !HasEmittedSplitCU) &&
+ "Multiple CUs emitted into a single dwo file");
+ HasEmittedSplitCU = true;
+ dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_dwo_name
+ : dwarf::DW_AT_GNU_dwo_name;
+ finishUnitAttributes(TheCU.getCUNode(), TheCU);
+ TheCU.addString(TheCU.getUnitDie(), attrDWOName,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
+ SkCU->addString(SkCU->getUnitDie(), attrDWOName,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
+ // Emit a unique identifier for this CU.
+ uint64_t ID =
+ DIEHash(Asm, &TheCU).computeCUSignature(DWOName, TheCU.getUnitDie());
+ if (getDwarfVersion() >= 5) {
+ TheCU.setDWOId(ID);
+ SkCU->setDWOId(ID);
+ } else {
+ TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ }
+
+ if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) {
+ const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
+ Sym, Sym);
+ }
+ } else if (SkCU) {
+ finishUnitAttributes(SkCU->getCUNode(), *SkCU);
+ }
+
+ // If we have code split among multiple sections or non-contiguous
+ // ranges of code then emit a DW_AT_ranges attribute on the unit that will
+ // remain in the .o file, otherwise add a DW_AT_low_pc.
+ // FIXME: We should use ranges allow reordering of code ala
+ // .subsections_via_symbols in mach-o. This would mean turning on
+ // ranges for all subprogram DIEs for mach-o.
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+
+ if (unsigned NumRanges = TheCU.getRanges().size()) {
+ if (NumRanges > 1 && useRangesSection())
+ // A DW_AT_low_pc attribute may also be specified in combination with
+ // DW_AT_ranges to specify the default base address for use in
+ // location lists (see Section 2.6.2) and range lists (see Section
+ // 2.17.3).
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+ else
+ U.setBaseAddress(TheCU.getRanges().front().Begin);
+ U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
+ }
+
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if ((HasSplitUnit || getDwarfVersion() >= 5) && !AddrPool.isEmpty())
+ U.addAddrTableBase();
+
+ if (getDwarfVersion() >= 5) {
+ if (U.hasRangeLists())
+ U.addRnglistsBase();
+
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ }
+ }
+
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ // If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros"
+ // attribute.
+ if (CUNode->getMacros()) {
+ if (UseDebugMacroSection) {
+ if (useSplitDwarf())
+ TheCU.addSectionDelta(
+ TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(),
+ TLOF.getDwarfMacroDWOSection()->getBeginSymbol());
+ else {
+ dwarf::Attribute MacrosAttr = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_macros
+ : dwarf::DW_AT_GNU_macros;
+ U.addSectionLabel(U.getUnitDie(), MacrosAttr, U.getMacroLabelBegin(),
+ TLOF.getDwarfMacroSection()->getBeginSymbol());
+ }
+ } else {
+ if (useSplitDwarf())
+ TheCU.addSectionDelta(
+ TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
+ else
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ }
+ }
+ }
+
+ // Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
+ for (auto *CUNode : MMI->getModule()->debug_compile_units())
+ if (CUNode->getDWOId())
+ getOrCreateDwarfCompileUnit(CUNode);
+
+ // Compute DIE offsets and sizes.
+ InfoHolder.computeSizeAndOffsets();
+ if (useSplitDwarf())
+ SkeletonHolder.computeSizeAndOffsets();
+}
+
+// Emit all Dwarf sections that should come after the content.
+void DwarfDebug::endModule() {
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
+ PrevCU = nullptr;
+ assert(CurFn == nullptr);
+ assert(CurMI == nullptr);
+
+ for (const auto &P : CUMap) {
+ const auto *CUNode = cast<DICompileUnit>(P.first);
+ DwarfCompileUnit *CU = &*P.second;
+
+ // Emit imported entities.
+ for (auto *IE : CUNode->getImportedEntities()) {
+ assert(!isa_and_nonnull<DILocalScope>(IE->getScope()) &&
+ "Unexpected function-local entity in 'imports' CU field.");
+ CU->getOrCreateImportedEntityDIE(IE);
+ }
+ for (const auto *D : CU->getDeferredLocalDecls()) {
+ if (auto *IE = dyn_cast<DIImportedEntity>(D))
+ CU->getOrCreateImportedEntityDIE(IE);
+ else
+ llvm_unreachable("Unexpected local retained node!");
+ }
+
+ // Emit base types.
+ CU->createBaseTypeDIEs();
+ }
+
+ // If we aren't actually generating debug info (check beginModule -
+ // conditionalized on the presence of the llvm.dbg.cu metadata node)
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ // Finalize the debug info for the module.
+ finalizeModuleInfo();
+
+ if (useSplitDwarf())
+ // Emit debug_loc.dwo/debug_loclists.dwo section.
+ emitDebugLocDWO();
+ else
+ // Emit debug_loc/debug_loclists section.
+ emitDebugLoc();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit all the DIEs into a debug info section.
+ emitDebugInfo();
+
+ // Emit info into a debug aranges section.
+ if (GenerateARangeSection)
+ emitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ if (useSplitDwarf())
+ // Emit info into a debug macinfo.dwo section.
+ emitDebugMacinfoDWO();
+ else
+ // Emit info into a debug macinfo/macro section.
+ emitDebugMacinfo();
+
+ emitDebugStr();
+
+ if (useSplitDwarf()) {
+ emitDebugStrDWO();
+ emitDebugInfoDWO();
+ emitDebugAbbrevDWO();
+ emitDebugLineDWO();
+ emitDebugRangesDWO();
+ }
+
+ emitDebugAddr();
+
+ // Emit info into the dwarf accelerator table sections.
+ switch (getAccelTableKind()) {
+ case AccelTableKind::Apple:
+ emitAccelNames();
+ emitAccelObjC();
+ emitAccelNamespaces();
+ emitAccelTypes();
+ break;
+ case AccelTableKind::Dwarf:
+ emitAccelDebugNames();
+ break;
+ case AccelTableKind::None:
+ break;
+ case AccelTableKind::Default:
+ llvm_unreachable("Default should have already been resolved.");
+ }
+
+ // Emit the pubnames and pubtypes sections if requested.
+ emitDebugPubSections();
+
+ // clean up.
+ // FIXME: AbstractVariables.clear();
+}
+
+void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+ const DINode *Node, const MDNode *ScopeNode) {
+ if (CU.getExistingAbstractEntity(Node))
+ return;
+
+ if (LexicalScope *Scope =
+ LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
+ CU.createAbstractEntity(Node, Scope);
+}
+
+static const DILocalScope *getRetainedNodeScope(const MDNode *N) {
+ const DIScope *S;
+ if (const auto *LV = dyn_cast<DILocalVariable>(N))
+ S = LV->getScope();
+ else if (const auto *L = dyn_cast<DILabel>(N))
+ S = L->getScope();
+ else if (const auto *IE = dyn_cast<DIImportedEntity>(N))
+ S = IE->getScope();
+ else
+ llvm_unreachable("Unexpected retained node!");
+
+ // Ensure the scope is not a DILexicalBlockFile.
+ return cast<DILocalScope>(S)->getNonLexicalBlockFileScope();
+}
+
+// Collect variable information from side table maintained by MF.
+void DwarfDebug::collectVariableInfoFromMFTable(
+ DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
+ SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
+ LLVM_DEBUG(dbgs() << "DwarfDebug: collecting variables from MF side table\n");
+ for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
+ if (!VI.Var)
+ continue;
+ assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+ "Expected inlined-at fields to agree");
+
+ InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt());
+ Processed.insert(Var);
+ LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
+
+ // If variable scope is not found then skip this variable.
+ if (!Scope) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << VI.Var->getName()
+ << ", no variable scope found\n");
+ continue;
+ }
+
+ ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
+ auto RegVar = std::make_unique<DbgVariable>(
+ cast<DILocalVariable>(Var.first), Var.second);
+ if (VI.inStackSlot())
+ RegVar->initializeMMI(VI.Expr, VI.getStackSlot());
+ else {
+ MachineLocation MLoc(VI.getEntryValueRegister(), /*IsIndirect*/ true);
+ auto LocEntry = DbgValueLocEntry(MLoc);
+ RegVar->initializeDbgValue(DbgValueLoc(VI.Expr, LocEntry));
+ }
+ LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
+ << "\n");
+
+ if (DbgVariable *DbgVar = MFVars.lookup(Var)) {
+ if (DbgVar->getValueLoc())
+ LLVM_DEBUG(dbgs() << "Dropping repeated entry value debug info for "
+ "variable "
+ << VI.Var->getName() << "\n");
+ else
+ DbgVar->addMMIEntry(*RegVar);
+ } else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
+ MFVars.insert({Var, RegVar.get()});
+ ConcreteEntities.push_back(std::move(RegVar));
+ }
+ }
+}
+
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+ const MachineInstr *DbgValue,
+ const MachineInstr *RangeEnd,
+ const InstructionOrdering &Ordering) {
+ assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+ auto MBB = DbgValue->getParent();
+ auto DL = DbgValue->getDebugLoc();
+ auto *LScope = LScopes.findLexicalScope(DL);
+ // Scope doesn't exist; this is a dead DBG_VALUE.
+ if (!LScope)
+ return false;
+ auto &LSRange = LScope->getRanges();
+ if (LSRange.size() == 0)
+ return false;
+
+ const MachineInstr *LScopeBegin = LSRange.front().first;
+ // If the scope starts before the DBG_VALUE then we may have a negative
+ // result. Otherwise the location is live coming into the scope and we
+ // can skip the following checks.
+ if (!Ordering.isBefore(DbgValue, LScopeBegin)) {
+ // Exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
+ return false;
+
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
+ return false;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
+ return false;
+ }
+ }
+
+ // If the range of the DBG_VALUE is open-ended, report success.
+ if (!RangeEnd)
+ return true;
+
+ // Single, constant DBG_VALUEs in the prologue are promoted to be live
+ // throughout the function. This is a hack, presumably for DWARF v2 and not
+ // necessarily correct. It would be much better to use a dbg.declare instead
+ // if we know the constant is live throughout the scope.
+ if (MBB->pred_empty() &&
+ all_of(DbgValue->debug_operands(),
+ [](const MachineOperand &Op) { return Op.isImm(); }))
+ return true;
+
+ // Test if the location terminates before the end of the scope.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (Ordering.isBefore(RangeEnd, LScopeEnd))
+ return false;
+
+ // There's a single location which starts at the scope start, and ends at or
+ // after the scope end.
+ return true;
+}
+
+/// Build the location list for all DBG_VALUEs in the function that
+/// describe the same variable. The resulting DebugLocEntries will have
+/// strict monotonically increasing begin addresses and will never
+/// overlap. If the resulting list has only one entry that is valid
+/// throughout variable's scope return true.
+//
+// See the definition of DbgValueHistoryMap::Entry for an explanation of the
+// different kinds of history map entries. One thing to be aware of is that if
+// a debug value is ended by another entry (rather than being valid until the
+// end of the function), that entry's instruction may or may not be included in
+// the range, depending on if the entry is a clobbering entry (it has an
+// instruction that clobbers one or more preceding locations), or if it is an
+// (overlapping) debug value entry. This distinction can be seen in the example
+// below. The first debug value is ended by the clobbering entry 2, and the
+// second and third debug values are ended by the overlapping debug value entry
+// 4.
+//
+// Input:
+//
+// History map entries [type, end index, mi]
+//
+// 0 | [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)]
+// 1 | | [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)]
+// 2 | | [Clobber, $reg0 = [...], -, -]
+// 3 | | [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)]
+// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)]
+//
+// Output [start, end) [Value...]:
+//
+// [0-1) [(reg0, fragment 0, 32)]
+// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
+// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
+// [4-) [(@g, fragment 0, 96)]
+bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries) {
+ using OpenRange =
+ std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
+ SmallVector<OpenRange, 4> OpenRanges;
+ bool isSafeForSingleLocation = true;
+ const MachineInstr *StartDebugMI = nullptr;
+ const MachineInstr *EndMI = nullptr;
+
+ for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
+ const MachineInstr *Instr = EI->getInstr();
+
+ // Remove all values that are no longer live.
+ size_t Index = std::distance(EB, EI);
+ erase_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
+
+ // If we are dealing with a clobbering entry, this iteration will result in
+ // a location list entry starting after the clobbering instruction.
+ const MCSymbol *StartLabel =
+ EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr);
+ assert(StartLabel &&
+ "Forgot label before/after instruction starting a range!");
+
+ const MCSymbol *EndLabel;
+ if (std::next(EI) == Entries.end()) {
+ const MachineBasicBlock &EndMBB = Asm->MF->back();
+ EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel;
+ if (EI->isClobber())
+ EndMI = EI->getInstr();
+ }
+ else if (std::next(EI)->isClobber())
+ EndLabel = getLabelAfterInsn(std::next(EI)->getInstr());
+ else
+ EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr());
+ assert(EndLabel && "Forgot label after instruction ending a range!");
+
+ if (EI->isDbgValue())
+ LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n");
+
+ // If this history map entry has a debug value, add that to the list of
+ // open ranges and check if its location is valid for a single value
+ // location.
+ if (EI->isDbgValue()) {
+ // Do not add undef debug values, as they are redundant information in
+ // the location list entries. An undef debug results in an empty location
+ // description. If there are any non-undef fragments then padding pieces
+ // with empty location descriptions will automatically be inserted, and if
+ // all fragments are undef then the whole location list entry is
+ // redundant.
+ if (!Instr->isUndefDebugValue()) {
+ auto Value = getDebugLocValue(Instr);
+ OpenRanges.emplace_back(EI->getEndIndex(), Value);
+
+ // TODO: Add support for single value fragment locations.
+ if (Instr->getDebugExpression()->isFragment())
+ isSafeForSingleLocation = false;
+
+ if (!StartDebugMI)
+ StartDebugMI = Instr;
+ } else {
+ isSafeForSingleLocation = false;
+ }
+ }
+
+ // Location list entries with empty location descriptions are redundant
+ // information in DWARF, so do not emit those.
+ if (OpenRanges.empty())
+ continue;
+
+ // Omit entries with empty ranges as they do not have any effect in DWARF.
+ if (StartLabel == EndLabel) {
+ LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
+ continue;
+ }
+
+ SmallVector<DbgValueLoc, 4> Values;
+ for (auto &R : OpenRanges)
+ Values.push_back(R.second);
+
+ // With Basic block sections, it is posssible that the StartLabel and the
+ // Instr are not in the same section. This happens when the StartLabel is
+ // the function begin label and the dbg value appears in a basic block
+ // that is not the entry. In this case, the range needs to be split to
+ // span each individual section in the range from StartLabel to EndLabel.
+ if (Asm->MF->hasBBSections() && StartLabel == Asm->getFunctionBegin() &&
+ !Instr->getParent()->sameSection(&Asm->MF->front())) {
+ const MCSymbol *BeginSectionLabel = StartLabel;
+
+ for (const MachineBasicBlock &MBB : *Asm->MF) {
+ if (MBB.isBeginSection() && &MBB != &Asm->MF->front())
+ BeginSectionLabel = MBB.getSymbol();
+
+ if (MBB.sameSection(Instr->getParent())) {
+ DebugLoc.emplace_back(BeginSectionLabel, EndLabel, Values);
+ break;
+ }
+ if (MBB.isEndSection())
+ DebugLoc.emplace_back(BeginSectionLabel, MBB.getEndSymbol(), Values);
+ }
+ } else {
+ DebugLoc.emplace_back(StartLabel, EndLabel, Values);
+ }
+
+ // Attempt to coalesce the ranges of two otherwise identical
+ // DebugLocEntries.
+ auto CurEntry = DebugLoc.rbegin();
+ LLVM_DEBUG({
+ dbgs() << CurEntry->getValues().size() << " Values:\n";
+ for (auto &Value : CurEntry->getValues())
+ Value.dump();
+ dbgs() << "-----\n";
+ });
+
+ auto PrevEntry = std::next(CurEntry);
+ if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
+ DebugLoc.pop_back();
+ }
+
+ if (!isSafeForSingleLocation ||
+ !validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering()))
+ return false;
+
+ if (DebugLoc.size() == 1)
+ return true;
+
+ if (!Asm->MF->hasBBSections())
+ return false;
+
+ // Check here to see if loclist can be merged into a single range. If not,
+ // we must keep the split loclists per section. This does exactly what
+ // MergeRanges does without sections. We don't actually merge the ranges
+ // as the split ranges must be kept intact if this cannot be collapsed
+ // into a single range.
+ const MachineBasicBlock *RangeMBB = nullptr;
+ if (DebugLoc[0].getBeginSym() == Asm->getFunctionBegin())
+ RangeMBB = &Asm->MF->front();
+ else
+ RangeMBB = Entries.begin()->getInstr()->getParent();
+ auto *CurEntry = DebugLoc.begin();
+ auto *NextEntry = std::next(CurEntry);
+ while (NextEntry != DebugLoc.end()) {
+ // Get the last machine basic block of this section.
+ while (!RangeMBB->isEndSection())
+ RangeMBB = RangeMBB->getNextNode();
+ if (!RangeMBB->getNextNode())
+ return false;
+ // CurEntry should end the current section and NextEntry should start
+ // the next section and the Values must match for these two ranges to be
+ // merged.
+ if (CurEntry->getEndSym() != RangeMBB->getEndSymbol() ||
+ NextEntry->getBeginSym() != RangeMBB->getNextNode()->getSymbol() ||
+ CurEntry->getValues() != NextEntry->getValues())
+ return false;
+ RangeMBB = RangeMBB->getNextNode();
+ CurEntry = NextEntry;
+ NextEntry = std::next(CurEntry);
+ }
+ return true;
+}
+
+DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
+ const DINode *Node,
+ const DILocation *Location,
+ const MCSymbol *Sym) {
+ ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
+ if (isa<const DILocalVariable>(Node)) {
+ ConcreteEntities.push_back(
+ std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ Location));
+ InfoHolder.addScopeVariable(&Scope,
+ cast<DbgVariable>(ConcreteEntities.back().get()));
+ } else if (isa<const DILabel>(Node)) {
+ ConcreteEntities.push_back(
+ std::make_unique<DbgLabel>(cast<const DILabel>(Node),
+ Location, Sym));
+ InfoHolder.addScopeLabel(&Scope,
+ cast<DbgLabel>(ConcreteEntities.back().get()));
+ }
+ return ConcreteEntities.back().get();
+}
+
+// Find variables for each lexical scope.
+void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
+ const DISubprogram *SP,
+ DenseSet<InlinedEntity> &Processed) {
+ // Grab the variable info that was squirreled away in the MMI side-table.
+ collectVariableInfoFromMFTable(TheCU, Processed);
+
+ for (const auto &I : DbgValues) {
+ InlinedEntity IV = I.first;
+ if (Processed.count(IV))
+ continue;
+
+ // Instruction ranges, specifying where IV is accessible.
+ const auto &HistoryMapEntries = I.second;
+
+ // Try to find any non-empty variable location. Do not create a concrete
+ // entity if there are no locations.
+ if (!DbgValues.hasNonEmptyLocation(HistoryMapEntries))
+ continue;
+
+ LexicalScope *Scope = nullptr;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first);
+ if (const DILocation *IA = IV.second)
+ Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA);
+ else
+ Scope = LScopes.findLexicalScope(LocalVar->getScope());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ Processed.insert(IV);
+ DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
+ *Scope, LocalVar, IV.second));
+
+ const MachineInstr *MInsn = HistoryMapEntries.front().getInstr();
+ assert(MInsn->isDebugValue() && "History must begin with debug value");
+
+ // Check if there is a single DBG_VALUE, valid throughout the var's scope.
+ // If the history map contains a single debug value, there may be an
+ // additional entry which clobbers the debug value.
+ size_t HistSize = HistoryMapEntries.size();
+ bool SingleValueWithClobber =
+ HistSize == 2 && HistoryMapEntries[1].isClobber();
+ if (HistSize == 1 || SingleValueWithClobber) {
+ const auto *End =
+ SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
+ if (validThroughout(LScopes, MInsn, End, getInstOrdering())) {
+ RegVar->initializeDbgValue(MInsn);
+ continue;
+ }
+ }
+
+ // Do not emit location lists if .debug_loc secton is disabled.
+ if (!useLocSection())
+ continue;
+
+ // Handle multiple DBG_VALUE instructions describing one variable.
+ DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
+
+ // Build the location list for this variable.
+ SmallVector<DebugLocEntry, 8> Entries;
+ bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
+
+ // Check whether buildLocationList managed to merge all locations to one
+ // that is valid throughout the variable's scope. If so, produce single
+ // value location.
+ if (isValidSingleLocation) {
+ RegVar->initializeDbgValue(Entries[0].getValues()[0]);
+ continue;
+ }
+
+ // If the variable has a DIBasicType, extract it. Basic types cannot have
+ // unique identifiers, so don't bother resolving the type with the
+ // identifier map.
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(LocalVar->getType()));
+
+ // Finalize the entry by lowering it into a DWARF bytestream.
+ for (auto &Entry : Entries)
+ Entry.finalize(*Asm, List, BT, TheCU);
+ }
+
+ // For each InlinedEntity collected from DBG_LABEL instructions, convert to
+ // DWARF-related DbgLabel.
+ for (const auto &I : DbgLabels) {
+ InlinedEntity IL = I.first;
+ const MachineInstr *MI = I.second;
+ if (MI == nullptr)
+ continue;
+
+ LexicalScope *Scope = nullptr;
+ const DILabel *Label = cast<DILabel>(IL.first);
+ // The scope could have an extra lexical block file.
+ const DILocalScope *LocalScope =
+ Label->getScope()->getNonLexicalBlockFileScope();
+ // Get inlined DILocation if it is inlined label.
+ if (const DILocation *IA = IL.second)
+ Scope = LScopes.findInlinedScope(LocalScope, IA);
+ else
+ Scope = LScopes.findLexicalScope(LocalScope);
+ // If label scope is not found then skip this label.
+ if (!Scope)
+ continue;
+
+ Processed.insert(IL);
+ /// At this point, the temporary label is created.
+ /// Save the temporary label to DbgLabel entity to get the
+ /// actually address when generating Dwarf DIE.
+ MCSymbol *Sym = getLabelBeforeInsn(MI);
+ createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
+ }
+
+ // Collect info for retained nodes.
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ const auto *LS = getRetainedNodeScope(DN);
+ if (isa<DILocalVariable>(DN) || isa<DILabel>(DN)) {
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+ LexicalScope *LexS = LScopes.findLexicalScope(LS);
+ if (LexS)
+ createConcreteEntity(TheCU, *LexS, DN, nullptr);
+ } else {
+ LocalDeclsPerLS[LS].insert(DN);
+ }
+ }
+}
+
+// Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ const MachineFunction &MF = *MI->getMF();
+ const auto *SP = MF.getFunction().getSubprogram();
+ bool NoDebug =
+ !SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug;
+
+ // Delay slot support check.
+ auto delaySlotSupported = [](const MachineInstr &MI) {
+ if (!MI.isBundledWithSucc())
+ return false;
+ auto Suc = std::next(MI.getIterator());
+ (void)Suc;
+ // Ensure that delay slot instruction is successor of the call instruction.
+ // Ex. CALL_INSTRUCTION {
+ // DELAY_SLOT_INSTRUCTION }
+ assert(Suc->isBundledWithPred() &&
+ "Call bundle instructions are out of order");
+ return true;
+ };
+
+ // When describing calls, we need a label for the call instruction.
+ if (!NoDebug && SP->areAllCallsDescribed() &&
+ MI->isCandidateForCallSiteEntry(MachineInstr::AnyInBundle) &&
+ (!MI->hasDelaySlot() || delaySlotSupported(*MI))) {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool IsTail = TII->isTailCall(*MI);
+ // For tail calls, we need the address of the branch instruction for
+ // DW_AT_call_pc.
+ if (IsTail)
+ requestLabelBeforeInsn(MI);
+ // For non-tail calls, we need the return address for the call for
+ // DW_AT_call_return_pc. Under GDB tuning, this information is needed for
+ // tail calls as well.
+ requestLabelAfterInsn(MI);
+ }
+
+ DebugHandlerBase::beginInstruction(MI);
+ if (!CurMI)
+ return;
+
+ if (NoDebug)
+ return;
+
+ // Check if source location changes, but ignore DBG_VALUE and CFI locations.
+ // If the instruction is part of the function frame setup code, do not emit
+ // any line record, as there is no correspondence with any user code.
+ if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup))
+ return;
+ const DebugLoc &DL = MI->getDebugLoc();
+ unsigned Flags = 0;
+
+ if (MI->getFlag(MachineInstr::FrameDestroy) && DL) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ if (MBB && (MBB != EpilogBeginBlock)) {
+ // First time FrameDestroy has been seen in this basic block
+ EpilogBeginBlock = MBB;
+ Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+ }
+ }
+
+ // When we emit a line-0 record, we don't update PrevInstLoc; so look at
+ // the last line number actually emitted, to see if it was line 0.
+ unsigned LastAsmLine =
+ Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+
+ bool PrevInstInSameSection =
+ (!PrevInstBB ||
+ PrevInstBB->getSectionIDNum() == MI->getParent()->getSectionIDNum());
+ if (DL == PrevInstLoc && PrevInstInSameSection) {
+ // If we have an ongoing unspecified location, nothing to do here.
+ if (!DL)
+ return;
+ // We have an explicit location, same as the previous location.
+ // But we might be coming back to it after a line 0 record.
+ if ((LastAsmLine == 0 && DL.getLine() != 0) || Flags) {
+ // Reinstate the source location but not marked as a statement.
+ const MDNode *Scope = DL.getScope();
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+ }
+ return;
+ }
+
+ if (!DL) {
+ // We have an unspecified location, which might want to be line 0.
+ // If we have already emitted a line-0 record, don't repeat it.
+ if (LastAsmLine == 0)
+ return;
+ // If user said Don't Do That, don't do that.
+ if (UnknownLocations == Disable)
+ return;
+ // See if we have a reason to emit a line-0 record now.
+ // Reasons to emit a line-0 record include:
+ // - User asked for it (UnknownLocations).
+ // - Instruction has a label, so it's referenced from somewhere else,
+ // possibly debug information; we want it to have a source location.
+ // - Instruction is at the top of a block; we don't want to inherit the
+ // location from the physically previous (maybe unrelated) block.
+ if (UnknownLocations == Enable || PrevLabel ||
+ (PrevInstBB && PrevInstBB != MI->getParent())) {
+ // Preserve the file and column numbers, if we can, to save space in
+ // the encoded line table.
+ // Do not update PrevInstLoc, it remembers the last non-0 line.
+ const MDNode *Scope = nullptr;
+ unsigned Column = 0;
+ if (PrevInstLoc) {
+ Scope = PrevInstLoc.getScope();
+ Column = PrevInstLoc.getCol();
+ }
+ recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
+ }
+ return;
+ }
+
+ // We have an explicit location, different from the previous location.
+ // Don't repeat a line-0 record, but otherwise emit the new location.
+ // (The new location might be an explicit line 0, which we do emit.)
+ if (DL.getLine() == 0 && LastAsmLine == 0)
+ return;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
+ PrologEndLoc = DebugLoc();
+ }
+ // If the line changed, we call that a new statement; unless we went to
+ // line 0 and came back, in which case it is not a new statement.
+ unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
+ if (DL.getLine() && DL.getLine() != OldLine)
+ Flags |= DWARF2_FLAG_IS_STMT;
+
+ const MDNode *Scope = DL.getScope();
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+
+ // If we're not at line 0, remember this location.
+ if (DL.getLine())
+ PrevInstLoc = DL;
+}
+
+static std::pair<DebugLoc, bool> findPrologueEndLoc(const MachineFunction *MF) {
+ // First known non-DBG_VALUE and non-frame setup location marks
+ // the beginning of the function body.
+ DebugLoc LineZeroLoc;
+ const Function &F = MF->getFunction();
+
+ // Some instructions may be inserted into prologue after this function. Must
+ // keep prologue for these cases.
+ bool IsEmptyPrologue =
+ !(F.hasPrologueData() || F.getMetadata(LLVMContext::MD_func_sanitize));
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isMetaInstruction()) {
+ if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
+ // Scan forward to try to find a non-zero line number. The
+ // prologue_end marks the first breakpoint in the function after the
+ // frame setup, and a compiler-generated line 0 location is not a
+ // meaningful breakpoint. If none is found, return the first
+ // location after the frame setup.
+ if (MI.getDebugLoc().getLine())
+ return std::make_pair(MI.getDebugLoc(), IsEmptyPrologue);
+
+ LineZeroLoc = MI.getDebugLoc();
+ }
+ IsEmptyPrologue = false;
+ }
+ }
+ }
+ return std::make_pair(LineZeroLoc, IsEmptyPrologue);
+}
+
+/// Register a source line with debug info. Returns the unique label that was
+/// emitted and which provides correspondence to the source line list.
+static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
+ const MDNode *S, unsigned Flags, unsigned CUID,
+ uint16_t DwarfVersion,
+ ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) {
+ StringRef Fn;
+ unsigned FileNo = 1;
+ unsigned Discriminator = 0;
+ if (auto *Scope = cast_or_null<DIScope>(S)) {
+ Fn = Scope->getFilename();
+ if (Line != 0 && DwarfVersion >= 4)
+ if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+ Discriminator = LBF->getDiscriminator();
+
+ FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
+ .getOrCreateSourceID(Scope->getFile());
+ }
+ Asm.OutStreamer->emitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
+ Discriminator, Fn);
+}
+
+DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
+ unsigned CUID) {
+ std::pair<DebugLoc, bool> PrologEnd = findPrologueEndLoc(&MF);
+ DebugLoc PrologEndLoc = PrologEnd.first;
+ bool IsEmptyPrologue = PrologEnd.second;
+
+ // Get beginning of function.
+ if (PrologEndLoc) {
+ // If the prolog is empty, no need to generate scope line for the proc.
+ if (IsEmptyPrologue)
+ return PrologEndLoc;
+
+ // Ensure the compile unit is created if the function is called before
+ // beginFunction().
+ (void)getOrCreateDwarfCompileUnit(
+ MF.getFunction().getSubprogram()->getUnit());
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
+ ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT,
+ CUID, getDwarfVersion(), getUnits());
+ return PrologEndLoc;
+ }
+ return DebugLoc();
+}
+
+// Gather pre-function debug information. Assumes being called immediately
+// after the function entry point has been emitted.
+void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
+ CurFn = MF;
+
+ auto *SP = MF->getFunction().getSubprogram();
+ assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
+ if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
+ return;
+
+ DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(
+ getDwarfCompileUnitIDForLineTable(CU));
+
+ // Record beginning of function.
+ PrologEndLoc = emitInitialLocDirective(
+ *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+}
+
+unsigned
+DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) {
+ // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
+ // belongs to so that we add to the correct per-cu line table in the
+ // non-asm case.
+ if (Asm->OutStreamer->hasRawTextSupport())
+ // Use a single line table if we are generating assembly.
+ return 0;
+ else
+ return CU.getUniqueID();
+}
+
+void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) {
+ const auto &CURanges = CU->getRanges();
+ auto &LineTable = Asm->OutStreamer->getContext().getMCDwarfLineTable(
+ getDwarfCompileUnitIDForLineTable(*CU));
+ // Add the last range label for the given CU.
+ LineTable.getMCLineSections().addEndEntry(
+ const_cast<MCSymbol *>(CURanges.back().End));
+}
+
+void DwarfDebug::skippedNonDebugFunction() {
+ // If we don't have a subprogram for this function then there will be a hole
+ // in the range information. Keep note of this by setting the previously used
+ // section to nullptr.
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
+ PrevCU = nullptr;
+ CurFn = nullptr;
+}
+
+// Gather and emit post-function debug information.
+void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
+ const DISubprogram *SP = MF->getFunction().getSubprogram();
+
+ assert(CurFn == MF &&
+ "endFunction should be called with the same function as beginFunction");
+
+ // Set DwarfDwarfCompileUnitID in MCContext to default value.
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
+
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ assert(!FnScope || SP == FnScope->getScopeNode());
+ DwarfCompileUnit &TheCU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ return;
+ }
+
+ DenseSet<InlinedEntity> Processed;
+ collectEntityInfo(TheCU, SP, Processed);
+
+ // Add the range of this function to the list of ranges for the CU.
+ // With basic block sections, add ranges for all basic block sections.
+ for (const auto &R : Asm->MBBSectionRanges)
+ TheCU.addRange({R.second.BeginLabel, R.second.EndLabel});
+
+ // Under -gmlt, skip building the subprogram if there are no inlined
+ // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
+ // is still needed as we need its source location.
+ if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
+ TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
+ LScopes.getAbstractScopesList().empty() && !IsDarwin) {
+ for (const auto &R : Asm->MBBSectionRanges)
+ addArangeLabel(SymbolCU(&TheCU, R.second.BeginLabel));
+
+ assert(InfoHolder.getScopeVariables().empty());
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ return;
+ }
+
+#ifndef NDEBUG
+ size_t NumAbstractSubprograms = LScopes.getAbstractScopesList().size();
+#endif
+ for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
+ const auto *SP = cast<DISubprogram>(AScope->getScopeNode());
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ const auto *LS = getRetainedNodeScope(DN);
+ // Ensure LexicalScope is created for the scope of this node.
+ auto *LexS = LScopes.getOrCreateAbstractScope(LS);
+ assert(LexS && "Expected the LexicalScope to be created.");
+ if (isa<DILocalVariable>(DN) || isa<DILabel>(DN)) {
+ // Collect info for variables/labels that were optimized out.
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second ||
+ TheCU.getExistingAbstractEntity(DN))
+ continue;
+ TheCU.createAbstractEntity(DN, LexS);
+ } else {
+ // Remember the node if this is a local declarations.
+ LocalDeclsPerLS[LS].insert(DN);
+ }
+ assert(
+ LScopes.getAbstractScopesList().size() == NumAbstractSubprograms &&
+ "getOrCreateAbstractScope() inserted an abstract subprogram scope");
+ }
+ constructAbstractSubprogramScopeDIE(TheCU, AScope);
+ }
+
+ ProcessedSPNodes.insert(SP);
+ DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope);
+ if (auto *SkelCU = TheCU.getSkeleton())
+ if (!LScopes.getAbstractScopesList().empty() &&
+ TheCU.getCUNode()->getSplitDebugInlining())
+ SkelCU->constructSubprogramScopeDIE(SP, FnScope);
+
+ // Construct call site entries.
+ constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
+
+ // Clear debug info
+ // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
+ // DbgVariables except those that are also in AbstractVariables (since they
+ // can be used cross-function)
+ InfoHolder.getScopeVariables().clear();
+ InfoHolder.getScopeLabels().clear();
+ LocalDeclsPerLS.clear();
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+}
+
+// Register a source line with debug info. Returns the unique label that was
+// emitted and which provides correspondence to the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+ unsigned Flags) {
+ ::recordSourceLine(*Asm, Line, Col, S, Flags,
+ Asm->OutStreamer->getContext().getDwarfCompileUnitID(),
+ getDwarfVersion(), getUnits());
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.emitUnits(/* UseOffsets */ false);
+}
+
+// Emit the abbreviation section.
+void DwarfDebug::emitAbbreviations() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+ Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+}
+
+void DwarfDebug::emitStringOffsetsTableHeader() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.getStringPool().emitStringOffsetsTableHeader(
+ *Asm, Asm->getObjFileLowering().getDwarfStrOffSection(),
+ Holder.getStringOffsetsStartSym());
+}
+
+template <typename AccelTableT>
+void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,
+ StringRef TableName) {
+ Asm->OutStreamer->switchSection(Section);
+
+ // Emit the full data.
+ emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol());
+}
+
+void DwarfDebug::emitAccelDebugNames() {
+ // Don't emit anything if we have no compilation units to index.
+ if (getUnits().empty())
+ return;
+
+ emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
+}
+
+// Emit visible names into a hashed accelerator table section.
+void DwarfDebug::emitAccelNames() {
+ emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
+ "Names");
+}
+
+// Emit objective C classes and categories into a hashed accelerator table
+// section.
+void DwarfDebug::emitAccelObjC() {
+ emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
+ "ObjC");
+}
+
+// Emit namespace dies into a hashed accelerator table.
+void DwarfDebug::emitAccelNamespaces() {
+ emitAccel(AccelNamespace,
+ Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
+ "namespac");
+}
+
+// Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+ emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
+ "types");
+}
+
+// Public name handling.
+// The format for the various pubnames:
+//
+// dwarf pubnames - offset/name pairs where the offset is the offset into the CU
+// for the DIE that is named.
+//
+// gnu pubnames - offset/index value/name tuples where the offset is the offset
+// into the CU and the index value is computed according to the type of value
+// for the DIE that is named.
+//
+// For type units the offset is the offset of the skeleton DIE. For split dwarf
+// it's the offset within the debug_info/debug_types dwo section, however, the
+// reference in the pubname header doesn't change.
+
+/// computeIndexValue - Compute the gdb index value for the DIE and CU.
+static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
+ const DIE *Die) {
+ // Entities that ended up only in a Type Unit reference the CU instead (since
+ // the pub entry has offsets within the CU there's no real offset that can be
+ // provided anyway). As it happens all such entities (namespaces and types,
+ // types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
+ // not to be true it would be necessary to persist this information from the
+ // point at which the entry is added to the index data structure - since by
+ // the time the index is built from that, the original type/namespace DIE in a
+ // type unit has already been destroyed so it can't be queried for properties
+ // like tag, etc.
+ if (Die->getTag() == dwarf::DW_TAG_compile_unit)
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
+ dwarf::GIEL_EXTERNAL);
+ dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
+
+ // We could have a specification DIE that has our most of our knowledge,
+ // look for that now.
+ if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) {
+ DIE &SpecDIE = SpecVal.getDIEEntry().getEntry();
+ if (SpecDIE.findAttribute(dwarf::DW_AT_external))
+ Linkage = dwarf::GIEL_EXTERNAL;
+ } else if (Die->findAttribute(dwarf::DW_AT_external))
+ Linkage = dwarf::GIEL_EXTERNAL;
+
+ switch (Die->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ return dwarf::PubIndexEntryDescriptor(
+ dwarf::GIEK_TYPE,
+ dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage())
+ ? dwarf::GIEL_EXTERNAL
+ : dwarf::GIEL_STATIC);
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_subrange_type:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
+ case dwarf::DW_TAG_namespace:
+ return dwarf::GIEK_TYPE;
+ case dwarf::DW_TAG_subprogram:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
+ case dwarf::DW_TAG_variable:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
+ case dwarf::DW_TAG_enumerator:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE,
+ dwarf::GIEL_STATIC);
+ default:
+ return dwarf::GIEK_NONE;
+ }
+}
+
+/// emitDebugPubSections - Emit visible names and types into debug pubnames and
+/// pubtypes sections.
+void DwarfDebug::emitDebugPubSections() {
+ for (const auto &NU : CUMap) {
+ DwarfCompileUnit *TheU = NU.second;
+ if (!TheU->hasDwarfPubSections())
+ continue;
+
+ bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
+ DICompileUnit::DebugNameTableKind::GNU;
+
+ Asm->OutStreamer->switchSection(
+ GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+ : Asm->getObjFileLowering().getDwarfPubNamesSection());
+ emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());
+
+ Asm->OutStreamer->switchSection(
+ GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+ : Asm->getObjFileLowering().getDwarfPubTypesSection());
+ emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
+ }
+}
+
+void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) {
+ if (useSectionsAsReferences())
+ Asm->emitDwarfOffset(CU.getSection()->getBeginSymbol(),
+ CU.getDebugSectionOffset());
+ else
+ Asm->emitDwarfSymbolReference(CU.getLabelBegin());
+}
+
+void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
+ DwarfCompileUnit *TheU,
+ const StringMap<const DIE *> &Globals) {
+ if (auto *Skeleton = TheU->getSkeleton())
+ TheU = Skeleton;
+
+ // Emit the header.
+ MCSymbol *EndLabel = Asm->emitDwarfUnitLength(
+ "pub" + Name, "Length of Public " + Name + " Info");
+
+ Asm->OutStreamer->AddComment("DWARF Version");
+ Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);
+
+ Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
+ emitSectionReference(*TheU);
+
+ Asm->OutStreamer->AddComment("Compilation Unit Length");
+ Asm->emitDwarfLengthOrOffset(TheU->getLength());
+
+ // Emit the pubnames for this compilation unit.
+ SmallVector<std::pair<StringRef, const DIE *>, 0> Vec;
+ for (const auto &GI : Globals)
+ Vec.emplace_back(GI.first(), GI.second);
+ llvm::sort(Vec, [](auto &A, auto &B) {
+ return A.second->getOffset() < B.second->getOffset();
+ });
+ for (const auto &[Name, Entity] : Vec) {
+ Asm->OutStreamer->AddComment("DIE offset");
+ Asm->emitDwarfLengthOrOffset(Entity->getOffset());
+
+ if (GnuStyle) {
+ dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
+ Asm->OutStreamer->AddComment(
+ Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) +
+ ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Asm->emitInt8(Desc.toBits());
+ }
+
+ Asm->OutStreamer->AddComment("External Name");
+ Asm->OutStreamer->emitBytes(StringRef(Name.data(), Name.size() + 1));
+ }
+
+ Asm->OutStreamer->AddComment("End Mark");
+ Asm->emitDwarfLengthOrOffset(0);
+ Asm->OutStreamer->emitLabel(EndLabel);
+}
+
+/// Emit null-terminated strings into a debug str section.
+void DwarfDebug::emitDebugStr() {
+ MCSection *StringOffsetsSection = nullptr;
+ if (useSegmentedStringOffsetsTable()) {
+ emitStringOffsetsTableHeader();
+ StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection();
+ }
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(),
+ StringOffsetsSection, /* UseRelativeOffsets = */ true);
+}
+
+void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
+ const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU) {
+ auto &&Comments = DebugLocs.getComments(Entry);
+ auto Comment = Comments.begin();
+ auto End = Comments.end();
+
+ // The expressions are inserted into a byte stream rather early (see
+ // DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that
+ // need to reference a base_type DIE the offset of that DIE is not yet known.
+ // To deal with this we instead insert a placeholder early and then extract
+ // it here and replace it with the real reference.
+ unsigned PtrSize = Asm->MAI->getCodePointerSize();
+ DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
+ DebugLocs.getBytes(Entry).size()),
+ Asm->getDataLayout().isLittleEndian(), PtrSize);
+ DWARFExpression Expr(Data, PtrSize, Asm->OutContext.getDwarfFormat());
+
+ using Encoding = DWARFExpression::Operation::Encoding;
+ uint64_t Offset = 0;
+ for (const auto &Op : Expr) {
+ assert(Op.getCode() != dwarf::DW_OP_const_type &&
+ "3 operand ops not yet supported");
+ assert(!Op.getSubCode() && "SubOps not yet supported");
+ Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
+ Offset++;
+ for (unsigned I = 0; I < Op.getDescription().Op.size(); ++I) {
+ if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
+ unsigned Length =
+ Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die);
+ // Make sure comments stay aligned.
+ for (unsigned J = 0; J < Length; ++J)
+ if (Comment != End)
+ Comment++;
+ } else {
+ for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
+ Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
+ }
+ Offset = Op.getOperandEndOffset(I);
+ }
+ assert(Offset == Op.getEndOffset());
+ }
+}
+
+void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ const DbgValueLoc &Value,
+ DwarfExpression &DwarfExpr) {
+ auto *DIExpr = Value.getExpression();
+ DIExpressionCursor ExprCursor(DIExpr);
+ DwarfExpr.addFragmentOffset(DIExpr);
+
+ // If the DIExpr is is an Entry Value, we want to follow the same code path
+ // regardless of whether the DBG_VALUE is variadic or not.
+ if (DIExpr && DIExpr->isEntryValue()) {
+ // Entry values can only be a single register with no additional DIExpr,
+ // so just add it directly.
+ assert(Value.getLocEntries().size() == 1);
+ assert(Value.getLocEntries()[0].isLocation());
+ MachineLocation Location = Value.getLocEntries()[0].getLoc();
+ DwarfExpr.setLocation(Location, DIExpr);
+
+ DwarfExpr.beginEntryValueExpression(ExprCursor);
+
+ const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, ExprCursor, Location.getReg()))
+ return;
+ return DwarfExpr.addExpression(std::move(ExprCursor));
+ }
+
+ // Regular entry.
+ auto EmitValueLocEntry = [&DwarfExpr, &BT,
+ &AP](const DbgValueLocEntry &Entry,
+ DIExpressionCursor &Cursor) -> bool {
+ if (Entry.isInt()) {
+ if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
+ BT->getEncoding() == dwarf::DW_ATE_signed_char))
+ DwarfExpr.addSignedConstant(Entry.getInt());
+ else
+ DwarfExpr.addUnsignedConstant(Entry.getInt());
+ } else if (Entry.isLocation()) {
+ MachineLocation Location = Entry.getLoc();
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
+
+ const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return false;
+ } else if (Entry.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Entry.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently only the
+ // WebAssembly-specific encoding is supported.
+ assert(AP.TM.getTargetTriple().isWasm());
+ DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ } else if (Entry.isConstantFP()) {
+ if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
+ !Cursor) {
+ DwarfExpr.addConstantFP(Entry.getConstantFP()->getValueAPF(), AP);
+ } else if (Entry.getConstantFP()
+ ->getValueAPF()
+ .bitcastToAPInt()
+ .getBitWidth() <= 64 /*bits*/) {
+ DwarfExpr.addUnsignedConstant(
+ Entry.getConstantFP()->getValueAPF().bitcastToAPInt());
+ } else {
+ LLVM_DEBUG(
+ dbgs() << "Skipped DwarfExpression creation for ConstantFP of size"
+ << Entry.getConstantFP()
+ ->getValueAPF()
+ .bitcastToAPInt()
+ .getBitWidth()
+ << " bits\n");
+ return false;
+ }
+ }
+ return true;
+ };
+
+ if (!Value.isVariadic()) {
+ if (!EmitValueLocEntry(Value.getLocEntries()[0], ExprCursor))
+ return;
+ DwarfExpr.addExpression(std::move(ExprCursor));
+ return;
+ }
+
+ // If any of the location entries are registers with the value 0, then the
+ // location is undefined.
+ if (any_of(Value.getLocEntries(), [](const DbgValueLocEntry &Entry) {
+ return Entry.isLocation() && !Entry.getLoc().getReg();
+ }))
+ return;
+
+ DwarfExpr.addExpression(
+ std::move(ExprCursor),
+ [EmitValueLocEntry, &Value](unsigned Idx,
+ DIExpressionCursor &Cursor) -> bool {
+ return EmitValueLocEntry(Value.getLocEntries()[Idx], Cursor);
+ });
+}
+
+void DebugLocEntry::finalize(const AsmPrinter &AP,
+ DebugLocStream::ListBuilder &List,
+ const DIBasicType *BT,
+ DwarfCompileUnit &TheCU) {
+ assert(!Values.empty() &&
+ "location list entries without values are redundant");
+ assert(Begin != End && "unexpected location list entry with empty range");
+ DebugLocStream::EntryBuilder Entry(List, Begin, End);
+ BufferByteStreamer Streamer = Entry.getStreamer();
+ DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU);
+ const DbgValueLoc &Value = Values[0];
+ if (Value.isFragment()) {
+ // Emit all fragments that belong to the same variable and range.
+ assert(llvm::all_of(Values, [](DbgValueLoc P) {
+ return P.isFragment();
+ }) && "all values are expected to be fragments");
+ assert(llvm::is_sorted(Values) && "fragments are expected to be sorted");
+
+ for (const auto &Fragment : Values)
+ DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
+
+ } else {
+ assert(Values.size() == 1 && "only fragments may have >1 value");
+ DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
+ }
+ DwarfExpr.finalize();
+ if (DwarfExpr.TagOffset)
+ List.setTagOffset(*DwarfExpr.TagOffset);
+}
+
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU) {
+ // Emit the size.
+ Asm->OutStreamer->AddComment("Loc expr size");
+ if (getDwarfVersion() >= 5)
+ Asm->emitULEB128(DebugLocs.getBytes(Entry).size());
+ else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
+ Asm->emitInt16(DebugLocs.getBytes(Entry).size());
+ else {
+ // The entry is too big to fit into 16 bit, drop it as there is nothing we
+ // can do.
+ Asm->emitInt16(0);
+ return;
+ }
+ // Emit the entry.
+ APByteStreamer Streamer(*Asm);
+ emitDebugLocEntry(Streamer, Entry, CU);
+}
+
+// Emit the header of a DWARF 5 range list table list table. Returns the symbol
+// that designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
+ const DwarfFile &Holder) {
+ MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer);
+
+ Asm->OutStreamer->AddComment("Offset entry count");
+ Asm->emitInt32(Holder.getRangeLists().size());
+ Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym());
+
+ for (const RangeSpanList &List : Holder.getRangeLists())
+ Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
+ Asm->getDwarfOffsetByteSize());
+
+ return TableEnd;
+}
+
+// Emit the header of a DWARF 5 locations list table. Returns the symbol that
+// designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
+ const DwarfDebug &DD) {
+ MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer);
+
+ const auto &DebugLocs = DD.getDebugLocs();
+
+ Asm->OutStreamer->AddComment("Offset entry count");
+ Asm->emitInt32(DebugLocs.getLists().size());
+ Asm->OutStreamer->emitLabel(DebugLocs.getSym());
+
+ for (const auto &List : DebugLocs.getLists())
+ Asm->emitLabelDifference(List.Label, DebugLocs.getSym(),
+ Asm->getDwarfOffsetByteSize());
+
+ return TableEnd;
+}
+
+template <typename Ranges, typename PayloadEmitter>
+static void emitRangeList(
+ DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R,
+ const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair,
+ unsigned StartxLength, unsigned EndOfList,
+ StringRef (*StringifyEnum)(unsigned),
+ bool ShouldUseBaseAddress,
+ PayloadEmitter EmitPayload) {
+
+ auto Size = Asm->MAI->getCodePointerSize();
+ bool UseDwarf5 = DD.getDwarfVersion() >= 5;
+
+ // Emit our symbol so we can find the beginning of the range.
+ Asm->OutStreamer->emitLabel(Sym);
+
+ // Gather all the ranges that apply to the same section so they can share
+ // a base address entry.
+ MapVector<const MCSection *, std::vector<decltype(&*R.begin())>> SectionRanges;
+
+ for (const auto &Range : R)
+ SectionRanges[&Range.Begin->getSection()].push_back(&Range);
+
+ const MCSymbol *CUBase = CU.getBaseAddress();
+ bool BaseIsSet = false;
+ for (const auto &P : SectionRanges) {
+ auto *Base = CUBase;
+ if (!Base && ShouldUseBaseAddress) {
+ const MCSymbol *Begin = P.second.front()->Begin;
+ const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection());
+ if (!UseDwarf5) {
+ Base = NewBase;
+ BaseIsSet = true;
+ Asm->OutStreamer->emitIntValue(-1, Size);
+ Asm->OutStreamer->AddComment(" base address");
+ Asm->OutStreamer->emitSymbolValue(Base, Size);
+ } else if (NewBase != Begin || P.second.size() > 1) {
+ // Only use a base address if
+ // * the existing pool address doesn't match (NewBase != Begin)
+ // * or, there's more than one entry to share the base address
+ Base = NewBase;
+ BaseIsSet = true;
+ Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx));
+ Asm->emitInt8(BaseAddressx);
+ Asm->OutStreamer->AddComment(" base address index");
+ Asm->emitULEB128(DD.getAddressPool().getIndex(Base));
+ }
+ } else if (BaseIsSet && !UseDwarf5) {
+ BaseIsSet = false;
+ assert(!Base);
+ Asm->OutStreamer->emitIntValue(-1, Size);
+ Asm->OutStreamer->emitIntValue(0, Size);
+ }
+
+ for (const auto *RS : P.second) {
+ const MCSymbol *Begin = RS->Begin;
+ const MCSymbol *End = RS->End;
+ assert(Begin && "Range without a begin symbol?");
+ assert(End && "Range without an end symbol?");
+ if (Base) {
+ if (UseDwarf5) {
+ // Emit offset_pair when we have a base.
+ Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair));
+ Asm->emitInt8(OffsetPair);
+ Asm->OutStreamer->AddComment(" starting offset");
+ Asm->emitLabelDifferenceAsULEB128(Begin, Base);
+ Asm->OutStreamer->AddComment(" ending offset");
+ Asm->emitLabelDifferenceAsULEB128(End, Base);
+ } else {
+ Asm->emitLabelDifference(Begin, Base, Size);
+ Asm->emitLabelDifference(End, Base, Size);
+ }
+ } else if (UseDwarf5) {
+ Asm->OutStreamer->AddComment(StringifyEnum(StartxLength));
+ Asm->emitInt8(StartxLength);
+ Asm->OutStreamer->AddComment(" start index");
+ Asm->emitULEB128(DD.getAddressPool().getIndex(Begin));
+ Asm->OutStreamer->AddComment(" length");
+ Asm->emitLabelDifferenceAsULEB128(End, Begin);
+ } else {
+ Asm->OutStreamer->emitSymbolValue(Begin, Size);
+ Asm->OutStreamer->emitSymbolValue(End, Size);
+ }
+ EmitPayload(*RS);
+ }
+ }
+
+ if (UseDwarf5) {
+ Asm->OutStreamer->AddComment(StringifyEnum(EndOfList));
+ Asm->emitInt8(EndOfList);
+ } else {
+ // Terminate the list with two 0 values.
+ Asm->OutStreamer->emitIntValue(0, Size);
+ Asm->OutStreamer->emitIntValue(0, Size);
+ }
+}
+
+// Handles emission of both debug_loclist / debug_loclist.dwo
+static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
+ emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List),
+ *List.CU, dwarf::DW_LLE_base_addressx,
+ dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length,
+ dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString,
+ /* ShouldUseBaseAddress */ true,
+ [&](const DebugLocStream::Entry &E) {
+ DD.emitDebugLocEntryLocation(E, List.CU);
+ });
+}
+
+void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
+ if (DebugLocs.getLists().empty())
+ return;
+
+ Asm->OutStreamer->switchSection(Sec);
+
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5)
+ TableEnd = emitLoclistsTableHeader(Asm, *this);
+
+ for (const auto &List : DebugLocs.getLists())
+ emitLocList(*this, Asm, List);
+
+ if (TableEnd)
+ Asm->OutStreamer->emitLabel(TableEnd);
+}
+
+// Emit locations into the .debug_loc/.debug_loclists section.
+void DwarfDebug::emitDebugLoc() {
+ emitDebugLocImpl(
+ getDwarfVersion() >= 5
+ ? Asm->getObjFileLowering().getDwarfLoclistsSection()
+ : Asm->getObjFileLowering().getDwarfLocSection());
+}
+
+// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section.
+void DwarfDebug::emitDebugLocDWO() {
+ if (getDwarfVersion() >= 5) {
+ emitDebugLocImpl(
+ Asm->getObjFileLowering().getDwarfLoclistsDWOSection());
+
+ return;
+ }
+
+ for (const auto &List : DebugLocs.getLists()) {
+ Asm->OutStreamer->switchSection(
+ Asm->getObjFileLowering().getDwarfLocDWOSection());
+ Asm->OutStreamer->emitLabel(List.Label);
+
+ for (const auto &Entry : DebugLocs.getEntries(List)) {
+ // GDB only supports startx_length in pre-standard split-DWARF.
+ // (in v5 standard loclists, it currently* /only/ supports base_address +
+ // offset_pair, so the implementations can't really share much since they
+ // need to use different representations)
+ // * as of October 2018, at least
+ //
+ // In v5 (see emitLocList), this uses SectionLabels to reuse existing
+ // addresses in the address pool to minimize object size/relocations.
+ Asm->emitInt8(dwarf::DW_LLE_startx_length);
+ unsigned idx = AddrPool.getIndex(Entry.Begin);
+ Asm->emitULEB128(idx);
+ // Also the pre-standard encoding is slightly different, emitting this as
+ // an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
+ Asm->emitLabelDifference(Entry.End, Entry.Begin, 4);
+ emitDebugLocEntryLocation(Entry, List.CU);
+ }
+ Asm->emitInt8(dwarf::DW_LLE_end_of_list);
+ }
+}
+
+struct ArangeSpan {
+ const MCSymbol *Start, *End;
+};
+
+// Emit a debug aranges section, containing a CU lookup for any
+// address we can tie back to a CU.
+void DwarfDebug::emitDebugARanges() {
+ // Provides a unique id per text section.
+ MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
+
+ // Filter labels by section.
+ for (const SymbolCU &SCU : ArangeLabels) {
+ if (SCU.Sym->isInSection()) {
+ // Make a note of this symbol and it's section.
+ MCSection *Section = &SCU.Sym->getSection();
+ if (!Section->getKind().isMetadata())
+ SectionMap[Section].push_back(SCU);
+ } else {
+ // Some symbols (e.g. common/bss on mach-o) can have no section but still
+ // appear in the output. This sucks as we rely on sections to build
+ // arange spans. We can do it without, but it's icky.
+ SectionMap[nullptr].push_back(SCU);
+ }
+ }
+
+ DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
+
+ for (auto &I : SectionMap) {
+ MCSection *Section = I.first;
+ SmallVector<SymbolCU, 8> &List = I.second;
+ if (List.size() < 1)
+ continue;
+
+ // If we have no section (e.g. common), just write out
+ // individual spans for each symbol.
+ if (!Section) {
+ for (const SymbolCU &Cur : List) {
+ ArangeSpan Span;
+ Span.Start = Cur.Sym;
+ Span.End = nullptr;
+ assert(Cur.CU);
+ Spans[Cur.CU].push_back(Span);
+ }
+ continue;
+ }
+
+ // Sort the symbols by offset within the section.
+ llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
+ unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0;
+
+ // Symbols with no order assigned should be placed at the end.
+ // (e.g. section end labels)
+ if (IA == 0)
+ return false;
+ if (IB == 0)
+ return true;
+ return IA < IB;
+ });
+
+ // Insert a final terminator.
+ List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
+
+ // Build spans between each label.
+ const MCSymbol *StartSym = List[0].Sym;
+ for (size_t n = 1, e = List.size(); n < e; n++) {
+ const SymbolCU &Prev = List[n - 1];
+ const SymbolCU &Cur = List[n];
+
+ // Try and build the longest span we can within the same CU.
+ if (Cur.CU != Prev.CU) {
+ ArangeSpan Span;
+ Span.Start = StartSym;
+ Span.End = Cur.Sym;
+ assert(Prev.CU);
+ Spans[Prev.CU].push_back(Span);
+ StartSym = Cur.Sym;
+ }
+ }
+ }
+
+ // Start the dwarf aranges section.
+ Asm->OutStreamer->switchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+
+ unsigned PtrSize = Asm->MAI->getCodePointerSize();
+
+ // Build a list of CUs used.
+ std::vector<DwarfCompileUnit *> CUs;
+ for (const auto &it : Spans) {
+ DwarfCompileUnit *CU = it.first;
+ CUs.push_back(CU);
+ }
+
+ // Sort the CU list (again, to ensure consistent output order).
+ llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+ return A->getUniqueID() < B->getUniqueID();
+ });
+
+ // Emit an arange table for each CU we used.
+ for (DwarfCompileUnit *CU : CUs) {
+ std::vector<ArangeSpan> &List = Spans[CU];
+
+ // Describe the skeleton CU's offset and length, not the dwo file's.
+ if (auto *Skel = CU->getSkeleton())
+ CU = Skel;
+
+ // Emit size of content not including length itself.
+ unsigned ContentSize =
+ sizeof(int16_t) + // DWARF ARange version number
+ Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info
+ // section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int8_t); // Segment Size (in bytes)
+
+ unsigned TupleSize = PtrSize * 2;
+
+ // 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
+ unsigned Padding = offsetToAlignment(
+ Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize));
+
+ ContentSize += Padding;
+ ContentSize += (List.size() + 1) * TupleSize;
+
+ // For each compile unit, write the list of spans it covers.
+ Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set");
+ Asm->OutStreamer->AddComment("DWARF Arange version number");
+ Asm->emitInt16(dwarf::DW_ARANGES_VERSION);
+ Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
+ emitSectionReference(*CU);
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->emitInt8(PtrSize);
+ Asm->OutStreamer->AddComment("Segment Size (in bytes)");
+ Asm->emitInt8(0);
+
+ Asm->OutStreamer->emitFill(Padding, 0xff);
+
+ for (const ArangeSpan &Span : List) {
+ Asm->emitLabelReference(Span.Start, PtrSize);
+
+ // Calculate the size as being from the span start to its end.
+ //
+ // If the size is zero, then round it up to one byte. The DWARF
+ // specification requires that entries in this table have nonzero
+ // lengths.
+ auto SizeRef = SymSize.find(Span.Start);
+ if ((SizeRef == SymSize.end() || SizeRef->second != 0) && Span.End) {
+ Asm->emitLabelDifference(Span.End, Span.Start, PtrSize);
+ } else {
+ // For symbols without an end marker (e.g. common), we
+ // write a single arange entry containing just that one symbol.
+ uint64_t Size;
+ if (SizeRef == SymSize.end() || SizeRef->second == 0)
+ Size = 1;
+ else
+ Size = SizeRef->second;
+
+ Asm->OutStreamer->emitIntValue(Size, PtrSize);
+ }
+ }
+
+ Asm->OutStreamer->AddComment("ARange terminator");
+ Asm->OutStreamer->emitIntValue(0, PtrSize);
+ Asm->OutStreamer->emitIntValue(0, PtrSize);
+ }
+}
+
+/// Emit a single range list. We handle both DWARF v5 and earlier.
+static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
+ const RangeSpanList &List) {
+ emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU,
+ dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
+ dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
+ llvm::dwarf::RangeListEncodingString,
+ List.CU->getCUNode()->getRangesBaseAddress() ||
+ DD.getDwarfVersion() >= 5,
+ [](auto) {});
+}
+
+void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) {
+ if (Holder.getRangeLists().empty())
+ return;
+
+ assert(useRangesSection());
+ assert(!CUMap.empty());
+ assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return !Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
+
+ Asm->OutStreamer->switchSection(Section);
+
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5)
+ TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
+ for (const RangeSpanList &List : Holder.getRangeLists())
+ emitRangeList(*this, Asm, List);
+
+ if (TableEnd)
+ Asm->OutStreamer->emitLabel(TableEnd);
+}
+
+/// Emit address ranges into the .debug_ranges section or into the DWARF v5
+/// .debug_rnglists section.
+void DwarfDebug::emitDebugRanges() {
+ const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+ emitDebugRangesImpl(Holder,
+ getDwarfVersion() >= 5
+ ? Asm->getObjFileLowering().getDwarfRnglistsSection()
+ : Asm->getObjFileLowering().getDwarfRangesSection());
+}
+
+void DwarfDebug::emitDebugRangesDWO() {
+ emitDebugRangesImpl(InfoHolder,
+ Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
+}
+
+/// Emit the header of a DWARF 5 macro section, or the GNU extension for
+/// DWARF 4.
+static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD,
+ const DwarfCompileUnit &CU, uint16_t DwarfVersion) {
+ enum HeaderFlagMask {
+#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
+ };
+ Asm->OutStreamer->AddComment("Macro information version");
+ Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4);
+ // We emit the line offset flag unconditionally here, since line offset should
+ // be mostly present.
+ if (Asm->isDwarf64()) {
+ Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present");
+ Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE | MACRO_FLAG_DEBUG_LINE_OFFSET);
+ } else {
+ Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
+ Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET);
+ }
+ Asm->OutStreamer->AddComment("debug_line_offset");
+ if (DD.useSplitDwarf())
+ Asm->emitDwarfLengthOrOffset(0);
+ else
+ Asm->emitDwarfSymbolReference(CU.getLineTableStartSym());
+}
+
+void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
+ for (auto *MN : Nodes) {
+ if (auto *M = dyn_cast<DIMacro>(MN))
+ emitMacro(*M);
+ else if (auto *F = dyn_cast<DIMacroFile>(MN))
+ emitMacroFile(*F, U);
+ else
+ llvm_unreachable("Unexpected DI type!");
+ }
+}
+
+void DwarfDebug::emitMacro(DIMacro &M) {
+ StringRef Name = M.getName();
+ StringRef Value = M.getValue();
+
+ // There should be one space between the macro name and the macro value in
+ // define entries. In undef entries, only the macro name is emitted.
+ std::string Str = Value.empty() ? Name.str() : (Name + " " + Value).str();
+
+ if (UseDebugMacroSection) {
+ if (getDwarfVersion() >= 5) {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_define_strx
+ : dwarf::DW_MACRO_undef_strx;
+ Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->emitULEB128(
+ InfoHolder.getStringPool().getIndexedEntry(*Asm, Str).getIndex());
+ } else {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_GNU_define_indirect
+ : dwarf::DW_MACRO_GNU_undef_indirect;
+ Asm->OutStreamer->AddComment(dwarf::GnuMacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->emitDwarfSymbolReference(
+ InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol());
+ }
+ } else {
+ Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType()));
+ Asm->emitULEB128(M.getMacinfoType());
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->OutStreamer->emitBytes(Str);
+ Asm->emitInt8('\0');
+ }
+}
+
+void DwarfDebug::emitMacroFileImpl(
+ DIMacroFile &MF, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
+ StringRef (*MacroFormToString)(unsigned Form)) {
+
+ Asm->OutStreamer->AddComment(MacroFormToString(StartFile));
+ Asm->emitULEB128(StartFile);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(MF.getLine());
+ Asm->OutStreamer->AddComment("File Number");
+ DIFile &F = *MF.getFile();
+ if (useSplitDwarf())
+ Asm->emitULEB128(getDwoLineTable(U)->getFile(
+ F.getDirectory(), F.getFilename(), getMD5AsBytes(&F),
+ Asm->OutContext.getDwarfVersion(), F.getSource()));
+ else
+ Asm->emitULEB128(U.getOrCreateSourceID(&F));
+ handleMacroNodes(MF.getElements(), U);
+ Asm->OutStreamer->AddComment(MacroFormToString(EndFile));
+ Asm->emitULEB128(EndFile);
+}
+
+void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
+ // DWARFv5 macro and DWARFv4 macinfo share some common encodings,
+ // so for readibility/uniformity, We are explicitly emitting those.
+ assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
+ if (UseDebugMacroSection)
+ emitMacroFileImpl(
+ F, U, dwarf::DW_MACRO_start_file, dwarf::DW_MACRO_end_file,
+ (getDwarfVersion() >= 5) ? dwarf::MacroString : dwarf::GnuMacroString);
+ else
+ emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file,
+ dwarf::DW_MACINFO_end_file, dwarf::MacinfoString);
+}
+
+void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
+ auto *SkCU = TheCU.getSkeleton();
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ DIMacroNodeArray Macros = CUNode->getMacros();
+ if (Macros.empty())
+ continue;
+ Asm->OutStreamer->switchSection(Section);
+ Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
+ if (UseDebugMacroSection)
+ emitMacroHeader(Asm, *this, U, getDwarfVersion());
+ handleMacroNodes(Macros, U);
+ Asm->OutStreamer->AddComment("End Of Macro List Mark");
+ Asm->emitInt8(0);
+ }
+}
+
+/// Emit macros into a debug macinfo/macro section.
+void DwarfDebug::emitDebugMacinfo() {
+ auto &ObjLower = Asm->getObjFileLowering();
+ emitDebugMacinfoImpl(UseDebugMacroSection
+ ? ObjLower.getDwarfMacroSection()
+ : ObjLower.getDwarfMacinfoSection());
+}
+
+void DwarfDebug::emitDebugMacinfoDWO() {
+ auto &ObjLower = Asm->getObjFileLowering();
+ emitDebugMacinfoImpl(UseDebugMacroSection
+ ? ObjLower.getDwarfMacroDWOSection()
+ : ObjLower.getDwarfMacinfoDWOSection());
+}
+
+// DWARF5 Experimental Separate Dwarf emitters.
+
+void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
+ std::unique_ptr<DwarfCompileUnit> NewU) {
+
+ if (!CompilationDir.empty())
+ NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ addGnuPubAttributes(*NewU, Die);
+
+ SkeletonHolder.addUnit(std::move(NewU));
+}
+
+DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
+
+ auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
+ CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder,
+ UnitKind::Skeleton);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
+
+ NewCU.initStmtList();
+
+ if (useSegmentedStringOffsetsTable())
+ NewCU.addStringOffsetsStart();
+
+ initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
+
+ return NewCU;
+}
+
+// Emit the .debug_info.dwo section for separated dwarf. This contains the
+// compile units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+ assert(useSplitDwarf() && "No split dwarf debug info?");
+ // Don't emit relocations into the dwo file.
+ InfoHolder.emitUnits(/* UseOffsets */ true);
+}
+
+// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
+// abbreviations for the .debug_info.dwo section.
+void DwarfDebug::emitDebugAbbrevDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection());
+}
+
+void DwarfDebug::emitDebugLineDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ SplitTypeUnitFileTable.Emit(
+ *Asm->OutStreamer, MCDwarfLineTableParams(),
+ Asm->getObjFileLowering().getDwarfLineDWOSection());
+}
+
+void DwarfDebug::emitStringOffsetsTableHeaderDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ InfoHolder.getStringPool().emitStringOffsetsTableHeader(
+ *Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(),
+ InfoHolder.getStringOffsetsStartSym());
+}
+
+// Emit the .debug_str.dwo section for separated dwarf. This contains the
+// string section and is identical in format to traditional .debug_str
+// sections.
+void DwarfDebug::emitDebugStrDWO() {
+ if (useSegmentedStringOffsetsTable())
+ emitStringOffsetsTableHeaderDWO();
+ assert(useSplitDwarf() && "No split dwarf?");
+ MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
+ InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
+ OffSec, /* UseRelativeOffsets = */ false);
+}
+
+// Emit address pool.
+void DwarfDebug::emitDebugAddr() {
+ AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
+}
+
+MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
+ if (!useSplitDwarf())
+ return nullptr;
+ const DICompileUnit *DIUnit = CU.getCUNode();
+ SplitTypeUnitFileTable.maybeSetRootFile(
+ DIUnit->getDirectory(), DIUnit->getFilename(),
+ getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
+ return &SplitTypeUnitFileTable;
+}
+
+uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
+ MD5 Hash;
+ Hash.update(Identifier);
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+ return Result.high();
+}
+
+void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
+ StringRef Identifier, DIE &RefDie,
+ const DICompositeType *CTy) {
+ // Fast path if we're building some type units and one has already used the
+ // address pool we know we're going to throw away all this work anyway, so
+ // don't bother building dependent types.
+ if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
+ return;
+
+ auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
+ if (!Ins.second) {
+ CU.addDIETypeSignature(RefDie, Ins.first->second);
+ return;
+ }
+
+ bool TopLevelType = TypeUnitsUnderConstruction.empty();
+ AddrPool.resetUsedFlag();
+
+ auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+ getDwoLineTable(CU));
+ DwarfTypeUnit &NewTU = *OwnedUnit;
+ DIE &UnitDie = NewTU.getUnitDie();
+ TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
+
+ NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ CU.getLanguage());
+
+ uint64_t Signature = makeTypeSignature(Identifier);
+ NewTU.setTypeSignature(Signature);
+ Ins.first->second = Signature;
+
+ if (useSplitDwarf()) {
+ MCSection *Section =
+ getDwarfVersion() <= 4
+ ? Asm->getObjFileLowering().getDwarfTypesDWOSection()
+ : Asm->getObjFileLowering().getDwarfInfoDWOSection();
+ NewTU.setSection(Section);
+ } else {
+ MCSection *Section =
+ getDwarfVersion() <= 4
+ ? Asm->getObjFileLowering().getDwarfTypesSection(Signature)
+ : Asm->getObjFileLowering().getDwarfInfoSection(Signature);
+ NewTU.setSection(Section);
+ // Non-split type units reuse the compile unit's line table.
+ CU.applyStmtList(UnitDie);
+ }
+
+ // Add DW_AT_str_offsets_base to the type unit DIE, but not for split type
+ // units.
+ if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
+ NewTU.addStringOffsetsStart();
+
+ NewTU.setType(NewTU.createTypeDIE(CTy));
+
+ if (TopLevelType) {
+ auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction);
+ TypeUnitsUnderConstruction.clear();
+
+ // Types referencing entries in the address table cannot be placed in type
+ // units.
+ if (AddrPool.hasBeenUsed()) {
+
+ // Remove all the types built while building this type.
+ // This is pessimistic as some of these types might not be dependent on
+ // the type that used an address.
+ for (const auto &TU : TypeUnitsToAdd)
+ TypeSignatures.erase(TU.second);
+
+ // Construct this type in the CU directly.
+ // This is inefficient because all the dependent types will be rebuilt
+ // from scratch, including building them in type units, discovering that
+ // they depend on addresses, throwing them out and rebuilding them.
+ CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
+ return;
+ }
+
+ // If the type wasn't dependent on fission addresses, finish adding the type
+ // and all its dependent types.
+ for (auto &TU : TypeUnitsToAdd) {
+ InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
+ InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
+ }
+ }
+ CU.addDIETypeSignature(RefDie, Signature);
+}
+
+// Add the Name along with its companion DIE to the appropriate accelerator
+// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
+// AccelTableKind::Apple, we use the table we got as an argument). If
+// accelerator tables are disabled, this function does nothing.
+template <typename DataT>
+void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
+ AccelTable<DataT> &AppleAccel, StringRef Name,
+ const DIE &Die) {
+ if (getAccelTableKind() == AccelTableKind::None || Name.empty())
+ return;
+
+ if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
+ return;
+
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);
+
+ switch (getAccelTableKind()) {
+ case AccelTableKind::Apple:
+ AppleAccel.addName(Ref, Die);
+ break;
+ case AccelTableKind::Dwarf:
+ AccelDebugNames.addName(Ref, Die);
+ break;
+ case AccelTableKind::Default:
+ llvm_unreachable("Default should have already been resolved.");
+ case AccelTableKind::None:
+ llvm_unreachable("None handled above");
+ }
+}
+
+void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(CU, AccelNames, Name, Die);
+}
+
+void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ // ObjC names go only into the Apple accelerator tables.
+ if (getAccelTableKind() == AccelTableKind::Apple)
+ addAccelNameImpl(CU, AccelObjC, Name, Die);
+}
+
+void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(CU, AccelNamespace, Name, Die);
+}
+
+void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die, char Flags) {
+ addAccelNameImpl(CU, AccelTypes, Name, Die);
+}
+
+uint16_t DwarfDebug::getDwarfVersion() const {
+ return Asm->OutStreamer->getContext().getDwarfVersion();
+}
+
+dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
+ if (Asm->getDwarfVersion() >= 4)
+ return dwarf::Form::DW_FORM_sec_offset;
+ assert((!Asm->isDwarf64() || (Asm->getDwarfVersion() == 3)) &&
+ "DWARF64 is not defined prior DWARFv3");
+ return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8
+ : dwarf::Form::DW_FORM_data4;
+}
+
+const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
+ return SectionLabels.lookup(S);
+}
+
+void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
+ if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
+ if (useSplitDwarf() || getDwarfVersion() >= 5)
+ AddrPool.getIndex(S);
+}
+
+std::optional<MD5::MD5Result>
+DwarfDebug::getMD5AsBytes(const DIFile *File) const {
+ assert(File);
+ if (getDwarfVersion() < 5)
+ return std::nullopt;
+ std::optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
+ if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
+ return std::nullopt;
+
+ // Convert the string checksum to an MD5Result for the streamer.
+ // The verifier validates the checksum so we assume it's okay.
+ // An MD5 checksum is 16 bytes.
+ std::string ChecksumString = fromHex(Checksum->Value);
+ MD5::MD5Result CKMem;
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data());
+ return CKMem;
+}
+
+bool DwarfDebug::alwaysUseRanges(const DwarfCompileUnit &CU) const {
+ if (MinimizeAddr == MinimizeAddrInV5::Ranges)
+ return true;
+ if (MinimizeAddr != MinimizeAddrInV5::Default)
+ return false;
+ if (useSplitDwarf())
+ return true;
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 000000000000..1af4b643eb17
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,849 @@
+//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+
+#include "AddressPool.h"
+#include "DebugLocEntry.h"
+#include "DebugLocStream.h"
+#include "DwarfFile.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AccelTable.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class AsmPrinter;
+class ByteStreamer;
+class DIE;
+class DwarfCompileUnit;
+class DwarfExpression;
+class DwarfTypeUnit;
+class DwarfUnit;
+class LexicalScope;
+class MachineFunction;
+class MCSection;
+class MCSymbol;
+class Module;
+
+//===----------------------------------------------------------------------===//
+/// This class is defined as the common parent of DbgVariable and DbgLabel
+/// such that it could levarage polymorphism to extract common code for
+/// DbgVariable and DbgLabel.
+class DbgEntity {
+public:
+ enum DbgEntityKind {
+ DbgVariableKind,
+ DbgLabelKind
+ };
+
+private:
+ const DINode *Entity;
+ const DILocation *InlinedAt;
+ DIE *TheDIE = nullptr;
+ const DbgEntityKind SubclassID;
+
+public:
+ DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID)
+ : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+ virtual ~DbgEntity() = default;
+
+ /// Accessors.
+ /// @{
+ const DINode *getEntity() const { return Entity; }
+ const DILocation *getInlinedAt() const { return InlinedAt; }
+ DIE *getDIE() const { return TheDIE; }
+ DbgEntityKind getDbgEntityID() const { return SubclassID; }
+ /// @}
+
+ void setDIE(DIE &D) { TheDIE = &D; }
+
+ static bool classof(const DbgEntity *N) {
+ switch (N->getDbgEntityID()) {
+ case DbgVariableKind:
+ case DbgLabelKind:
+ return true;
+ }
+ llvm_unreachable("Invalid DbgEntityKind");
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// This class is used to track local variable information.
+///
+/// Variables can be created from allocas, in which case they're generated from
+/// the MMI table. Such variables can have multiple expressions and frame
+/// indices.
+///
+/// Variables can be created from \c DBG_VALUE instructions. Those whose
+/// location changes over time use \a DebugLocListIndex, while those with a
+/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
+///
+/// Variables that have been optimized out use none of these fields.
+class DbgVariable : public DbgEntity {
+ /// Index of the entry list in DebugLocs.
+ unsigned DebugLocListIndex = ~0u;
+ /// DW_OP_LLVM_tag_offset value from DebugLocs.
+ std::optional<uint8_t> DebugLocListTagOffset;
+
+ /// Single value location description.
+ std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
+
+ struct FrameIndexExpr {
+ int FI;
+ const DIExpression *Expr;
+ };
+ mutable SmallVector<FrameIndexExpr, 1>
+ FrameIndexExprs; /// Frame index + expression.
+
+public:
+ /// Construct a DbgVariable.
+ ///
+ /// Creates a variable without any DW_AT_location. Call \a initializeMMI()
+ /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
+ DbgVariable(const DILocalVariable *V, const DILocation *IA)
+ : DbgEntity(V, IA, DbgVariableKind) {}
+
+ /// Initialize from the MMI table.
+ void initializeMMI(const DIExpression *E, int FI) {
+ assert(FrameIndexExprs.empty() && "Already initialized?");
+ assert(!ValueLoc.get() && "Already initialized?");
+
+ assert((!E || E->isValid()) && "Expected valid expression");
+ assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
+
+ FrameIndexExprs.push_back({FI, E});
+ }
+
+ // Initialize variable's location.
+ void initializeDbgValue(DbgValueLoc Value) {
+ assert(FrameIndexExprs.empty() && "Already initialized?");
+ assert(!ValueLoc && "Already initialized?");
+ assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
+
+ ValueLoc = std::make_unique<DbgValueLoc>(Value);
+ if (auto *E = ValueLoc->getExpression())
+ if (E->getNumElements())
+ FrameIndexExprs.push_back({0, E});
+ }
+
+ /// Initialize from a DBG_VALUE instruction.
+ void initializeDbgValue(const MachineInstr *DbgValue);
+
+ // Accessors.
+ const DILocalVariable *getVariable() const {
+ return cast<DILocalVariable>(getEntity());
+ }
+
+ const DIExpression *getSingleExpression() const {
+ assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
+ return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
+ }
+
+ void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
+ unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+ void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
+ std::optional<uint8_t> getDebugLocListTagOffset() const {
+ return DebugLocListTagOffset;
+ }
+ StringRef getName() const { return getVariable()->getName(); }
+ const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
+ /// Get the FI entries, sorted by fragment offset.
+ ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
+ bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
+ void addMMIEntry(const DbgVariable &V);
+
+ // Translate tag to proper Dwarf tag.
+ dwarf::Tag getTag() const {
+ // FIXME: Why don't we just infer this tag and store it all along?
+ if (getVariable()->isParameter())
+ return dwarf::DW_TAG_formal_parameter;
+
+ return dwarf::DW_TAG_variable;
+ }
+
+ /// Return true if DbgVariable is artificial.
+ bool isArtificial() const {
+ if (getVariable()->isArtificial())
+ return true;
+ if (getType()->isArtificial())
+ return true;
+ return false;
+ }
+
+ bool isObjectPointer() const {
+ if (getVariable()->isObjectPointer())
+ return true;
+ if (getType()->isObjectPointer())
+ return true;
+ return false;
+ }
+
+ bool hasComplexAddress() const {
+ assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
+ assert((FrameIndexExprs.empty() ||
+ (FrameIndexExprs.size() == 1 &&
+ FrameIndexExprs[0].Expr->getNumElements())) &&
+ "Invalid Expr for DBG_VALUE");
+ return !FrameIndexExprs.empty();
+ }
+
+ const DIType *getType() const;
+
+ static bool classof(const DbgEntity *N) {
+ return N->getDbgEntityID() == DbgVariableKind;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// This class is used to track label information.
+///
+/// Labels are collected from \c DBG_LABEL instructions.
+class DbgLabel : public DbgEntity {
+ const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction.
+
+public:
+ /// We need MCSymbol information to generate DW_AT_low_pc.
+ DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr)
+ : DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {}
+
+ /// Accessors.
+ /// @{
+ const DILabel *getLabel() const { return cast<DILabel>(getEntity()); }
+ const MCSymbol *getSymbol() const { return Sym; }
+
+ StringRef getName() const { return getLabel()->getName(); }
+ /// @}
+
+ /// Translate tag to proper Dwarf tag.
+ dwarf::Tag getTag() const {
+ return dwarf::DW_TAG_label;
+ }
+
+ static bool classof(const DbgEntity *N) {
+ return N->getDbgEntityID() == DbgLabelKind;
+ }
+};
+
+/// Used for tracking debug info about call site parameters.
+class DbgCallSiteParam {
+private:
+ unsigned Register; ///< Parameter register at the callee entry point.
+ DbgValueLoc Value; ///< Corresponding location for the parameter value at
+ ///< the call site.
+public:
+ DbgCallSiteParam(unsigned Reg, DbgValueLoc Val)
+ : Register(Reg), Value(Val) {
+ assert(Reg && "Parameter register cannot be undef");
+ }
+
+ unsigned getRegister() const { return Register; }
+ DbgValueLoc getValue() const { return Value; }
+};
+
+/// Collection used for storing debug call site parameters.
+using ParamSet = SmallVector<DbgCallSiteParam, 4>;
+
+/// Helper used to pair up a symbol and its DWARF compile unit.
+struct SymbolCU {
+ SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
+
+ const MCSymbol *Sym;
+ DwarfCompileUnit *CU;
+};
+
+/// The kind of accelerator tables we should emit.
+enum class AccelTableKind {
+ Default, ///< Platform default.
+ None, ///< None.
+ Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
+ Dwarf, ///< DWARF v5 .debug_names.
+};
+
+/// Collects and handles dwarf debug information.
+class DwarfDebug : public DebugHandlerBase {
+ /// All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ /// Maps MDNode with its corresponding DwarfCompileUnit.
+ MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
+
+ /// Maps a CU DIE with its corresponding DwarfCompileUnit.
+ DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
+
+ /// List of all labels used in aranges generation.
+ std::vector<SymbolCU> ArangeLabels;
+
+ /// Size of each symbol emitted (for those symbols that have a specific size).
+ DenseMap<const MCSymbol *, uint64_t> SymSize;
+
+ /// Collection of abstract variables/labels.
+ SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities;
+
+ /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
+ /// can refer to them in spite of insertions into this list.
+ DebugLocStream DebugLocs;
+
+ /// This is a collection of subprogram MDNodes that are processed to
+ /// create DIEs.
+ SmallSetVector<const DISubprogram *, 16> ProcessedSPNodes;
+
+ /// Map function-local imported entities to their parent local scope
+ /// (either DILexicalBlock or DISubprogram) for a processed function
+ /// (including inlined subprograms).
+ using MDNodeSet = SetVector<const MDNode *, SmallVector<const MDNode *, 2>,
+ SmallPtrSet<const MDNode *, 2>>;
+ DenseMap<const DILocalScope *, MDNodeSet> LocalDeclsPerLS;
+
+ /// If nonnull, stores the current machine function we're processing.
+ const MachineFunction *CurFn = nullptr;
+
+ /// If nonnull, stores the CU in which the previous subprogram was contained.
+ const DwarfCompileUnit *PrevCU = nullptr;
+
+ /// As an optimization, there is no need to emit an entry in the directory
+ /// table for the same directory as DW_AT_comp_dir.
+ StringRef CompilationDir;
+
+ /// Holder for the file specific debug information.
+ DwarfFile InfoHolder;
+
+ /// Holders for the various debug information flags that we might need to
+ /// have exposed. See accessor functions below for description.
+
+ /// Map from MDNodes for user-defined types to their type signatures. Also
+ /// used to keep track of which types we have emitted type units for.
+ DenseMap<const MDNode *, uint64_t> TypeSignatures;
+
+ DenseMap<const MCSection *, const MCSymbol *> SectionLabels;
+
+ SmallVector<
+ std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
+ TypeUnitsUnderConstruction;
+
+ /// Whether to use the GNU TLS opcode (instead of the standard opcode).
+ bool UseGNUTLSOpcode;
+
+ /// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format).
+ bool UseDWARF2Bitfields;
+
+ /// Whether to emit all linkage names, or just abstract subprograms.
+ bool UseAllLinkageNames;
+
+ /// Use inlined strings.
+ bool UseInlineStrings = false;
+
+ /// Allow emission of .debug_ranges section.
+ bool UseRangesSection = true;
+
+ /// True if the sections itself must be used as references and don't create
+ /// temp symbols inside DWARF sections.
+ bool UseSectionsAsReferences = false;
+
+ ///Allow emission of the .debug_loc section.
+ bool UseLocSection = true;
+
+ /// Generate DWARF v4 type units.
+ bool GenerateTypeUnits;
+
+ /// Emit a .debug_macro section instead of .debug_macinfo.
+ bool UseDebugMacroSection;
+
+ /// Avoid using DW_OP_convert due to consumer incompatibilities.
+ bool EnableOpConvert;
+
+public:
+ enum class MinimizeAddrInV5 {
+ Default,
+ Disabled,
+ Ranges,
+ Expressions,
+ Form,
+ };
+
+private:
+ /// Force the use of DW_AT_ranges even for single-entry range lists.
+ MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled;
+
+ /// DWARF5 Experimental Options
+ /// @{
+ AccelTableKind TheAccelTableKind;
+ bool HasAppleExtensionAttributes;
+ bool HasSplitDwarf;
+
+ /// Whether to generate the DWARF v5 string offsets table.
+ /// It consists of a series of contributions, each preceded by a header.
+ /// The pre-DWARF v5 string offsets table for split dwarf is, in contrast,
+ /// a monolithic sequence of string offsets.
+ bool UseSegmentedStringOffsetsTable;
+
+ /// Enable production of call site parameters needed to print the debug entry
+ /// values. Useful for testing purposes when a debugger does not support the
+ /// feature yet.
+ bool EmitDebugEntryValues;
+
+ /// Separated Dwarf Variables
+ /// In general these will all be for bits that are left in the
+ /// original object file, rather than things that are meant
+ /// to be in the .dwo sections.
+
+ /// Holder for the skeleton information.
+ DwarfFile SkeletonHolder;
+
+ /// Store file names for type units under fission in a line table
+ /// header that will be emitted into debug_line.dwo.
+ // FIXME: replace this with a map from comp_dir to table so that we
+ // can emit multiple tables during LTO each of which uses directory
+ // 0, referencing the comp_dir of all the type units that use it.
+ MCDwarfDwoLineTable SplitTypeUnitFileTable;
+ /// @}
+
+ /// True iff there are multiple CUs in this module.
+ bool SingleCU;
+ bool IsDarwin;
+
+ /// Map for tracking Fortran deferred CHARACTER lengths.
+ DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
+
+ AddressPool AddrPool;
+
+ /// Accelerator tables.
+ AccelTable<DWARF5AccelTableData> AccelDebugNames;
+ AccelTable<AppleAccelTableOffsetData> AccelNames;
+ AccelTable<AppleAccelTableOffsetData> AccelObjC;
+ AccelTable<AppleAccelTableOffsetData> AccelNamespace;
+ AccelTable<AppleAccelTableTypeData> AccelTypes;
+
+ /// Identify a debugger for "tuning" the debug info.
+ ///
+ /// The "tuning" should be used to set defaults for individual feature flags
+ /// in DwarfDebug; if a given feature has a more specific command-line option,
+ /// that option should take precedence over the tuning.
+ DebuggerKind DebuggerTuning = DebuggerKind::Default;
+
+ MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
+
+ const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
+ return InfoHolder.getUnits();
+ }
+
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+
+ void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *Scope);
+
+ DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
+ const DINode *Node,
+ const DILocation *Location,
+ const MCSymbol *Sym = nullptr);
+
+ /// Construct a DIE for this abstract scope.
+ void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+
+ /// Construct DIEs for call site entries describing the calls in \p MF.
+ void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
+ DIE &ScopeDIE, const MachineFunction &MF);
+
+ template <typename DataT>
+ void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
+ StringRef Name, const DIE &Die);
+
+ void finishEntityDefinitions();
+
+ void finishSubprogramDefinitions();
+
+ /// Finish off debug information after all functions have been
+ /// processed.
+ void finalizeModuleInfo();
+
+ /// Emit the debug info section.
+ void emitDebugInfo();
+
+ /// Emit the abbreviation section.
+ void emitAbbreviations();
+
+ /// Emit the string offsets table header.
+ void emitStringOffsetsTableHeader();
+
+ /// Emit a specified accelerator table.
+ template <typename AccelTableT>
+ void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName);
+
+ /// Emit DWARF v5 accelerator table.
+ void emitAccelDebugNames();
+
+ /// Emit visible names into a hashed accelerator table section.
+ void emitAccelNames();
+
+ /// Emit objective C classes and categories into a hashed
+ /// accelerator table section.
+ void emitAccelObjC();
+
+ /// Emit namespace dies into a hashed accelerator table.
+ void emitAccelNamespaces();
+
+ /// Emit type dies into a hashed accelerator table.
+ void emitAccelTypes();
+
+ /// Emit visible names and types into debug pubnames and pubtypes sections.
+ void emitDebugPubSections();
+
+ void emitDebugPubSection(bool GnuStyle, StringRef Name,
+ DwarfCompileUnit *TheU,
+ const StringMap<const DIE *> &Globals);
+
+ /// Emit null-terminated strings into a debug str section.
+ void emitDebugStr();
+
+ /// Emit variable locations into a debug loc section.
+ void emitDebugLoc();
+
+ /// Emit variable locations into a debug loc dwo section.
+ void emitDebugLocDWO();
+
+ void emitDebugLocImpl(MCSection *Sec);
+
+ /// Emit address ranges into a debug aranges section.
+ void emitDebugARanges();
+
+ /// Emit address ranges into a debug ranges section.
+ void emitDebugRanges();
+ void emitDebugRangesDWO();
+ void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section);
+
+ /// Emit macros into a debug macinfo section.
+ void emitDebugMacinfo();
+ /// Emit macros into a debug macinfo.dwo section.
+ void emitDebugMacinfoDWO();
+ void emitDebugMacinfoImpl(MCSection *Section);
+ void emitMacro(DIMacro &M);
+ void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
+ void emitMacroFileImpl(DIMacroFile &F, DwarfCompileUnit &U,
+ unsigned StartFile, unsigned EndFile,
+ StringRef (*MacroFormToString)(unsigned Form));
+ void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
+
+ /// DWARF 5 Experimental Split Dwarf Emitters
+
+ /// Initialize common features of skeleton units.
+ void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
+ std::unique_ptr<DwarfCompileUnit> NewU);
+
+ /// Construct the split debug info compile unit for the debug info section.
+ /// In DWARF v5, the skeleton unit DIE may have the following attributes:
+ /// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc,
+ /// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base.
+ /// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name
+ /// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of
+ /// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base.
+ DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
+
+ /// Emit the debug info dwo section.
+ void emitDebugInfoDWO();
+
+ /// Emit the debug abbrev dwo section.
+ void emitDebugAbbrevDWO();
+
+ /// Emit the debug line dwo section.
+ void emitDebugLineDWO();
+
+ /// Emit the dwo stringoffsets table header.
+ void emitStringOffsetsTableHeaderDWO();
+
+ /// Emit the debug str dwo section.
+ void emitDebugStrDWO();
+
+ /// Emit DWO addresses.
+ void emitDebugAddr();
+
+ /// Flags to let the linker know we have emitted new style pubnames. Only
+ /// emit it here if we don't have a skeleton CU for split dwarf.
+ void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;
+
+ /// Create new DwarfCompileUnit for the given metadata node with tag
+ /// DW_TAG_compile_unit.
+ DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
+ void finishUnitAttributes(const DICompileUnit *DIUnit,
+ DwarfCompileUnit &NewCU);
+
+ /// Register a source line with debug info. Returns the unique
+ /// label that was emitted and which provides correspondence to the
+ /// source line list.
+ void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+ unsigned Flags);
+
+ /// Populate LexicalScope entries with variables' info.
+ void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+ DenseSet<InlinedEntity> &ProcessedVars);
+
+ /// Build the location list for all DBG_VALUEs in the
+ /// function that describe the same variable. If the resulting
+ /// list has only one entry that is valid for entire variable's
+ /// scope return true.
+ bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries);
+
+ /// Collect variable information from the side table maintained by MF.
+ void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
+ DenseSet<InlinedEntity> &P);
+
+ /// Emit the reference to the section.
+ void emitSectionReference(const DwarfCompileUnit &CU);
+
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function debug information.
+ void endFunctionImpl(const MachineFunction *MF) override;
+
+ /// Get Dwarf compile unit ID for line table.
+ unsigned getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU);
+
+ void skippedNonDebugFunction() override;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(AsmPrinter *A);
+
+ ~DwarfDebug() override;
+
+ /// Emit all Dwarf sections that should come prior to the
+ /// content.
+ void beginModule(Module *M) override;
+
+ /// Emit all Dwarf sections that should come after the content.
+ void endModule() override;
+
+ /// Emits inital debug location directive.
+ DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID);
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+
+ /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
+ static uint64_t makeTypeSignature(StringRef Identifier);
+
+ /// Add a DIE to the set of types that we're going to pull into
+ /// type units.
+ void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
+ DIE &Die, const DICompositeType *CTy);
+
+ /// Add a label so that arange data can be generated for it.
+ void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
+
+ /// For symbols that have a size designated (e.g. common symbols),
+ /// this tracks that size.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
+ SymSize[Sym] = Size;
+ }
+
+ /// Returns whether we should emit all DW_AT_[MIPS_]linkage_name.
+ /// If not, we still might emit certain cases.
+ bool useAllLinkageNames() const { return UseAllLinkageNames; }
+
+ /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+ /// standard DW_OP_form_tls_address opcode
+ bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
+
+ /// Returns whether to use the DWARF2 format for bitfields instyead of the
+ /// DWARF4 format.
+ bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; }
+
+ /// Returns whether to use inline strings.
+ bool useInlineStrings() const { return UseInlineStrings; }
+
+ /// Returns whether ranges section should be emitted.
+ bool useRangesSection() const { return UseRangesSection; }
+
+ /// Returns whether range encodings should be used for single entry range
+ /// lists.
+ bool alwaysUseRanges(const DwarfCompileUnit &) const;
+
+ // Returns whether novel exprloc addrx+offset encodings should be used to
+ // reduce debug_addr size.
+ bool useAddrOffsetExpressions() const {
+ return MinimizeAddr == MinimizeAddrInV5::Expressions;
+ }
+
+ // Returns whether addrx+offset LLVM extension form should be used to reduce
+ // debug_addr size.
+ bool useAddrOffsetForm() const {
+ return MinimizeAddr == MinimizeAddrInV5::Form;
+ }
+
+ /// Returns whether to use sections as labels rather than temp symbols.
+ bool useSectionsAsReferences() const {
+ return UseSectionsAsReferences;
+ }
+
+ /// Returns whether .debug_loc section should be emitted.
+ bool useLocSection() const { return UseLocSection; }
+
+ /// Returns whether to generate DWARF v4 type units.
+ bool generateTypeUnits() const { return GenerateTypeUnits; }
+
+ // Experimental DWARF5 features.
+
+ /// Returns what kind (if any) of accelerator tables to emit.
+ AccelTableKind getAccelTableKind() const { return TheAccelTableKind; }
+
+ bool useAppleExtensionAttributes() const {
+ return HasAppleExtensionAttributes;
+ }
+
+ /// Returns whether or not to change the current debug info for the
+ /// split dwarf proposal support.
+ bool useSplitDwarf() const { return HasSplitDwarf; }
+
+ /// Returns whether to generate a string offsets table with (possibly shared)
+ /// contributions from each CU and type unit. This implies the use of
+ /// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that
+ /// DW_FORM_GNU_str_index is also an indirect reference, but it is used with
+ /// a pre-DWARF v5 implementation of split DWARF sections, which uses a
+ /// monolithic string offsets table.
+ bool useSegmentedStringOffsetsTable() const {
+ return UseSegmentedStringOffsetsTable;
+ }
+
+ bool emitDebugEntryValues() const {
+ return EmitDebugEntryValues;
+ }
+
+ bool useOpConvert() const {
+ return EnableOpConvert;
+ }
+
+ bool shareAcrossDWOCUs() const;
+
+ /// Returns the Dwarf Version.
+ uint16_t getDwarfVersion() const;
+
+ /// Returns a suitable DWARF form to represent a section offset, i.e.
+ /// * DW_FORM_sec_offset for DWARF version >= 4;
+ /// * DW_FORM_data8 for 64-bit DWARFv3;
+ /// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2.
+ dwarf::Form getDwarfSectionOffsetForm() const;
+
+ /// Returns the previous CU that was being updated
+ const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
+ void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
+
+ /// Terminate the line table by adding the last range label.
+ void terminateLineTable(const DwarfCompileUnit *CU);
+
+ /// Returns the entries for the .debug_loc section.
+ const DebugLocStream &getDebugLocs() const { return DebugLocs; }
+
+ /// Emit an entry for the debug loc section. This can be used to
+ /// handle an entry that's going to be emitted into the debug loc section.
+ void emitDebugLocEntry(ByteStreamer &Streamer,
+ const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU);
+
+ /// Emit the location for a debug loc entry, including the size header.
+ void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU);
+
+ void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
+ DIE &Die);
+
+ AddressPool &getAddressPool() { return AddrPool; }
+
+ void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);
+
+ void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);
+
+ void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die);
+
+ void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
+ char Flags);
+
+ const MachineFunction *getCurrentFunction() const { return CurFn; }
+
+ /// A helper function to check whether the DIE for a given Scope is
+ /// going to be null.
+ bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+ /// Find the matching DwarfCompileUnit for the given CU DIE.
+ DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); }
+ const DwarfCompileUnit *lookupCU(const DIE *Die) const {
+ return CUDieMap.lookup(Die);
+ }
+
+ unsigned getStringTypeLoc(const DIStringType *ST) const {
+ return StringTypeLocMap.lookup(ST);
+ }
+
+ void addStringTypeLoc(const DIStringType *ST, unsigned Loc) {
+ assert(ST);
+ if (Loc)
+ StringTypeLocMap[ST] = Loc;
+ }
+
+ /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+ ///
+ /// Returns whether we are "tuning" for a given debugger.
+ /// @{
+ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+ bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+ bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ bool tuneForDBX() const { return DebuggerTuning == DebuggerKind::DBX; }
+ /// @}
+
+ const MCSymbol *getSectionLabel(const MCSection *S);
+ void insertSectionLabel(const MCSymbol *S);
+
+ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ const DbgValueLoc &Value,
+ DwarfExpression &DwarfExpr);
+
+ /// If the \p File has an MD5 checksum, return it as an MD5Result
+ /// allocated in the MCContext.
+ std::optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+
+ MDNodeSet &getLocalDeclsForScope(const DILocalScope *S) {
+ return LocalDeclsPerLS[S];
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 000000000000..c2c11c7bc14d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,110 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "EHStreamer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCDwarf.h"
+
+namespace llvm {
+class MachineFunction;
+class ARMTargetStreamer;
+
+class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public EHStreamer {
+ /// Per-function flag to indicate if .cfi_personality should be emitted.
+ bool shouldEmitPersonality = false;
+
+ /// Per-function flag to indicate if .cfi_personality must be emitted.
+ bool forceEmitPersonality = false;
+
+ /// Per-function flag to indicate if .cfi_lsda should be emitted.
+ bool shouldEmitLSDA = false;
+
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI = false;
+
+ /// Per-module flag to indicate if .cfi_section has beeen emitted.
+ bool hasEmittedCFISections = false;
+
+ /// Vector of all personality functions seen so far in the module.
+ std::vector<const GlobalValue *> Personalities;
+
+ void addPersonality(const GlobalValue *Personality);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ ~DwarfCFIException() override;
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ void beginBasicBlockSection(const MachineBasicBlock &MBB) override;
+ void endBasicBlockSection(const MachineBasicBlock &MBB) override;
+};
+
+class LLVM_LIBRARY_VISIBILITY ARMException : public EHStreamer {
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI = false;
+
+ /// Per-module flag to indicate if .cfi_section has beeen emitted.
+ bool hasEmittedCFISections = false;
+
+ void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override;
+ ARMTargetStreamer &getTargetStreamer();
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ ARMException(AsmPrinter *A);
+ ~ARMException() override;
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override {}
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ void markFunctionEnd() override;
+};
+
+class LLVM_LIBRARY_VISIBILITY AIXException : public EHStreamer {
+ /// This is AIX's compat unwind section, which unwinder would use
+ /// to find the location of LSDA area and personality rountine.
+ void emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym);
+
+public:
+ AIXException(AsmPrinter *A);
+
+ void endModule() override {}
+ void beginFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
new file mode 100644
index 000000000000..7623b7fb7c5d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -0,0 +1,740 @@
+//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfExpression.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+void DwarfExpression::emitConstu(uint64_t Value) {
+ if (Value < 32)
+ emitOp(dwarf::DW_OP_lit0 + Value);
+ else if (Value == std::numeric_limits<uint64_t>::max()) {
+ // Only do this for 64-bit values as the DWARF expression stack uses
+ // target-address-size values.
+ emitOp(dwarf::DW_OP_lit0);
+ emitOp(dwarf::DW_OP_not);
+ } else {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Value);
+ }
+}
+
+void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert((isUnknownLocation() || isRegisterLocation()) &&
+ "location description already locked down");
+ LocationKind = Register;
+ if (DwarfReg < 32) {
+ emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ } else {
+ emitOp(dwarf::DW_OP_regx, Comment);
+ emitUnsigned(DwarfReg);
+ }
+}
+
+void DwarfExpression::addBReg(int DwarfReg, int Offset) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert(!isRegisterLocation() && "location description already locked down");
+ if (DwarfReg < 32) {
+ emitOp(dwarf::DW_OP_breg0 + DwarfReg);
+ } else {
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(DwarfReg);
+ }
+ emitSigned(Offset);
+}
+
+void DwarfExpression::addFBReg(int Offset) {
+ emitOp(dwarf::DW_OP_fbreg);
+ emitSigned(Offset);
+}
+
+void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ if (!SizeInBits)
+ return;
+
+ const unsigned SizeOfByte = 8;
+ if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
+ emitOp(dwarf::DW_OP_bit_piece);
+ emitUnsigned(SizeInBits);
+ emitUnsigned(OffsetInBits);
+ } else {
+ emitOp(dwarf::DW_OP_piece);
+ unsigned ByteSize = SizeInBits / SizeOfByte;
+ emitUnsigned(ByteSize);
+ }
+ this->OffsetInBits += SizeInBits;
+}
+
+void DwarfExpression::addShr(unsigned ShiftBy) {
+ emitConstu(ShiftBy);
+ emitOp(dwarf::DW_OP_shr);
+}
+
+void DwarfExpression::addAnd(unsigned Mask) {
+ emitConstu(Mask);
+ emitOp(dwarf::DW_OP_and);
+}
+
+bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
+ llvm::Register MachineReg,
+ unsigned MaxSize) {
+ if (!MachineReg.isPhysical()) {
+ if (isFrameRegister(TRI, MachineReg)) {
+ DwarfRegs.push_back(Register::createRegister(-1, nullptr));
+ return true;
+ }
+ return false;
+ }
+
+ int Reg = TRI.getDwarfRegNum(MachineReg, false);
+
+ // If this is a valid register number, emit it.
+ if (Reg >= 0) {
+ DwarfRegs.push_back(Register::createRegister(Reg, nullptr));
+ return true;
+ }
+
+ // Walk up the super-register chain until we find a valid number.
+ // For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0.
+ for (MCPhysReg SR : TRI.superregs(MachineReg)) {
+ Reg = TRI.getDwarfRegNum(SR, false);
+ if (Reg >= 0) {
+ unsigned Idx = TRI.getSubRegIndex(SR, MachineReg);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
+ DwarfRegs.push_back(Register::createRegister(Reg, "super-register"));
+ // Use a DW_OP_bit_piece to describe the sub-register.
+ setSubRegisterPiece(Size, RegOffset);
+ return true;
+ }
+ }
+
+ // Otherwise, attempt to find a covering set of sub-register numbers.
+ // For example, Q0 on ARM is a composition of D0+D1.
+ unsigned CurPos = 0;
+ // The size of the register in bits.
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg);
+ unsigned RegSize = TRI.getRegSizeInBits(*RC);
+ // Keep track of the bits in the register we already emitted, so we
+ // can avoid emitting redundant aliasing subregs. Because this is
+ // just doing a greedy scan of all subregisters, it is possible that
+ // this doesn't find a combination of subregisters that fully cover
+ // the register (even though one may exist).
+ SmallBitVector Coverage(RegSize, false);
+ for (MCPhysReg SR : TRI.subregs(MachineReg)) {
+ unsigned Idx = TRI.getSubRegIndex(MachineReg, SR);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned Offset = TRI.getSubRegIdxOffset(Idx);
+ Reg = TRI.getDwarfRegNum(SR, false);
+ if (Reg < 0)
+ continue;
+
+ // Used to build the intersection between the bits we already
+ // emitted and the bits covered by this subregister.
+ SmallBitVector CurSubReg(RegSize, false);
+ CurSubReg.set(Offset, Offset + Size);
+
+ // If this sub-register has a DWARF number and we haven't covered
+ // its range, and its range covers the value, emit a DWARF piece for it.
+ if (Offset < MaxSize && CurSubReg.test(Coverage)) {
+ // Emit a piece for any gap in the coverage.
+ if (Offset > CurPos)
+ DwarfRegs.push_back(Register::createSubRegister(
+ -1, Offset - CurPos, "no DWARF register encoding"));
+ if (Offset == 0 && Size >= MaxSize)
+ DwarfRegs.push_back(Register::createRegister(Reg, "sub-register"));
+ else
+ DwarfRegs.push_back(Register::createSubRegister(
+ Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"));
+ }
+ // Mark it as emitted.
+ Coverage.set(Offset, Offset + Size);
+ CurPos = Offset + Size;
+ }
+ // Failed to find any DWARF encoding.
+ if (CurPos == 0)
+ return false;
+ // Found a partial or complete DWARF encoding.
+ if (CurPos < RegSize)
+ DwarfRegs.push_back(Register::createSubRegister(
+ -1, RegSize - CurPos, "no DWARF register encoding"));
+ return true;
+}
+
+void DwarfExpression::addStackValue() {
+ if (DwarfVersion >= 4)
+ emitOp(dwarf::DW_OP_stack_value);
+}
+
+void DwarfExpression::addSignedConstant(int64_t Value) {
+ assert(isImplicitLocation() || isUnknownLocation());
+ LocationKind = Implicit;
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Value);
+}
+
+void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+ assert(isImplicitLocation() || isUnknownLocation());
+ LocationKind = Implicit;
+ emitConstu(Value);
+}
+
+void DwarfExpression::addUnsignedConstant(const APInt &Value) {
+ assert(isImplicitLocation() || isUnknownLocation());
+ LocationKind = Implicit;
+
+ unsigned Size = Value.getBitWidth();
+ const uint64_t *Data = Value.getRawData();
+
+ // Chop it up into 64-bit pieces, because that's the maximum that
+ // addUnsignedConstant takes.
+ unsigned Offset = 0;
+ while (Offset < Size) {
+ addUnsignedConstant(*Data++);
+ if (Offset == 0 && Size <= 64)
+ break;
+ addStackValue();
+ addOpPiece(std::min(Size - Offset, 64u), Offset);
+ Offset += 64;
+ }
+}
+
+void DwarfExpression::addConstantFP(const APFloat &APF, const AsmPrinter &AP) {
+ assert(isImplicitLocation() || isUnknownLocation());
+ APInt API = APF.bitcastToAPInt();
+ int NumBytes = API.getBitWidth() / 8;
+ if (NumBytes == 4 /*float*/ || NumBytes == 8 /*double*/) {
+ // FIXME: Add support for `long double`.
+ emitOp(dwarf::DW_OP_implicit_value);
+ emitUnsigned(NumBytes /*Size of the block in bytes*/);
+
+ // The loop below is emitting the value starting at least significant byte,
+ // so we need to perform a byte-swap to get the byte order correct in case
+ // of a big-endian target.
+ if (AP.getDataLayout().isBigEndian())
+ API = API.byteSwap();
+
+ for (int i = 0; i < NumBytes; ++i) {
+ emitData1(API.getZExtValue() & 0xFF);
+ API = API.lshr(8);
+ }
+
+ return;
+ }
+ LLVM_DEBUG(
+ dbgs() << "Skipped DW_OP_implicit_value creation for ConstantFP of size: "
+ << API.getBitWidth() << " bits\n");
+}
+
+bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
+ DIExpressionCursor &ExprCursor,
+ llvm::Register MachineReg,
+ unsigned FragmentOffsetInBits) {
+ auto Fragment = ExprCursor.getFragmentInfo();
+ if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) {
+ LocationKind = Unknown;
+ return false;
+ }
+
+ bool HasComplexExpression = false;
+ auto Op = ExprCursor.peek();
+ if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
+ HasComplexExpression = true;
+
+ // If the register can only be described by a complex expression (i.e.,
+ // multiple subregisters) it doesn't safely compose with another complex
+ // expression. For example, it is not possible to apply a DW_OP_deref
+ // operation to multiple DW_OP_pieces, since composite location descriptions
+ // do not push anything on the DWARF stack.
+ //
+ // DW_OP_entry_value operations can only hold a DWARF expression or a
+ // register location description, so we can't emit a single entry value
+ // covering a composite location description. In the future we may want to
+ // emit entry value operations for each register location in the composite
+ // location, but until that is supported do not emit anything.
+ if ((HasComplexExpression || IsEmittingEntryValue) && DwarfRegs.size() > 1) {
+ if (IsEmittingEntryValue)
+ cancelEntryValue();
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
+ }
+
+ // Handle simple register locations. If we are supposed to emit
+ // a call site parameter expression and if that expression is just a register
+ // location, emit it with addBReg and offset 0, because we should emit a DWARF
+ // expression representing a value, rather than a location.
+ if ((!isParameterValue() && !isMemoryLocation() && !HasComplexExpression) ||
+ isEntryValue()) {
+ auto FragmentInfo = ExprCursor.getFragmentInfo();
+ unsigned RegSize = 0;
+ for (auto &Reg : DwarfRegs) {
+ RegSize += Reg.SubRegSize;
+ if (Reg.DwarfRegNo >= 0)
+ addReg(Reg.DwarfRegNo, Reg.Comment);
+ if (FragmentInfo)
+ if (RegSize > FragmentInfo->SizeInBits)
+ // If the register is larger than the current fragment stop
+ // once the fragment is covered.
+ break;
+ addOpPiece(Reg.SubRegSize);
+ }
+
+ if (isEntryValue()) {
+ finalizeEntryValue();
+
+ if (!isIndirect() && !isParameterValue() && !HasComplexExpression &&
+ DwarfVersion >= 4)
+ emitOp(dwarf::DW_OP_stack_value);
+ }
+
+ DwarfRegs.clear();
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ auto NextOp = ExprCursor.peek();
+ if (SubRegisterSizeInBits && NextOp &&
+ (NextOp->getOp() != dwarf::DW_OP_LLVM_fragment))
+ maskSubRegister();
+ return true;
+ }
+
+ // Don't emit locations that cannot be expressed without DW_OP_stack_value.
+ if (DwarfVersion < 4)
+ if (any_of(ExprCursor, [](DIExpression::ExprOperand Op) -> bool {
+ return Op.getOp() == dwarf::DW_OP_stack_value;
+ })) {
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
+ }
+
+ // TODO: We should not give up here but the following code needs to be changed
+ // to deal with multiple (sub)registers first.
+ if (DwarfRegs.size() > 1) {
+ LLVM_DEBUG(dbgs() << "TODO: giving up on debug information due to "
+ "multi-register usage.\n");
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
+ }
+
+ auto Reg = DwarfRegs[0];
+ bool FBReg = isFrameRegister(TRI, MachineReg);
+ int SignedOffset = 0;
+ assert(!Reg.isSubRegister() && "full register expected");
+
+ // Pattern-match combinations for which more efficient representations exist.
+ // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset].
+ if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) {
+ uint64_t Offset = Op->getArg(0);
+ uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max());
+ if (Offset <= IntMax) {
+ SignedOffset = Offset;
+ ExprCursor.take();
+ }
+ }
+
+ // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]
+ // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset]
+ // If Reg is a subregister we need to mask it out before subtracting.
+ if (Op && Op->getOp() == dwarf::DW_OP_constu) {
+ uint64_t Offset = Op->getArg(0);
+ uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max());
+ auto N = ExprCursor.peekNext();
+ if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) {
+ SignedOffset = Offset;
+ ExprCursor.consume(2);
+ } else if (N && N->getOp() == dwarf::DW_OP_minus &&
+ !SubRegisterSizeInBits && Offset <= IntMax + 1) {
+ SignedOffset = -static_cast<int64_t>(Offset);
+ ExprCursor.consume(2);
+ }
+ }
+
+ if (FBReg)
+ addFBReg(SignedOffset);
+ else
+ addBReg(Reg.DwarfRegNo, SignedOffset);
+ DwarfRegs.clear();
+
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ auto NextOp = ExprCursor.peek();
+ if (SubRegisterSizeInBits && NextOp &&
+ (NextOp->getOp() != dwarf::DW_OP_LLVM_fragment))
+ maskSubRegister();
+
+ return true;
+}
+
+void DwarfExpression::setEntryValueFlags(const MachineLocation &Loc) {
+ LocationFlags |= EntryValue;
+ if (Loc.isIndirect())
+ LocationFlags |= Indirect;
+}
+
+void DwarfExpression::setLocation(const MachineLocation &Loc,
+ const DIExpression *DIExpr) {
+ if (Loc.isIndirect())
+ setMemoryLocationKind();
+
+ if (DIExpr->isEntryValue())
+ setEntryValueFlags(Loc);
+}
+
+void DwarfExpression::beginEntryValueExpression(
+ DIExpressionCursor &ExprCursor) {
+ auto Op = ExprCursor.take();
+ (void)Op;
+ assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value);
+ assert(!IsEmittingEntryValue && "Already emitting entry value?");
+ assert(Op->getArg(0) == 1 &&
+ "Can currently only emit entry values covering a single operation");
+
+ SavedLocationKind = LocationKind;
+ LocationKind = Register;
+ IsEmittingEntryValue = true;
+ enableTemporaryBuffer();
+}
+
+void DwarfExpression::finalizeEntryValue() {
+ assert(IsEmittingEntryValue && "Entry value not open?");
+ disableTemporaryBuffer();
+
+ emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value));
+
+ // Emit the entry value's size operand.
+ unsigned Size = getTemporaryBufferSize();
+ emitUnsigned(Size);
+
+ // Emit the entry value's DWARF block operand.
+ commitTemporaryBuffer();
+
+ LocationFlags &= ~EntryValue;
+ LocationKind = SavedLocationKind;
+ IsEmittingEntryValue = false;
+}
+
+void DwarfExpression::cancelEntryValue() {
+ assert(IsEmittingEntryValue && "Entry value not open?");
+ disableTemporaryBuffer();
+
+ // The temporary buffer can't be emptied, so for now just assert that nothing
+ // has been emitted to it.
+ assert(getTemporaryBufferSize() == 0 &&
+ "Began emitting entry value block before cancelling entry value");
+
+ LocationKind = SavedLocationKind;
+ IsEmittingEntryValue = false;
+}
+
+unsigned DwarfExpression::getOrCreateBaseType(unsigned BitSize,
+ dwarf::TypeKind Encoding) {
+ // Reuse the base_type if we already have one in this CU otherwise we
+ // create a new one.
+ unsigned I = 0, E = CU.ExprRefedBaseTypes.size();
+ for (; I != E; ++I)
+ if (CU.ExprRefedBaseTypes[I].BitSize == BitSize &&
+ CU.ExprRefedBaseTypes[I].Encoding == Encoding)
+ break;
+
+ if (I == E)
+ CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding);
+ return I;
+}
+
+/// Assuming a well-formed expression, match "DW_OP_deref*
+/// DW_OP_LLVM_fragment?".
+static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
+ while (ExprCursor) {
+ auto Op = ExprCursor.take();
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_deref:
+ case dwarf::DW_OP_LLVM_fragment:
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor) {
+ addExpression(std::move(ExprCursor),
+ [](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
+ llvm_unreachable("unhandled opcode found in expression");
+ });
+}
+
+bool DwarfExpression::addExpression(
+ DIExpressionCursor &&ExprCursor,
+ llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg) {
+ // Entry values can currently only cover the initial register location,
+ // and not any other parts of the following DWARF expression.
+ assert(!IsEmittingEntryValue && "Can't emit entry value around expression");
+
+ std::optional<DIExpression::ExprOperand> PrevConvertOp;
+
+ while (ExprCursor) {
+ auto Op = ExprCursor.take();
+ uint64_t OpNum = Op->getOp();
+
+ if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) {
+ emitOp(OpNum);
+ continue;
+ } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) {
+ addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0));
+ continue;
+ }
+
+ switch (OpNum) {
+ case dwarf::DW_OP_LLVM_arg:
+ if (!InsertArg(Op->getArg(0), ExprCursor)) {
+ LocationKind = Unknown;
+ return false;
+ }
+ break;
+ case dwarf::DW_OP_LLVM_fragment: {
+ unsigned SizeInBits = Op->getArg(1);
+ unsigned FragmentOffset = Op->getArg(0);
+ // The fragment offset must have already been adjusted by emitting an
+ // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
+ // location.
+ assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+ assert(SizeInBits >= OffsetInBits - FragmentOffset && "size underflow");
+
+ // If addMachineReg already emitted DW_OP_piece operations to represent
+ // a super-register by splicing together sub-registers, subtract the size
+ // of the pieces that was already emitted.
+ SizeInBits -= OffsetInBits - FragmentOffset;
+
+ // If addMachineReg requested a DW_OP_bit_piece to stencil out a
+ // sub-register that is smaller than the current fragment's size, use it.
+ if (SubRegisterSizeInBits)
+ SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
+
+ // Emit a DW_OP_stack_value for implicit location descriptions.
+ if (isImplicitLocation())
+ addStackValue();
+
+ // Emit the DW_OP_piece.
+ addOpPiece(SizeInBits, SubRegisterOffsetInBits);
+ setSubRegisterPiece(0, 0);
+ // Reset the location description kind.
+ LocationKind = Unknown;
+ return true;
+ }
+ case dwarf::DW_OP_plus_uconst:
+ assert(!isRegisterLocation());
+ emitOp(dwarf::DW_OP_plus_uconst);
+ emitUnsigned(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_minus:
+ case dwarf::DW_OP_mul:
+ case dwarf::DW_OP_div:
+ case dwarf::DW_OP_mod:
+ case dwarf::DW_OP_or:
+ case dwarf::DW_OP_and:
+ case dwarf::DW_OP_xor:
+ case dwarf::DW_OP_shl:
+ case dwarf::DW_OP_shr:
+ case dwarf::DW_OP_shra:
+ case dwarf::DW_OP_lit0:
+ case dwarf::DW_OP_not:
+ case dwarf::DW_OP_dup:
+ case dwarf::DW_OP_push_object_address:
+ case dwarf::DW_OP_over:
+ case dwarf::DW_OP_eq:
+ case dwarf::DW_OP_ne:
+ case dwarf::DW_OP_gt:
+ case dwarf::DW_OP_ge:
+ case dwarf::DW_OP_lt:
+ case dwarf::DW_OP_le:
+ emitOp(OpNum);
+ break;
+ case dwarf::DW_OP_deref:
+ assert(!isRegisterLocation());
+ if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
+ // Turning this into a memory location description makes the deref
+ // implicit.
+ LocationKind = Memory;
+ else
+ emitOp(dwarf::DW_OP_deref);
+ break;
+ case dwarf::DW_OP_constu:
+ assert(!isRegisterLocation());
+ emitConstu(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_consts:
+ assert(!isRegisterLocation());
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_LLVM_convert: {
+ unsigned BitSize = Op->getArg(0);
+ dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
+ if (DwarfVersion >= 5 && CU.getDwarfDebug().useOpConvert()) {
+ emitOp(dwarf::DW_OP_convert);
+ // If targeting a location-list; simply emit the index into the raw
+ // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
+ // fitted with means to extract it later.
+ // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef
+ // (containing the index and a resolve mechanism during emit) into the
+ // DIE value list.
+ emitBaseTypeRef(getOrCreateBaseType(BitSize, Encoding));
+ } else {
+ if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) {
+ if (Encoding == dwarf::DW_ATE_signed)
+ emitLegacySExt(PrevConvertOp->getArg(0));
+ else if (Encoding == dwarf::DW_ATE_unsigned)
+ emitLegacyZExt(PrevConvertOp->getArg(0));
+ PrevConvertOp = std::nullopt;
+ } else {
+ PrevConvertOp = Op;
+ }
+ }
+ break;
+ }
+ case dwarf::DW_OP_stack_value:
+ LocationKind = Implicit;
+ break;
+ case dwarf::DW_OP_swap:
+ assert(!isRegisterLocation());
+ emitOp(dwarf::DW_OP_swap);
+ break;
+ case dwarf::DW_OP_xderef:
+ assert(!isRegisterLocation());
+ emitOp(dwarf::DW_OP_xderef);
+ break;
+ case dwarf::DW_OP_deref_size:
+ emitOp(dwarf::DW_OP_deref_size);
+ emitData1(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_LLVM_tag_offset:
+ TagOffset = Op->getArg(0);
+ break;
+ case dwarf::DW_OP_regx:
+ emitOp(dwarf::DW_OP_regx);
+ emitUnsigned(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_bregx:
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(Op->getArg(0));
+ emitSigned(Op->getArg(1));
+ break;
+ default:
+ llvm_unreachable("unhandled opcode found in expression");
+ }
+ }
+
+ if (isImplicitLocation() && !isParameterValue())
+ // Turn this into an implicit location description.
+ addStackValue();
+
+ return true;
+}
+
+/// add masking operations to stencil out a subregister.
+void DwarfExpression::maskSubRegister() {
+ assert(SubRegisterSizeInBits && "no subregister was registered");
+ if (SubRegisterOffsetInBits > 0)
+ addShr(SubRegisterOffsetInBits);
+ uint64_t Mask = (1ULL << (uint64_t)SubRegisterSizeInBits) - 1ULL;
+ addAnd(Mask);
+}
+
+void DwarfExpression::finalize() {
+ assert(DwarfRegs.size() == 0 && "dwarf registers not emitted");
+ // Emit any outstanding DW_OP_piece operations to mask out subregisters.
+ if (SubRegisterSizeInBits == 0)
+ return;
+ // Don't emit a DW_OP_piece for a subregister at offset 0.
+ if (SubRegisterOffsetInBits == 0)
+ return;
+ addOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+}
+
+void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
+ if (!Expr || !Expr->isFragment())
+ return;
+
+ uint64_t FragmentOffset = Expr->getFragmentInfo()->OffsetInBits;
+ assert(FragmentOffset >= OffsetInBits &&
+ "overlapping or duplicate fragments");
+ if (FragmentOffset > OffsetInBits)
+ addOpPiece(FragmentOffset - OffsetInBits);
+ OffsetInBits = FragmentOffset;
+}
+
+void DwarfExpression::emitLegacySExt(unsigned FromBits) {
+ // (((X >> (FromBits - 1)) * (~0)) << FromBits) | X
+ emitOp(dwarf::DW_OP_dup);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits - 1);
+ emitOp(dwarf::DW_OP_shr);
+ emitOp(dwarf::DW_OP_lit0);
+ emitOp(dwarf::DW_OP_not);
+ emitOp(dwarf::DW_OP_mul);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits);
+ emitOp(dwarf::DW_OP_shl);
+ emitOp(dwarf::DW_OP_or);
+}
+
+void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
+ // Heuristic to decide the most efficient encoding.
+ // A ULEB can encode 7 1-bits per byte.
+ if (FromBits / 7 < 1+1+1+1+1) {
+ // (X & (1 << FromBits - 1))
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned((1ULL << FromBits) - 1);
+ } else {
+ // Note that the DWARF 4 stack consists of pointer-sized elements,
+ // so technically it doesn't make sense to shift left more than 64
+ // bits. We leave that for the consumer to decide though. LLDB for
+ // example uses APInt for the stack elements and can still deal
+ // with this.
+ emitOp(dwarf::DW_OP_lit1);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits);
+ emitOp(dwarf::DW_OP_shl);
+ emitOp(dwarf::DW_OP_lit1);
+ emitOp(dwarf::DW_OP_minus);
+ }
+ emitOp(dwarf::DW_OP_and);
+}
+
+void DwarfExpression::addWasmLocation(unsigned Index, uint64_t Offset) {
+ emitOp(dwarf::DW_OP_WASM_location);
+ emitUnsigned(Index == 4/*TI_LOCAL_INDIRECT*/ ? 0/*TI_LOCAL*/ : Index);
+ emitUnsigned(Offset);
+ if (Index == 4 /*TI_LOCAL_INDIRECT*/) {
+ assert(LocationKind == Unknown);
+ LocationKind = Memory;
+ } else {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
new file mode 100644
index 000000000000..667a9efc6f6c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -0,0 +1,439 @@
+//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+
+#include "ByteStreamer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <optional>
+
+namespace llvm {
+
+class AsmPrinter;
+class APInt;
+class DwarfCompileUnit;
+class DIELoc;
+class TargetRegisterInfo;
+class MachineLocation;
+
+/// Holds a DIExpression and keeps track of how many operands have been consumed
+/// so far.
+class DIExpressionCursor {
+ DIExpression::expr_op_iterator Start, End;
+
+public:
+ DIExpressionCursor(const DIExpression *Expr) {
+ if (!Expr) {
+ assert(Start == End);
+ return;
+ }
+ Start = Expr->expr_op_begin();
+ End = Expr->expr_op_end();
+ }
+
+ DIExpressionCursor(ArrayRef<uint64_t> Expr)
+ : Start(Expr.begin()), End(Expr.end()) {}
+
+ DIExpressionCursor(const DIExpressionCursor &) = default;
+
+ /// Consume one operation.
+ std::optional<DIExpression::ExprOperand> take() {
+ if (Start == End)
+ return std::nullopt;
+ return *(Start++);
+ }
+
+ /// Consume N operations.
+ void consume(unsigned N) { std::advance(Start, N); }
+
+ /// Return the current operation.
+ std::optional<DIExpression::ExprOperand> peek() const {
+ if (Start == End)
+ return std::nullopt;
+ return *(Start);
+ }
+
+ /// Return the next operation.
+ std::optional<DIExpression::ExprOperand> peekNext() const {
+ if (Start == End)
+ return std::nullopt;
+
+ auto Next = Start.getNext();
+ if (Next == End)
+ return std::nullopt;
+
+ return *Next;
+ }
+
+ /// Determine whether there are any operations left in this expression.
+ operator bool() const { return Start != End; }
+
+ DIExpression::expr_op_iterator begin() const { return Start; }
+ DIExpression::expr_op_iterator end() const { return End; }
+
+ /// Retrieve the fragment information, if any.
+ std::optional<DIExpression::FragmentInfo> getFragmentInfo() const {
+ return DIExpression::getFragmentInfo(Start, End);
+ }
+};
+
+/// Base class containing the logic for constructing DWARF expressions
+/// independently of whether they are emitted into a DIE or into a .debug_loc
+/// entry.
+///
+/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size
+/// of a succeeding DWARF block before the latter is emitted to the output.
+/// To handle such cases, data can conditionally be emitted to a temporary
+/// buffer, which can later on be committed to the main output. The size of the
+/// temporary buffer is queryable, allowing for the size of the data to be
+/// emitted before the data is committed.
+class DwarfExpression {
+protected:
+ /// Holds information about all subregisters comprising a register location.
+ struct Register {
+ int DwarfRegNo;
+ unsigned SubRegSize;
+ const char *Comment;
+
+ /// Create a full register, no extra DW_OP_piece operators necessary.
+ static Register createRegister(int RegNo, const char *Comment) {
+ return {RegNo, 0, Comment};
+ }
+
+ /// Create a subregister that needs a DW_OP_piece operator with SizeInBits.
+ static Register createSubRegister(int RegNo, unsigned SizeInBits,
+ const char *Comment) {
+ return {RegNo, SizeInBits, Comment};
+ }
+
+ bool isSubRegister() const { return SubRegSize; }
+ };
+
+ /// Whether we are currently emitting an entry value operation.
+ bool IsEmittingEntryValue = false;
+
+ DwarfCompileUnit &CU;
+
+ /// The register location, if any.
+ SmallVector<Register, 2> DwarfRegs;
+
+ /// Current Fragment Offset in Bits.
+ uint64_t OffsetInBits = 0;
+
+ /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
+ unsigned SubRegisterSizeInBits : 16;
+ unsigned SubRegisterOffsetInBits : 16;
+
+ /// The kind of location description being produced.
+ enum { Unknown = 0, Register, Memory, Implicit };
+
+ /// Additional location flags which may be combined with any location kind.
+ /// Currently, entry values are not supported for the Memory location kind.
+ enum { EntryValue = 1 << 0, Indirect = 1 << 1, CallSiteParamValue = 1 << 2 };
+
+ unsigned LocationKind : 3;
+ unsigned SavedLocationKind : 3;
+ unsigned LocationFlags : 3;
+ unsigned DwarfVersion : 4;
+
+public:
+ /// Set the location (\p Loc) and \ref DIExpression (\p DIExpr) to describe.
+ void setLocation(const MachineLocation &Loc, const DIExpression *DIExpr);
+
+ bool isUnknownLocation() const { return LocationKind == Unknown; }
+
+ bool isMemoryLocation() const { return LocationKind == Memory; }
+
+ bool isRegisterLocation() const { return LocationKind == Register; }
+
+ bool isImplicitLocation() const { return LocationKind == Implicit; }
+
+ bool isEntryValue() const { return LocationFlags & EntryValue; }
+
+ bool isIndirect() const { return LocationFlags & Indirect; }
+
+ bool isParameterValue() { return LocationFlags & CallSiteParamValue; }
+
+ std::optional<uint8_t> TagOffset;
+
+protected:
+ /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
+ /// to represent a subregister.
+ void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ assert(SizeInBits < 65536 && OffsetInBits < 65536);
+ SubRegisterSizeInBits = SizeInBits;
+ SubRegisterOffsetInBits = OffsetInBits;
+ }
+
+ /// Add masking operations to stencil out a subregister.
+ void maskSubRegister();
+
+ /// Output a dwarf operand and an optional assembler comment.
+ virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+
+ /// Emit a raw signed value.
+ virtual void emitSigned(int64_t Value) = 0;
+
+ /// Emit a raw unsigned value.
+ virtual void emitUnsigned(uint64_t Value) = 0;
+
+ virtual void emitData1(uint8_t Value) = 0;
+
+ virtual void emitBaseTypeRef(uint64_t Idx) = 0;
+
+ /// Start emitting data to the temporary buffer. The data stored in the
+ /// temporary buffer can be committed to the main output using
+ /// commitTemporaryBuffer().
+ virtual void enableTemporaryBuffer() = 0;
+
+ /// Disable emission to the temporary buffer. This does not commit data
+ /// in the temporary buffer to the main output.
+ virtual void disableTemporaryBuffer() = 0;
+
+ /// Return the emitted size, in number of bytes, for the data stored in the
+ /// temporary buffer.
+ virtual unsigned getTemporaryBufferSize() = 0;
+
+ /// Commit the data stored in the temporary buffer to the main output.
+ virtual void commitTemporaryBuffer() = 0;
+
+ /// Emit a normalized unsigned constant.
+ void emitConstu(uint64_t Value);
+
+ /// Return whether the given machine register is the frame register in the
+ /// current function.
+ virtual bool isFrameRegister(const TargetRegisterInfo &TRI,
+ llvm::Register MachineReg) = 0;
+
+ /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
+ /// register location description.
+ void addReg(int DwarfReg, const char *Comment = nullptr);
+
+ /// Emit a DW_OP_breg operation.
+ void addBReg(int DwarfReg, int Offset);
+
+ /// Emit DW_OP_fbreg <Offset>.
+ void addFBReg(int Offset);
+
+ /// Emit a partial DWARF register operation.
+ ///
+ /// \param MachineReg The register number.
+ /// \param MaxSize If the register must be composed from
+ /// sub-registers this is an upper bound
+ /// for how many bits the emitted DW_OP_piece
+ /// may cover.
+ ///
+ /// If size and offset is zero an operation for the entire register is
+ /// emitted: Some targets do not provide a DWARF register number for every
+ /// register. If this is the case, this function will attempt to emit a DWARF
+ /// register by emitting a fragment of a super-register or by piecing together
+ /// multiple subregisters that alias the register.
+ ///
+ /// \return false if no DWARF register exists for MachineReg.
+ bool addMachineReg(const TargetRegisterInfo &TRI, llvm::Register MachineReg,
+ unsigned MaxSize = ~1U);
+
+ /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
+ /// \param OffsetInBits This is an optional offset into the location that
+ /// is at the top of the DWARF stack.
+ void addOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+
+ /// Emit a shift-right dwarf operation.
+ void addShr(unsigned ShiftBy);
+
+ /// Emit a bitwise and dwarf operation.
+ void addAnd(unsigned Mask);
+
+ /// Emit a DW_OP_stack_value, if supported.
+ ///
+ /// The proper way to describe a constant value is DW_OP_constu <const>,
+ /// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available
+ /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
+ /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
+ /// <const> actually describes a value at a constant address, not a constant
+ /// value. However, in the past there was no better way to describe a
+ /// constant value, so the producers and consumers started to rely on
+ /// heuristics to disambiguate the value vs. location status of the
+ /// expression. See PR21176 for more details.
+ void addStackValue();
+
+ /// Finalize an entry value by emitting its size operand, and committing the
+ /// DWARF block which has been emitted to the temporary buffer.
+ void finalizeEntryValue();
+
+ /// Cancel the emission of an entry value.
+ void cancelEntryValue();
+
+ ~DwarfExpression() = default;
+
+public:
+ DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU)
+ : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0),
+ LocationKind(Unknown), SavedLocationKind(Unknown),
+ LocationFlags(Unknown), DwarfVersion(DwarfVersion) {}
+
+ /// This needs to be called last to commit any pending changes.
+ void finalize();
+
+ /// Emit a signed constant.
+ void addSignedConstant(int64_t Value);
+
+ /// Emit an unsigned constant.
+ void addUnsignedConstant(uint64_t Value);
+
+ /// Emit an unsigned constant.
+ void addUnsignedConstant(const APInt &Value);
+
+ /// Emit an floating point constant.
+ void addConstantFP(const APFloat &Value, const AsmPrinter &AP);
+
+ /// Lock this down to become a memory location description.
+ void setMemoryLocationKind() {
+ assert(isUnknownLocation());
+ LocationKind = Memory;
+ }
+
+ /// Lock this down to become an entry value location.
+ void setEntryValueFlags(const MachineLocation &Loc);
+
+ /// Lock this down to become a call site parameter location.
+ void setCallSiteParamValueFlag() { LocationFlags |= CallSiteParamValue; }
+
+ /// Emit a machine register location. As an optimization this may also consume
+ /// the prefix of a DwarfExpression if a more efficient representation for
+ /// combining the register location and the first operation exists.
+ ///
+ /// \param FragmentOffsetInBits If this is one fragment out of a
+ /// fragmented
+ /// location, this is the offset of the
+ /// fragment inside the entire variable.
+ /// \return false if no DWARF register exists
+ /// for MachineReg.
+ bool addMachineRegExpression(const TargetRegisterInfo &TRI,
+ DIExpressionCursor &Expr,
+ llvm::Register MachineReg,
+ unsigned FragmentOffsetInBits = 0);
+
+ /// Begin emission of an entry value dwarf operation. The entry value's
+ /// first operand is the size of the DWARF block (its second operand),
+ /// which needs to be calculated at time of emission, so we don't emit
+ /// any operands here.
+ void beginEntryValueExpression(DIExpressionCursor &ExprCursor);
+
+ /// Return the index of a base type with the given properties and
+ /// create one if necessary.
+ unsigned getOrCreateBaseType(unsigned BitSize, dwarf::TypeKind Encoding);
+
+ /// Emit all remaining operations in the DIExpressionCursor. The
+ /// cursor must not contain any DW_OP_LLVM_arg operations.
+ void addExpression(DIExpressionCursor &&Expr);
+
+ /// Emit all remaining operations in the DIExpressionCursor.
+ /// DW_OP_LLVM_arg operations are resolved by calling (\p InsertArg).
+ //
+ /// \return false if any call to (\p InsertArg) returns false.
+ bool addExpression(
+ DIExpressionCursor &&Expr,
+ llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg);
+
+ /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
+ /// the fragment described by \c Expr.
+ void addFragmentOffset(const DIExpression *Expr);
+
+ void emitLegacySExt(unsigned FromBits);
+ void emitLegacyZExt(unsigned FromBits);
+
+ /// Emit location information expressed via WebAssembly location + offset
+ /// The Index is an identifier for locals, globals or operand stack.
+ void addWasmLocation(unsigned Index, uint64_t Offset);
+};
+
+/// DwarfExpression implementation for .debug_loc entries.
+class DebugLocDwarfExpression final : public DwarfExpression {
+
+ struct TempBuffer {
+ SmallString<32> Bytes;
+ std::vector<std::string> Comments;
+ BufferByteStreamer BS;
+
+ TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {}
+ };
+
+ std::unique_ptr<TempBuffer> TmpBuf;
+ BufferByteStreamer &OutBS;
+ bool IsBuffering = false;
+
+ /// Return the byte streamer that currently is being emitted to.
+ ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; }
+
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
+ void emitData1(uint8_t Value) override;
+ void emitBaseTypeRef(uint64_t Idx) override;
+
+ void enableTemporaryBuffer() override;
+ void disableTemporaryBuffer() override;
+ unsigned getTemporaryBufferSize() override;
+ void commitTemporaryBuffer() override;
+
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ llvm::Register MachineReg) override;
+
+public:
+ DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS,
+ DwarfCompileUnit &CU)
+ : DwarfExpression(DwarfVersion, CU), OutBS(BS) {}
+};
+
+/// DwarfExpression implementation for singular DW_AT_location.
+class DIEDwarfExpression final : public DwarfExpression {
+ const AsmPrinter &AP;
+ DIELoc &OutDIE;
+ DIELoc TmpDIE;
+ bool IsBuffering = false;
+
+ /// Return the DIE that currently is being emitted to.
+ DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; }
+
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
+ void emitData1(uint8_t Value) override;
+ void emitBaseTypeRef(uint64_t Idx) override;
+
+ void enableTemporaryBuffer() override;
+ void disableTemporaryBuffer() override;
+ unsigned getTemporaryBufferSize() override;
+ void commitTemporaryBuffer() override;
+
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ llvm::Register MachineReg) override;
+
+public:
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
+
+ DIELoc *finalize() {
+ DwarfExpression::finalize();
+ return &OutDIE;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
new file mode 100644
index 000000000000..3fe437a07c92
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -0,0 +1,132 @@
+//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfFile.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "DwarfUnit.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/MC/MCStreamer.h"
+#include <cstdint>
+
+using namespace llvm;
+
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+ : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {}
+
+void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) {
+ CUs.push_back(std::move(U));
+}
+
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfFile::emitUnits(bool UseOffsets) {
+ for (const auto &TheU : CUs)
+ emitUnit(TheU.get(), UseOffsets);
+}
+
+void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
+ if (TheU->getCUNode()->isDebugDirectivesOnly())
+ return;
+
+ MCSection *S = TheU->getSection();
+
+ if (!S)
+ return;
+
+ // Skip CUs that ended up not being needed (split CUs that were abandoned
+ // because they added no information beyond the non-split CU)
+ if (TheU->getUnitDie().values().empty())
+ return;
+
+ Asm->OutStreamer->switchSection(S);
+ TheU->emitHeader(UseOffsets);
+ Asm->emitDwarfDIE(TheU->getUnitDie());
+
+ if (MCSymbol *EndLabel = TheU->getEndLabel())
+ Asm->OutStreamer->emitLabel(EndLabel);
+}
+
+// Compute the size and offset for each DIE.
+void DwarfFile::computeSizeAndOffsets() {
+ // Offset from the first CU in the debug info section is 0 initially.
+ uint64_t SecOffset = 0;
+
+ // Iterate over each compile unit and set the size and offsets for each
+ // DIE within each compile unit. All offsets are CU relative.
+ for (const auto &TheU : CUs) {
+ if (TheU->getCUNode()->isDebugDirectivesOnly())
+ continue;
+
+ // Skip CUs that ended up not being needed (split CUs that were abandoned
+ // because they added no information beyond the non-split CU)
+ if (TheU->getUnitDie().values().empty())
+ return;
+
+ TheU->setDebugSectionOffset(SecOffset);
+ SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
+ }
+ if (SecOffset > UINT32_MAX && !Asm->isDwarf64())
+ report_fatal_error("The generated debug information is too large "
+ "for the 32-bit DWARF format.");
+}
+
+unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
+ // CU-relative offset is reset to 0 here.
+ unsigned Offset = Asm->getUnitLengthFieldByteSize() + // Length of Unit Info
+ TheU->getHeaderSize(); // Unit-specific headers
+
+ // The return value here is CU-relative, after laying out
+ // all of the CU DIE.
+ return computeSizeAndOffset(TheU->getUnitDie(), Offset);
+}
+
+// Compute the size and offset of a DIE. The offset is relative to start of the
+// CU. It returns the offset after laying out the DIE.
+unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
+ return Die.computeOffsetsAndAbbrevs(Asm->getDwarfFormParams(), Abbrevs,
+ Offset);
+}
+
+void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
+
+// Emit strings into a string section.
+void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection,
+ bool UseRelativeOffsets) {
+ StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets);
+}
+
+bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+ auto &ScopeVars = ScopeVariables[LS];
+ const DILocalVariable *DV = Var->getVariable();
+ if (unsigned ArgNum = DV->getArg()) {
+ auto Cached = ScopeVars.Args.find(ArgNum);
+ if (Cached == ScopeVars.Args.end())
+ ScopeVars.Args[ArgNum] = Var;
+ else {
+ Cached->second->addMMIEntry(*Var);
+ return false;
+ }
+ } else {
+ ScopeVars.Locals.push_back(Var);
+ }
+ return true;
+}
+
+void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
+ SmallVectorImpl<DbgLabel *> &Labels = ScopeLabels[LS];
+ Labels.push_back(Label);
+}
+
+std::pair<uint32_t, RangeSpanList *>
+DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
+ CURangeLists.push_back(
+ RangeSpanList{Asm->createTempSymbol("debug_ranges"), &CU, std::move(R)});
+ return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
new file mode 100644
index 000000000000..464f4f048016
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -0,0 +1,185 @@
+//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+
+#include "DwarfStringPool.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Support/Allocator.h"
+#include <map>
+#include <memory>
+#include <utility>
+
+namespace llvm {
+
+class AsmPrinter;
+class DbgEntity;
+class DbgVariable;
+class DbgLabel;
+class DINode;
+class DILocalScope;
+class DwarfCompileUnit;
+class DwarfUnit;
+class LexicalScope;
+class MCSection;
+class MDNode;
+
+// Data structure to hold a range for range lists.
+struct RangeSpan {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+};
+
+struct RangeSpanList {
+ // Index for locating within the debug_range section this particular span.
+ MCSymbol *Label;
+ const DwarfCompileUnit *CU;
+ // List of ranges.
+ SmallVector<RangeSpan, 2> Ranges;
+};
+
+class DwarfFile {
+ // Target of Dwarf emission, used for sizing of abbreviations.
+ AsmPrinter *Asm;
+
+ BumpPtrAllocator AbbrevAllocator;
+
+ // Used to uniquely define abbreviations.
+ DIEAbbrevSet Abbrevs;
+
+ // A pointer to all units in the section.
+ SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs;
+
+ DwarfStringPool StrPool;
+
+ // List of range lists for a given compile unit, separate from the ranges for
+ // the CU itself.
+ SmallVector<RangeSpanList, 1> CURangeLists;
+
+ /// DWARF v5: The symbol that designates the start of the contribution to
+ /// the string offsets table. The contribution is shared by all units.
+ MCSymbol *StringOffsetsStartSym = nullptr;
+
+ /// DWARF v5: The symbol that designates the base of the range list table.
+ /// The table is shared by all units.
+ MCSymbol *RnglistsTableBaseSym = nullptr;
+
+ /// The variables of a lexical scope.
+ struct ScopeVars {
+ /// We need to sort Args by ArgNo and check for duplicates. This could also
+ /// be implemented as a list or vector + std::lower_bound().
+ std::map<unsigned, DbgVariable *> Args;
+ SmallVector<DbgVariable *, 8> Locals;
+ };
+ /// Collection of DbgVariables of each lexical scope.
+ DenseMap<LexicalScope *, ScopeVars> ScopeVariables;
+
+ /// Collection of DbgLabels of each lexical scope.
+ using LabelList = SmallVector<DbgLabel *, 4>;
+ DenseMap<LexicalScope *, LabelList> ScopeLabels;
+
+ // Collection of abstract subprogram DIEs.
+ DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
+
+ /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
+ /// be shared across CUs, that is why we keep the map here instead
+ /// of in DwarfCompileUnit.
+ DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap;
+
+public:
+ DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
+
+ const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
+ return CUs;
+ }
+
+ std::pair<uint32_t, RangeSpanList *> addRange(const DwarfCompileUnit &CU,
+ SmallVector<RangeSpan, 2> R);
+
+ /// getRangeLists - Get the vector of range lists.
+ const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+ return CURangeLists;
+ }
+
+ /// Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
+
+ /// Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// Compute the size and offset of all the DIEs in the given unit.
+ /// \returns The size of the root DIE.
+ unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
+
+ /// Add a unit to the list of CUs.
+ void addUnit(std::unique_ptr<DwarfCompileUnit> U);
+
+ /// Emit all of the units to the section listed with the given
+ /// abbreviation section.
+ void emitUnits(bool UseOffsets);
+
+ /// Emit the given unit to its section.
+ void emitUnit(DwarfUnit *TheU, bool UseOffsets);
+
+ /// Emit a set of abbreviations to the specific section.
+ void emitAbbrevs(MCSection *);
+
+ /// Emit all of the strings to the section given. If OffsetSection is
+ /// non-null, emit a table of string offsets to it. If UseRelativeOffsets
+ /// is false, emit absolute offsets to the strings. Otherwise, emit
+ /// relocatable references to the strings if they are supported by the target.
+ void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr,
+ bool UseRelativeOffsets = false);
+
+ /// Returns the string pool.
+ DwarfStringPool &getStringPool() { return StrPool; }
+
+ MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; }
+ void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; }
+
+ MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
+ void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
+
+ /// \returns false if the variable was merged with a previous one.
+ bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+
+ void addScopeLabel(LexicalScope *LS, DbgLabel *Label);
+
+ DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() {
+ return ScopeVariables;
+ }
+
+ DenseMap<LexicalScope *, LabelList> &getScopeLabels() {
+ return ScopeLabels;
+ }
+
+ DenseMap<const DILocalScope *, DIE *> &getAbstractScopeDIEs() {
+ return AbstractLocalScopeDIEs;
+ }
+
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
+ return AbstractEntities;
+ }
+
+ void insertDIE(const MDNode *TypeMD, DIE *Die) {
+ DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+ }
+
+ DIE *getDIE(const MDNode *TypeMD) {
+ return DITypeNodeToDieMap.lookup(TypeMD);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
new file mode 100644
index 000000000000..2292590b135e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -0,0 +1,128 @@
+//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfStringPool.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+
+DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
+ StringRef Prefix)
+ : Pool(A), Prefix(Prefix),
+ ShouldCreateSymbols(Asm.doesDwarfUseRelocationsAcrossSections()) {}
+
+StringMapEntry<DwarfStringPool::EntryTy> &
+DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
+ auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+ auto &Entry = I.first->second;
+ if (I.second) {
+ Entry.Index = EntryTy::NotIndexed;
+ Entry.Offset = NumBytes;
+ Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
+
+ NumBytes += Str.size() + 1;
+ }
+ return *I.first;
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto &MapEntry = getEntryImpl(Asm, Str);
+ return EntryRef(MapEntry);
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto &MapEntry = getEntryImpl(Asm, Str);
+ if (!MapEntry.getValue().isIndexed())
+ MapEntry.getValue().Index = NumIndexedStrings++;
+ return EntryRef(MapEntry);
+}
+
+void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
+ MCSection *Section,
+ MCSymbol *StartSym) {
+ if (getNumIndexedStrings() == 0)
+ return;
+ Asm.OutStreamer->switchSection(Section);
+ unsigned EntrySize = Asm.getDwarfOffsetByteSize();
+ // We are emitting the header for a contribution to the string offsets
+ // table. The header consists of an entry with the contribution's
+ // size (not including the size of the length field), the DWARF version and
+ // 2 bytes of padding.
+ Asm.emitDwarfUnitLength(getNumIndexedStrings() * EntrySize + 4,
+ "Length of String Offsets Set");
+ Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.emitInt16(0);
+ // Define the symbol that marks the start of the contribution. It is
+ // referenced by most unit headers via DW_AT_str_offsets_base.
+ // Split units do not use the attribute.
+ if (StartSym)
+ Asm.OutStreamer->emitLabel(StartSym);
+}
+
+void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
+ MCSection *OffsetSection, bool UseRelativeOffsets) {
+ if (Pool.empty())
+ return;
+
+ // Start the dwarf str section.
+ Asm.OutStreamer->switchSection(StrSection);
+
+ // Get all of the string pool entries and sort them by their offset.
+ SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
+ Entries.reserve(Pool.size());
+
+ for (const auto &E : Pool)
+ Entries.push_back(&E);
+
+ llvm::sort(Entries, [](const StringMapEntry<EntryTy> *A,
+ const StringMapEntry<EntryTy> *B) {
+ return A->getValue().Offset < B->getValue().Offset;
+ });
+
+ for (const auto &Entry : Entries) {
+ assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
+ "Mismatch between setting and entry");
+
+ // Emit a label for reference from debug information entries.
+ if (ShouldCreateSymbols)
+ Asm.OutStreamer->emitLabel(Entry->getValue().Symbol);
+
+ // Emit the string itself with a terminating null byte.
+ Asm.OutStreamer->AddComment("string offset=" +
+ Twine(Entry->getValue().Offset));
+ Asm.OutStreamer->emitBytes(
+ StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1));
+ }
+
+ // If we've got an offset section go ahead and emit that now as well.
+ if (OffsetSection) {
+ // Now only take the indexed entries and put them in an array by their ID so
+ // we can emit them in order.
+ Entries.resize(NumIndexedStrings);
+ for (const auto &Entry : Pool) {
+ if (Entry.getValue().isIndexed())
+ Entries[Entry.getValue().Index] = &Entry;
+ }
+
+ Asm.OutStreamer->switchSection(OffsetSection);
+ unsigned size = Asm.getDwarfOffsetByteSize();
+ for (const auto &Entry : Entries)
+ if (UseRelativeOffsets)
+ Asm.emitDwarfStringOffset(Entry->getValue());
+ else
+ Asm.OutStreamer->emitIntValue(Entry->getValue().Offset, size);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
new file mode 100644
index 000000000000..79b5df89e338
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -0,0 +1,66 @@
+//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MCSection;
+class MCSymbol;
+
+// Collection of strings for this unit and assorted symbols.
+// A String->Symbol mapping of strings used by indirect
+// references.
+class DwarfStringPool {
+ using EntryTy = DwarfStringPoolEntry;
+
+ StringMap<EntryTy, BumpPtrAllocator &> Pool;
+ StringRef Prefix;
+ uint64_t NumBytes = 0;
+ unsigned NumIndexedStrings = 0;
+ bool ShouldCreateSymbols;
+
+ StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str);
+
+public:
+ using EntryRef = DwarfStringPoolEntryRef;
+
+ DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
+
+ void emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSection *OffsetSection,
+ MCSymbol *StartSym);
+
+ void emit(AsmPrinter &Asm, MCSection *StrSection,
+ MCSection *OffsetSection = nullptr,
+ bool UseRelativeOffsets = false);
+
+ bool empty() const { return Pool.empty(); }
+
+ unsigned size() const { return Pool.size(); }
+
+ unsigned getNumIndexedStrings() const { return NumIndexedStrings; }
+
+ /// Get a reference to an entry in the string pool.
+ EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
+
+ /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a
+ /// unique ID of this entry (e.g., for use in indexed forms like
+ /// DW_FORM_strx).
+ EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
new file mode 100644
index 000000000000..d30f0ef7af34
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -0,0 +1,1851 @@
+//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfUnit.h"
+#include "AddressPool.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfExpression.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
+ DwarfCompileUnit &CU, DIELoc &DIE)
+ : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {}
+
+void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op);
+}
+
+void DIEDwarfExpression::emitSigned(int64_t Value) {
+ CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value);
+}
+
+void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value);
+}
+
+void DIEDwarfExpression::emitData1(uint8_t Value) {
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value);
+}
+
+void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+ CU.addBaseTypeRef(getActiveDIE(), Idx);
+}
+
+void DIEDwarfExpression::enableTemporaryBuffer() {
+ assert(!IsBuffering && "Already buffering?");
+ IsBuffering = true;
+}
+
+void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
+
+unsigned DIEDwarfExpression::getTemporaryBufferSize() {
+ return TmpDIE.computeSize(AP.getDwarfFormParams());
+}
+
+void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
+
+bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
+ llvm::Register MachineReg) {
+ return MachineReg == TRI.getFrameRegister(*AP.MF);
+}
+
+DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
+ AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
+ : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU) {}
+
+DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU,
+ MCDwarfDwoLineTable *SplitLineTable)
+ : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
+ SplitLineTable(SplitLineTable) {
+}
+
+DwarfUnit::~DwarfUnit() {
+ for (DIEBlock *B : DIEBlocks)
+ B->~DIEBlock();
+ for (DIELoc *L : DIELocs)
+ L->~DIELoc();
+}
+
+int64_t DwarfUnit::getDefaultLowerBound() const {
+ switch (getLanguage()) {
+ default:
+ break;
+
+ // The languages below have valid values in all DWARF versions.
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C_plus_plus:
+ return 0;
+
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ return 1;
+
+ // The languages below have valid values only if the DWARF version >= 3.
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ if (DD->getDwarfVersion() >= 3)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Fortran95:
+ if (DD->getDwarfVersion() >= 3)
+ return 1;
+ break;
+
+ // Starting with DWARF v4, all defined languages have valid values.
+ case dwarf::DW_LANG_D:
+ case dwarf::DW_LANG_Java:
+ case dwarf::DW_LANG_Python:
+ case dwarf::DW_LANG_UPC:
+ if (DD->getDwarfVersion() >= 4)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Ada83:
+ case dwarf::DW_LANG_Ada95:
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ case dwarf::DW_LANG_Modula2:
+ case dwarf::DW_LANG_Pascal83:
+ case dwarf::DW_LANG_PLI:
+ if (DD->getDwarfVersion() >= 4)
+ return 1;
+ break;
+
+ // The languages below are new in DWARF v5.
+ case dwarf::DW_LANG_BLISS:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ case dwarf::DW_LANG_Dylan:
+ case dwarf::DW_LANG_Go:
+ case dwarf::DW_LANG_Haskell:
+ case dwarf::DW_LANG_OCaml:
+ case dwarf::DW_LANG_OpenCL:
+ case dwarf::DW_LANG_RenderScript:
+ case dwarf::DW_LANG_Rust:
+ case dwarf::DW_LANG_Swift:
+ if (DD->getDwarfVersion() >= 5)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ case dwarf::DW_LANG_Julia:
+ case dwarf::DW_LANG_Modula3:
+ if (DD->getDwarfVersion() >= 5)
+ return 1;
+ break;
+ }
+
+ return -1;
+}
+
+/// Check whether the DIE for this MDNode can be shared across CUs.
+bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
+ // When the MDNode can be part of the type system, the DIE can be shared
+ // across CUs.
+ // Combining type units and cross-CU DIE sharing is lower value (since
+ // cross-CU DIE sharing is used in LTO and removes type redundancy at that
+ // level already) but may be implementable for some value in projects
+ // building multiple independent libraries with LTO and then linking those
+ // together.
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return false;
+ return (isa<DIType>(D) ||
+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+ !DD->generateTypeUnits();
+}
+
+DIE *DwarfUnit::getDIE(const DINode *D) const {
+ if (isShareableAcrossCUs(D))
+ return DU->getDIE(D);
+ return MDNodeToDieMap.lookup(D);
+}
+
+void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
+ if (isShareableAcrossCUs(Desc)) {
+ DU->insertDIE(Desc, D);
+ return;
+ }
+ MDNodeToDieMap.insert(std::make_pair(Desc, D));
+}
+
+void DwarfUnit::insertDIE(DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(nullptr, D));
+}
+
+void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
+ if (DD->getDwarfVersion() >= 4)
+ addAttribute(Die, Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1));
+ else
+ addAttribute(Die, Attribute, dwarf::DW_FORM_flag, DIEInteger(1));
+}
+
+void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ std::optional<dwarf::Form> Form, uint64_t Integer) {
+ if (!Form)
+ Form = DIEInteger::BestForm(false, Integer);
+ assert(Form != dwarf::DW_FORM_implicit_const &&
+ "DW_FORM_implicit_const is used only for signed integers");
+ addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
+}
+
+void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
+ uint64_t Integer) {
+ addUInt(Block, (dwarf::Attribute)0, Form, Integer);
+}
+
+void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ std::optional<dwarf::Form> Form, int64_t Integer) {
+ if (!Form)
+ Form = DIEInteger::BestForm(true, Integer);
+ addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
+}
+
+void DwarfUnit::addSInt(DIELoc &Die, std::optional<dwarf::Form> Form,
+ int64_t Integer) {
+ addSInt(Die, (dwarf::Attribute)0, Form, Integer);
+}
+
+void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
+ StringRef String) {
+ if (CUNode->isDebugDirectivesOnly())
+ return;
+
+ if (DD->useInlineStrings()) {
+ addAttribute(Die, Attribute, dwarf::DW_FORM_string,
+ new (DIEValueAllocator)
+ DIEInlineString(String, DIEValueAllocator));
+ return;
+ }
+ dwarf::Form IxForm =
+ isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+
+ auto StringPoolEntry =
+ useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index
+ ? DU->getStringPool().getIndexedEntry(*Asm, String)
+ : DU->getStringPool().getEntry(*Asm, String);
+
+ // For DWARF v5 and beyond, use the smallest strx? form possible.
+ if (useSegmentedStringOffsetsTable()) {
+ IxForm = dwarf::DW_FORM_strx1;
+ unsigned Index = StringPoolEntry.getIndex();
+ if (Index > 0xffffff)
+ IxForm = dwarf::DW_FORM_strx4;
+ else if (Index > 0xffff)
+ IxForm = dwarf::DW_FORM_strx3;
+ else if (Index > 0xff)
+ IxForm = dwarf::DW_FORM_strx2;
+ }
+ addAttribute(Die, Attribute, IxForm, DIEString(StringPoolEntry));
+}
+
+void DwarfUnit::addLabel(DIEValueList &Die, dwarf::Attribute Attribute,
+ dwarf::Form Form, const MCSymbol *Label) {
+ addAttribute(Die, Attribute, Form, DIELabel(Label));
+}
+
+void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
+ addLabel(Die, (dwarf::Attribute)0, Form, Label);
+}
+
+void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
+ uint64_t Integer) {
+ addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer);
+}
+
+unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
+ if (!SplitLineTable)
+ return getCU().getOrCreateSourceID(File);
+ if (!UsedLineTable) {
+ UsedLineTable = true;
+ // This is a split type unit that needs a line table.
+ addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
+ }
+ return SplitLineTable->getFile(
+ File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+ Asm->OutContext.getDwarfVersion(), File->getSource());
+}
+
+void DwarfUnit::addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label) {
+ bool UseAddrOffsetFormOrExpressions =
+ DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
+
+ const MCSymbol *Base = nullptr;
+ if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
+ Base = DD->getSectionLabel(&Label->getSection());
+
+ uint32_t Index = DD->getAddressPool().getIndex(Base ? Base : Label);
+
+ if (DD->getDwarfVersion() >= 5) {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
+ addUInt(Die, dwarf::DW_FORM_addrx, Index);
+ } else {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ addUInt(Die, dwarf::DW_FORM_GNU_addr_index, Index);
+ }
+
+ if (Base && Base != Label) {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_const4u);
+ addLabelDelta(Die, (dwarf::Attribute)0, Label, Base);
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ }
+}
+
+void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
+ if (DD->getDwarfVersion() >= 5) {
+ addPoolOpAddress(Die, Sym);
+ return;
+ }
+
+ if (DD->useSplitDwarf()) {
+ addPoolOpAddress(Die, Sym);
+ return;
+ }
+
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, dwarf::DW_FORM_addr, Sym);
+}
+
+void DwarfUnit::addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ addAttribute(Die, Attribute, dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
+ addDIEEntry(Die, Attribute, DIEEntry(Entry));
+}
+
+void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
+ // Flag the type unit reference as a declaration so that if it contains
+ // members (implicit special members, static data member definitions, member
+ // declarations for definitions in this CU, etc) consumers don't get confused
+ // and think this is a full definition.
+ addFlag(Die, dwarf::DW_AT_declaration);
+
+ addAttribute(Die, dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
+ DIEInteger(Signature));
+}
+
+void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
+ DIEEntry Entry) {
+ const DIEUnit *CU = Die.getUnit();
+ const DIEUnit *EntryCU = Entry.getEntry().getUnit();
+ if (!CU)
+ // We assume that Die belongs to this CU, if it is not linked to any CU yet.
+ CU = getUnitDie().getUnit();
+ if (!EntryCU)
+ EntryCU = getUnitDie().getUnit();
+ assert(EntryCU == CU || !DD->useSplitDwarf() || DD->shareAcrossDWOCUs() ||
+ !static_cast<const DwarfUnit*>(CU)->isDwoUnit());
+ addAttribute(Die, Attribute,
+ EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ Entry);
+}
+
+DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) {
+ DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, Tag));
+ if (N)
+ insertDIE(N, &Die);
+ return Die;
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
+ Loc->computeSize(Asm->getDwarfFormParams());
+ DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
+ addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
+ DIEBlock *Block) {
+ Block->computeSize(Asm->getDwarfFormParams());
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+ addAttribute(Die, Attribute, Form, Block);
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
+ DIEBlock *Block) {
+ addBlock(Die, Attribute, Block->BestForm(), Block);
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
+ if (Line == 0)
+ return;
+
+ unsigned FileID = getOrCreateSourceID(File);
+ addUInt(Die, dwarf::DW_AT_decl_file, std::nullopt, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, std::nullopt, Line);
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
+ assert(V);
+
+ addSourceLine(Die, V->getLine(), V->getFile());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
+ assert(G);
+
+ addSourceLine(Die, G->getLine(), G->getFile());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
+ assert(SP);
+
+ addSourceLine(Die, SP->getLine(), SP->getFile());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) {
+ assert(L);
+
+ addSourceLine(Die, L->getLine(), L->getFile());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
+ assert(Ty);
+
+ addSourceLine(Die, Ty->getLine(), Ty->getFile());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
+ assert(Ty);
+
+ addSourceLine(Die, Ty->getLine(), Ty->getFile());
+}
+
+void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
+ // Pass this down to addConstantValue as an unsigned bag of bits.
+ addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
+ const DIType *Ty) {
+ addConstantValue(Die, CI->getValue(), Ty);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
+ addConstantValue(Die, DD->isUnsignedDIType(Ty), Val);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
+ // FIXME: This is a bit conservative/simple - it emits negative values always
+ // sign extended to 64 bits rather than minimizing the number of bytes.
+ addUInt(Die, dwarf::DW_AT_const_value,
+ Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
+ addConstantValue(Die, Val, DD->isUnsignedDIType(Ty));
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
+ unsigned CIBitWidth = Val.getBitWidth();
+ if (CIBitWidth <= 64) {
+ addConstantValue(Die, Unsigned,
+ Unsigned ? Val.getZExtValue() : Val.getSExtValue());
+ return;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
+
+ // Get the raw data form of the large APInt.
+ const uint64_t *Ptr64 = Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+
+ // Output the constant to DWARF one byte at a time.
+ for (int i = 0; i < NumBytes; i++) {
+ uint8_t c;
+ if (LittleEndian)
+ c = Ptr64[i / 8] >> (8 * (i & 7));
+ else
+ c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
+ addUInt(*Block, dwarf::DW_FORM_data1, c);
+ }
+
+ addBlock(Die, dwarf::DW_AT_const_value, Block);
+}
+
+void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
+ if (!LinkageName.empty())
+ addString(Die,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+ : dwarf::DW_AT_MIPS_linkage_name,
+ GlobalValue::dropLLVMManglingEscape(LinkageName));
+}
+
+void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
+ // Add template parameters.
+ for (const auto *Element : TParams) {
+ if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element))
+ constructTemplateTypeParameterDIE(Buffer, TTP);
+ else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element))
+ constructTemplateValueParameterDIE(Buffer, TVP);
+ }
+}
+
+/// Add thrown types.
+void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
+ for (const auto *Ty : ThrownTypes) {
+ DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die);
+ addType(TT, cast<DIType>(Ty));
+ }
+}
+
+void DwarfUnit::addAccess(DIE &Die, DINode::DIFlags Flags) {
+ if ((Flags & DINode::FlagAccessibility) == DINode::FlagProtected)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPrivate)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPublic)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+}
+
+DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
+ if (!Context || isa<DIFile>(Context) || isa<DICompileUnit>(Context))
+ return &getUnitDie();
+ if (auto *T = dyn_cast<DIType>(Context))
+ return getOrCreateTypeDIE(T);
+ if (auto *NS = dyn_cast<DINamespace>(Context))
+ return getOrCreateNameSpace(NS);
+ if (auto *SP = dyn_cast<DISubprogram>(Context))
+ return getOrCreateSubprogramDIE(SP);
+ if (auto *M = dyn_cast<DIModule>(Context))
+ return getOrCreateModule(M);
+ return getDIE(Context);
+}
+
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+ auto *Context = Ty->getScope();
+ DIE *ContextDIE = getOrCreateContextDIE(Context);
+
+ if (DIE *TyDIE = getDIE(Ty))
+ return TyDIE;
+
+ // Create new type.
+ DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
+
+ constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
+
+ updateAcceleratorTables(Context, Ty, TyDIE);
+ return &TyDIE;
+}
+
+DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
+ const DIType *Ty) {
+ // Create new type.
+ DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);
+
+ updateAcceleratorTables(Context, Ty, TyDIE);
+
+ if (auto *BT = dyn_cast<DIBasicType>(Ty))
+ constructTypeDIE(TyDIE, BT);
+ else if (auto *ST = dyn_cast<DIStringType>(Ty))
+ constructTypeDIE(TyDIE, ST);
+ else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+ constructTypeDIE(TyDIE, STy);
+ else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
+ (Ty->getRawName() || CTy->getRawIdentifier())) {
+ // Skip updating the accelerator tables since this is not the full type.
+ if (MDString *TypeId = CTy->getRawIdentifier())
+ DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
+ else
+ finishNonUnitTypeDIE(TyDIE, CTy);
+ return &TyDIE;
+ }
+ constructTypeDIE(TyDIE, CTy);
+ } else {
+ constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
+ }
+
+ return &TyDIE;
+}
+
+DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+ if (!TyNode)
+ return nullptr;
+
+ auto *Ty = cast<DIType>(TyNode);
+
+ // DW_TAG_restrict_type is not supported in DWARF2
+ if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+ return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
+
+ // DW_TAG_atomic_type is not supported in DWARF < 5
+ if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
+ return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
+
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ auto *Context = Ty->getScope();
+ DIE *ContextDIE = getOrCreateContextDIE(Context);
+ assert(ContextDIE);
+
+ if (DIE *TyDIE = getDIE(Ty))
+ return TyDIE;
+
+ return static_cast<DwarfUnit *>(ContextDIE->getUnit())
+ ->createTypeDIE(Context, *ContextDIE, Ty);
+}
+
+void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
+ const DIType *Ty, const DIE &TyDIE) {
+ if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
+ bool IsImplementation = false;
+ if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
+ }
+ unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
+ DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
+
+ if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
+ addGlobalType(Ty, TyDIE, Context);
+ }
+}
+
+void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
+ dwarf::Attribute Attribute) {
+ assert(Ty && "Trying to add a type that doesn't exist?");
+ addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
+}
+
+std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
+ if (!Context)
+ return "";
+
+ // FIXME: Decide whether to implement this for non-C++ languages.
+ if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage()))
+ return "";
+
+ std::string CS;
+ SmallVector<const DIScope *, 1> Parents;
+ while (!isa<DICompileUnit>(Context)) {
+ Parents.push_back(Context);
+ if (const DIScope *S = Context->getScope())
+ Context = S;
+ else
+ // Structure, etc types will have a NULL context if they're at the top
+ // level.
+ break;
+ }
+
+ // Reverse iterate over our list to go from the outermost construct to the
+ // innermost.
+ for (const DIScope *Ctx : llvm::reverse(Parents)) {
+ StringRef Name = Ctx->getName();
+ if (Name.empty() && isa<DINamespace>(Ctx))
+ Name = "(anonymous namespace)";
+ if (!Name.empty()) {
+ CS += Name;
+ CS += "::";
+ }
+ }
+ return CS;
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
+ // Get core information.
+ StringRef Name = BTy->getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ // An unspecified type only has a name attribute.
+ if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
+ return;
+
+ if (BTy->getTag() != dwarf::DW_TAG_string_type)
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy->getEncoding());
+
+ uint64_t Size = BTy->getSizeInBits() >> 3;
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
+
+ if (BTy->isBigEndian())
+ addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_big);
+ else if (BTy->isLittleEndian())
+ addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_little);
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
+ // Get core information.
+ StringRef Name = STy->getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ if (DIVariable *Var = STy->getStringLength()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_string_length, *VarDIE);
+ } else if (DIExpression *Expr = STy->getStringLengthExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ // This is to describe the memory location of the
+ // length of a Fortran deferred length string, so
+ // lock it down as such.
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize());
+ } else {
+ uint64_t Size = STy->getSizeInBits() >> 3;
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
+ }
+
+ if (DIExpression *Expr = STy->getStringLocationExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ // This is to describe the memory location of the
+ // string, so lock it down as such.
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
+ }
+
+ if (STy->getEncoding()) {
+ // For eventual Unicode support.
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ STy->getEncoding());
+ }
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
+ // Get core information.
+ StringRef Name = DTy->getName();
+ uint64_t Size = DTy->getSizeInBits() >> 3;
+ uint16_t Tag = Buffer.getTag();
+
+ // Map to main type, void will not have a type.
+ const DIType *FromTy = DTy->getBaseType();
+ if (FromTy)
+ addType(Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ addAnnotation(Buffer, DTy->getAnnotations());
+
+ // If alignment is specified for a typedef , create and insert DW_AT_alignment
+ // attribute in DW_TAG_typedef DIE.
+ if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
+ uint32_t AlignInBytes = DTy->getAlignInBytes();
+ if (AlignInBytes > 0)
+ addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+ }
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type
+ && Tag != dwarf::DW_TAG_ptr_to_member_type
+ && Tag != dwarf::DW_TAG_reference_type
+ && Tag != dwarf::DW_TAG_rvalue_reference_type)
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
+
+ if (Tag == dwarf::DW_TAG_ptr_to_member_type)
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
+
+ addAccess(Buffer, DTy->getFlags());
+
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy->isForwardDecl())
+ addSourceLine(Buffer, DTy);
+
+ // If DWARF address space value is other than None, add it. The IR
+ // verifier checks that DWARF address space only exists for pointer
+ // or reference types.
+ if (DTy->getDWARFAddressSpace())
+ addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
+ *DTy->getDWARFAddressSpace());
+}
+
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
+ for (unsigned i = 1, N = Args.size(); i < N; ++i) {
+ const DIType *Ty = Args[i];
+ if (!Ty) {
+ assert(i == N-1 && "Unspecified parameter must be the last argument");
+ createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
+ } else {
+ DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
+ addType(Arg, Ty);
+ if (Ty->isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
+ }
+ }
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
+ // Add return type. A void return won't have a type.
+ auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
+ if (Elements.size())
+ if (auto RTy = Elements[0])
+ addType(Buffer, RTy);
+
+ bool isPrototyped = true;
+ if (Elements.size() == 2 && !Elements[1])
+ isPrototyped = false;
+
+ constructSubprogramArguments(Buffer, Elements);
+
+ // Add prototype flag if we're dealing with a C language and the function has
+ // been prototyped.
+ if (isPrototyped && dwarf::isC((dwarf::SourceLanguage)getLanguage()))
+ addFlag(Buffer, dwarf::DW_AT_prototyped);
+
+ // Add a DW_AT_calling_convention if this has an explicit convention.
+ if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal)
+ addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
+ CTy->getCC());
+
+ if (CTy->isLValueReference())
+ addFlag(Buffer, dwarf::DW_AT_reference);
+
+ if (CTy->isRValueReference())
+ addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
+}
+
+void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) {
+ if (!Annotations)
+ return;
+
+ for (const Metadata *Annotation : Annotations->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotation);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+ const auto &Value = MD->getOperand(1);
+
+ DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer);
+ addString(AnnotationDie, dwarf::DW_AT_name, Name->getString());
+ if (const auto *Data = dyn_cast<MDString>(Value))
+ addString(AnnotationDie, dwarf::DW_AT_const_value, Data->getString());
+ else if (const auto *Data = dyn_cast<ConstantAsMetadata>(Value))
+ addConstantValue(AnnotationDie, Data->getValue()->getUniqueInteger(),
+ /*Unsigned=*/true);
+ else
+ assert(false && "Unsupported annotation value type");
+ }
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ // Add name if not anonymous or intermediate type.
+ StringRef Name = CTy->getName();
+
+ uint64_t Size = CTy->getSizeInBits() >> 3;
+ uint16_t Tag = Buffer.getTag();
+
+ switch (Tag) {
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type:
+ constructEnumTypeDIE(Buffer, CTy);
+ break;
+ case dwarf::DW_TAG_variant_part:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_namelist: {
+ // Emit the discriminator for a variant part.
+ DIDerivedType *Discriminator = nullptr;
+ if (Tag == dwarf::DW_TAG_variant_part) {
+ Discriminator = CTy->getDiscriminator();
+ if (Discriminator) {
+ // DWARF says:
+ // If the variant part has a discriminant, the discriminant is
+ // represented by a separate debugging information entry which is
+ // a child of the variant part entry.
+ DIE &DiscMember = constructMemberDIE(Buffer, Discriminator);
+ addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember);
+ }
+ }
+
+ // Add template parameters to a class, structure or union types.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ addTemplateParams(Buffer, CTy->getTemplateParams());
+
+ // Add elements to structure type.
+ DINodeArray Elements = CTy->getElements();
+ for (const auto *Element : Elements) {
+ if (!Element)
+ continue;
+ if (auto *SP = dyn_cast<DISubprogram>(Element))
+ getOrCreateSubprogramDIE(SP);
+ else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+ if (DDTy->getTag() == dwarf::DW_TAG_friend) {
+ DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
+ addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend);
+ } else if (DDTy->isStaticMember()) {
+ getOrCreateStaticMemberDIE(DDTy);
+ } else if (Tag == dwarf::DW_TAG_variant_part) {
+ // When emitting a variant part, wrap each member in
+ // DW_TAG_variant.
+ DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
+ if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
+ if (DD->isUnsignedDIType(Discriminator->getBaseType()))
+ addUInt(Variant, dwarf::DW_AT_discr_value, std::nullopt,
+ CI->getZExtValue());
+ else
+ addSInt(Variant, dwarf::DW_AT_discr_value, std::nullopt,
+ CI->getSExtValue());
+ }
+ constructMemberDIE(Variant, DDTy);
+ } else {
+ constructMemberDIE(Buffer, DDTy);
+ }
+ } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
+ DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+ StringRef PropertyName = Property->getName();
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+ if (Property->getType())
+ addType(ElemDie, Property->getType());
+ addSourceLine(ElemDie, Property);
+ StringRef GetterName = Property->getGetterName();
+ if (!GetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+ StringRef SetterName = Property->getSetterName();
+ if (!SetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+ if (unsigned PropertyAttributes = Property->getAttributes())
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, std::nullopt,
+ PropertyAttributes);
+ } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
+ if (Composite->getTag() == dwarf::DW_TAG_variant_part) {
+ DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
+ constructTypeDIE(VariantPart, Composite);
+ }
+ } else if (Tag == dwarf::DW_TAG_namelist) {
+ auto *Var = dyn_cast<DINode>(Element);
+ auto *VarDIE = getDIE(Var);
+ if (VarDIE) {
+ DIE &ItemDie = createAndAddDIE(dwarf::DW_TAG_namelist_item, Buffer);
+ addDIEEntry(ItemDie, dwarf::DW_AT_namelist_item, *VarDIE);
+ }
+ }
+ }
+
+ if (CTy->isAppleBlockExtension())
+ addFlag(Buffer, dwarf::DW_AT_APPLE_block);
+
+ if (CTy->getExportSymbols())
+ addFlag(Buffer, dwarf::DW_AT_export_symbols);
+
+ // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
+ // inside C++ composite types to point to the base class with the vtable.
+ // Rust uses DW_AT_containing_type to link a vtable to the type
+ // for which it was created.
+ if (auto *ContainingType = CTy->getVTableHolder())
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(ContainingType));
+
+ if (CTy->isObjcClassComplete())
+ addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
+
+ // Add the type's non-standard calling convention.
+ // DW_CC_pass_by_value/DW_CC_pass_by_reference are introduced in DWARF 5.
+ if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 5) {
+ uint8_t CC = 0;
+ if (CTy->isTypePassByValue())
+ CC = dwarf::DW_CC_pass_by_value;
+ else if (CTy->isTypePassByReference())
+ CC = dwarf::DW_CC_pass_by_reference;
+ if (CC)
+ addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
+ CC);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ addAnnotation(Buffer, CTy->getAnnotations());
+
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ // Ignore the size if it's a non-enum forward decl.
+ // TODO: Do we care about size for enum forward declarations?
+ if (Size &&
+ (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type))
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
+ else if (!CTy->isForwardDecl())
+ // Add zero size if it is not a forward declaration.
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, 0);
+
+ // If we're a forward decl, say so.
+ if (CTy->isForwardDecl())
+ addFlag(Buffer, dwarf::DW_AT_declaration);
+
+ // Add accessibility info if available.
+ addAccess(Buffer, CTy->getFlags());
+
+ // Add source line info if available.
+ if (!CTy->isForwardDecl())
+ addSourceLine(Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy->getRuntimeLang();
+ if (RLang)
+ addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
+ RLang);
+
+ // Add align info if available.
+ if (uint32_t AlignInBytes = CTy->getAlignInBytes())
+ addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+ }
+}
+
+void DwarfUnit::constructTemplateTypeParameterDIE(
+ DIE &Buffer, const DITemplateTypeParameter *TP) {
+ DIE &ParamDIE =
+ createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
+ // Add the type if it exists, it could be void and therefore no type.
+ if (TP->getType())
+ addType(ParamDIE, TP->getType());
+ if (!TP->getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
+ if (TP->isDefault() && isCompatibleWithVersion(5))
+ addFlag(ParamDIE, dwarf::DW_AT_default_value);
+}
+
+void DwarfUnit::constructTemplateValueParameterDIE(
+ DIE &Buffer, const DITemplateValueParameter *VP) {
+ DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
+
+ // Add the type if there is one, template template and template parameter
+ // packs will not have a type.
+ if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
+ addType(ParamDIE, VP->getType());
+ if (!VP->getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+ if (VP->isDefault() && isCompatibleWithVersion(5))
+ addFlag(ParamDIE, dwarf::DW_AT_default_value);
+ if (Metadata *Val = VP->getValue()) {
+ if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
+ addConstantValue(ParamDIE, CI, VP->getType());
+ else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
+ // We cannot describe the location of dllimport'd entities: the
+ // computation of their address requires loads from the IAT.
+ if (!GV->hasDLLImportStorageClass()) {
+ // For declaration non-type template parameters (such as global values
+ // and functions)
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ addOpAddress(*Loc, Asm->getSymbol(GV));
+ // Emit DW_OP_stack_value to use the address as the immediate value of
+ // the parameter, rather than a pointer to it.
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
+ }
+ } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+ assert(isa<MDString>(Val));
+ addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
+ cast<MDString>(Val)->getString());
+ } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ addTemplateParams(ParamDIE, cast<MDTuple>(Val));
+ }
+ }
+}
+
+DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
+
+ if (DIE *NDie = getDIE(NS))
+ return NDie;
+ DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
+
+ StringRef Name = NS->getName();
+ if (!Name.empty())
+ addString(NDie, dwarf::DW_AT_name, NS->getName());
+ else
+ Name = "(anonymous namespace)";
+ DD->addAccelNamespace(*CUNode, Name, NDie);
+ addGlobalName(Name, NDie, NS->getScope());
+ if (NS->getExportSymbols())
+ addFlag(NDie, dwarf::DW_AT_export_symbols);
+ return &NDie;
+}
+
+DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(M->getScope());
+
+ if (DIE *MDie = getDIE(M))
+ return MDie;
+ DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M);
+
+ if (!M->getName().empty()) {
+ addString(MDie, dwarf::DW_AT_name, M->getName());
+ addGlobalName(M->getName(), MDie, M->getScope());
+ }
+ if (!M->getConfigurationMacros().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_config_macros,
+ M->getConfigurationMacros());
+ if (!M->getIncludePath().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
+ if (!M->getAPINotesFile().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile());
+ if (M->getFile())
+ addUInt(MDie, dwarf::DW_AT_decl_file, std::nullopt,
+ getOrCreateSourceID(M->getFile()));
+ if (M->getLineNo())
+ addUInt(MDie, dwarf::DW_AT_decl_line, std::nullopt, M->getLineNo());
+ if (M->getIsDecl())
+ addFlag(MDie, dwarf::DW_AT_declaration);
+
+ return &MDie;
+}
+
+DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE (as is the case for member function
+ // declarations).
+ DIE *ContextDIE =
+ Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());
+
+ if (DIE *SPDie = getDIE(SP))
+ return SPDie;
+
+ if (auto *SPDecl = SP->getDeclaration()) {
+ if (!Minimal) {
+ // Add subprogram definitions to the CU die directly.
+ ContextDIE = &getUnitDie();
+ // Build the decl now to ensure it precedes the definition.
+ getOrCreateSubprogramDIE(SPDecl);
+ }
+ }
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
+
+ // Stop here and fill this in later, depending on whether or not this
+ // subprogram turns out to have inlined instances or not.
+ if (SP->isDefinition())
+ return &SPDie;
+
+ static_cast<DwarfUnit *>(SPDie.getUnit())
+ ->applySubprogramAttributes(SP, SPDie);
+ return &SPDie;
+}
+
+bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
+ DIE &SPDie, bool Minimal) {
+ DIE *DeclDie = nullptr;
+ StringRef DeclLinkageName;
+ if (auto *SPDecl = SP->getDeclaration()) {
+ if (!Minimal) {
+ DITypeRefArray DeclArgs, DefinitionArgs;
+ DeclArgs = SPDecl->getType()->getTypeArray();
+ DefinitionArgs = SP->getType()->getTypeArray();
+
+ if (DeclArgs.size() && DefinitionArgs.size())
+ if (DefinitionArgs[0] != nullptr && DeclArgs[0] != DefinitionArgs[0])
+ addType(SPDie, DefinitionArgs[0]);
+
+ DeclDie = getDIE(SPDecl);
+ assert(DeclDie && "This DIE should've already been constructed when the "
+ "definition DIE was created in "
+ "getOrCreateSubprogramDIE");
+ // Look at the Decl's linkage name only if we emitted it.
+ if (DD->useAllLinkageNames())
+ DeclLinkageName = SPDecl->getLinkageName();
+ unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
+ unsigned DefID = getOrCreateSourceID(SP->getFile());
+ if (DeclID != DefID)
+ addUInt(SPDie, dwarf::DW_AT_decl_file, std::nullopt, DefID);
+
+ if (SP->getLine() != SPDecl->getLine())
+ addUInt(SPDie, dwarf::DW_AT_decl_line, std::nullopt, SP->getLine());
+ }
+ }
+
+ // Add function template parameters.
+ addTemplateParams(SPDie, SP->getTemplateParams());
+
+ // Add the linkage name if we have one and it isn't in the Decl.
+ StringRef LinkageName = SP->getLinkageName();
+ assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
+ LinkageName == DeclLinkageName) &&
+ "decl has a linkage name and it is different");
+ if (DeclLinkageName.empty() &&
+ // Always emit it for abstract subprograms.
+ (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP)))
+ addLinkageName(SPDie, LinkageName);
+
+ if (!DeclDie)
+ return false;
+
+ // Refer to the function declaration where all the other attributes will be
+ // found.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
+ return true;
+}
+
+void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
+ bool SkipSPAttributes) {
+ // If -fdebug-info-for-profiling is enabled, need to emit the subprogram
+ // and its source location.
+ bool SkipSPSourceLocation = SkipSPAttributes &&
+ !CUNode->getDebugInfoForProfiling();
+ if (!SkipSPSourceLocation)
+ if (applySubprogramDefinitionAttributes(SP, SPDie, SkipSPAttributes))
+ return;
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP->getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, SP->getName());
+
+ addAnnotation(SPDie, SP->getAnnotations());
+
+ if (!SkipSPSourceLocation)
+ addSourceLine(SPDie, SP);
+
+ // Skip the rest of the attributes under -gmlt to save space.
+ if (SkipSPAttributes)
+ return;
+
+ // Add the prototype if we have a prototype and we have a C like
+ // language.
+ if (SP->isPrototyped() && dwarf::isC((dwarf::SourceLanguage)getLanguage()))
+ addFlag(SPDie, dwarf::DW_AT_prototyped);
+
+ if (SP->isObjCDirect())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct);
+
+ unsigned CC = 0;
+ DITypeRefArray Args;
+ if (const DISubroutineType *SPTy = SP->getType()) {
+ Args = SPTy->getTypeArray();
+ CC = SPTy->getCC();
+ }
+
+ // Add a DW_AT_calling_convention if this has an explicit convention.
+ if (CC && CC != dwarf::DW_CC_normal)
+ addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC);
+
+ // Add a return type. If this is a type like a C/C++ void type we don't add a
+ // return type.
+ if (Args.size())
+ if (auto Ty = Args[0])
+ addType(SPDie, Ty);
+
+ unsigned VK = SP->getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
+ if (SP->getVirtualIndex() != -1u) {
+ DIELoc *Block = getDIELoc();
+ addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
+ }
+ ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType()));
+ }
+
+ if (!SP->isDefinition()) {
+ addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ constructSubprogramArguments(SPDie, Args);
+ }
+
+ addThrownTypes(SPDie, SP->getThrownTypes());
+
+ if (SP->isArtificial())
+ addFlag(SPDie, dwarf::DW_AT_artificial);
+
+ if (!SP->isLocalToUnit())
+ addFlag(SPDie, dwarf::DW_AT_external);
+
+ if (DD->useAppleExtensionAttributes()) {
+ if (SP->isOptimized())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
+
+ if (unsigned isa = Asm->getISAEncoding())
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ if (SP->isLValueReference())
+ addFlag(SPDie, dwarf::DW_AT_reference);
+
+ if (SP->isRValueReference())
+ addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
+
+ if (SP->isNoReturn())
+ addFlag(SPDie, dwarf::DW_AT_noreturn);
+
+ addAccess(SPDie, SP->getFlags());
+
+ if (SP->isExplicit())
+ addFlag(SPDie, dwarf::DW_AT_explicit);
+
+ if (SP->isMainSubprogram())
+ addFlag(SPDie, dwarf::DW_AT_main_subprogram);
+ if (SP->isPure())
+ addFlag(SPDie, dwarf::DW_AT_pure);
+ if (SP->isElemental())
+ addFlag(SPDie, dwarf::DW_AT_elemental);
+ if (SP->isRecursive())
+ addFlag(SPDie, dwarf::DW_AT_recursive);
+
+ if (!SP->getTargetFuncName().empty())
+ addString(SPDie, dwarf::DW_AT_trampoline, SP->getTargetFuncName());
+
+ if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
+ addFlag(SPDie, dwarf::DW_AT_deleted);
+}
+
+void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
+ DIE *IndexTy) {
+ DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);
+
+ // The LowerBound value defines the lower bounds which is typically zero for
+ // C/C++. The Count value is the number of elements. Values are 64 bit. If
+ // Count == -1 then the array is unbounded and we do not emit
+ // DW_AT_lower_bound and DW_AT_count attributes.
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+
+ auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
+ DISubrange::BoundType Bound) -> void {
+ if (auto *BV = dyn_cast_if_present<DIVariable *>(Bound)) {
+ if (auto *VarDIE = getDIE(BV))
+ addDIEEntry(DW_Subrange, Attr, *VarDIE);
+ } else if (auto *BE = dyn_cast_if_present<DIExpression *>(Bound)) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(BE);
+ addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
+ } else if (auto *BI = dyn_cast_if_present<ConstantInt *>(Bound)) {
+ if (Attr == dwarf::DW_AT_count) {
+ if (BI->getSExtValue() != -1)
+ addUInt(DW_Subrange, Attr, std::nullopt, BI->getSExtValue());
+ } else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
+ BI->getSExtValue() != DefaultLowerBound)
+ addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue());
+ }
+ };
+
+ AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
+
+ AddBoundTypeEntry(dwarf::DW_AT_count, SR->getCount());
+
+ AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
+
+ AddBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
+}
+
+void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
+ const DIGenericSubrange *GSR,
+ DIE *IndexTy) {
+ DIE &DwGenericSubrange =
+ createAndAddDIE(dwarf::DW_TAG_generic_subrange, Buffer);
+ addDIEEntry(DwGenericSubrange, dwarf::DW_AT_type, *IndexTy);
+
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+
+ auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
+ DIGenericSubrange::BoundType Bound) -> void {
+ if (auto *BV = dyn_cast_if_present<DIVariable *>(Bound)) {
+ if (auto *VarDIE = getDIE(BV))
+ addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
+ } else if (auto *BE = dyn_cast_if_present<DIExpression *>(Bound)) {
+ if (BE->isConstant() &&
+ DIExpression::SignedOrUnsignedConstant::SignedConstant ==
+ *BE->isConstant()) {
+ if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
+ static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound)
+ addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata,
+ BE->getElement(1));
+ } else {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(BE);
+ addBlock(DwGenericSubrange, Attr, DwarfExpr.finalize());
+ }
+ }
+ };
+
+ AddBoundTypeEntry(dwarf::DW_AT_lower_bound, GSR->getLowerBound());
+ AddBoundTypeEntry(dwarf::DW_AT_count, GSR->getCount());
+ AddBoundTypeEntry(dwarf::DW_AT_upper_bound, GSR->getUpperBound());
+ AddBoundTypeEntry(dwarf::DW_AT_byte_stride, GSR->getStride());
+}
+
+DIE *DwarfUnit::getIndexTyDie() {
+ if (IndexTyDie)
+ return IndexTyDie;
+ // Construct an integer type to use for indexes.
+ IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
+ StringRef Name = "__ARRAY_SIZE_TYPE__";
+ addString(*IndexTyDie, dwarf::DW_AT_name, Name);
+ addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, std::nullopt, sizeof(int64_t));
+ addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::getArrayIndexTypeEncoding(
+ (dwarf::SourceLanguage)getLanguage()));
+ DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
+ return IndexTyDie;
+}
+
+/// Returns true if the vector's size differs from the sum of sizes of elements
+/// the user specified. This can occur if the vector has been rounded up to
+/// fit memory alignment constraints.
+static bool hasVectorBeenPadded(const DICompositeType *CTy) {
+ assert(CTy && CTy->isVector() && "Composite type is not a vector");
+ const uint64_t ActualSize = CTy->getSizeInBits();
+
+ // Obtain the size of each element in the vector.
+ DIType *BaseTy = CTy->getBaseType();
+ assert(BaseTy && "Unknown vector element type.");
+ const uint64_t ElementSize = BaseTy->getSizeInBits();
+
+ // Locate the number of elements in the vector.
+ const DINodeArray Elements = CTy->getElements();
+ assert(Elements.size() == 1 &&
+ Elements[0]->getTag() == dwarf::DW_TAG_subrange_type &&
+ "Invalid vector element array, expected one element of type subrange");
+ const auto Subrange = cast<DISubrange>(Elements[0]);
+ const auto NumVecElements =
+ Subrange->getCount()
+ ? cast<ConstantInt *>(Subrange->getCount())->getSExtValue()
+ : 0;
+
+ // Ensure we found the element count and that the actual size is wide
+ // enough to contain the requested size.
+ assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size");
+ return ActualSize != (NumVecElements * ElementSize);
+}
+
+void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ if (CTy->isVector()) {
+ addFlag(Buffer, dwarf::DW_AT_GNU_vector);
+ if (hasVectorBeenPadded(CTy))
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt,
+ CTy->getSizeInBits() / CHAR_BIT);
+ }
+
+ if (DIVariable *Var = CTy->getDataLocation()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_data_location, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getDataLocationExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
+ }
+
+ if (DIVariable *Var = CTy->getAssociated()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_associated, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getAssociatedExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_associated, DwarfExpr.finalize());
+ }
+
+ if (DIVariable *Var = CTy->getAllocated()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_allocated, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getAllocatedExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_allocated, DwarfExpr.finalize());
+ }
+
+ if (auto *RankConst = CTy->getRankConst()) {
+ addSInt(Buffer, dwarf::DW_AT_rank, dwarf::DW_FORM_sdata,
+ RankConst->getSExtValue());
+ } else if (auto *RankExpr = CTy->getRankExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(RankExpr);
+ addBlock(Buffer, dwarf::DW_AT_rank, DwarfExpr.finalize());
+ }
+
+ // Emit the element type.
+ addType(Buffer, CTy->getBaseType());
+
+ // Get an anonymous type for index type.
+ // FIXME: This type should be passed down from the front end
+ // as different languages may have different sizes for indexes.
+ DIE *IdxTy = getIndexTyDie();
+
+ // Add subranges to array type.
+ DINodeArray Elements = CTy->getElements();
+ for (DINode *E : Elements) {
+ // FIXME: Should this really be such a loose cast?
+ if (auto *Element = dyn_cast_or_null<DINode>(E)) {
+ if (Element->getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
+ else if (Element->getTag() == dwarf::DW_TAG_generic_subrange)
+ constructGenericSubrangeDIE(Buffer, cast<DIGenericSubrange>(Element),
+ IdxTy);
+ }
+ }
+}
+
+void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ const DIType *DTy = CTy->getBaseType();
+ bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy);
+ if (DTy) {
+ if (DD->getDwarfVersion() >= 3)
+ addType(Buffer, DTy);
+ if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
+ addFlag(Buffer, dwarf::DW_AT_enum_class);
+ }
+
+ auto *Context = CTy->getScope();
+ bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context);
+ DINodeArray Elements = CTy->getElements();
+
+ // Add enumerators to enumeration type.
+ for (const DINode *E : Elements) {
+ auto *Enum = dyn_cast_or_null<DIEnumerator>(E);
+ if (Enum) {
+ DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
+ StringRef Name = Enum->getName();
+ addString(Enumerator, dwarf::DW_AT_name, Name);
+ addConstantValue(Enumerator, Enum->getValue(), IsUnsigned);
+ if (IndexEnumerators)
+ addGlobalName(Name, Enumerator, Context);
+ }
+ }
+}
+
+void DwarfUnit::constructContainingTypeDIEs() {
+ for (auto &P : ContainingTypeMap) {
+ DIE &SPDie = *P.first;
+ const DINode *D = P.second;
+ if (!D)
+ continue;
+ DIE *NDie = getDIE(D);
+ if (!NDie)
+ continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie);
+ }
+}
+
+DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
+ DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
+ StringRef Name = DT->getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, Name);
+
+ addAnnotation(MemberDie, DT->getAnnotations());
+
+ if (DIType *Resolved = DT->getBaseType())
+ addType(MemberDie, Resolved);
+
+ addSourceLine(MemberDie, DT);
+
+ if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc;
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
+ } else {
+ uint64_t Size = DT->getSizeInBits();
+ uint64_t FieldSize = DD->getBaseTypeSize(DT);
+ uint32_t AlignInBytes = DT->getAlignInBytes();
+ uint64_t OffsetInBytes;
+
+ bool IsBitfield = DT->isBitField();
+ if (IsBitfield) {
+ // Handle bitfield, assume bytes are 8 bits.
+ if (DD->useDWARF2Bitfields())
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size);
+
+ uint64_t Offset = DT->getOffsetInBits();
+ // We can't use DT->getAlignInBits() here: AlignInBits for member type
+ // is non-zero if and only if alignment was forced (e.g. _Alignas()),
+ // which can't be done with bitfields. Thus we use FieldSize here.
+ uint32_t AlignInBits = FieldSize;
+ uint32_t AlignMask = ~(AlignInBits - 1);
+ // The bits from the start of the storage unit to the start of the field.
+ uint64_t StartBitOffset = Offset - (Offset & AlignMask);
+ // The byte offset of the field's aligned storage unit inside the struct.
+ OffsetInBytes = (Offset - StartBitOffset) / 8;
+
+ if (DD->useDWARF2Bitfields()) {
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getDataLayout().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, Offset);
+ OffsetInBytes = FieldOffset >> 3;
+ } else {
+ addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset);
+ }
+ } else {
+ // This is not a bitfield.
+ OffsetInBytes = DT->getOffsetInBits() / 8;
+ if (AlignInBytes)
+ addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+ }
+
+ if (DD->getDwarfVersion() <= 2) {
+ DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
+ addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
+ } else if (!IsBitfield || DD->useDWARF2Bitfields()) {
+ // In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are
+ // interpreted as location-list pointers. Interpreting constants as
+ // pointers is not expected, so we use DW_FORM_udata to encode the
+ // constants here.
+ if (DD->getDwarfVersion() == 3)
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location,
+ dwarf::DW_FORM_udata, OffsetInBytes);
+ else
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location, std::nullopt,
+ OffsetInBytes);
+ }
+ }
+
+ addAccess(MemberDie, DT->getFlags());
+
+ if (DT->isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
+ dwarf::DW_VIRTUALITY_virtual);
+
+ // Objective-C properties.
+ if (DINode *PNode = DT->getObjCProperty())
+ if (DIE *PDie = getDIE(PNode))
+ addAttribute(MemberDie, dwarf::DW_AT_APPLE_property,
+ dwarf::DW_FORM_ref4, DIEEntry(*PDie));
+
+ if (DT->isArtificial())
+ addFlag(MemberDie, dwarf::DW_AT_artificial);
+
+ return MemberDie;
+}
+
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
+ if (!DT)
+ return nullptr;
+
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(DT->getScope());
+ assert(dwarf::isType(ContextDIE->getTag()) &&
+ "Static member should belong to a type.");
+
+ if (DIE *StaticMemberDIE = getDIE(DT))
+ return StaticMemberDIE;
+
+ DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
+
+ const DIType *Ty = DT->getBaseType();
+
+ addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
+ addType(StaticMemberDIE, Ty);
+ addSourceLine(StaticMemberDIE, DT);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_external);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
+
+ // FIXME: We could omit private if the parent is a class_type, and
+ // public if the parent is something else.
+ addAccess(StaticMemberDIE, DT->getFlags());
+
+ if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
+ addConstantValue(StaticMemberDIE, CI, Ty);
+ if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
+ addConstantFPValue(StaticMemberDIE, CFP);
+
+ if (uint32_t AlignInBytes = DT->getAlignInBytes())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+
+ return &StaticMemberDIE;
+}
+
+void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
+ // Emit size of content not including length itself
+ if (!DD->useSectionsAsReferences())
+ EndLabel = Asm->emitDwarfUnitLength(
+ isDwoUnit() ? "debug_info_dwo" : "debug_info", "Length of Unit");
+ else
+ Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(),
+ "Length of Unit");
+
+ Asm->OutStreamer->AddComment("DWARF version number");
+ unsigned Version = DD->getDwarfVersion();
+ Asm->emitInt16(Version);
+
+ // DWARF v5 reorders the address size and adds a unit type.
+ if (Version >= 5) {
+ Asm->OutStreamer->AddComment("DWARF Unit Type");
+ Asm->emitInt8(UT);
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
+ }
+
+ // We share one abbreviations table across all units so it's always at the
+ // start of the section. Use a relocatable offset where needed to ensure
+ // linking doesn't invalidate that offset.
+ Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ if (UseOffsets)
+ Asm->emitDwarfLengthOrOffset(0);
+ else
+ Asm->emitDwarfSymbolReference(
+ TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
+
+ if (Version <= 4) {
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
+ }
+}
+
+void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+ DwarfUnit::emitCommonHeader(UseOffsets,
+ DD->useSplitDwarf() ? dwarf::DW_UT_split_type
+ : dwarf::DW_UT_type);
+ Asm->OutStreamer->AddComment("Type Signature");
+ Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature));
+ Asm->OutStreamer->AddComment("Type DIE Offset");
+ // In a skeleton type unit there is no type DIE so emit a zero offset.
+ Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0);
+}
+
+void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ addAttribute(Die, Attribute, DD->getDwarfSectionOffsetForm(),
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
+ if (Asm->doesDwarfUseRelocationsAcrossSections())
+ addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
+ else
+ addSectionDelta(Die, Attribute, Label, Sec);
+}
+
+bool DwarfTypeUnit::isDwoUnit() const {
+ // Since there are no skeleton type units, all type units are dwo type units
+ // when split DWARF is being used.
+ return DD->useSplitDwarf();
+}
+
+void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalNameForTypeUnit(Name, Context);
+}
+
+void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalTypeUnitType(Ty, Context);
+}
+
+const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
+ if (!Asm->doesDwarfUseRelocationsAcrossSections())
+ return nullptr;
+ if (isDwoUnit())
+ return nullptr;
+ return getSection()->getBeginSymbol();
+}
+
+void DwarfUnit::addStringOffsetsStart() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base,
+ DU->getStringOffsetsStartSym(),
+ TLOF.getDwarfStrOffSection()->getBeginSymbol());
+}
+
+void DwarfUnit::addRnglistsBase() {
+ assert(DD->getDwarfVersion() >= 5 &&
+ "DW_AT_rnglists_base requires DWARF version 5 or later");
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base,
+ DU->getRnglistsTableBaseSym(),
+ TLOF.getDwarfRnglistsSection()->getBeginSymbol());
+}
+
+void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+ DD->getAddressPool().resetUsedFlag(true);
+}
+
+bool DwarfUnit::isCompatibleWithVersion(uint16_t Version) const {
+ return !Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= Version;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
new file mode 100644
index 000000000000..8f17e94c2d1c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -0,0 +1,390 @@
+//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
+
+#include "DwarfDebug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Target/TargetMachine.h"
+#include <optional>
+#include <string>
+
+namespace llvm {
+
+class ConstantFP;
+class ConstantInt;
+class DwarfCompileUnit;
+class MCDwarfDwoLineTable;
+class MCSymbol;
+
+//===----------------------------------------------------------------------===//
+/// This dwarf writer support class manages information associated with a
+/// source file.
+class DwarfUnit : public DIEUnit {
+protected:
+ /// MDNode for the compile unit.
+ const DICompileUnit *CUNode;
+
+ // All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ /// Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ /// Emitted at the end of the CU and used to compute the CU Length field.
+ MCSymbol *EndLabel = nullptr;
+
+ // Holders for some common dwarf information.
+ DwarfDebug *DD;
+ DwarfFile *DU;
+
+ /// An anonymous type for index type. Owned by DIEUnit.
+ DIE *IndexTyDie = nullptr;
+
+ /// Tracks the mapping of unit level debug information variables to debug
+ /// information entries.
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+ /// A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+ /// A list of all the DIELocs in use.
+ std::vector<DIELoc *> DIELocs;
+
+ /// This map is used to keep track of subprogram DIEs that need
+ /// DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const DINode *> ContainingTypeMap;
+
+ DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU);
+
+ bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal);
+
+ bool isShareableAcrossCUs(const DINode *D) const;
+
+ template <typename T>
+ void addAttribute(DIEValueList &Die, dwarf::Attribute Attribute,
+ dwarf::Form Form, T &&Value) {
+ // For strict DWARF mode, only generate attributes available to current
+ // DWARF version.
+ // Attribute 0 is used when emitting form-encoded values in blocks, which
+ // don't have attributes (only forms) so we cannot detect their DWARF
+ // version compatibility here and assume they are compatible.
+ if (Attribute != 0 && Asm->TM.Options.DebugStrictDwarf &&
+ DD->getDwarfVersion() < dwarf::AttributeVersion(Attribute))
+ return;
+
+ Die.addValue(DIEValueAllocator,
+ DIEValue(Attribute, Form, std::forward<T>(Value)));
+ }
+
+public:
+ // Accessors.
+ AsmPrinter* getAsmPrinter() const { return Asm; }
+ MCSymbol *getEndLabel() const { return EndLabel; }
+ uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
+ const DICompileUnit *getCUNode() const { return CUNode; }
+ DwarfDebug &getDwarfDebug() const { return *DD; }
+
+ /// Return true if this compile unit has something to write out.
+ bool hasContent() const { return getUnitDie().hasChildren(); }
+
+ /// Get string containing language specific context for a global name.
+ ///
+ /// Walks the metadata parent chain in a language specific manner (using the
+ /// compile unit language) and returns it as a string. This is done at the
+ /// metadata level because DIEs may not currently have been added to the
+ /// parent context and walking the DIEs looking for names is more expensive
+ /// than walking the metadata.
+ std::string getParentContextString(const DIScope *Context) const;
+
+ /// Add a new global name to the compile unit.
+ virtual void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) = 0;
+
+ /// Add a new global type to the compile unit.
+ virtual void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) = 0;
+
+ /// Returns the DIE map slot for the specified debug variable.
+ ///
+ /// We delegate the request to DwarfDebug when the MDNode can be part of the
+ /// type system, since DIEs for the type system can be shared across CUs and
+ /// the mappings are kept in DwarfDebug.
+ DIE *getDIE(const DINode *D) const;
+
+ /// Returns a fresh newly allocated DIELoc.
+ DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; }
+
+ /// Insert DIE into the map.
+ ///
+ /// We delegate the request to DwarfDebug when the MDNode can be part of the
+ /// type system, since DIEs for the type system can be shared across CUs and
+ /// the mappings are kept in DwarfDebug.
+ void insertDIE(const DINode *Desc, DIE *D);
+
+ void insertDIE(DIE *D);
+
+ /// Add a flag that is true to the DIE.
+ void addFlag(DIE &Die, dwarf::Attribute Attribute);
+
+ /// Add an unsigned integer attribute data and value.
+ void addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ std::optional<dwarf::Form> Form, uint64_t Integer);
+
+ void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer);
+
+ /// Add an signed integer attribute data and value.
+ void addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ std::optional<dwarf::Form> Form, int64_t Integer);
+
+ void addSInt(DIELoc &Die, std::optional<dwarf::Form> Form, int64_t Integer);
+
+ /// Add a string attribute data and value.
+ ///
+ /// We always emit a reference to the string pool instead of immediate
+ /// strings so that DIEs have more predictable sizes. In the case of split
+ /// dwarf we emit an index into another table which gets us the static offset
+ /// into the string table.
+ void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
+
+ /// Add a Dwarf label attribute data and value.
+ void addLabel(DIEValueList &Die, dwarf::Attribute Attribute, dwarf::Form Form,
+ const MCSymbol *Label);
+
+ void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
+
+ /// Add an offset into a section attribute data and value.
+ void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
+
+ /// Add a dwarf op address data and value using the form given and an
+ /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ void addOpAddress(DIELoc &Die, const MCSymbol *Sym);
+ void addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label);
+
+ /// Add a label delta attribute data and value.
+ void addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// Add a DIE attribute data and value.
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
+
+ /// Add a DIE attribute data and value.
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
+
+ /// Add a type's DW_AT_signature and set the declaration flag.
+ void addDIETypeSignature(DIE &Die, uint64_t Signature);
+
+ /// Add block data.
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc);
+
+ /// Add block data.
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
+ DIEBlock *Block);
+
+ /// Add location information to specified debug information entry.
+ void addSourceLine(DIE &Die, unsigned Line, const DIFile *File);
+ void addSourceLine(DIE &Die, const DILocalVariable *V);
+ void addSourceLine(DIE &Die, const DIGlobalVariable *G);
+ void addSourceLine(DIE &Die, const DISubprogram *SP);
+ void addSourceLine(DIE &Die, const DILabel *L);
+ void addSourceLine(DIE &Die, const DIType *Ty);
+ void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
+
+ /// Add constant value entry in variable DIE.
+ void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
+ void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
+ void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
+ void addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty);
+ void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
+
+ /// Add constant value entry in variable DIE.
+ void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
+
+ /// Add a linkage name, if it isn't empty.
+ void addLinkageName(DIE &Die, StringRef LinkageName);
+
+ /// Add template parameters in buffer.
+ void addTemplateParams(DIE &Buffer, DINodeArray TParams);
+
+ /// Add thrown types.
+ void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
+
+ /// Add the accessibility attribute.
+ void addAccess(DIE &Die, DINode::DIFlags Flags);
+
+ /// Add a new type attribute to the specified entity.
+ ///
+ /// This takes and attribute parameter because DW_AT_friend attributes are
+ /// also type references.
+ void addType(DIE &Entity, const DIType *Ty,
+ dwarf::Attribute Attribute = dwarf::DW_AT_type);
+
+ DIE *getOrCreateNameSpace(const DINamespace *NS);
+ DIE *getOrCreateModule(const DIModule *M);
+ DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
+
+ void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
+ bool SkipSPAttributes = false);
+
+ /// Creates type DIE with specific context.
+ DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty);
+
+ /// Find existing DIE or create new DIE for the given type.
+ virtual DIE *getOrCreateTypeDIE(const MDNode *TyNode);
+
+ /// Get context owner's DIE.
+ virtual DIE *getOrCreateContextDIE(const DIScope *Context);
+
+ /// Construct DIEs for types that contain vtables.
+ void constructContainingTypeDIEs();
+
+ /// Construct function argument DIEs.
+ void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args);
+
+ /// Create a DIE with the given Tag, add the DIE to its parent, and
+ /// call insertDIE if MD is not null.
+ DIE &createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N = nullptr);
+
+ bool useSegmentedStringOffsetsTable() const {
+ return DD->useSegmentedStringOffsetsTable();
+ }
+
+ /// Compute the size of a header for this unit, not including the initial
+ /// length field.
+ virtual unsigned getHeaderSize() const {
+ return sizeof(int16_t) + // DWARF version number
+ Asm->getDwarfOffsetByteSize() + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ (DD->getDwarfVersion() >= 5 ? sizeof(int8_t)
+ : 0); // DWARF v5 unit type
+ }
+
+ /// Emit the header for this unit, not including the initial length field.
+ virtual void emitHeader(bool UseOffsets) = 0;
+
+ /// Add the DW_AT_str_offsets_base attribute to the unit DIE.
+ void addStringOffsetsStart();
+
+ /// Add the DW_AT_rnglists_base attribute to the unit DIE.
+ void addRnglistsBase();
+
+ virtual DwarfCompileUnit &getCU() = 0;
+
+ void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+
+ /// addSectionDelta - Add a label delta attribute data and value.
+ void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ const MCSymbol *Lo);
+
+ /// Add a Dwarf section label attribute data and value.
+ void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec);
+
+ /// Add DW_TAG_LLVM_annotation.
+ void addAnnotation(DIE &Buffer, DINodeArray Annotations);
+
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
+
+protected:
+ ~DwarfUnit();
+
+ /// Create new static data member DIE.
+ DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
+
+ /// Look up the source ID for the given file. If none currently exists,
+ /// create a new ID and insert it in the line table.
+ virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;
+
+ /// Emit the common part of the header for this unit.
+ void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
+
+private:
+ void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
+ void constructTypeDIE(DIE &Buffer, const DIStringType *BTy);
+ void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
+ void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy);
+ void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
+ void constructGenericSubrangeDIE(DIE &Buffer, const DIGenericSubrange *SR,
+ DIE *IndexTy);
+ void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
+ void constructTemplateTypeParameterDIE(DIE &Buffer,
+ const DITemplateTypeParameter *TP);
+ void constructTemplateValueParameterDIE(DIE &Buffer,
+ const DITemplateValueParameter *TVP);
+
+ /// Return the default lower bound for an array.
+ ///
+ /// If the DWARF version doesn't handle the language, return -1.
+ int64_t getDefaultLowerBound() const;
+
+ /// Get an anonymous type for index type.
+ DIE *getIndexTyDie();
+
+ /// Set D as anonymous type for index which can be reused later.
+ void setIndexTyDie(DIE *D) { IndexTyDie = D; }
+
+ virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0;
+
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
+ virtual bool isDwoUnit() const = 0;
+ const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
+
+ /// Returns 'true' if the current DwarfVersion is compatible
+ /// with the specified \p Version.
+ bool isCompatibleWithVersion(uint16_t Version) const;
+};
+
+class DwarfTypeUnit final : public DwarfUnit {
+ uint64_t TypeSignature;
+ const DIE *Ty;
+ DwarfCompileUnit &CU;
+ MCDwarfDwoLineTable *SplitLineTable;
+ bool UsedLineTable = false;
+
+ unsigned getOrCreateSourceID(const DIFile *File) override;
+ void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+ bool isDwoUnit() const override;
+
+public:
+ DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr);
+
+ void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
+ void setType(const DIE *Ty) { this->Ty = Ty; }
+
+ /// Emit the header for this unit, not including the initial length field.
+ void emitHeader(bool UseOffsets) override;
+ unsigned getHeaderSize() const override {
+ return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
+ Asm->getDwarfOffsetByteSize(); // Type DIE Offset
+ }
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+ void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
+ DwarfCompileUnit &getCU() override { return CU; }
+};
+} // end llvm namespace
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
new file mode 100644
index 000000000000..eef6b1d93f36
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -0,0 +1,850 @@
+//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EHStreamer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
+using namespace llvm;
+
+EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+
+EHStreamer::~EHStreamer() = default;
+
+/// How many leading type ids two landing pads have in common.
+unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ return std::mismatch(LIds.begin(), LIds.end(), RIds.begin(), RIds.end())
+ .first -
+ LIds.begin();
+}
+
+/// Compute the actions table and gather the first action index for each landing
+/// pad site.
+void EHStreamer::computeActionsTable(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = Asm->MF->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (unsigned FilterId : FilterIds) {
+ FilterOffsets.push_back(Offset);
+ Offset -= getULEB128Size(FilterId);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0; // Total size of all action entries for a function
+ const LandingPadInfo *PrevLPI = nullptr;
+
+ for (const LandingPadInfo *LPI : LandingPads) {
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad
+
+ if (NumShared < TypeIds.size()) {
+ // Size of one action entry (typeid + next action)
+ unsigned SizeActionEntry = 0;
+ unsigned PrevAction = (unsigned)-1;
+
+ if (NumShared) {
+ unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = Actions.size() - 1;
+ SizeActionEntry = getSLEB128Size(Actions[PrevAction].NextAction) +
+ getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+ SizeActionEntry -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeActionEntry += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID =
+ isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeActionEntry ? -(SizeActionEntry + SizeTypeID) : 0;
+ SizeActionEntry = SizeTypeID + getSLEB128Size(NextAction);
+ SizeSiteActions += SizeActionEntry;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = Actions.size() - 1;
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeActionEntry + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when creating the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 indicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+}
+
+/// Return `true' if this is a call to a function marked `nounwind'. Return
+/// `false' otherwise.
+bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isGlobal()) continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+
+ return MarkedNoUnwind;
+}
+
+void EHStreamer::computePadMap(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ RangeMapType &PadMap) {
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced so we can put them in the LSDA.
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad->BeginLabels[j];
+ // If we have deleted the code for a given invoke after registering it in
+ // the LandingPad label list, the associated symbols will not have been
+ // emitted. In that case, ignore this callsite entry.
+ if (!BeginLabel->isDefined() || !EndLabel->isDefined())
+ continue;
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+}
+
+/// Compute the call-site table. The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action. The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action. Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table. Entries must be ordered by try-range address.
+///
+/// Call-sites are split into one or more call-site ranges associated with
+/// different sections of the function.
+///
+/// - Without -basic-block-sections, all call-sites are grouped into one
+/// call-site-range corresponding to the function section.
+///
+/// - With -basic-block-sections, one call-site range is created for each
+/// section, with its FragmentBeginLabel and FragmentEndLabel respectively
+// set to the beginning and ending of the corresponding section and its
+// ExceptionLabel set to the exception symbol dedicated for this section.
+// Later, one LSDA header will be emitted for each call-site range with its
+// call-sites following. The action table and type info table will be
+// shared across all ranges.
+void EHStreamer::computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ RangeMapType PadMap;
+ computePadMap(LandingPads, PadMap);
+
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = Asm->getFunctionBegin();
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+ // Visit all instructions in order of address.
+ for (const auto &MBB : *Asm->MF) {
+ if (&MBB == &Asm->MF->front() || MBB.isBeginSection()) {
+ // We start a call-site range upon function entry and at the beginning of
+ // every basic block section.
+ CallSiteRanges.push_back(
+ {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel,
+ Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel,
+ Asm->getMBBExceptionSym(MBB), CallSites.size()});
+ PreviousIsInvoke = false;
+ SawPotentiallyThrowing = false;
+ LastLabel = nullptr;
+ }
+
+ if (MBB.isEHPad())
+ CallSiteRanges.back().IsLPRange = true;
+
+ for (const auto &MI : MBB) {
+ if (!MI.isEHLabel()) {
+ if (MI.isCall())
+ SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+ continue;
+ }
+
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf and AIX exception handling (SjLj handling doesn't use this).
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (SawPotentiallyThrowing &&
+ (Asm->MAI->usesCFIForEH() ||
+ Asm->MAI->getExceptionHandlingType() == ExceptionHandling::AIX)) {
+ CallSites.push_back({LastLabel, BeginLabel, nullptr, 0});
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke && !IsSJLJ) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (!IsSJLJ)
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = Asm->MF->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ }
+ }
+
+ // We end the call-site range upon function exit and at the end of every
+ // basic block section.
+ if (&MBB == &Asm->MF->back() || MBB.isEndSection()) {
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for
+ // the region following the try-range.
+ if (SawPotentiallyThrowing && !IsSJLJ) {
+ CallSiteEntry Site = {LastLabel, CallSiteRanges.back().FragmentEndLabel,
+ nullptr, 0};
+ CallSites.push_back(Site);
+ SawPotentiallyThrowing = false;
+ }
+ CallSiteRanges.back().CallSiteEndIdx = CallSites.size();
+ }
+ }
+}
+
+/// Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+///
+/// Returns the starting symbol of an exception table.
+MCSymbol *EHStreamer::emitExceptionTable() {
+ const MachineFunction *MF = Asm->MF;
+ const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MF->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MF->getLandingPads();
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (const LandingPadInfo &LPI : PadInfos) {
+ // If a landing-pad has an associated label, but the label wasn't ever
+ // emitted, then skip it. (This can occur if the landingpad's MBB was
+ // deleted).
+ if (LPI.LandingPadLabel && !LPI.LandingPadLabel->isDefined())
+ continue;
+ LandingPads.push_back(&LPI);
+ }
+
+ // Order landing pads lexicographically by type id.
+ llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
+ return L->TypeIds < R->TypeIds;
+ });
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ computeActionsTable(LandingPads, Actions, FirstActions);
+
+ // Compute the call-site table and call-site ranges. Normally, there is only
+ // one call-site-range which covers the whole function. With
+ // -basic-block-sections, there is one call-site-range per basic block
+ // section.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ SmallVector<CallSiteRange, 4> CallSiteRanges;
+ computeCallSiteTable(CallSites, CallSiteRanges, LandingPads, FirstActions);
+
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
+ bool HasLEB128Directives = Asm->MAI->hasLEB128Directives();
+ unsigned CallSiteEncoding =
+ IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
+ Asm->getObjFileLowering().getCallSiteEncoding();
+ bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
+
+ // Type infos.
+ MCSection *LSDASection = Asm->getObjFileLowering().getSectionForLSDA(
+ MF->getFunction(), *Asm->CurrentFnSym, Asm->TM);
+ unsigned TTypeEncoding;
+
+ if (!HaveTTData) {
+ // If there is no TypeInfo, then we just explicitly say that we're omitting
+ // that bit.
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ }
+
+ // Begin the exception table.
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer->switchSection(LSDASection);
+ Asm->emitAlignment(Align(4));
+
+ // Emit the LSDA.
+ MCSymbol *GCCETSym =
+ Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
+ Twine(Asm->getFunctionNumber()));
+ Asm->OutStreamer->emitLabel(GCCETSym);
+ MCSymbol *CstEndLabel = Asm->createTempSymbol(
+ CallSiteRanges.size() > 1 ? "action_table_base" : "cst_end");
+
+ MCSymbol *TTBaseLabel = nullptr;
+ if (HaveTTData)
+ TTBaseLabel = Asm->createTempSymbol("ttbase");
+
+ const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ // Helper for emitting references (offsets) for type table and the end of the
+ // call-site table (which marks the beginning of the action table).
+ // * For Itanium, these references will be emitted for every callsite range.
+ // * For SJLJ and Wasm, they will be emitted only once in the LSDA header.
+ auto EmitTypeTableRefAndCallSiteTableEndRef = [&]() {
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ if (HaveTTData) {
+ // N.B.: There is a dependency loop between the size of the TTBase uleb128
+ // here and the amount of padding before the aligned type table. The
+ // assembler must sometimes pad this uleb128 or insert extra padding
+ // before the type table. See PR35809 or GNU as bug 4029.
+ MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+ Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
+ Asm->OutStreamer->emitLabel(TTBaseRefLabel);
+ }
+
+ // The Action table follows the call-site table. So we emit the
+ // label difference from here (start of the call-site table for SJLJ and
+ // Wasm, and start of a call-site range for Itanium) to the end of the
+ // whole call-site table (end of the last call-site range for Itanium).
+ MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
+ Asm->OutStreamer->emitLabel(CstBeginLabel);
+ };
+
+ // An alternative path to EmitTypeTableRefAndCallSiteTableEndRef.
+ // For some platforms, the system assembler does not accept the form of
+ // `.uleb128 label2 - label1`. In those situations, we would need to calculate
+ // the size between label1 and label2 manually.
+ // In this case, we would need to calculate the LSDA size and the call
+ // site table size.
+ auto EmitTypeTableOffsetAndCallSiteTableOffset = [&]() {
+ assert(CallSiteEncoding == dwarf::DW_EH_PE_udata4 && !HasLEB128Directives &&
+ "Targets supporting .uleb128 do not need to take this path.");
+ if (CallSiteRanges.size() > 1)
+ report_fatal_error(
+ "-fbasic-block-sections is not yet supported on "
+ "platforms that do not have general LEB128 directive support.");
+
+ uint64_t CallSiteTableSize = 0;
+ const CallSiteRange &CSRange = CallSiteRanges.back();
+ for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx;
+ CallSiteIdx < CSRange.CallSiteEndIdx; ++CallSiteIdx) {
+ const CallSiteEntry &S = CallSites[CallSiteIdx];
+ // Each call site entry consists of 3 udata4 fields (12 bytes) and
+ // 1 ULEB128 field.
+ CallSiteTableSize += 12 + getULEB128Size(S.Action);
+ assert(isUInt<32>(CallSiteTableSize) && "CallSiteTableSize overflows.");
+ }
+
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ if (HaveTTData) {
+ const unsigned ByteSizeOfCallSiteOffset =
+ getULEB128Size(CallSiteTableSize);
+ uint64_t ActionTableSize = 0;
+ for (const ActionEntry &Action : Actions) {
+ // Each action entry consists of two SLEB128 fields.
+ ActionTableSize += getSLEB128Size(Action.ValueForTypeID) +
+ getSLEB128Size(Action.NextAction);
+ assert(isUInt<32>(ActionTableSize) && "ActionTableSize overflows.");
+ }
+
+ const unsigned TypeInfoSize =
+ Asm->GetSizeOfEncodedValue(TTypeEncoding) * MF->getTypeInfos().size();
+
+ const uint64_t LSDASizeBeforeAlign =
+ 1 // Call site encoding byte.
+ + ByteSizeOfCallSiteOffset // ULEB128 encoding of CallSiteTableSize.
+ + CallSiteTableSize // Call site table content.
+ + ActionTableSize; // Action table content.
+
+ const uint64_t LSDASizeWithoutAlign = LSDASizeBeforeAlign + TypeInfoSize;
+ const unsigned ByteSizeOfLSDAWithoutAlign =
+ getULEB128Size(LSDASizeWithoutAlign);
+ const uint64_t DisplacementBeforeAlign =
+ 2 // LPStartEncoding and TypeTableEncoding.
+ + ByteSizeOfLSDAWithoutAlign + LSDASizeBeforeAlign;
+
+ // The type info area starts with 4 byte alignment.
+ const unsigned NeedAlignVal = (4 - DisplacementBeforeAlign % 4) % 4;
+ uint64_t LSDASizeWithAlign = LSDASizeWithoutAlign + NeedAlignVal;
+ const unsigned ByteSizeOfLSDAWithAlign =
+ getULEB128Size(LSDASizeWithAlign);
+
+ // The LSDASizeWithAlign could use 1 byte less padding for alignment
+ // when the data we use to represent the LSDA Size "needs" to be 1 byte
+ // larger than the one previously calculated without alignment.
+ if (ByteSizeOfLSDAWithAlign > ByteSizeOfLSDAWithoutAlign)
+ LSDASizeWithAlign -= 1;
+
+ Asm->OutStreamer->emitULEB128IntValue(LSDASizeWithAlign,
+ ByteSizeOfLSDAWithAlign);
+ }
+
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->OutStreamer->emitULEB128IntValue(CallSiteTableSize);
+ };
+
+ // SjLj / Wasm Exception handling
+ if (IsSJLJ || IsWasm) {
+ Asm->OutStreamer->emitLabel(Asm->getMBBExceptionSym(Asm->MF->front()));
+
+ // emit the LSDA header.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ EmitTypeTableRefAndCallSiteTableEndRef();
+
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ // Index of the call site entry.
+ if (VerboseAsm) {
+ Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
+ Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx));
+ }
+ Asm->emitULEB128(idx);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer->AddComment(" Action: cleanup");
+ else
+ Asm->OutStreamer->AddComment(" Action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->emitULEB128(S.Action);
+ }
+ Asm->OutStreamer->emitLabel(CstEndLabel);
+ } else {
+ // Itanium LSDA exception handling
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ assert(CallSiteRanges.size() != 0 && "No call-site ranges!");
+
+ // There should be only one call-site range which includes all the landing
+ // pads. Find that call-site range here.
+ const CallSiteRange *LandingPadRange = nullptr;
+ for (const CallSiteRange &CSRange : CallSiteRanges) {
+ if (CSRange.IsLPRange) {
+ assert(LandingPadRange == nullptr &&
+ "All landing pads must be in a single callsite range.");
+ LandingPadRange = &CSRange;
+ }
+ }
+
+ // The call-site table is split into its call-site ranges, each being
+ // emitted as:
+ // [ LPStartEncoding | LPStart ]
+ // [ TypeTableEncoding | TypeTableOffset ]
+ // [ CallSiteEncoding | CallSiteTableEndOffset ]
+ // cst_begin -> { call-site entries contained in this range }
+ //
+ // and is followed by the next call-site range.
+ //
+ // For each call-site range, CallSiteTableEndOffset is computed as the
+ // difference between cst_begin of that range and the last call-site-table's
+ // end label. This offset is used to find the action table.
+
+ unsigned Entry = 0;
+ for (const CallSiteRange &CSRange : CallSiteRanges) {
+ if (CSRange.CallSiteBeginIdx != 0) {
+ // Align the call-site range for all ranges except the first. The
+ // first range is already aligned due to the exception table alignment.
+ Asm->emitAlignment(Align(4));
+ }
+ Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel);
+
+ // Emit the LSDA header.
+ // LPStart is omitted if either we have a single call-site range (in which
+ // case the function entry is treated as @LPStart) or if this function has
+ // no landing pads (in which case @LPStart is undefined).
+ if (CallSiteRanges.size() == 1 || LandingPadRange == nullptr) {
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ } else if (!Asm->isPositionIndependent()) {
+ // For more than one call-site ranges, LPStart must be explicitly
+ // specified.
+ // For non-PIC we can simply use the absolute value.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_absptr, "@LPStart");
+ Asm->OutStreamer->emitSymbolValue(LandingPadRange->FragmentBeginLabel,
+ Asm->MAI->getCodePointerSize());
+ } else {
+ // For PIC mode, we Emit a PC-relative address for LPStart.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_pcrel, "@LPStart");
+ MCContext &Context = Asm->OutStreamer->getContext();
+ MCSymbol *Dot = Context.createTempSymbol();
+ Asm->OutStreamer->emitLabel(Dot);
+ Asm->OutStreamer->emitValue(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(LandingPadRange->FragmentBeginLabel,
+ Context),
+ MCSymbolRefExpr::create(Dot, Context), Context),
+ Asm->MAI->getCodePointerSize());
+ }
+
+ if (HasLEB128Directives)
+ EmitTypeTableRefAndCallSiteTableEndRef();
+ else
+ EmitTypeTableOffsetAndCallSiteTableOffset();
+
+ for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx;
+ CallSiteIdx != CSRange.CallSiteEndIdx; ++CallSiteIdx) {
+ const CallSiteEntry &S = CallSites[CallSiteIdx];
+
+ MCSymbol *EHFuncBeginSym = CSRange.FragmentBeginLabel;
+ MCSymbol *EHFuncEndSym = CSRange.FragmentEndLabel;
+
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (!BeginLabel)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (!EndLabel)
+ EndLabel = EHFuncEndSym;
+
+ // Offset of the call site relative to the start of the procedure.
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) +
+ " <<");
+ Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
+
+ // Offset of the landing pad relative to the start of the landing pad
+ // fragment.
+ if (!S.LPad) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(" has no landing pad");
+ Asm->emitCallSiteValue(0, CallSiteEncoding);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(Twine(" jumps to ") +
+ S.LPad->LandingPadLabel->getName());
+ Asm->emitCallSiteOffset(S.LPad->LandingPadLabel,
+ LandingPadRange->FragmentBeginLabel,
+ CallSiteEncoding);
+ }
+
+ // Offset of the first associated action record, relative to the start
+ // of the action table. This value is biased by 1 (1 indicates the start
+ // of the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer->AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer->AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->emitULEB128(S.Action);
+ }
+ }
+ Asm->OutStreamer->emitLabel(CstEndLabel);
+ }
+
+ // Emit the Action Table.
+ int Entry = 0;
+ for (const ActionEntry &Action : Actions) {
+ if (VerboseAsm) {
+ // Emit comments that decode the action table.
+ Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<");
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ if (VerboseAsm) {
+ if (Action.ValueForTypeID > 0)
+ Asm->OutStreamer->AddComment(" Catch TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else if (Action.ValueForTypeID < 0)
+ Asm->OutStreamer->AddComment(" Filter TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer->AddComment(" Cleanup");
+ }
+ Asm->emitSLEB128(Action.ValueForTypeID);
+
+ // Action Record
+ if (VerboseAsm) {
+ if (Action.Previous == unsigned(-1)) {
+ Asm->OutStreamer->AddComment(" No further actions");
+ } else {
+ Asm->OutStreamer->AddComment(" Continue to action " +
+ Twine(Action.Previous + 1));
+ }
+ }
+ Asm->emitSLEB128(Action.NextAction);
+ }
+
+ if (HaveTTData) {
+ Asm->emitAlignment(Align(4));
+ emitTypeInfos(TTypeEncoding, TTBaseLabel);
+ }
+
+ Asm->emitAlignment(Align(4));
+ return GCCETSym;
+}
+
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
+ const MachineFunction *MF = Asm->MF;
+ const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MF->getFilterIds();
+
+ const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer->addBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (const GlobalValue *GV : llvm::reverse(TypeInfos)) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
+ Asm->emitTTypeReference(GV, TTypeEncoding);
+ }
+
+ Asm->OutStreamer->emitLabel(TTBaseLabel);
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer->addBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (isFilterEHSelector(TypeID))
+ Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->emitULEB128(TypeID);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
new file mode 100644
index 000000000000..234e62506a56
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -0,0 +1,165 @@
+//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class AsmPrinter;
+struct LandingPadInfo;
+class MachineInstr;
+class MachineModuleInfo;
+class MCSymbol;
+template <typename T> class SmallVectorImpl;
+
+/// Emits exception handling directives.
+class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler {
+protected:
+ /// Target of directive emission.
+ AsmPrinter *Asm;
+
+ /// Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// How many leading type ids two landing pads have in common.
+ static unsigned sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ using RangeMapType = DenseMap<MCSymbol *, PadRange>;
+
+ /// Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // Null indicates the start of the function.
+ MCSymbol *EndLabel; // Null indicates the end of the function.
+
+ // LPad contains the landing pad start labels.
+ const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
+
+ unsigned Action;
+ };
+
+ /// Structure describing a contiguous range of call-sites which reside
+ /// in the same procedure fragment. With -fbasic-block-sections, there will
+ /// be one call site range per basic block section. Otherwise, we will have
+ /// one call site range containing all the call sites in the function.
+ struct CallSiteRange {
+ // Symbol marking the beginning of the precedure fragment.
+ MCSymbol *FragmentBeginLabel = nullptr;
+ // Symbol marking the end of the procedure fragment.
+ MCSymbol *FragmentEndLabel = nullptr;
+ // LSDA symbol for this call-site range.
+ MCSymbol *ExceptionLabel = nullptr;
+ // Index of the first call-site entry in the call-site table which
+ // belongs to this range.
+ size_t CallSiteBeginIdx = 0;
+ // Index just after the last call-site entry in the call-site table which
+ // belongs to this range.
+ size_t CallSiteEndIdx = 0;
+ // Whether this is the call-site range containing all the landing pads.
+ bool IsLPRange = false;
+ };
+
+ /// Compute the actions table and gather the first action index for each
+ /// landing pad site.
+ void computeActionsTable(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ RangeMapType &PadMap);
+
+ /// Compute the call-site table and the call-site ranges. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address. CallSiteRanges vector is only populated for Itanium
+ /// exception handling.
+ virtual void computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+ ///
+ /// Returns the starting symbol of an exception table.
+ MCSymbol *emitExceptionTable();
+
+ virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel);
+
+ // Helpers for identifying what kind of clause an EH typeid or selector
+ // corresponds to. Negative selectors are for filter clauses, the zero
+ // selector is for cleanups, and positive selectors are for catch clauses.
+ static bool isFilterEHSelector(int Selector) { return Selector < 0; }
+ static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
+ static bool isCatchEHSelector(int Selector) { return Selector > 0; }
+
+public:
+ EHStreamer(AsmPrinter *A);
+ ~EHStreamer() override;
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+
+ /// Return `true' if this is a call to a function marked `nounwind'. Return
+ /// `false' otherwise.
+ static bool callToNoUnwindFunction(const MachineInstr *MI);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 000000000000..62fd15d89512
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,117 @@
+//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/BuiltinGCs.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+namespace {
+
+class ErlangGCPrinter : public GCMetadataPrinter {
+public:
+ void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
+
+} // end anonymous namespace
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+ X("erlang", "erlang-compatible garbage collector");
+
+void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ MCStreamer &OS = *AP.OutStreamer;
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
+
+ // Put this in a custom .note section.
+ OS.switchSection(AP.getObjFileLowering().getContext().getELFSection(
+ ".note.gc", ELF::SHT_PROGBITS, 0));
+
+ // For each function...
+ for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ FI != IE; ++FI) {
+ GCFunctionInfo &MD = **FI;
+ if (MD.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
+ /** A compact GC layout. Emit this data structure:
+ *
+ * struct {
+ * int16_t PointCount;
+ * void *SafePointAddress[PointCount];
+ * int16_t StackFrameSize; (in words)
+ * int16_t StackArity;
+ * int16_t LiveCount;
+ * int16_t LiveOffsets[LiveCount];
+ * } __gcmap_<FUNCTIONNAME>;
+ **/
+
+ // Align to address width.
+ AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
+
+ // Emit PointCount.
+ OS.AddComment("safe point count");
+ AP.emitInt16(MD.size());
+
+ // And each safe point...
+ for (const GCPoint &P : MD) {
+ // Emit the address of the safe point.
+ OS.AddComment("safe point address");
+ MCSymbol *Label = P.Label;
+ AP.emitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
+ }
+
+ // Stack information never change in safe points! Only print info from the
+ // first call-site.
+ GCFunctionInfo::iterator PI = MD.begin();
+
+ // Emit the stack frame size.
+ OS.AddComment("stack frame size (in words)");
+ AP.emitInt16(MD.getFrameSize() / IntPtrSize);
+
+ // Emit stack arity, i.e. the number of stacked arguments.
+ unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs
+ ? MD.getFunction().arg_size() - RegisteredArgs
+ : 0;
+ OS.AddComment("stack arity");
+ AP.emitInt16(StackArity);
+
+ // Emit the number of live roots in the function.
+ OS.AddComment("live root count");
+ AP.emitInt16(MD.live_size(PI));
+
+ // And for each live root...
+ for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+ LE = MD.live_end(PI);
+ LI != LE; ++LI) {
+ // Emit live root's offset within the stack frame.
+ OS.AddComment("stack index (offset / wordsize)");
+ AP.emitInt16(LI->StackOffset / IntPtrSize);
+ }
+ }
+}
+
+void llvm::linkErlangGCPrinter() {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 000000000000..74fa30ab321b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,182 @@
+//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/BuiltinGCs.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+public:
+ void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+ void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
+
+} // end anonymous namespace
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+ Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() {}
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string SymName;
+ SymName += "caml";
+ size_t Letter = SymName.size();
+ SymName.append(MId.begin(), llvm::find(MId, '.'));
+ SymName += "__";
+ SymName += Id;
+
+ // Capitalize the first letter of the module name.
+ SymName[Letter] = toupper(SymName[Letter]);
+
+ SmallString<128> TmpStr;
+ Mangler::getNameWithPrefix(TmpStr, SymName, M.getDataLayout());
+
+ MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr);
+
+ AP.OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer->emitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(M, AP, "code_begin");
+
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(M, AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
+
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(M, AP, "code_end");
+
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(M, AP, "data_end");
+
+ // FIXME: Why does ocaml emit this??
+ AP.OutStreamer->emitIntValue(0, IntPtrSize);
+
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(M, AP, "frametable");
+
+ int NumDescriptors = 0;
+ for (std::unique_ptr<GCFunctionInfo> &FI :
+ llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) {
+ if (FI->getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
+ NumDescriptors += FI->size();
+ }
+
+ if (NumDescriptors >= 1 << 16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.emitInt16(NumDescriptors);
+ AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
+
+ for (std::unique_ptr<GCFunctionInfo> &FI :
+ llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) {
+ if (FI->getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
+
+ uint64_t FrameSize = FI->getFrameSize();
+ if (FrameSize >= 1 << 16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI->getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Frame size " +
+ Twine(FrameSize) +
+ ">= 65536.\n"
+ "(" +
+ Twine(reinterpret_cast<uintptr_t>(FI.get())) + ")");
+ }
+
+ AP.OutStreamer->AddComment("live roots for " +
+ Twine(FI->getFunction().getName()));
+ AP.OutStreamer->addBlankLine();
+
+ for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE;
+ ++J) {
+ size_t LiveCount = FI->live_size(J);
+ if (LiveCount >= 1 << 16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI->getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Live root count " +
+ Twine(LiveCount) + " >= 65536.");
+ }
+
+ AP.OutStreamer->emitSymbolValue(J->Label, IntPtrSize);
+ AP.emitInt16(FrameSize);
+ AP.emitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI->live_begin(J),
+ KE = FI->live_end(J);
+ K != KE; ++K) {
+ if (K->StackOffset >= 1 << 16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.emitInt16(K->StackOffset);
+ }
+
+ AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
new file mode 100644
index 000000000000..59c3fa15885e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -0,0 +1,56 @@
+//===- llvm/CodeGen/PseudoProbePrinter.cpp - Pseudo Probe Emission -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PseudoProbePrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+PseudoProbeHandler::~PseudoProbeHandler() = default;
+
+void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
+ uint64_t Type, uint64_t Attr,
+ const DILocation *DebugLoc) {
+ // Gather all the inlined-at nodes.
+ // When it's done ReversedInlineStack looks like ([66, B], [88, A])
+ // which means, Function A inlines function B at calliste with a probe id 88,
+ // and B inlines C at probe 66 where C is represented by Guid.
+ SmallVector<InlineSite, 8> ReversedInlineStack;
+ auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
+ while (InlinedAt) {
+ auto Name = InlinedAt->getSubprogramLinkageName();
+ // Use caching to avoid redundant md5 computation for build speed.
+ uint64_t &CallerGuid = NameGuidMap[Name];
+ if (!CallerGuid)
+ CallerGuid = Function::getGUID(Name);
+ uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
+ InlinedAt->getDiscriminator());
+ ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
+ InlinedAt = InlinedAt->getInlinedAt();
+ }
+ uint64_t Discriminator = 0;
+ // For now only block probes have FS discriminators. See
+ // MIRFSDiscriminator.cpp for more details.
+ if (EnableFSDiscriminator && DebugLoc &&
+ (Type == (uint64_t)PseudoProbeType::Block))
+ Discriminator = DebugLoc->getDiscriminator();
+ assert((EnableFSDiscriminator || Discriminator == 0) &&
+ "Discriminator should not be set in non-FSAFDO mode");
+ SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack));
+ Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, Discriminator,
+ InlineStack, Asm->CurrentFnSym);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
new file mode 100644
index 000000000000..a92a89084cad
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -0,0 +1,47 @@
+//===- PseudoProbePrinter.h - Pseudo probe encoding support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class DILocation;
+
+class PseudoProbeHandler : public AsmPrinterHandler {
+ // Target of pseudo probe emission.
+ AsmPrinter *Asm;
+ // Name to GUID map, used as caching/memoization for speed.
+ DenseMap<StringRef, uint64_t> NameGuidMap;
+
+public:
+ PseudoProbeHandler(AsmPrinter *A) : Asm(A){};
+ ~PseudoProbeHandler() override;
+
+ void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attr, const DILocation *DebugLoc);
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void endModule() override {}
+ void beginFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+};
+
+} // namespace llvm
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
new file mode 100644
index 000000000000..bf65e525dde1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -0,0 +1,98 @@
+//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WasmException.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+void WasmException::endModule() {
+ // These are symbols used to throw/catch C++ exceptions and C longjmps. These
+ // symbols have to be emitted somewhere once in the module. Check if each of
+ // the symbols has already been created, i.e., we have at least one 'throw' or
+ // 'catch' instruction with the symbol in the module, and emit the symbol only
+ // if so.
+ //
+ // But in dynamic linking, it is in general not possible to come up with a
+ // module instantiating order in which tag-defining modules are loaded before
+ // the importing modules. So we make them undefined symbols here, define tags
+ // in the JS side, and feed them to each importing module.
+ if (!Asm->isPositionIndependent()) {
+ for (const char *SymName : {"__cpp_exception", "__c_longjmp"}) {
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, SymName, Asm->getDataLayout());
+ if (Asm->OutContext.lookupSymbol(NameStr)) {
+ MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol(SymName);
+ Asm->OutStreamer->emitLabel(ExceptionSym);
+ }
+ }
+ }
+}
+
+void WasmException::endFunction(const MachineFunction *MF) {
+ bool ShouldEmitExceptionTable = false;
+ for (const LandingPadInfo &Info : MF->getLandingPads()) {
+ if (MF->hasWasmLandingPadIndex(Info.LandingPadBlock)) {
+ ShouldEmitExceptionTable = true;
+ break;
+ }
+ }
+ if (!ShouldEmitExceptionTable)
+ return;
+ MCSymbol *LSDALabel = emitExceptionTable();
+ assert(LSDALabel && ".GCC_exception_table has not been emitted!");
+
+ // Wasm requires every data section symbol to have a .size set. So we emit an
+ // end marker and set the size as the difference between the start end the end
+ // marker.
+ MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end");
+ Asm->OutStreamer->emitLabel(LSDAEndLabel);
+ MCContext &OutContext = Asm->OutStreamer->getContext();
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(LSDAEndLabel, OutContext),
+ MCSymbolRefExpr::create(LSDALabel, OutContext), OutContext);
+ Asm->OutStreamer->emitELFSize(LSDALabel, SizeExp);
+}
+
+// Compute the call-site table for wasm EH. Even though we use the same function
+// name to share the common routines, a call site entry in the table corresponds
+// to not a call site for possibly-throwing functions but a landing pad. In wasm
+// EH the VM is responsible for stack unwinding. After an exception occurs and
+// the stack is unwound, the control flow is transferred to wasm 'catch'
+// instruction by the VM, after which the personality function is called from
+// the compiler-generated code. Refer to WasmEHPrepare pass for more
+// information.
+void WasmException::computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ MachineFunction &MF = *Asm->MF;
+ for (unsigned I = 0, N = LandingPads.size(); I < N; ++I) {
+ const LandingPadInfo *Info = LandingPads[I];
+ MachineBasicBlock *LPad = Info->LandingPadBlock;
+ // We don't emit LSDA for single catch (...).
+ if (!MF.hasWasmLandingPadIndex(LPad))
+ continue;
+ // Wasm EH must maintain the EH pads in the order assigned to them by the
+ // WasmEHPrepare pass.
+ unsigned LPadIndex = MF.getWasmLandingPadIndex(LPad);
+ CallSiteEntry Site = {nullptr, nullptr, Info, FirstActions[I]};
+ if (CallSites.size() < LPadIndex + 1)
+ CallSites.resize(LPadIndex + 1);
+ CallSites[LPadIndex] = Site;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
new file mode 100644
index 000000000000..86cc37dfde07
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -0,0 +1,44 @@
+//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+
+#include "EHStreamer.h"
+
+namespace llvm {
+class AsmPrinter;
+class MachineFunction;
+struct LandingPadInfo;
+template <typename T> class SmallVectorImpl;
+
+class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer {
+public:
+ WasmException(AsmPrinter *A) : EHStreamer(A) {}
+
+ void endModule() override;
+ void beginFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override;
+
+protected:
+ // Compute the call site table for wasm EH.
+ void computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) override;
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
new file mode 100644
index 000000000000..5d813b72c0b7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -0,0 +1,125 @@
+//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions and valid longjmp targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WinCFGuard.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCStreamer.h"
+
+#include <vector>
+
+using namespace llvm;
+
+WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {}
+
+WinCFGuard::~WinCFGuard() = default;
+
+void WinCFGuard::endFunction(const MachineFunction *MF) {
+
+ // Skip functions without any longjmp targets.
+ if (MF->getLongjmpTargets().empty())
+ return;
+
+ // Copy the function's longjmp targets to a module-level list.
+ llvm::append_range(LongjmpTargets, MF->getLongjmpTargets());
+}
+
+/// Returns true if this function's address is escaped in a way that might make
+/// it an indirect call target. Function::hasAddressTaken gives different
+/// results when a function is called directly with a function prototype
+/// mismatch, which requires a cast.
+static bool isPossibleIndirectCallTarget(const Function *F) {
+ SmallVector<const Value *, 4> Users{F};
+ while (!Users.empty()) {
+ const Value *FnOrCast = Users.pop_back_val();
+ for (const Use &U : FnOrCast->uses()) {
+ const User *FnUser = U.getUser();
+ if (isa<BlockAddress>(FnUser))
+ continue;
+ if (const auto *Call = dyn_cast<CallBase>(FnUser)) {
+ if (!Call->isCallee(&U))
+ return true;
+ } else if (isa<Instruction>(FnUser)) {
+ // Consider any other instruction to be an escape. This has some weird
+ // consequences like no-op intrinsics being an escape or a store *to* a
+ // function address being an escape.
+ return true;
+ } else if (const auto *C = dyn_cast<Constant>(FnUser)) {
+ // If this is a constant pointer cast of the function, don't consider
+ // this escape. Analyze the uses of the cast as well. This ensures that
+ // direct calls with mismatched prototypes don't end up in the CFG
+ // table. Consider other constants, such as vtable initializers, to
+ // escape the function.
+ if (C->stripPointerCasts() == F)
+ Users.push_back(FnUser);
+ else
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+MCSymbol *WinCFGuard::lookupImpSymbol(const MCSymbol *Sym) {
+ if (Sym->getName().startswith("__imp_"))
+ return nullptr;
+ return Asm->OutContext.lookupSymbol(Twine("__imp_") + Sym->getName());
+}
+
+void WinCFGuard::endModule() {
+ const Module *M = Asm->MMI->getModule();
+ std::vector<const MCSymbol *> GFIDsEntries;
+ std::vector<const MCSymbol *> GIATsEntries;
+ for (const Function &F : *M) {
+ if (isPossibleIndirectCallTarget(&F)) {
+ // If F is a dllimport and has an "__imp_" symbol already defined, add the
+ // "__imp_" symbol to the .giats section.
+ if (F.hasDLLImportStorageClass()) {
+ if (MCSymbol *impSym = lookupImpSymbol(Asm->getSymbol(&F))) {
+ GIATsEntries.push_back(impSym);
+ }
+ }
+ // Add the function's symbol to the .gfids section.
+ // Note: For dllimport functions, MSVC sometimes does not add this symbol
+ // to the .gfids section, but only adds the corresponding "__imp_" symbol
+ // to the .giats section. Here we always add the symbol to the .gfids
+ // section, since this does not introduce security risks.
+ GFIDsEntries.push_back(Asm->getSymbol(&F));
+ }
+ }
+
+ if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty())
+ return;
+
+ // Emit the symbol index of each GFIDs entry to form the .gfids section.
+ auto &OS = *Asm->OutStreamer;
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
+ for (const MCSymbol *S : GFIDsEntries)
+ OS.emitCOFFSymbolIndex(S);
+
+ // Emit the symbol index of each GIATs entry to form the .giats section.
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
+ for (const MCSymbol *S : GIATsEntries) {
+ OS.emitCOFFSymbolIndex(S);
+ }
+
+ // Emit the symbol index of each longjmp target to form the .gljmp section.
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
+ for (const MCSymbol *S : LongjmpTargets) {
+ OS.emitCOFFSymbolIndex(S);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
new file mode 100644
index 000000000000..0e472af52c8f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -0,0 +1,57 @@
+//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
+
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+#include "llvm/Support/Compiler.h"
+#include <vector>
+
+namespace llvm {
+
+class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
+ /// Target of directive emission.
+ AsmPrinter *Asm;
+ std::vector<const MCSymbol *> LongjmpTargets;
+ MCSymbol *lookupImpSymbol(const MCSymbol *Sym);
+
+public:
+ WinCFGuard(AsmPrinter *A);
+ ~WinCFGuard() override;
+
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+
+ /// Emit the Control Flow Guard function ID table.
+ void endModule() override;
+
+ /// Gather pre-function debug information.
+ /// Every beginFunction(MF) call should be followed by an endFunction(MF)
+ /// call.
+ void beginFunction(const MachineFunction *MF) override {}
+
+ /// Gather post-function debug information.
+ /// Please note that some AsmPrinter implementations may not call
+ /// beginFunction at all.
+ void endFunction(const MachineFunction *MF) override;
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override {}
+
+ /// Process end of an instruction.
+ void endInstruction() override {}
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
new file mode 100644
index 000000000000..6d6432b61f2d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -0,0 +1,1345 @@
+//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WinException.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
+ // MSVC's EH tables are always composed of 32-bit words. All known 64-bit
+ // platforms use an imagerel32 relocation to refer to symbols.
+ useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
+ isAArch64 = Asm->TM.getTargetTriple().isAArch64();
+ isThumb = Asm->TM.getTargetTriple().isThumb();
+}
+
+WinException::~WinException() = default;
+
+/// endModule - Emit all exception information that should come after the
+/// content.
+void WinException::endModule() {
+ auto &OS = *Asm->OutStreamer;
+ const Module *M = MMI->getModule();
+ for (const Function &F : *M)
+ if (F.hasFnAttribute("safeseh"))
+ OS.emitCOFFSafeSEH(Asm->getSymbol(&F));
+
+ if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) {
+ // Emit the symbol index of each ehcont target.
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection());
+ for (const MCSymbol *S : EHContTargets) {
+ OS.emitCOFFSymbolIndex(S);
+ }
+ }
+}
+
+void WinException::beginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MF->getLandingPads().empty();
+ bool hasEHFunclets = MF->hasEHFunclets();
+
+ const Function &F = MF->getFunction();
+
+ shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ EHPersonality Per = EHPersonality::Unknown;
+ const Function *PerFn = nullptr;
+ if (F.hasPersonalityFn()) {
+ PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ Per = classifyEHPersonality(PerFn);
+ }
+
+ bool forceEmitPersonality = F.hasPersonalityFn() &&
+ !isNoOpWithoutInvoke(Per) &&
+ F.needsUnwindTableEntry();
+
+ shouldEmitPersonality =
+ forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
+ PerEncoding != dwarf::DW_EH_PE_omit && PerFn);
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ // If we're not using CFI, we don't want the CFI or the personality, but we
+ // might want EH tables if we had EH pads.
+ if (!Asm->MAI->usesWindowsCFI()) {
+ if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
+ // If this is 32-bit SEH and we don't have any funclets (really invokes),
+ // make sure we emit the parent offset label. Some unreferenced filter
+ // functions may still refer to it.
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ StringRef FLinkageName =
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+ emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
+ }
+ shouldEmitLSDA = hasEHFunclets;
+ shouldEmitPersonality = false;
+ return;
+ }
+
+ beginFunclet(MF->front(), Asm->CurrentFnSym);
+}
+
+void WinException::markFunctionEnd() {
+ if (isAArch64 && CurrentFuncletEntry &&
+ (shouldEmitMoves || shouldEmitPersonality))
+ Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd();
+}
+
+/// endFunction - Gather and emit post-function exception information.
+///
+void WinException::endFunction(const MachineFunction *MF) {
+ if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
+ return;
+
+ const Function &F = MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F.hasPersonalityFn())
+ Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
+
+ endFuncletImpl();
+
+ // endFunclet will emit the necessary .xdata tables for table-based SEH.
+ if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets())
+ return;
+
+ if (shouldEmitPersonality || shouldEmitLSDA) {
+ Asm->OutStreamer->pushSection();
+
+ // Just switch sections to the right xdata section.
+ MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
+ Asm->OutStreamer->getCurrentSectionOnly());
+ Asm->OutStreamer->switchSection(XData);
+
+ // Emit the tables appropriate to the personality function in use. If we
+ // don't recognize the personality, assume it uses an Itanium-style LSDA.
+ if (Per == EHPersonality::MSVC_TableSEH)
+ emitCSpecificHandlerTable(MF);
+ else if (Per == EHPersonality::MSVC_X86SEH)
+ emitExceptHandlerTable(MF);
+ else if (Per == EHPersonality::MSVC_CXX)
+ emitCXXFrameHandler3Table(MF);
+ else if (Per == EHPersonality::CoreCLR)
+ emitCLRExceptionTable(MF);
+ else
+ emitExceptionTable();
+
+ Asm->OutStreamer->popSection();
+ }
+
+ if (!MF->getCatchretTargets().empty()) {
+ // Copy the function's catchret targets to a module-level list.
+ EHContTargets.insert(EHContTargets.end(), MF->getCatchretTargets().begin(),
+ MF->getCatchretTargets().end());
+ }
+}
+
+/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock.
+static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB) {
+ if (!MBB)
+ return nullptr;
+
+ assert(MBB->isEHFuncletEntry());
+
+ // Give catches and cleanups a name based off of their parent function and
+ // their funclet entry block's number.
+ const MachineFunction *MF = MBB->getParent();
+ const Function &F = MF->getFunction();
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
+ MCContext &Ctx = MF->getContext();
+ StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
+ return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
+ Twine(MBB->getNumber()) + "@?0?" +
+ FuncLinkageName + "@4HA");
+}
+
+void WinException::beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym) {
+ CurrentFuncletEntry = &MBB;
+
+ const Function &F = Asm->MF->getFunction();
+ // If a symbol was not provided for the funclet, invent one.
+ if (!Sym) {
+ Sym = getMCSymbolForMBB(Asm, &MBB);
+
+ // Describe our funclet symbol as a function with internal linkage.
+ Asm->OutStreamer->beginCOFFSymbolDef(Sym);
+ Asm->OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ Asm->OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ Asm->OutStreamer->endCOFFSymbolDef();
+
+ // We want our funclet's entry point to be aligned such that no nops will be
+ // present after the label.
+ Asm->emitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
+ &F);
+
+ // Now that we've emitted the alignment directive, point at our funclet.
+ Asm->OutStreamer->emitLabel(Sym);
+ }
+
+ // Mark 'Sym' as starting our funclet.
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly();
+ Asm->OutStreamer->emitWinCFIStartProc(Sym);
+ }
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *PerFn = nullptr;
+
+ // Determine which personality routine we are using for this funclet.
+ if (F.hasPersonalityFn())
+ PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI);
+
+ // Do not emit a .seh_handler directives for cleanup funclets.
+ // FIXME: This means cleanup funclets cannot handle exceptions. Given that
+ // Clang doesn't produce EH constructs inside cleanup funclets and LLVM's
+ // inliner doesn't allow inlining them, this isn't a major problem in
+ // practice.
+ if (!CurrentFuncletEntry->isCleanupFuncletEntry())
+ Asm->OutStreamer->emitWinEHHandler(PersHandlerSym, true, true);
+ }
+}
+
+void WinException::endFunclet() {
+ if (isAArch64 && CurrentFuncletEntry &&
+ (shouldEmitMoves || shouldEmitPersonality)) {
+ Asm->OutStreamer->switchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd();
+ }
+ endFuncletImpl();
+}
+
+void WinException::endFuncletImpl() {
+ // No funclet to process? Great, we have nothing to do.
+ if (!CurrentFuncletEntry)
+ return;
+
+ const MachineFunction *MF = Asm->MF;
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ const Function &F = MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F.hasPersonalityFn())
+ Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
+
+ if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
+ !CurrentFuncletEntry->isCleanupFuncletEntry()) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->emitWinEHHandlerData();
+
+ // If this is a C++ catch funclet (or the parent function),
+ // emit a reference to the LSDA for the parent function.
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
+ MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$cppxdata$", FuncLinkageName));
+ Asm->OutStreamer->emitValue(create32bitRef(FuncInfoXData), 4);
+ } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() &&
+ !CurrentFuncletEntry->isEHFuncletEntry()) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->emitWinEHHandlerData();
+
+ // If this is the parent function in Win64 SEH, emit the LSDA immediately
+ // following .seh_handlerdata.
+ emitCSpecificHandlerTable(MF);
+ } else if (shouldEmitPersonality || shouldEmitLSDA) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->emitWinEHHandlerData();
+ // In these cases, no further info is written to the .xdata section
+ // right here, but is written by e.g. emitExceptionTable in endFunction()
+ // above.
+ } else {
+ // No need to emit the EH handler data right here if nothing needs
+ // writing to the .xdata section; it will be emitted for all
+ // functions that need it in the end anyway.
+ }
+
+ // Switch back to the funclet start .text section now that we are done
+ // writing to .xdata, and emit an .seh_endproc directive to mark the end of
+ // the function.
+ Asm->OutStreamer->switchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->emitWinCFIEndProc();
+ }
+
+ // Let's make sure we don't try to end the same funclet twice.
+ CurrentFuncletEntry = nullptr;
+}
+
+const MCExpr *WinException::create32bitRef(const MCSymbol *Value) {
+ if (!Value)
+ return MCConstantExpr::create(0, Asm->OutContext);
+ return MCSymbolRefExpr::create(Value, useImageRel32
+ ? MCSymbolRefExpr::VK_COFF_IMGREL32
+ : MCSymbolRefExpr::VK_None,
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
+ if (!GV)
+ return MCConstantExpr::create(0, Asm->OutContext);
+ return create32bitRef(Asm->getSymbol(GV));
+}
+
+const MCExpr *WinException::getLabel(const MCSymbol *Label) {
+ return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(getLabel(Label),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(OffsetOf, Asm->OutContext),
+ MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+int WinException::getFrameIndexOffset(int FrameIndex,
+ const WinEHFuncInfo &FuncInfo) {
+ const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
+ Register UnusedReg;
+ if (Asm->MAI->usesWindowsCFI()) {
+ StackOffset Offset =
+ TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg,
+ /*IgnoreSPUpdates*/ true);
+ assert(UnusedReg ==
+ Asm->MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore());
+ return Offset.getFixed();
+ }
+
+ // For 32-bit, offsets should be relative to the end of the EH registration
+ // node. For 64-bit, it's relative to SP at the end of the prologue.
+ assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
+ StackOffset Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+ Offset += StackOffset::getFixed(FuncInfo.EHRegNodeEndOffset);
+ assert(!Offset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ return Offset.getFixed();
+}
+
+namespace {
+
+/// Top-level state used to represent unwind to caller
+const int NullState = -1;
+
+struct InvokeStateChange {
+ /// EH Label immediately after the last invoke in the previous state, or
+ /// nullptr if the previous state was the null state.
+ const MCSymbol *PreviousEndLabel;
+
+ /// EH label immediately before the first invoke in the new state, or nullptr
+ /// if the new state is the null state.
+ const MCSymbol *NewStartLabel;
+
+ /// State of the invoke following NewStartLabel, or NullState to indicate
+ /// the presence of calls which may unwind to caller.
+ int NewState;
+};
+
+/// Iterator that reports all the invoke state changes in a range of machine
+/// basic blocks. Changes to the null state are reported whenever a call that
+/// may unwind to caller is encountered. The MBB range is expected to be an
+/// entire function or funclet, and the start and end of the range are treated
+/// as being in the NullState even if there's not an unwind-to-caller call
+/// before the first invoke or after the last one (i.e., the first state change
+/// reported is the first change to something other than NullState, and a
+/// change back to NullState is always reported at the end of iteration).
+class InvokeStateChangeIterator {
+ InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo,
+ MachineFunction::const_iterator MFI,
+ MachineFunction::const_iterator MFE,
+ MachineBasicBlock::const_iterator MBBI,
+ int BaseState)
+ : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) {
+ LastStateChange.PreviousEndLabel = nullptr;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ scan();
+ }
+
+public:
+ static iterator_range<InvokeStateChangeIterator>
+ range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin,
+ MachineFunction::const_iterator End, int BaseState = NullState) {
+ // Reject empty ranges to simplify bookkeeping by ensuring that we can get
+ // the end of the last block.
+ assert(Begin != End);
+ auto BlockBegin = Begin->begin();
+ auto BlockEnd = std::prev(End)->end();
+ return make_range(
+ InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState),
+ InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState));
+ }
+
+ // Iterator methods.
+ bool operator==(const InvokeStateChangeIterator &O) const {
+ assert(BaseState == O.BaseState);
+ // Must be visiting same block.
+ if (MFI != O.MFI)
+ return false;
+ // Must be visiting same isntr.
+ if (MBBI != O.MBBI)
+ return false;
+ // At end of block/instr iteration, we can still have two distinct states:
+ // one to report the final EndLabel, and another indicating the end of the
+ // state change iteration. Check for CurrentEndLabel equality to
+ // distinguish these.
+ return CurrentEndLabel == O.CurrentEndLabel;
+ }
+
+ bool operator!=(const InvokeStateChangeIterator &O) const {
+ return !operator==(O);
+ }
+ InvokeStateChange &operator*() { return LastStateChange; }
+ InvokeStateChange *operator->() { return &LastStateChange; }
+ InvokeStateChangeIterator &operator++() { return scan(); }
+
+private:
+ InvokeStateChangeIterator &scan();
+
+ const WinEHFuncInfo &EHInfo;
+ const MCSymbol *CurrentEndLabel = nullptr;
+ MachineFunction::const_iterator MFI;
+ MachineFunction::const_iterator MFE;
+ MachineBasicBlock::const_iterator MBBI;
+ InvokeStateChange LastStateChange;
+ bool VisitingInvoke = false;
+ int BaseState;
+};
+
+} // end anonymous namespace
+
+InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
+ bool IsNewBlock = false;
+ for (; MFI != MFE; ++MFI, IsNewBlock = true) {
+ if (IsNewBlock)
+ MBBI = MFI->begin();
+ for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ const MachineInstr &MI = *MBBI;
+ if (!VisitingInvoke && LastStateChange.NewState != BaseState &&
+ MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) {
+ // Indicate a change of state to the null state. We don't have
+ // start/end EH labels handy but the caller won't expect them for
+ // null state regions.
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ CurrentEndLabel = nullptr;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+
+ // All other state changes are at EH labels before/after invokes.
+ if (!MI.isEHLabel())
+ continue;
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+ if (Label == CurrentEndLabel) {
+ VisitingInvoke = false;
+ continue;
+ }
+ auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label);
+ // Ignore EH labels that aren't the ones inserted before an invoke
+ if (InvokeMapIter == EHInfo.LabelToStateMap.end())
+ continue;
+ auto &StateAndEnd = InvokeMapIter->second;
+ int NewState = StateAndEnd.first;
+ // Keep track of the fact that we're between EH start/end labels so
+ // we know not to treat the inoke we'll see as unwinding to caller.
+ VisitingInvoke = true;
+ if (NewState == LastStateChange.NewState) {
+ // The state isn't actually changing here. Record the new end and
+ // keep going.
+ CurrentEndLabel = StateAndEnd.second;
+ continue;
+ }
+ // Found a state change to report
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = Label;
+ LastStateChange.NewState = NewState;
+ // Start keeping track of the new current end
+ CurrentEndLabel = StateAndEnd.second;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+ }
+ // Iteration hit the end of the block range.
+ if (LastStateChange.NewState != BaseState) {
+ // Report the end of the last new state
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ // Leave CurrentEndLabel non-null to distinguish this state from end.
+ assert(CurrentEndLabel != nullptr);
+ return *this;
+ }
+ // We've reported all state changes and hit the end state.
+ CurrentEndLabel = nullptr;
+ return *this;
+}
+
+/// Emit the language-specific data that __C_specific_handler expects. This
+/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
+/// up after faults with __try, __except, and __finally. The typeinfo values
+/// are not really RTTI data, but pointers to filter functions that return an
+/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
+/// blocks and other cleanups, the landing pad label is zero, and the filter
+/// function is actually a cleanup handler with the same prototype. A catch-all
+/// entry is modeled with a null filter function field and a non-zero landing
+/// pad label.
+///
+/// Possible filter function return values:
+/// EXCEPTION_EXECUTE_HANDLER (1):
+/// Jump to the landing pad label after cleanups.
+/// EXCEPTION_CONTINUE_SEARCH (0):
+/// Continue searching this table or continue unwinding.
+/// EXCEPTION_CONTINUE_EXECUTION (-1):
+/// Resume execution at the trapping PC.
+///
+/// Inferred table structure:
+/// struct Table {
+/// int NumEntries;
+/// struct Entry {
+/// imagerel32 LabelStart; // Inclusive
+/// imagerel32 LabelEnd; // Exclusive
+/// imagerel32 FilterOrFinally; // One means catch-all.
+/// imagerel32 LabelLPad; // Zero means __finally.
+/// } Entries[NumEntries];
+/// };
+void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ if (!isAArch64) {
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.eh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->emitAssignment(ParentFrameOffset, MCOffset);
+ }
+
+ // Use the assembler to compute the number of table entries through label
+ // difference and division.
+ MCSymbol *TableBegin =
+ Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true);
+ MCSymbol *TableEnd =
+ Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true);
+ const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin);
+ const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx);
+ const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx);
+ AddComment("Number of call sites");
+ OS.emitValue(EntryCount, 4);
+
+ OS.emitLabel(TableBegin);
+
+ // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only
+ // models exceptions from invokes. LLVM also allows arbitrary reordering of
+ // the code, so our tables end up looking a bit different. Rather than
+ // trying to match MSVC's tables exactly, we emit a denormalized table. For
+ // each range of invokes in the same state, we emit table entries for all
+ // the actions that would be taken in that state. This means our tables are
+ // slightly bigger, which is OK.
+ const MCSymbol *LastStartLabel = nullptr;
+ int LastEHState = -1;
+ // Break out before we enter into a finally funclet.
+ // FIXME: We need to emit separate EH tables for cleanups.
+ MachineFunction::const_iterator End = MF->end();
+ MachineFunction::const_iterator Stop = std::next(MF->begin());
+ while (Stop != End && !Stop->isEHFuncletEntry())
+ ++Stop;
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) {
+ // Emit all the actions for the state we just transitioned out of
+ // if it was not the null state
+ if (LastEHState != -1)
+ emitSEHActionsForRange(FuncInfo, LastStartLabel,
+ StateChange.PreviousEndLabel, LastEHState);
+ LastStartLabel = StateChange.NewStartLabel;
+ LastEHState = StateChange.NewState;
+ }
+
+ OS.emitLabel(TableEnd);
+}
+
+void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ assert(BeginLabel && EndLabel);
+ while (State != -1) {
+ const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
+ const MCExpr *FilterOrFinally;
+ const MCExpr *ExceptOrNull;
+ auto *Handler = cast<MachineBasicBlock *>(UME.Handler);
+ if (UME.IsFinally) {
+ FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
+ ExceptOrNull = MCConstantExpr::create(0, Ctx);
+ } else {
+ // For an except, the filter can be 1 (catch-all) or a function
+ // label.
+ FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter)
+ : MCConstantExpr::create(1, Ctx);
+ ExceptOrNull = create32bitRef(Handler->getSymbol());
+ }
+
+ AddComment("LabelStart");
+ OS.emitValue(getLabel(BeginLabel), 4);
+ AddComment("LabelEnd");
+ OS.emitValue(getLabelPlusOne(EndLabel), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
+ : "CatchAll");
+ OS.emitValue(FilterOrFinally, 4);
+ AddComment(UME.IsFinally ? "Null" : "ExceptionHandler");
+ OS.emitValue(ExceptOrNull, 4);
+
+ assert(UME.ToState < State && "states should decrease");
+ State = UME.ToState;
+ }
+}
+
+void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+ const Function &F = MF->getFunction();
+ auto &OS = *Asm->OutStreamer;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
+
+ SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
+ MCSymbol *FuncInfoXData = nullptr;
+ if (shouldEmitPersonality) {
+ // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from
+ // IPs to state numbers.
+ FuncInfoXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName));
+ computeIP2StateTable(MF, FuncInfo, IPToStateTable);
+ } else {
+ FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName);
+ }
+
+ int UnwindHelpOffset = 0;
+ // TODO: The check for UnwindHelpFrameIdx against max() below (and the
+ // second check further below) can be removed if MS C++ unwinding is
+ // implemented for ARM, when test/CodeGen/ARM/Windows/wineh-basic.ll
+ // passes without the check.
+ if (Asm->MAI->usesWindowsCFI() &&
+ FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max())
+ UnwindHelpOffset =
+ getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
+
+ MCSymbol *UnwindMapXData = nullptr;
+ MCSymbol *TryBlockMapXData = nullptr;
+ MCSymbol *IPToStateXData = nullptr;
+ if (!FuncInfo.CxxUnwindMap.empty())
+ UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$stateUnwindMap$", FuncLinkageName));
+ if (!FuncInfo.TryBlockMap.empty())
+ TryBlockMapXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName));
+ if (!IPToStateTable.empty())
+ IPToStateXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName));
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ // FuncInfo {
+ // uint32_t MagicNumber
+ // int32_t MaxState;
+ // UnwindMapEntry *UnwindMap;
+ // uint32_t NumTryBlocks;
+ // TryBlockMapEntry *TryBlockMap;
+ // uint32_t IPMapEntries; // always 0 for x86
+ // IPToStateMapEntry *IPToStateMap; // always 0 for x86
+ // uint32_t UnwindHelp; // non-x86 only
+ // ESTypeList *ESTypeList;
+ // int32_t EHFlags;
+ // }
+ // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
+ // EHFlags & 2 -> ???
+ // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+ OS.emitValueToAlignment(Align(4));
+ OS.emitLabel(FuncInfoXData);
+
+ AddComment("MagicNumber");
+ OS.emitInt32(0x19930522);
+
+ AddComment("MaxState");
+ OS.emitInt32(FuncInfo.CxxUnwindMap.size());
+
+ AddComment("UnwindMap");
+ OS.emitValue(create32bitRef(UnwindMapXData), 4);
+
+ AddComment("NumTryBlocks");
+ OS.emitInt32(FuncInfo.TryBlockMap.size());
+
+ AddComment("TryBlockMap");
+ OS.emitValue(create32bitRef(TryBlockMapXData), 4);
+
+ AddComment("IPMapEntries");
+ OS.emitInt32(IPToStateTable.size());
+
+ AddComment("IPToStateXData");
+ OS.emitValue(create32bitRef(IPToStateXData), 4);
+
+ if (Asm->MAI->usesWindowsCFI() &&
+ FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max()) {
+ AddComment("UnwindHelp");
+ OS.emitInt32(UnwindHelpOffset);
+ }
+
+ AddComment("ESTypeList");
+ OS.emitInt32(0);
+
+ AddComment("EHFlags");
+ if (MMI->getModule()->getModuleFlag("eh-asynch")) {
+ OS.emitInt32(0);
+ } else {
+ OS.emitInt32(1);
+ }
+
+ // UnwindMapEntry {
+ // int32_t ToState;
+ // void (*Action)();
+ // };
+ if (UnwindMapXData) {
+ OS.emitLabel(UnwindMapXData);
+ for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
+ MCSymbol *CleanupSym = getMCSymbolForMBB(
+ Asm, dyn_cast_if_present<MachineBasicBlock *>(UME.Cleanup));
+ AddComment("ToState");
+ OS.emitInt32(UME.ToState);
+
+ AddComment("Action");
+ OS.emitValue(create32bitRef(CleanupSym), 4);
+ }
+ }
+
+ // TryBlockMap {
+ // int32_t TryLow;
+ // int32_t TryHigh;
+ // int32_t CatchHigh;
+ // int32_t NumCatches;
+ // HandlerType *HandlerArray;
+ // };
+ if (TryBlockMapXData) {
+ OS.emitLabel(TryBlockMapXData);
+ SmallVector<MCSymbol *, 1> HandlerMaps;
+ for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+
+ MCSymbol *HandlerMapXData = nullptr;
+ if (!TBME.HandlerArray.empty())
+ HandlerMapXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
+ .concat(Twine(I))
+ .concat("$")
+ .concat(FuncLinkageName));
+ HandlerMaps.push_back(HandlerMapXData);
+
+ // TBMEs should form intervals.
+ assert(0 <= TBME.TryLow && "bad trymap interval");
+ assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval");
+ assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval");
+ assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) &&
+ "bad trymap interval");
+
+ AddComment("TryLow");
+ OS.emitInt32(TBME.TryLow);
+
+ AddComment("TryHigh");
+ OS.emitInt32(TBME.TryHigh);
+
+ AddComment("CatchHigh");
+ OS.emitInt32(TBME.CatchHigh);
+
+ AddComment("NumCatches");
+ OS.emitInt32(TBME.HandlerArray.size());
+
+ AddComment("HandlerArray");
+ OS.emitValue(create32bitRef(HandlerMapXData), 4);
+ }
+
+ // All funclets use the same parent frame offset currently.
+ unsigned ParentFrameOffset = 0;
+ if (shouldEmitPersonality) {
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF);
+ }
+
+ for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+ MCSymbol *HandlerMapXData = HandlerMaps[I];
+ if (!HandlerMapXData)
+ continue;
+ // HandlerType {
+ // int32_t Adjectives;
+ // TypeDescriptor *Type;
+ // int32_t CatchObjOffset;
+ // void (*Handler)();
+ // int32_t ParentFrameOffset; // x64 and AArch64 only
+ // };
+ OS.emitLabel(HandlerMapXData);
+ for (const WinEHHandlerType &HT : TBME.HandlerArray) {
+ // Get the frame escape label with the offset of the catch object. If
+ // the index is INT_MAX, then there is no catch object, and we should
+ // emit an offset of zero, indicating that no copy will occur.
+ const MCExpr *FrameAllocOffsetRef = nullptr;
+ if (HT.CatchObj.FrameIndex != INT_MAX) {
+ int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
+ assert(Offset != 0 && "Illegal offset for catch object!");
+ FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
+ } else {
+ FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
+ }
+
+ MCSymbol *HandlerSym = getMCSymbolForMBB(
+ Asm, dyn_cast_if_present<MachineBasicBlock *>(HT.Handler));
+
+ AddComment("Adjectives");
+ OS.emitInt32(HT.Adjectives);
+
+ AddComment("Type");
+ OS.emitValue(create32bitRef(HT.TypeDescriptor), 4);
+
+ AddComment("CatchObjOffset");
+ OS.emitValue(FrameAllocOffsetRef, 4);
+
+ AddComment("Handler");
+ OS.emitValue(create32bitRef(HandlerSym), 4);
+
+ if (shouldEmitPersonality) {
+ AddComment("ParentFrameOffset");
+ OS.emitInt32(ParentFrameOffset);
+ }
+ }
+ }
+ }
+
+ // IPToStateMapEntry {
+ // void *IP;
+ // int32_t State;
+ // };
+ if (IPToStateXData) {
+ OS.emitLabel(IPToStateXData);
+ for (auto &IPStatePair : IPToStateTable) {
+ AddComment("IP");
+ OS.emitValue(IPStatePair.first, 4);
+ AddComment("ToState");
+ OS.emitInt32(IPStatePair.second);
+ }
+ }
+}
+
+void WinException::computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) {
+
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ // Find the end of the funclet
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ break;
+ }
+ }
+
+ // Don't emit ip2state entries for cleanup funclets. Any interesting
+ // exceptional actions in cleanups must be handled in a separate IR
+ // function.
+ if (FuncletStart->isCleanupFuncletEntry())
+ continue;
+
+ MCSymbol *StartLabel;
+ int BaseState;
+ if (FuncletStart == MF->begin()) {
+ BaseState = NullState;
+ StartLabel = Asm->getFunctionBegin();
+ } else {
+ auto *FuncletPad =
+ cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI());
+ assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0);
+ BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second;
+ StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart);
+ }
+ assert(StartLabel && "need local function start label");
+ IPToStateTable.push_back(
+ std::make_pair(create32bitRef(StartLabel), BaseState));
+
+ for (const auto &StateChange : InvokeStateChangeIterator::range(
+ FuncInfo, FuncletStart, FuncletEnd, BaseState)) {
+ // Compute the label to report as the start of this entry; use the EH
+ // start label for the invoke if we have one, otherwise (this is a call
+ // which may unwind to our caller and does not have an EH start label, so)
+ // use the previous end label.
+ const MCSymbol *ChangeLabel = StateChange.NewStartLabel;
+ if (!ChangeLabel)
+ ChangeLabel = StateChange.PreviousEndLabel;
+ // Emit an entry indicating that PCs after 'Label' have this EH state.
+ // NOTE: On ARM architectures, the StateFromIp automatically takes into
+ // account that the return address is after the call instruction (whose EH
+ // state we should be using), but on other platforms we need to +1 to the
+ // label so that we are using the correct EH state.
+ const MCExpr *LabelExpression = (isAArch64 || isThumb)
+ ? getLabel(ChangeLabel)
+ : getLabelPlusOne(ChangeLabel);
+ IPToStateTable.push_back(
+ std::make_pair(LabelExpression, StateChange.NewState));
+ // FIXME: assert that NewState is between CatchLow and CatchHigh.
+ }
+ }
+}
+
+void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
+ StringRef FLinkageName) {
+ // Outlined helpers called by the EH runtime need to know the offset of the EH
+ // registration in order to recover the parent frame pointer. Now that we know
+ // we've code generated the parent, we can emit the label assignment that
+ // those helpers use to get the offset of the registration node.
+
+ // Compute the parent frame offset. The EHRegNodeFrameIndex will be invalid if
+ // after optimization all the invokes were eliminated. We still need to emit
+ // the parent frame offset label, but it should be garbage and should never be
+ // used.
+ int64_t Offset = 0;
+ int FI = FuncInfo.EHRegNodeFrameIndex;
+ if (FI != INT_MAX) {
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI).getFixed();
+ }
+
+ MCContext &Ctx = Asm->OutContext;
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ Asm->OutStreamer->emitAssignment(ParentFrameOffset,
+ MCConstantExpr::create(Offset, Ctx));
+}
+
+/// Emit the language-specific data that _except_handler3 and 4 expect. This is
+/// functionally equivalent to the __C_specific_handler table, except it is
+/// indexed by state number instead of IP.
+void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
+ MCStreamer &OS = *Asm->OutStreamer;
+ const Function &F = MF->getFunction();
+ StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
+
+ // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
+ MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
+ OS.emitValueToAlignment(Align(4));
+ OS.emitLabel(LSDALabel);
+
+ const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ StringRef PerName = Per->getName();
+ int BaseState = -1;
+ if (PerName == "_except_handler4") {
+ // The LSDA for _except_handler4 starts with this struct, followed by the
+ // scope table:
+ //
+ // struct EH4ScopeTable {
+ // int32_t GSCookieOffset;
+ // int32_t GSCookieXOROffset;
+ // int32_t EHCookieOffset;
+ // int32_t EHCookieXOROffset;
+ // ScopeTableEntry ScopeRecord[];
+ // };
+ //
+ // Offsets are %ebp relative.
+ //
+ // The GS cookie is present only if the function needs stack protection.
+ // GSCookieOffset = -2 means that GS cookie is not used.
+ //
+ // The EH cookie is always present.
+ //
+ // Check is done the following way:
+ // (ebp+CookieXOROffset) ^ [ebp+CookieOffset] == _security_cookie
+
+ // Retrieve the Guard Stack slot.
+ int GSCookieOffset = -2;
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ if (MFI.hasStackProtectorIndex()) {
+ Register UnusedReg;
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int SSPIdx = MFI.getStackProtectorIndex();
+ GSCookieOffset =
+ TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg).getFixed();
+ }
+
+ // Retrieve the EH Guard slot.
+ // TODO(etienneb): Get rid of this value and change it for and assertion.
+ int EHCookieOffset = 9999;
+ if (FuncInfo.EHGuardFrameIndex != INT_MAX) {
+ Register UnusedReg;
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int EHGuardIdx = FuncInfo.EHGuardFrameIndex;
+ EHCookieOffset =
+ TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg).getFixed();
+ }
+
+ AddComment("GSCookieOffset");
+ OS.emitInt32(GSCookieOffset);
+ AddComment("GSCookieXOROffset");
+ OS.emitInt32(0);
+ AddComment("EHCookieOffset");
+ OS.emitInt32(EHCookieOffset);
+ AddComment("EHCookieXOROffset");
+ OS.emitInt32(0);
+ BaseState = -2;
+ }
+
+ assert(!FuncInfo.SEHUnwindMap.empty());
+ for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
+ auto *Handler = cast<MachineBasicBlock *>(UME.Handler);
+ const MCSymbol *ExceptOrFinally =
+ UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
+ // -1 is usually the base state for "unwind to caller", but for
+ // _except_handler4 it's -2. Do that replacement here if necessary.
+ int ToState = UME.ToState == -1 ? BaseState : UME.ToState;
+ AddComment("ToState");
+ OS.emitInt32(ToState);
+ AddComment(UME.IsFinally ? "Null" : "FilterFunction");
+ OS.emitValue(create32bitRef(UME.Filter), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler");
+ OS.emitValue(create32bitRef(ExceptOrFinally), 4);
+ }
+}
+
+static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) {
+ int Rank = 0;
+ while (State != -1) {
+ ++Rank;
+ State = FuncInfo.ClrEHUnwindMap[State].TryParentState;
+ }
+ return Rank;
+}
+
+static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+ int LeftRank = getTryRank(FuncInfo, Left);
+ int RightRank = getTryRank(FuncInfo, Right);
+
+ while (LeftRank < RightRank) {
+ Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
+ --RightRank;
+ }
+
+ while (RightRank < LeftRank) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
+ --LeftRank;
+ }
+
+ while (Left != Right) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
+ Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
+ }
+
+ return Left;
+}
+
+void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
+ // CLR EH "states" are really just IDs that identify handlers/funclets;
+ // states, handlers, and funclets all have 1:1 mappings between them, and a
+ // handler/funclet's "state" is its index in the ClrEHUnwindMap.
+ MCStreamer &OS = *Asm->OutStreamer;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ MCSymbol *FuncBeginSym = Asm->getFunctionBegin();
+ MCSymbol *FuncEndSym = Asm->getFunctionEnd();
+
+ // A ClrClause describes a protected region.
+ struct ClrClause {
+ const MCSymbol *StartLabel; // Start of protected region
+ const MCSymbol *EndLabel; // End of protected region
+ int State; // Index of handler protecting the protected region
+ int EnclosingState; // Index of funclet enclosing the protected region
+ };
+ SmallVector<ClrClause, 8> Clauses;
+
+ // Build a map from handler MBBs to their corresponding states (i.e. their
+ // indices in the ClrEHUnwindMap).
+ int NumStates = FuncInfo.ClrEHUnwindMap.size();
+ assert(NumStates > 0 && "Don't need exception table!");
+ DenseMap<const MachineBasicBlock *, int> HandlerStates;
+ for (int State = 0; State < NumStates; ++State) {
+ MachineBasicBlock *HandlerBlock =
+ cast<MachineBasicBlock *>(FuncInfo.ClrEHUnwindMap[State].Handler);
+ HandlerStates[HandlerBlock] = State;
+ // Use this loop through all handlers to verify our assumption (used in
+ // the MinEnclosingState computation) that enclosing funclets have lower
+ // state numbers than their enclosed funclets.
+ assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State &&
+ "ill-formed state numbering");
+ }
+ // Map the main function to the NullState.
+ HandlerStates[&MF->front()] = NullState;
+
+ // Write out a sentinel indicating the end of the standard (Windows) xdata
+ // and the start of the additional (CLR) info.
+ OS.emitInt32(0xffffffff);
+ // Write out the number of funclets
+ OS.emitInt32(NumStates);
+
+ // Walk the machine blocks/instrs, computing and emitting a few things:
+ // 1. Emit a list of the offsets to each handler entry, in lexical order.
+ // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end.
+ // 3. Compute the list of ClrClauses, in the required order (inner before
+ // outer, earlier before later; the order by which a forward scan with
+ // early termination will find the innermost enclosing clause covering
+ // a given address).
+ // 4. A map (MinClauseMap) from each handler index to the index of the
+ // outermost funclet/function which contains a try clause targeting the
+ // key handler. This will be used to determine IsDuplicate-ness when
+ // emitting ClrClauses. The NullState value is used to indicate that the
+ // top-level function contains a try clause targeting the key handler.
+ // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for
+ // try regions we entered before entering the PendingState try but which
+ // we haven't yet exited.
+ SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack;
+ // EndSymbolMap and MinClauseMap are maps described above.
+ std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]);
+ SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
+
+ // Visit the root function and each funclet.
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ int FuncletState = HandlerStates[&*FuncletStart];
+ // Find the end of the funclet
+ MCSymbol *EndSymbol = FuncEndSym;
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd);
+ break;
+ }
+ }
+ // Emit the function/funclet end and, if this is a funclet (and not the
+ // root function), record it in the EndSymbolMap.
+ OS.emitValue(getOffset(EndSymbol, FuncBeginSym), 4);
+ if (FuncletState != NullState) {
+ // Record the end of the handler.
+ EndSymbolMap[FuncletState] = EndSymbol;
+ }
+
+ // Walk the state changes in this function/funclet and compute its clauses.
+ // Funclets always start in the null state.
+ const MCSymbol *CurrentStartLabel = nullptr;
+ int CurrentState = NullState;
+ assert(HandlerStack.empty());
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
+ // Close any try regions we're not still under
+ int StillPendingState =
+ getTryAncestor(FuncInfo, CurrentState, StateChange.NewState);
+ while (CurrentState != StillPendingState) {
+ assert(CurrentState != NullState &&
+ "Failed to find still-pending state!");
+ // Close the pending clause
+ Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
+ CurrentState, FuncletState});
+ // Now the next-outer try region is current
+ CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState;
+ // Pop the new start label from the handler stack if we've exited all
+ // inner try regions of the corresponding try region.
+ if (HandlerStack.back().second == CurrentState)
+ CurrentStartLabel = HandlerStack.pop_back_val().first;
+ }
+
+ if (StateChange.NewState != CurrentState) {
+ // For each clause we're starting, update the MinClauseMap so we can
+ // know which is the topmost funclet containing a clause targeting
+ // it.
+ for (int EnteredState = StateChange.NewState;
+ EnteredState != CurrentState;
+ EnteredState =
+ FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) {
+ int &MinEnclosingState = MinClauseMap[EnteredState];
+ if (FuncletState < MinEnclosingState)
+ MinEnclosingState = FuncletState;
+ }
+ // Save the previous current start/label on the stack and update to
+ // the newly-current start/state.
+ HandlerStack.emplace_back(CurrentStartLabel, CurrentState);
+ CurrentStartLabel = StateChange.NewStartLabel;
+ CurrentState = StateChange.NewState;
+ }
+ }
+ assert(HandlerStack.empty());
+ }
+
+ // Now emit the clause info, starting with the number of clauses.
+ OS.emitInt32(Clauses.size());
+ for (ClrClause &Clause : Clauses) {
+ // Emit a CORINFO_EH_CLAUSE :
+ /*
+ struct CORINFO_EH_CLAUSE
+ {
+ CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag
+ DWORD TryOffset;
+ DWORD TryLength; // actually TryEndOffset
+ DWORD HandlerOffset;
+ DWORD HandlerLength; // actually HandlerEndOffset
+ union
+ {
+ DWORD ClassToken; // use for catch clauses
+ DWORD FilterOffset; // use for filter clauses
+ };
+ };
+
+ enum CORINFO_EH_CLAUSE_FLAGS
+ {
+ CORINFO_EH_CLAUSE_NONE = 0,
+ CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter
+ CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause
+ CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause
+ };
+ typedef enum CorExceptionFlag
+ {
+ COR_ILEXCEPTION_CLAUSE_NONE,
+ COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause
+ COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause
+ COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause
+ COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This
+ // clause was duplicated
+ // to a funclet which was
+ // pulled out of line
+ } CorExceptionFlag;
+ */
+ // Add 1 to the start/end of the EH clause; the IP associated with a
+ // call when the runtime does its scan is the IP of the next instruction
+ // (the one to which control will return after the call), so we need
+ // to add 1 to the end of the clause to cover that offset. We also add
+ // 1 to the start of the clause to make sure that the ranges reported
+ // for all clauses are disjoint. Note that we'll need some additional
+ // logic when machine traps are supported, since in that case the IP
+ // that the runtime uses is the offset of the faulting instruction
+ // itself; if such an instruction immediately follows a call but the
+ // two belong to different clauses, we'll need to insert a nop between
+ // them so the runtime can distinguish the point to which the call will
+ // return from the point at which the fault occurs.
+
+ const MCExpr *ClauseBegin =
+ getOffsetPlusOne(Clause.StartLabel, FuncBeginSym);
+ const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
+
+ const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
+ MachineBasicBlock *HandlerBlock = cast<MachineBasicBlock *>(Entry.Handler);
+ MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
+ const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
+ MCSymbol *EndSym = EndSymbolMap[Clause.State];
+ const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym);
+
+ uint32_t Flags = 0;
+ switch (Entry.HandlerType) {
+ case ClrHandlerType::Catch:
+ // Leaving bits 0-2 clear indicates catch.
+ break;
+ case ClrHandlerType::Filter:
+ Flags |= 1;
+ break;
+ case ClrHandlerType::Finally:
+ Flags |= 2;
+ break;
+ case ClrHandlerType::Fault:
+ Flags |= 4;
+ break;
+ }
+ if (Clause.EnclosingState != MinClauseMap[Clause.State]) {
+ // This is a "duplicate" clause; the handler needs to be entered from a
+ // frame above the one holding the invoke.
+ assert(Clause.EnclosingState > MinClauseMap[Clause.State]);
+ Flags |= 8;
+ }
+ OS.emitInt32(Flags);
+
+ // Write the clause start/end
+ OS.emitValue(ClauseBegin, 4);
+ OS.emitValue(ClauseEnd, 4);
+
+ // Write out the handler start/end
+ OS.emitValue(HandlerBegin, 4);
+ OS.emitValue(HandlerEnd, 4);
+
+ // Write out the type token or filter offset
+ assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters");
+ OS.emitInt32(Entry.TypeToken);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
new file mode 100644
index 000000000000..638589adf0dd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -0,0 +1,121 @@
+//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing windows exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+
+#include "EHStreamer.h"
+#include <vector>
+
+namespace llvm {
+class GlobalValue;
+class MachineFunction;
+class MCExpr;
+class MCSection;
+struct WinEHFuncInfo;
+
+class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
+ /// Per-function flag to indicate if personality info should be emitted.
+ bool shouldEmitPersonality = false;
+
+ /// Per-function flag to indicate if the LSDA should be emitted.
+ bool shouldEmitLSDA = false;
+
+ /// Per-function flag to indicate if frame moves info should be emitted.
+ bool shouldEmitMoves = false;
+
+ /// True if this is a 64-bit target and we should use image relative offsets.
+ bool useImageRel32 = false;
+
+ /// True if we are generating exception handling on Windows for ARM64.
+ bool isAArch64 = false;
+
+ /// True if we are generating exception handling on Windows for ARM (Thumb).
+ bool isThumb = false;
+
+ /// Pointer to the current funclet entry BB.
+ const MachineBasicBlock *CurrentFuncletEntry = nullptr;
+
+ /// The section of the last funclet start.
+ MCSection *CurrentFuncletTextSection = nullptr;
+
+ /// The list of symbols to add to the ehcont section
+ std::vector<const MCSymbol *> EHContTargets;
+
+ void emitCSpecificHandlerTable(const MachineFunction *MF);
+
+ void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State);
+
+ /// Emit the EH table data for 32-bit and 64-bit functions using
+ /// the __CxxFrameHandler3 personality.
+ void emitCXXFrameHandler3Table(const MachineFunction *MF);
+
+ /// Emit the EH table data for _except_handler3 and _except_handler4
+ /// personality functions. These are only used on 32-bit and do not use CFI
+ /// tables.
+ void emitExceptHandlerTable(const MachineFunction *MF);
+
+ void emitCLRExceptionTable(const MachineFunction *MF);
+
+ void computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
+
+ /// Emits the label used with llvm.eh.recoverfp, which is used by
+ /// outlined funclets.
+ void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
+ StringRef FLinkageName);
+
+ const MCExpr *create32bitRef(const MCSymbol *Value);
+ const MCExpr *create32bitRef(const GlobalValue *GV);
+ const MCExpr *getLabel(const MCSymbol *Label);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+ const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
+ const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom);
+
+ /// Gets the offset that we should use in a table for a stack object with the
+ /// given index. For targets using CFI (Win64, etc), this is relative to the
+ /// established SP at the end of the prologue. For targets without CFI (Win32
+ /// only), it is relative to the frame pointer.
+ int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
+
+ void endFuncletImpl();
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ WinException(AsmPrinter *A);
+ ~WinException() override;
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ void markFunctionEnd() override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ /// Emit target-specific EH funclet machinery.
+ void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;
+ void endFunclet() override;
+};
+}
+
+#endif
+
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
new file mode 100644
index 000000000000..5ef850d09d92
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -0,0 +1,2576 @@
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "LiveDebugValues/LiveDebugValues.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <assert.h>
+#include <cstdint>
+#include <optional>
+#include <sstream>
+#include <unordered_map>
+
+using namespace llvm;
+#define DEBUG_TYPE "debug-ata"
+
+STATISTIC(NumDefsScanned, "Number of dbg locs that get scanned for removal");
+STATISTIC(NumDefsRemoved, "Number of dbg locs removed");
+STATISTIC(NumWedgesScanned, "Number of dbg wedges scanned");
+STATISTIC(NumWedgesChanged, "Number of dbg wedges changed");
+
+static cl::opt<unsigned>
+ MaxNumBlocks("debug-ata-max-blocks", cl::init(10000),
+ cl::desc("Maximum num basic blocks before debug info dropped"),
+ cl::Hidden);
+/// Option for debugging the pass, determines if the memory location fragment
+/// filling happens after generating the variable locations.
+static cl::opt<bool> EnableMemLocFragFill("mem-loc-frag-fill", cl::init(true),
+ cl::Hidden);
+/// Print the results of the analysis. Respects -filter-print-funcs.
+static cl::opt<bool> PrintResults("print-debug-ata", cl::init(false),
+ cl::Hidden);
+
+/// Coalesce adjacent dbg locs describing memory locations that have contiguous
+/// fragments. This reduces the cost of LiveDebugValues which does SSA
+/// construction for each explicitly stated variable fragment.
+static cl::opt<cl::boolOrDefault>
+ CoalesceAdjacentFragmentsOpt("debug-ata-coalesce-frags", cl::Hidden);
+
+// Implicit conversions are disabled for enum class types, so unfortunately we
+// need to create a DenseMapInfo wrapper around the specified underlying type.
+template <> struct llvm::DenseMapInfo<VariableID> {
+ using Wrapped = DenseMapInfo<unsigned>;
+ static inline VariableID getEmptyKey() {
+ return static_cast<VariableID>(Wrapped::getEmptyKey());
+ }
+ static inline VariableID getTombstoneKey() {
+ return static_cast<VariableID>(Wrapped::getTombstoneKey());
+ }
+ static unsigned getHashValue(const VariableID &Val) {
+ return Wrapped::getHashValue(static_cast<unsigned>(Val));
+ }
+ static bool isEqual(const VariableID &LHS, const VariableID &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// Helper class to build FunctionVarLocs, since that class isn't easy to
+/// modify. TODO: There's not a great deal of value in the split, it could be
+/// worth merging the two classes.
+class FunctionVarLocsBuilder {
+ friend FunctionVarLocs;
+ UniqueVector<DebugVariable> Variables;
+ // Use an unordered_map so we don't invalidate iterators after
+ // insert/modifications.
+ std::unordered_map<const Instruction *, SmallVector<VarLocInfo>>
+ VarLocsBeforeInst;
+
+ SmallVector<VarLocInfo> SingleLocVars;
+
+public:
+ unsigned getNumVariables() const { return Variables.size(); }
+
+ /// Find or insert \p V and return the ID.
+ VariableID insertVariable(DebugVariable V) {
+ return static_cast<VariableID>(Variables.insert(V));
+ }
+
+ /// Get a variable from its \p ID.
+ const DebugVariable &getVariable(VariableID ID) const {
+ return Variables[static_cast<unsigned>(ID)];
+ }
+
+ /// Return ptr to wedge of defs or nullptr if no defs come just before /p
+ /// Before.
+ const SmallVectorImpl<VarLocInfo> *getWedge(const Instruction *Before) const {
+ auto R = VarLocsBeforeInst.find(Before);
+ if (R == VarLocsBeforeInst.end())
+ return nullptr;
+ return &R->second;
+ }
+
+ /// Replace the defs that come just before /p Before with /p Wedge.
+ void setWedge(const Instruction *Before, SmallVector<VarLocInfo> &&Wedge) {
+ VarLocsBeforeInst[Before] = std::move(Wedge);
+ }
+
+ /// Add a def for a variable that is valid for its lifetime.
+ void addSingleLocVar(DebugVariable Var, DIExpression *Expr, DebugLoc DL,
+ RawLocationWrapper R) {
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = insertVariable(Var);
+ VarLoc.Expr = Expr;
+ VarLoc.DL = DL;
+ VarLoc.Values = R;
+ SingleLocVars.emplace_back(VarLoc);
+ }
+
+ /// Add a def to the wedge of defs just before /p Before.
+ void addVarLoc(Instruction *Before, DebugVariable Var, DIExpression *Expr,
+ DebugLoc DL, RawLocationWrapper R) {
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = insertVariable(Var);
+ VarLoc.Expr = Expr;
+ VarLoc.DL = DL;
+ VarLoc.Values = R;
+ VarLocsBeforeInst[Before].emplace_back(VarLoc);
+ }
+};
+
+void FunctionVarLocs::print(raw_ostream &OS, const Function &Fn) const {
+ // Print the variable table first. TODO: Sorting by variable could make the
+ // output more stable?
+ unsigned Counter = -1;
+ OS << "=== Variables ===\n";
+ for (const DebugVariable &V : Variables) {
+ ++Counter;
+ // Skip first entry because it is a dummy entry.
+ if (Counter == 0) {
+ continue;
+ }
+ OS << "[" << Counter << "] " << V.getVariable()->getName();
+ if (auto F = V.getFragment())
+ OS << " bits [" << F->OffsetInBits << ", "
+ << F->OffsetInBits + F->SizeInBits << ")";
+ if (const auto *IA = V.getInlinedAt())
+ OS << " inlined-at " << *IA;
+ OS << "\n";
+ }
+
+ auto PrintLoc = [&OS](const VarLocInfo &Loc) {
+ OS << "DEF Var=[" << (unsigned)Loc.VariableID << "]"
+ << " Expr=" << *Loc.Expr << " Values=(";
+ for (auto *Op : Loc.Values.location_ops()) {
+ errs() << Op->getName() << " ";
+ }
+ errs() << ")\n";
+ };
+
+ // Print the single location variables.
+ OS << "=== Single location vars ===\n";
+ for (auto It = single_locs_begin(), End = single_locs_end(); It != End;
+ ++It) {
+ PrintLoc(*It);
+ }
+
+ // Print the non-single-location defs in line with IR.
+ OS << "=== In-line variable defs ===";
+ for (const BasicBlock &BB : Fn) {
+ OS << "\n" << BB.getName() << ":\n";
+ for (const Instruction &I : BB) {
+ for (auto It = locs_begin(&I), End = locs_end(&I); It != End; ++It) {
+ PrintLoc(*It);
+ }
+ OS << I << "\n";
+ }
+ }
+}
+
+void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) {
+ // Add the single-location variables first.
+ for (const auto &VarLoc : Builder.SingleLocVars)
+ VarLocRecords.emplace_back(VarLoc);
+ // Mark the end of the section.
+ SingleVarLocEnd = VarLocRecords.size();
+
+ // Insert a contiguous block of VarLocInfos for each instruction, mapping it
+ // to the start and end position in the vector with VarLocsBeforeInst.
+ for (auto &P : Builder.VarLocsBeforeInst) {
+ unsigned BlockStart = VarLocRecords.size();
+ for (const VarLocInfo &VarLoc : P.second)
+ VarLocRecords.emplace_back(VarLoc);
+ unsigned BlockEnd = VarLocRecords.size();
+ // Record the start and end indices.
+ if (BlockEnd != BlockStart)
+ VarLocsBeforeInst[P.first] = {BlockStart, BlockEnd};
+ }
+
+ // Copy the Variables vector from the builder's UniqueVector.
+ assert(Variables.empty() && "Expect clear before init");
+ // UniqueVectors IDs are one-based (which means the VarLocInfo VarID values
+ // are one-based) so reserve an extra and insert a dummy.
+ Variables.reserve(Builder.Variables.size() + 1);
+ Variables.push_back(DebugVariable(nullptr, std::nullopt, nullptr));
+ Variables.append(Builder.Variables.begin(), Builder.Variables.end());
+}
+
+void FunctionVarLocs::clear() {
+ Variables.clear();
+ VarLocRecords.clear();
+ VarLocsBeforeInst.clear();
+ SingleVarLocEnd = 0;
+}
+
+/// Walk backwards along constant GEPs and bitcasts to the base storage from \p
+/// Start as far as possible. Prepend \Expression with the offset and append it
+/// with a DW_OP_deref that haes been implicit until now. Returns the walked-to
+/// value and modified expression.
+static std::pair<Value *, DIExpression *>
+walkToAllocaAndPrependOffsetDeref(const DataLayout &DL, Value *Start,
+ DIExpression *Expression) {
+ APInt OffsetInBytes(DL.getTypeSizeInBits(Start->getType()), false);
+ Value *End =
+ Start->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetInBytes);
+ SmallVector<uint64_t, 3> Ops;
+ if (OffsetInBytes.getBoolValue()) {
+ Ops = {dwarf::DW_OP_plus_uconst, OffsetInBytes.getZExtValue()};
+ Expression = DIExpression::prependOpcodes(
+ Expression, Ops, /*StackValue=*/false, /*EntryValue=*/false);
+ }
+ Expression = DIExpression::append(Expression, {dwarf::DW_OP_deref});
+ return {End, Expression};
+}
+
+/// Extract the offset used in \p DIExpr. Returns std::nullopt if the expression
+/// doesn't explicitly describe a memory location with DW_OP_deref or if the
+/// expression is too complex to interpret.
+static std::optional<int64_t>
+getDerefOffsetInBytes(const DIExpression *DIExpr) {
+ int64_t Offset = 0;
+ const unsigned NumElements = DIExpr->getNumElements();
+ const auto Elements = DIExpr->getElements();
+ unsigned ExpectedDerefIdx = 0;
+ // Extract the offset.
+ if (NumElements > 2 && Elements[0] == dwarf::DW_OP_plus_uconst) {
+ Offset = Elements[1];
+ ExpectedDerefIdx = 2;
+ } else if (NumElements > 3 && Elements[0] == dwarf::DW_OP_constu) {
+ ExpectedDerefIdx = 3;
+ if (Elements[2] == dwarf::DW_OP_plus)
+ Offset = Elements[1];
+ else if (Elements[2] == dwarf::DW_OP_minus)
+ Offset = -Elements[1];
+ else
+ return std::nullopt;
+ }
+
+ // If that's all there is it means there's no deref.
+ if (ExpectedDerefIdx >= NumElements)
+ return std::nullopt;
+
+ // Check the next element is DW_OP_deref - otherwise this is too complex or
+ // isn't a deref expression.
+ if (Elements[ExpectedDerefIdx] != dwarf::DW_OP_deref)
+ return std::nullopt;
+
+ // Check the final operation is either the DW_OP_deref or is a fragment.
+ if (NumElements == ExpectedDerefIdx + 1)
+ return Offset; // Ends with deref.
+ unsigned ExpectedFragFirstIdx = ExpectedDerefIdx + 1;
+ unsigned ExpectedFragFinalIdx = ExpectedFragFirstIdx + 2;
+ if (NumElements == ExpectedFragFinalIdx + 1 &&
+ Elements[ExpectedFragFirstIdx] == dwarf::DW_OP_LLVM_fragment)
+ return Offset; // Ends with deref + fragment.
+
+ // Don't bother trying to interpret anything more complex.
+ return std::nullopt;
+}
+
+/// A whole (unfragmented) source variable.
+using DebugAggregate = std::pair<const DILocalVariable *, const DILocation *>;
+static DebugAggregate getAggregate(const DbgVariableIntrinsic *DII) {
+ return DebugAggregate(DII->getVariable(), DII->getDebugLoc().getInlinedAt());
+}
+static DebugAggregate getAggregate(const DebugVariable &Var) {
+ return DebugAggregate(Var.getVariable(), Var.getInlinedAt());
+}
+
+static bool shouldCoalesceFragments(Function &F) {
+ // Enabling fragment coalescing reduces compiler run time when instruction
+ // referencing is enabled. However, it may cause LiveDebugVariables to create
+ // incorrect locations. Since instruction-referencing mode effectively
+ // bypasses LiveDebugVariables we only enable coalescing if the cl::opt flag
+ // has not been explicitly set and instruction-referencing is turned on.
+ switch (CoalesceAdjacentFragmentsOpt) {
+ case cl::boolOrDefault::BOU_UNSET:
+ return debuginfoShouldUseDebugInstrRef(
+ Triple(F.getParent()->getTargetTriple()));
+ case cl::boolOrDefault::BOU_TRUE:
+ return true;
+ case cl::boolOrDefault::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Unknown boolOrDefault value");
+}
+
+namespace {
+/// In dwarf emission, the following sequence
+/// 1. dbg.value ... Fragment(0, 64)
+/// 2. dbg.value ... Fragment(0, 32)
+/// effectively sets Fragment(32, 32) to undef (each def sets all bits not in
+/// the intersection of the fragments to having "no location"). This makes
+/// sense for implicit location values because splitting the computed values
+/// could be troublesome, and is probably quite uncommon. When we convert
+/// dbg.assigns to dbg.value+deref this kind of thing is common, and describing
+/// a location (memory) rather than a value means we don't need to worry about
+/// splitting any values, so we try to recover the rest of the fragment
+/// location here.
+/// This class performs a(nother) dataflow analysis over the function, adding
+/// variable locations so that any bits of a variable with a memory location
+/// have that location explicitly reinstated at each subsequent variable
+/// location definition that that doesn't overwrite those bits. i.e. after a
+/// variable location def, insert new defs for the memory location with
+/// fragments for the difference of "all bits currently in memory" and "the
+/// fragment of the second def".
+class MemLocFragmentFill {
+ Function &Fn;
+ FunctionVarLocsBuilder *FnVarLocs;
+ const DenseSet<DebugAggregate> *VarsWithStackSlot;
+ bool CoalesceAdjacentFragments;
+
+ // 0 = no memory location.
+ using BaseAddress = unsigned;
+ using OffsetInBitsTy = unsigned;
+ using FragTraits = IntervalMapHalfOpenInfo<OffsetInBitsTy>;
+ using FragsInMemMap = IntervalMap<
+ OffsetInBitsTy, BaseAddress,
+ IntervalMapImpl::NodeSizer<OffsetInBitsTy, BaseAddress>::LeafSize,
+ FragTraits>;
+ FragsInMemMap::Allocator IntervalMapAlloc;
+ using VarFragMap = DenseMap<unsigned, FragsInMemMap>;
+
+ /// IDs for memory location base addresses in maps. Use 0 to indicate that
+ /// there's no memory location.
+ UniqueVector<RawLocationWrapper> Bases;
+ UniqueVector<DebugAggregate> Aggregates;
+ DenseMap<const BasicBlock *, VarFragMap> LiveIn;
+ DenseMap<const BasicBlock *, VarFragMap> LiveOut;
+
+ struct FragMemLoc {
+ unsigned Var;
+ unsigned Base;
+ unsigned OffsetInBits;
+ unsigned SizeInBits;
+ DebugLoc DL;
+ };
+ using InsertMap = MapVector<Instruction *, SmallVector<FragMemLoc>>;
+
+ /// BBInsertBeforeMap holds a description for the set of location defs to be
+ /// inserted after the analysis is complete. It is updated during the dataflow
+ /// and the entry for a block is CLEARED each time it is (re-)visited. After
+ /// the dataflow is complete, each block entry will contain the set of defs
+ /// calculated during the final (fixed-point) iteration.
+ DenseMap<const BasicBlock *, InsertMap> BBInsertBeforeMap;
+
+ static bool intervalMapsAreEqual(const FragsInMemMap &A,
+ const FragsInMemMap &B) {
+ auto AIt = A.begin(), AEnd = A.end();
+ auto BIt = B.begin(), BEnd = B.end();
+ for (; AIt != AEnd; ++AIt, ++BIt) {
+ if (BIt == BEnd)
+ return false; // B has fewer elements than A.
+ if (AIt.start() != BIt.start() || AIt.stop() != BIt.stop())
+ return false; // Interval is different.
+ if (*AIt != *BIt)
+ return false; // Value at interval is different.
+ }
+ // AIt == AEnd. Check BIt is also now at end.
+ return BIt == BEnd;
+ }
+
+ static bool varFragMapsAreEqual(const VarFragMap &A, const VarFragMap &B) {
+ if (A.size() != B.size())
+ return false;
+ for (const auto &APair : A) {
+ auto BIt = B.find(APair.first);
+ if (BIt == B.end())
+ return false;
+ if (!intervalMapsAreEqual(APair.second, BIt->second))
+ return false;
+ }
+ return true;
+ }
+
+ /// Return a string for the value that \p BaseID represents.
+ std::string toString(unsigned BaseID) {
+ if (BaseID)
+ return Bases[BaseID].getVariableLocationOp(0)->getName().str();
+ else
+ return "None";
+ }
+
+ /// Format string describing an FragsInMemMap (IntervalMap) interval.
+ std::string toString(FragsInMemMap::const_iterator It, bool Newline = true) {
+ std::string String;
+ std::stringstream S(String);
+ if (It.valid()) {
+ S << "[" << It.start() << ", " << It.stop()
+ << "): " << toString(It.value());
+ } else {
+ S << "invalid iterator (end)";
+ }
+ if (Newline)
+ S << "\n";
+ return S.str();
+ };
+
+ FragsInMemMap meetFragments(const FragsInMemMap &A, const FragsInMemMap &B) {
+ FragsInMemMap Result(IntervalMapAlloc);
+ for (auto AIt = A.begin(), AEnd = A.end(); AIt != AEnd; ++AIt) {
+ LLVM_DEBUG(dbgs() << "a " << toString(AIt));
+ // This is basically copied from process() and inverted (process is
+ // performing something like a union whereas this is more of an
+ // intersect).
+
+ // There's no work to do if interval `a` overlaps no fragments in map `B`.
+ if (!B.overlaps(AIt.start(), AIt.stop()))
+ continue;
+
+ // Does StartBit intersect an existing fragment?
+ auto FirstOverlap = B.find(AIt.start());
+ assert(FirstOverlap != B.end());
+ bool IntersectStart = FirstOverlap.start() < AIt.start();
+ LLVM_DEBUG(dbgs() << "- FirstOverlap " << toString(FirstOverlap, false)
+ << ", IntersectStart: " << IntersectStart << "\n");
+
+ // Does EndBit intersect an existing fragment?
+ auto LastOverlap = B.find(AIt.stop());
+ bool IntersectEnd =
+ LastOverlap != B.end() && LastOverlap.start() < AIt.stop();
+ LLVM_DEBUG(dbgs() << "- LastOverlap " << toString(LastOverlap, false)
+ << ", IntersectEnd: " << IntersectEnd << "\n");
+
+ // Check if both ends of `a` intersect the same interval `b`.
+ if (IntersectStart && IntersectEnd && FirstOverlap == LastOverlap) {
+ // Insert `a` (`a` is contained in `b`) if the values match.
+ // [ a ]
+ // [ - b - ]
+ // -
+ // [ r ]
+ LLVM_DEBUG(dbgs() << "- a is contained within "
+ << toString(FirstOverlap));
+ if (*AIt && *AIt == *FirstOverlap)
+ Result.insert(AIt.start(), AIt.stop(), *AIt);
+ } else {
+ // There's an overlap but `a` is not fully contained within
+ // `b`. Shorten any end-point intersections.
+ // [ - a - ]
+ // [ - b - ]
+ // -
+ // [ r ]
+ auto Next = FirstOverlap;
+ if (IntersectStart) {
+ LLVM_DEBUG(dbgs() << "- insert intersection of a and "
+ << toString(FirstOverlap));
+ if (*AIt && *AIt == *FirstOverlap)
+ Result.insert(AIt.start(), FirstOverlap.stop(), *AIt);
+ ++Next;
+ }
+ // [ - a - ]
+ // [ - b - ]
+ // -
+ // [ r ]
+ if (IntersectEnd) {
+ LLVM_DEBUG(dbgs() << "- insert intersection of a and "
+ << toString(LastOverlap));
+ if (*AIt && *AIt == *LastOverlap)
+ Result.insert(LastOverlap.start(), AIt.stop(), *AIt);
+ }
+
+ // Insert all intervals in map `B` that are contained within interval
+ // `a` where the values match.
+ // [ - - a - - ]
+ // [ b1 ] [ b2 ]
+ // -
+ // [ r1 ] [ r2 ]
+ while (Next != B.end() && Next.start() < AIt.stop() &&
+ Next.stop() <= AIt.stop()) {
+ LLVM_DEBUG(dbgs()
+ << "- insert intersection of a and " << toString(Next));
+ if (*AIt && *AIt == *Next)
+ Result.insert(Next.start(), Next.stop(), *Next);
+ ++Next;
+ }
+ }
+ }
+ return Result;
+ }
+
+ /// Meet \p A and \p B, storing the result in \p A.
+ void meetVars(VarFragMap &A, const VarFragMap &B) {
+ // Meet A and B.
+ //
+ // Result = meet(a, b) for a in A, b in B where Var(a) == Var(b)
+ for (auto It = A.begin(), End = A.end(); It != End; ++It) {
+ unsigned AVar = It->first;
+ FragsInMemMap &AFrags = It->second;
+ auto BIt = B.find(AVar);
+ if (BIt == B.end()) {
+ A.erase(It);
+ continue; // Var has no bits defined in B.
+ }
+ LLVM_DEBUG(dbgs() << "meet fragment maps for "
+ << Aggregates[AVar].first->getName() << "\n");
+ AFrags = meetFragments(AFrags, BIt->second);
+ }
+ }
+
+ bool meet(const BasicBlock &BB,
+ const SmallPtrSet<BasicBlock *, 16> &Visited) {
+ LLVM_DEBUG(dbgs() << "meet block info from preds of " << BB.getName()
+ << "\n");
+
+ VarFragMap BBLiveIn;
+ bool FirstMeet = true;
+ // LiveIn locs for BB is the meet of the already-processed preds' LiveOut
+ // locs.
+ for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
+ // Ignore preds that haven't been processed yet. This is essentially the
+ // same as initialising all variables to implicit top value (⊤) which is
+ // the identity value for the meet operation.
+ const BasicBlock *Pred = *I;
+ if (!Visited.count(Pred))
+ continue;
+
+ auto PredLiveOut = LiveOut.find(Pred);
+ assert(PredLiveOut != LiveOut.end());
+
+ if (FirstMeet) {
+ LLVM_DEBUG(dbgs() << "BBLiveIn = " << Pred->getName() << "\n");
+ BBLiveIn = PredLiveOut->second;
+ FirstMeet = false;
+ } else {
+ LLVM_DEBUG(dbgs() << "BBLiveIn = meet BBLiveIn, " << Pred->getName()
+ << "\n");
+ meetVars(BBLiveIn, PredLiveOut->second);
+ }
+
+ // An empty set is ⊥ for the intersect-like meet operation. If we've
+ // already got ⊥ there's no need to run the code - we know the result is
+ // ⊥ since `meet(a, ⊥) = ⊥`.
+ if (BBLiveIn.size() == 0)
+ break;
+ }
+
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
+ // If there's no LiveIn entry for the block yet, add it.
+ if (CurrentLiveInEntry == LiveIn.end()) {
+ LLVM_DEBUG(dbgs() << "change=true (first) on meet on " << BB.getName()
+ << "\n");
+ LiveIn[&BB] = std::move(BBLiveIn);
+ return /*Changed=*/true;
+ }
+
+ // If the LiveIn set has changed (expensive check) update it and return
+ // true.
+ if (!varFragMapsAreEqual(BBLiveIn, CurrentLiveInEntry->second)) {
+ LLVM_DEBUG(dbgs() << "change=true on meet on " << BB.getName() << "\n");
+ CurrentLiveInEntry->second = std::move(BBLiveIn);
+ return /*Changed=*/true;
+ }
+
+ LLVM_DEBUG(dbgs() << "change=false on meet on " << BB.getName() << "\n");
+ return /*Changed=*/false;
+ }
+
+ void insertMemLoc(BasicBlock &BB, Instruction &Before, unsigned Var,
+ unsigned StartBit, unsigned EndBit, unsigned Base,
+ DebugLoc DL) {
+ assert(StartBit < EndBit && "Cannot create fragment of size <= 0");
+ if (!Base)
+ return;
+ FragMemLoc Loc;
+ Loc.Var = Var;
+ Loc.OffsetInBits = StartBit;
+ Loc.SizeInBits = EndBit - StartBit;
+ assert(Base && "Expected a non-zero ID for Base address");
+ Loc.Base = Base;
+ Loc.DL = DL;
+ BBInsertBeforeMap[&BB][&Before].push_back(Loc);
+ LLVM_DEBUG(dbgs() << "Add mem def for " << Aggregates[Var].first->getName()
+ << " bits [" << StartBit << ", " << EndBit << ")\n");
+ }
+
+ /// Inserts a new dbg def if the interval found when looking up \p StartBit
+ /// in \p FragMap starts before \p StartBit or ends after \p EndBit (which
+ /// indicates - assuming StartBit->EndBit has just been inserted - that the
+ /// slice has been coalesced in the map).
+ void coalesceFragments(BasicBlock &BB, Instruction &Before, unsigned Var,
+ unsigned StartBit, unsigned EndBit, unsigned Base,
+ DebugLoc DL, const FragsInMemMap &FragMap) {
+ if (!CoalesceAdjacentFragments)
+ return;
+ // We've inserted the location into the map. The map will have coalesced
+ // adjacent intervals (variable fragments) that describe the same memory
+ // location. Use this knowledge to insert a debug location that describes
+ // that coalesced fragment. This may eclipse other locs we've just
+ // inserted. This is okay as redundant locs will be cleaned up later.
+ auto CoalescedFrag = FragMap.find(StartBit);
+ // Bail if no coalescing has taken place.
+ if (CoalescedFrag.start() == StartBit && CoalescedFrag.stop() == EndBit)
+ return;
+
+ LLVM_DEBUG(dbgs() << "- Insert loc for bits " << CoalescedFrag.start()
+ << " to " << CoalescedFrag.stop() << "\n");
+ insertMemLoc(BB, Before, Var, CoalescedFrag.start(), CoalescedFrag.stop(),
+ Base, DL);
+ }
+
+ void addDef(const VarLocInfo &VarLoc, Instruction &Before, BasicBlock &BB,
+ VarFragMap &LiveSet) {
+ DebugVariable DbgVar = FnVarLocs->getVariable(VarLoc.VariableID);
+ if (skipVariable(DbgVar.getVariable()))
+ return;
+ // Don't bother doing anything for this variables if we know it's fully
+ // promoted. We're only interested in variables that (sometimes) live on
+ // the stack here.
+ if (!VarsWithStackSlot->count(getAggregate(DbgVar)))
+ return;
+ unsigned Var = Aggregates.insert(
+ DebugAggregate(DbgVar.getVariable(), VarLoc.DL.getInlinedAt()));
+
+ // [StartBit: EndBit) are the bits affected by this def.
+ const DIExpression *DIExpr = VarLoc.Expr;
+ unsigned StartBit;
+ unsigned EndBit;
+ if (auto Frag = DIExpr->getFragmentInfo()) {
+ StartBit = Frag->OffsetInBits;
+ EndBit = StartBit + Frag->SizeInBits;
+ } else {
+ assert(static_cast<bool>(DbgVar.getVariable()->getSizeInBits()));
+ StartBit = 0;
+ EndBit = *DbgVar.getVariable()->getSizeInBits();
+ }
+
+ // We will only fill fragments for simple memory-describing dbg.value
+ // intrinsics. If the fragment offset is the same as the offset from the
+ // base pointer, do The Thing, otherwise fall back to normal dbg.value
+ // behaviour. AssignmentTrackingLowering has generated DIExpressions
+ // written in terms of the base pointer.
+ // TODO: Remove this condition since the fragment offset doesn't always
+ // equal the offset from base pointer (e.g. for a SROA-split variable).
+ const auto DerefOffsetInBytes = getDerefOffsetInBytes(DIExpr);
+ const unsigned Base =
+ DerefOffsetInBytes && *DerefOffsetInBytes * 8 == StartBit
+ ? Bases.insert(VarLoc.Values)
+ : 0;
+ LLVM_DEBUG(dbgs() << "DEF " << DbgVar.getVariable()->getName() << " ["
+ << StartBit << ", " << EndBit << "): " << toString(Base)
+ << "\n");
+
+ // First of all, any locs that use mem that are disrupted need reinstating.
+ // Unfortunately, IntervalMap doesn't let us insert intervals that overlap
+ // with existing intervals so this code involves a lot of fiddling around
+ // with intervals to do that manually.
+ auto FragIt = LiveSet.find(Var);
+
+ // Check if the variable does not exist in the map.
+ if (FragIt == LiveSet.end()) {
+ // Add this variable to the BB map.
+ auto P = LiveSet.try_emplace(Var, FragsInMemMap(IntervalMapAlloc));
+ assert(P.second && "Var already in map?");
+ // Add the interval to the fragment map.
+ P.first->second.insert(StartBit, EndBit, Base);
+ return;
+ }
+ // The variable has an entry in the map.
+
+ FragsInMemMap &FragMap = FragIt->second;
+ // First check the easy case: the new fragment `f` doesn't overlap with any
+ // intervals.
+ if (!FragMap.overlaps(StartBit, EndBit)) {
+ LLVM_DEBUG(dbgs() << "- No overlaps\n");
+ FragMap.insert(StartBit, EndBit, Base);
+ coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
+ FragMap);
+ return;
+ }
+ // There is at least one overlap.
+
+ // Does StartBit intersect an existing fragment?
+ auto FirstOverlap = FragMap.find(StartBit);
+ assert(FirstOverlap != FragMap.end());
+ bool IntersectStart = FirstOverlap.start() < StartBit;
+
+ // Does EndBit intersect an existing fragment?
+ auto LastOverlap = FragMap.find(EndBit);
+ bool IntersectEnd = LastOverlap.valid() && LastOverlap.start() < EndBit;
+
+ // Check if both ends of `f` intersect the same interval `i`.
+ if (IntersectStart && IntersectEnd && FirstOverlap == LastOverlap) {
+ LLVM_DEBUG(dbgs() << "- Intersect single interval @ both ends\n");
+ // Shorten `i` so that there's space to insert `f`.
+ // [ f ]
+ // [ - i - ]
+ // +
+ // [ i ][ f ][ i ]
+
+ // Save values for use after inserting a new interval.
+ auto EndBitOfOverlap = FirstOverlap.stop();
+ unsigned OverlapValue = FirstOverlap.value();
+
+ // Shorten the overlapping interval.
+ FirstOverlap.setStop(StartBit);
+ insertMemLoc(BB, Before, Var, FirstOverlap.start(), StartBit,
+ OverlapValue, VarLoc.DL);
+
+ // Insert a new interval to represent the end part.
+ FragMap.insert(EndBit, EndBitOfOverlap, OverlapValue);
+ insertMemLoc(BB, Before, Var, EndBit, EndBitOfOverlap, OverlapValue,
+ VarLoc.DL);
+
+ // Insert the new (middle) fragment now there is space.
+ FragMap.insert(StartBit, EndBit, Base);
+ } else {
+ // There's an overlap but `f` may not be fully contained within
+ // `i`. Shorten any end-point intersections so that we can then
+ // insert `f`.
+ // [ - f - ]
+ // [ - i - ]
+ // | |
+ // [ i ]
+ // Shorten any end-point intersections.
+ if (IntersectStart) {
+ LLVM_DEBUG(dbgs() << "- Intersect interval at start\n");
+ // Split off at the intersection.
+ FirstOverlap.setStop(StartBit);
+ insertMemLoc(BB, Before, Var, FirstOverlap.start(), StartBit,
+ *FirstOverlap, VarLoc.DL);
+ }
+ // [ - f - ]
+ // [ - i - ]
+ // | |
+ // [ i ]
+ if (IntersectEnd) {
+ LLVM_DEBUG(dbgs() << "- Intersect interval at end\n");
+ // Split off at the intersection.
+ LastOverlap.setStart(EndBit);
+ insertMemLoc(BB, Before, Var, EndBit, LastOverlap.stop(), *LastOverlap,
+ VarLoc.DL);
+ }
+
+ LLVM_DEBUG(dbgs() << "- Erase intervals contained within\n");
+ // FirstOverlap and LastOverlap have been shortened such that they're
+ // no longer overlapping with [StartBit, EndBit). Delete any overlaps
+ // that remain (these will be fully contained within `f`).
+ // [ - f - ] }
+ // [ - i - ] } Intersection shortening that has happened above.
+ // | | }
+ // [ i ] }
+ // -----------------
+ // [i2 ] } Intervals fully contained within `f` get erased.
+ // -----------------
+ // [ - f - ][ i ] } Completed insertion.
+ auto It = FirstOverlap;
+ if (IntersectStart)
+ ++It; // IntersectStart: first overlap has been shortened.
+ while (It.valid() && It.start() >= StartBit && It.stop() <= EndBit) {
+ LLVM_DEBUG(dbgs() << "- Erase " << toString(It));
+ It.erase(); // This increments It after removing the interval.
+ }
+ // We've dealt with all the overlaps now!
+ assert(!FragMap.overlaps(StartBit, EndBit));
+ LLVM_DEBUG(dbgs() << "- Insert DEF into now-empty space\n");
+ FragMap.insert(StartBit, EndBit, Base);
+ }
+
+ coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
+ FragMap);
+ }
+
+ bool skipVariable(const DILocalVariable *V) { return !V->getSizeInBits(); }
+
+ void process(BasicBlock &BB, VarFragMap &LiveSet) {
+ BBInsertBeforeMap[&BB].clear();
+ for (auto &I : BB) {
+ if (const auto *Locs = FnVarLocs->getWedge(&I)) {
+ for (const VarLocInfo &Loc : *Locs) {
+ addDef(Loc, I, *I.getParent(), LiveSet);
+ }
+ }
+ }
+ }
+
+public:
+ MemLocFragmentFill(Function &Fn,
+ const DenseSet<DebugAggregate> *VarsWithStackSlot,
+ bool CoalesceAdjacentFragments)
+ : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot),
+ CoalesceAdjacentFragments(CoalesceAdjacentFragments) {}
+
+ /// Add variable locations to \p FnVarLocs so that any bits of a variable
+ /// with a memory location have that location explicitly reinstated at each
+ /// subsequent variable location definition that that doesn't overwrite those
+ /// bits. i.e. after a variable location def, insert new defs for the memory
+ /// location with fragments for the difference of "all bits currently in
+ /// memory" and "the fragment of the second def". e.g.
+ ///
+ /// Before:
+ ///
+ /// var x bits 0 to 63: value in memory
+ /// more instructions
+ /// var x bits 0 to 31: value is %0
+ ///
+ /// After:
+ ///
+ /// var x bits 0 to 63: value in memory
+ /// more instructions
+ /// var x bits 0 to 31: value is %0
+ /// var x bits 32 to 61: value in memory ; <-- new loc def
+ ///
+ void run(FunctionVarLocsBuilder *FnVarLocs) {
+ if (!EnableMemLocFragFill)
+ return;
+
+ this->FnVarLocs = FnVarLocs;
+
+ // Prepare for traversal.
+ //
+ ReversePostOrderTraversal<Function *> RPOT(&Fn);
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Pending;
+ DenseMap<unsigned int, BasicBlock *> OrderToBB;
+ DenseMap<BasicBlock *, unsigned int> BBToOrder;
+ { // Init OrderToBB and BBToOrder.
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
+ LiveIn.init(RPONumber);
+ LiveOut.init(RPONumber);
+ }
+
+ // Perform the traversal.
+ //
+ // This is a standard "intersect of predecessor outs" dataflow problem. To
+ // solve it, we perform meet() and process() using the two worklist method
+ // until the LiveIn data for each block becomes unchanging.
+ //
+ // This dataflow is essentially working on maps of sets and at each meet we
+ // intersect the maps and the mapped sets. So, initialized live-in maps
+ // monotonically decrease in value throughout the dataflow.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice. We could avoid this with a custom priority queue, but
+ // this is probably not worth it.
+ SmallPtrSet<BasicBlock *, 16> OnPending;
+ LLVM_DEBUG(dbgs() << "Processing Worklist\n");
+ while (!Worklist.empty()) {
+ BasicBlock *BB = OrderToBB[Worklist.top()];
+ LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n");
+ Worklist.pop();
+ bool InChanged = meet(*BB, Visited);
+ // Always consider LiveIn changed on the first visit.
+ InChanged |= Visited.insert(BB).second;
+ if (InChanged) {
+ LLVM_DEBUG(dbgs()
+ << BB->getName() << " has new InLocs, process it\n");
+ // Mutate a copy of LiveIn while processing BB. Once we've processed
+ // the terminator LiveSet is the LiveOut set for BB.
+ // This is an expensive copy!
+ VarFragMap LiveSet = LiveIn[BB];
+
+ // Process the instructions in the block.
+ process(*BB, LiveSet);
+
+ // Relatively expensive check: has anything changed in LiveOut for BB?
+ if (!varFragMapsAreEqual(LiveOut[BB], LiveSet)) {
+ LLVM_DEBUG(dbgs() << BB->getName()
+ << " has new OutLocs, add succs to worklist: [ ");
+ LiveOut[BB] = std::move(LiveSet);
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) {
+ if (OnPending.insert(*I).second) {
+ LLVM_DEBUG(dbgs() << I->getName() << " ");
+ Pending.push(BBToOrder[*I]);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "]\n");
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // Insert new location defs.
+ for (auto &Pair : BBInsertBeforeMap) {
+ InsertMap &Map = Pair.second;
+ for (auto &Pair : Map) {
+ Instruction *InsertBefore = Pair.first;
+ assert(InsertBefore && "should never be null");
+ auto FragMemLocs = Pair.second;
+ auto &Ctx = Fn.getContext();
+
+ for (auto &FragMemLoc : FragMemLocs) {
+ DIExpression *Expr = DIExpression::get(Ctx, std::nullopt);
+ if (FragMemLoc.SizeInBits !=
+ *Aggregates[FragMemLoc.Var].first->getSizeInBits())
+ Expr = *DIExpression::createFragmentExpression(
+ Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefAfter,
+ FragMemLoc.OffsetInBits / 8);
+ DebugVariable Var(Aggregates[FragMemLoc.Var].first, Expr,
+ FragMemLoc.DL.getInlinedAt());
+ FnVarLocs->addVarLoc(InsertBefore, Var, Expr, FragMemLoc.DL,
+ Bases[FragMemLoc.Base]);
+ }
+ }
+ }
+ }
+};
+
+/// AssignmentTrackingLowering encapsulates a dataflow analysis over a function
+/// that interprets assignment tracking debug info metadata and stores in IR to
+/// create a map of variable locations.
+class AssignmentTrackingLowering {
+public:
+ /// The kind of location in use for a variable, where Mem is the stack home,
+ /// Val is an SSA value or const, and None means that there is not one single
+ /// kind (either because there are multiple or because there is none; it may
+ /// prove useful to split this into two values in the future).
+ ///
+ /// LocKind is a join-semilattice with the partial order:
+ /// None > Mem, Val
+ ///
+ /// i.e.
+ /// join(Mem, Mem) = Mem
+ /// join(Val, Val) = Val
+ /// join(Mem, Val) = None
+ /// join(None, Mem) = None
+ /// join(None, Val) = None
+ /// join(None, None) = None
+ ///
+ /// Note: the order is not `None > Val > Mem` because we're using DIAssignID
+ /// to name assignments and are not tracking the actual stored values.
+ /// Therefore currently there's no way to ensure that Mem values and Val
+ /// values are the same. This could be a future extension, though it's not
+ /// clear that many additional locations would be recovered that way in
+ /// practice as the likelihood of this sitation arising naturally seems
+ /// incredibly low.
+ enum class LocKind { Mem, Val, None };
+
+ /// An abstraction of the assignment of a value to a variable or memory
+ /// location.
+ ///
+ /// An Assignment is Known or NoneOrPhi. A Known Assignment means we have a
+ /// DIAssignID ptr that represents it. NoneOrPhi means that we don't (or
+ /// can't) know the ID of the last assignment that took place.
+ ///
+ /// The Status of the Assignment (Known or NoneOrPhi) is another
+ /// join-semilattice. The partial order is:
+ /// NoneOrPhi > Known {id_0, id_1, ...id_N}
+ ///
+ /// i.e. for all values x and y where x != y:
+ /// join(x, x) = x
+ /// join(x, y) = NoneOrPhi
+ struct Assignment {
+ enum S { Known, NoneOrPhi } Status;
+ /// ID of the assignment. nullptr if Status is not Known.
+ DIAssignID *ID;
+ /// The dbg.assign that marks this dbg-def. Mem-defs don't use this field.
+ /// May be nullptr.
+ DbgAssignIntrinsic *Source;
+
+ bool isSameSourceAssignment(const Assignment &Other) const {
+ // Don't include Source in the equality check. Assignments are
+ // defined by their ID, not debug intrinsic(s).
+ return std::tie(Status, ID) == std::tie(Other.Status, Other.ID);
+ }
+ void dump(raw_ostream &OS) {
+ static const char *LUT[] = {"Known", "NoneOrPhi"};
+ OS << LUT[Status] << "(id=";
+ if (ID)
+ OS << ID;
+ else
+ OS << "null";
+ OS << ", s=";
+ if (Source)
+ OS << *Source;
+ else
+ OS << "null";
+ OS << ")";
+ }
+
+ static Assignment make(DIAssignID *ID, DbgAssignIntrinsic *Source) {
+ return Assignment(Known, ID, Source);
+ }
+ static Assignment makeFromMemDef(DIAssignID *ID) {
+ return Assignment(Known, ID, nullptr);
+ }
+ static Assignment makeNoneOrPhi() {
+ return Assignment(NoneOrPhi, nullptr, nullptr);
+ }
+ // Again, need a Top value?
+ Assignment()
+ : Status(NoneOrPhi), ID(nullptr), Source(nullptr) {
+ } // Can we delete this?
+ Assignment(S Status, DIAssignID *ID, DbgAssignIntrinsic *Source)
+ : Status(Status), ID(ID), Source(Source) {
+ // If the Status is Known then we expect there to be an assignment ID.
+ assert(Status == NoneOrPhi || ID);
+ }
+ };
+
+ using AssignmentMap = SmallVector<Assignment>;
+ using LocMap = SmallVector<LocKind>;
+ using OverlapMap = DenseMap<VariableID, SmallVector<VariableID>>;
+ using UntaggedStoreAssignmentMap =
+ DenseMap<const Instruction *,
+ SmallVector<std::pair<VariableID, at::AssignmentInfo>>>;
+
+private:
+ /// The highest numbered VariableID for partially promoted variables plus 1,
+ /// the values for which start at 1.
+ unsigned TrackedVariablesVectorSize = 0;
+ /// Map a variable to the set of variables that it fully contains.
+ OverlapMap VarContains;
+ /// Map untagged stores to the variable fragments they assign to. Used by
+ /// processUntaggedInstruction.
+ UntaggedStoreAssignmentMap UntaggedStoreVars;
+
+ // Machinery to defer inserting dbg.values.
+ using InsertMap = MapVector<Instruction *, SmallVector<VarLocInfo>>;
+ InsertMap InsertBeforeMap;
+ /// Clear the location definitions currently cached for insertion after /p
+ /// After.
+ void resetInsertionPoint(Instruction &After);
+ void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source,
+ Instruction *After);
+
+ static bool mapsAreEqual(const BitVector &Mask, const AssignmentMap &A,
+ const AssignmentMap &B) {
+ return llvm::all_of(Mask.set_bits(), [&](unsigned VarID) {
+ return A[VarID].isSameSourceAssignment(B[VarID]);
+ });
+ }
+
+ /// Represents the stack and debug assignments in a block. Used to describe
+ /// the live-in and live-out values for blocks, as well as the "current"
+ /// value as we process each instruction in a block.
+ struct BlockInfo {
+ /// The set of variables (VariableID) being tracked in this block.
+ BitVector VariableIDsInBlock;
+ /// Dominating assignment to memory for each variable, indexed by
+ /// VariableID.
+ AssignmentMap StackHomeValue;
+ /// Dominating assignemnt to each variable, indexed by VariableID.
+ AssignmentMap DebugValue;
+ /// Location kind for each variable. LiveLoc indicates whether the
+ /// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue
+ /// (LocKind::Val), or neither (LocKind::None) is valid, in that order of
+ /// preference. This cannot be derived by inspecting DebugValue and
+ /// StackHomeValue due to the fact that there's no distinction in
+ /// Assignment (the class) between whether an assignment is unknown or a
+ /// merge of multiple assignments (both are Status::NoneOrPhi). In other
+ /// words, the memory location may well be valid while both DebugValue and
+ /// StackHomeValue contain Assignments that have a Status of NoneOrPhi.
+ /// Indexed by VariableID.
+ LocMap LiveLoc;
+
+ public:
+ enum AssignmentKind { Stack, Debug };
+ const AssignmentMap &getAssignmentMap(AssignmentKind Kind) const {
+ switch (Kind) {
+ case Stack:
+ return StackHomeValue;
+ case Debug:
+ return DebugValue;
+ }
+ llvm_unreachable("Unknown AssignmentKind");
+ }
+ AssignmentMap &getAssignmentMap(AssignmentKind Kind) {
+ return const_cast<AssignmentMap &>(
+ const_cast<const BlockInfo *>(this)->getAssignmentMap(Kind));
+ }
+
+ bool isVariableTracked(VariableID Var) const {
+ return VariableIDsInBlock[static_cast<unsigned>(Var)];
+ }
+
+ const Assignment &getAssignment(AssignmentKind Kind, VariableID Var) const {
+ assert(isVariableTracked(Var) && "Var not tracked in block");
+ return getAssignmentMap(Kind)[static_cast<unsigned>(Var)];
+ }
+
+ LocKind getLocKind(VariableID Var) const {
+ assert(isVariableTracked(Var) && "Var not tracked in block");
+ return LiveLoc[static_cast<unsigned>(Var)];
+ }
+
+ /// Set LocKind for \p Var only: does not set LocKind for VariableIDs of
+ /// fragments contained win \p Var.
+ void setLocKind(VariableID Var, LocKind K) {
+ VariableIDsInBlock.set(static_cast<unsigned>(Var));
+ LiveLoc[static_cast<unsigned>(Var)] = K;
+ }
+
+ /// Set the assignment in the \p Kind assignment map for \p Var only: does
+ /// not set the assignment for VariableIDs of fragments contained win \p
+ /// Var.
+ void setAssignment(AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) {
+ VariableIDsInBlock.set(static_cast<unsigned>(Var));
+ getAssignmentMap(Kind)[static_cast<unsigned>(Var)] = AV;
+ }
+
+ /// Return true if there is an assignment matching \p AV in the \p Kind
+ /// assignment map. Does consider assignments for VariableIDs of fragments
+ /// contained win \p Var.
+ bool hasAssignment(AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) const {
+ if (!isVariableTracked(Var))
+ return false;
+ return AV.isSameSourceAssignment(getAssignment(Kind, Var));
+ }
+
+ /// Compare every element in each map to determine structural equality
+ /// (slow).
+ bool operator==(const BlockInfo &Other) const {
+ return VariableIDsInBlock == Other.VariableIDsInBlock &&
+ LiveLoc == Other.LiveLoc &&
+ mapsAreEqual(VariableIDsInBlock, StackHomeValue,
+ Other.StackHomeValue) &&
+ mapsAreEqual(VariableIDsInBlock, DebugValue, Other.DebugValue);
+ }
+ bool operator!=(const BlockInfo &Other) const { return !(*this == Other); }
+ bool isValid() {
+ return LiveLoc.size() == DebugValue.size() &&
+ LiveLoc.size() == StackHomeValue.size();
+ }
+
+ /// Clear everything and initialise with ⊤-values for all variables.
+ void init(int NumVars) {
+ StackHomeValue.clear();
+ DebugValue.clear();
+ LiveLoc.clear();
+ VariableIDsInBlock = BitVector(NumVars);
+ StackHomeValue.insert(StackHomeValue.begin(), NumVars,
+ Assignment::makeNoneOrPhi());
+ DebugValue.insert(DebugValue.begin(), NumVars,
+ Assignment::makeNoneOrPhi());
+ LiveLoc.insert(LiveLoc.begin(), NumVars, LocKind::None);
+ }
+
+ /// Helper for join.
+ template <typename ElmtType, typename FnInputType>
+ static void joinElmt(int Index, SmallVector<ElmtType> &Target,
+ const SmallVector<ElmtType> &A,
+ const SmallVector<ElmtType> &B,
+ ElmtType (*Fn)(FnInputType, FnInputType)) {
+ Target[Index] = Fn(A[Index], B[Index]);
+ }
+
+ /// See comment for AssignmentTrackingLowering::joinBlockInfo.
+ static BlockInfo join(const BlockInfo &A, const BlockInfo &B, int NumVars) {
+ // Join A and B.
+ //
+ // Intersect = join(a, b) for a in A, b in B where Var(a) == Var(b)
+ // Difference = join(x, ⊤) for x where Var(x) is in A xor B
+ // Join = Intersect ∪ Difference
+ //
+ // This is achieved by performing a join on elements from A and B with
+ // variables common to both A and B (join elements indexed by var
+ // intersect), then adding ⊤-value elements for vars in A xor B. The
+ // latter part is equivalent to performing join on elements with variables
+ // in A xor B with the ⊤-value for the map element since join(x, ⊤) = ⊤.
+ // BlockInfo::init initializes all variable entries to the ⊤ value so we
+ // don't need to explicitly perform that step as Join.VariableIDsInBlock
+ // is set to the union of the variables in A and B at the end of this
+ // function.
+ BlockInfo Join;
+ Join.init(NumVars);
+
+ BitVector Intersect = A.VariableIDsInBlock;
+ Intersect &= B.VariableIDsInBlock;
+
+ for (auto VarID : Intersect.set_bits()) {
+ joinElmt(VarID, Join.LiveLoc, A.LiveLoc, B.LiveLoc, joinKind);
+ joinElmt(VarID, Join.DebugValue, A.DebugValue, B.DebugValue,
+ joinAssignment);
+ joinElmt(VarID, Join.StackHomeValue, A.StackHomeValue, B.StackHomeValue,
+ joinAssignment);
+ }
+
+ Join.VariableIDsInBlock = A.VariableIDsInBlock;
+ Join.VariableIDsInBlock |= B.VariableIDsInBlock;
+ assert(Join.isValid());
+ return Join;
+ }
+ };
+
+ Function &Fn;
+ const DataLayout &Layout;
+ const DenseSet<DebugAggregate> *VarsWithStackSlot;
+ FunctionVarLocsBuilder *FnVarLocs;
+ DenseMap<const BasicBlock *, BlockInfo> LiveIn;
+ DenseMap<const BasicBlock *, BlockInfo> LiveOut;
+
+ /// Helper for process methods to track variables touched each frame.
+ DenseSet<VariableID> VarsTouchedThisFrame;
+
+ /// The set of variables that sometimes are not located in their stack home.
+ DenseSet<DebugAggregate> NotAlwaysStackHomed;
+
+ VariableID getVariableID(const DebugVariable &Var) {
+ return static_cast<VariableID>(FnVarLocs->insertVariable(Var));
+ }
+
+ /// Join the LiveOut values of preds that are contained in \p Visited into
+ /// LiveIn[BB]. Return True if LiveIn[BB] has changed as a result. LiveIn[BB]
+ /// values monotonically increase. See the @link joinMethods join methods
+ /// @endlink documentation for more info.
+ bool join(const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited);
+ ///@name joinMethods
+ /// Functions that implement `join` (the least upper bound) for the
+ /// join-semilattice types used in the dataflow. There is an explicit bottom
+ /// value (⊥) for some types and and explicit top value (⊤) for all types.
+ /// By definition:
+ ///
+ /// Join(A, B) >= A && Join(A, B) >= B
+ /// Join(A, ⊥) = A
+ /// Join(A, ⊤) = ⊤
+ ///
+ /// These invariants are important for monotonicity.
+ ///
+ /// For the map-type functions, all unmapped keys in an empty map are
+ /// associated with a bottom value (⊥). This represents their values being
+ /// unknown. Unmapped keys in non-empty maps (joining two maps with a key
+ /// only present in one) represents either a variable going out of scope or
+ /// dropped debug info. It is assumed the key is associated with a top value
+ /// (⊤) in this case (unknown location / assignment).
+ ///@{
+ static LocKind joinKind(LocKind A, LocKind B);
+ static Assignment joinAssignment(const Assignment &A, const Assignment &B);
+ BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B);
+ ///@}
+
+ /// Process the instructions in \p BB updating \p LiveSet along the way. \p
+ /// LiveSet must be initialized with the current live-in locations before
+ /// calling this.
+ void process(BasicBlock &BB, BlockInfo *LiveSet);
+ ///@name processMethods
+ /// Methods to process instructions in order to update the LiveSet (current
+ /// location information).
+ ///@{
+ void processNonDbgInstruction(Instruction &I, BlockInfo *LiveSet);
+ void processDbgInstruction(DbgInfoIntrinsic &I, BlockInfo *LiveSet);
+ /// Update \p LiveSet after encountering an instruction with a DIAssignID
+ /// attachment, \p I.
+ void processTaggedInstruction(Instruction &I, BlockInfo *LiveSet);
+ /// Update \p LiveSet after encountering an instruciton without a DIAssignID
+ /// attachment, \p I.
+ void processUntaggedInstruction(Instruction &I, BlockInfo *LiveSet);
+ void processDbgAssign(DbgAssignIntrinsic &DAI, BlockInfo *LiveSet);
+ void processDbgValue(DbgValueInst &DVI, BlockInfo *LiveSet);
+ /// Add an assignment to memory for the variable /p Var.
+ void addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV);
+ /// Add an assignment to the variable /p Var.
+ void addDbgDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV);
+ ///@}
+
+ /// Set the LocKind for \p Var.
+ void setLocKind(BlockInfo *LiveSet, VariableID Var, LocKind K);
+ /// Get the live LocKind for a \p Var. Requires addMemDef or addDbgDef to
+ /// have been called for \p Var first.
+ LocKind getLocKind(BlockInfo *LiveSet, VariableID Var);
+ /// Return true if \p Var has an assignment in \p M matching \p AV.
+ bool hasVarWithAssignment(BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind,
+ VariableID Var, const Assignment &AV);
+ /// Return the set of VariableIDs corresponding the fragments contained fully
+ /// within the variable/fragment \p Var.
+ ArrayRef<VariableID> getContainedFragments(VariableID Var) const;
+
+ /// Mark \p Var as having been touched this frame. Note, this applies only
+ /// to the exact fragment \p Var and not to any fragments contained within.
+ void touchFragment(VariableID Var);
+
+ /// Emit info for variables that are fully promoted.
+ bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs);
+
+public:
+ AssignmentTrackingLowering(Function &Fn, const DataLayout &Layout,
+ const DenseSet<DebugAggregate> *VarsWithStackSlot)
+ : Fn(Fn), Layout(Layout), VarsWithStackSlot(VarsWithStackSlot) {}
+ /// Run the analysis, adding variable location info to \p FnVarLocs. Returns
+ /// true if any variable locations have been added to FnVarLocs.
+ bool run(FunctionVarLocsBuilder *FnVarLocs);
+};
+} // namespace
+
+ArrayRef<VariableID>
+AssignmentTrackingLowering::getContainedFragments(VariableID Var) const {
+ auto R = VarContains.find(Var);
+ if (R == VarContains.end())
+ return std::nullopt;
+ return R->second;
+}
+
+void AssignmentTrackingLowering::touchFragment(VariableID Var) {
+ VarsTouchedThisFrame.insert(Var);
+}
+
+void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var,
+ LocKind K) {
+ auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) {
+ LiveSet->setLocKind(Var, K);
+ touchFragment(Var);
+ };
+ SetKind(LiveSet, Var, K);
+
+ // Update the LocKind for all fragments contained within Var.
+ for (VariableID Frag : getContainedFragments(Var))
+ SetKind(LiveSet, Frag, K);
+}
+
+AssignmentTrackingLowering::LocKind
+AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) {
+ return LiveSet->getLocKind(Var);
+}
+
+void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var,
+ const Assignment &AV) {
+ LiveSet->setAssignment(BlockInfo::Stack, Var, AV);
+
+ // Use this assigment for all fragments contained within Var, but do not
+ // provide a Source because we cannot convert Var's value to a value for the
+ // fragment.
+ Assignment FragAV = AV;
+ FragAV.Source = nullptr;
+ for (VariableID Frag : getContainedFragments(Var))
+ LiveSet->setAssignment(BlockInfo::Stack, Frag, FragAV);
+}
+
+void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var,
+ const Assignment &AV) {
+ LiveSet->setAssignment(BlockInfo::Debug, Var, AV);
+
+ // Use this assigment for all fragments contained within Var, but do not
+ // provide a Source because we cannot convert Var's value to a value for the
+ // fragment.
+ Assignment FragAV = AV;
+ FragAV.Source = nullptr;
+ for (VariableID Frag : getContainedFragments(Var))
+ LiveSet->setAssignment(BlockInfo::Debug, Frag, FragAV);
+}
+
+static DIAssignID *getIDFromInst(const Instruction &I) {
+ return cast<DIAssignID>(I.getMetadata(LLVMContext::MD_DIAssignID));
+}
+
+static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) {
+ return cast<DIAssignID>(DAI.getAssignID());
+}
+
+/// Return true if \p Var has an assignment in \p M matching \p AV.
+bool AssignmentTrackingLowering::hasVarWithAssignment(
+ BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) {
+ if (!LiveSet->hasAssignment(Kind, Var, AV))
+ return false;
+
+ // Check all the frags contained within Var as these will have all been
+ // mapped to AV at the last store to Var.
+ for (VariableID Frag : getContainedFragments(Var))
+ if (!LiveSet->hasAssignment(Kind, Frag, AV))
+ return false;
+ return true;
+}
+
+#ifndef NDEBUG
+const char *locStr(AssignmentTrackingLowering::LocKind Loc) {
+ using LocKind = AssignmentTrackingLowering::LocKind;
+ switch (Loc) {
+ case LocKind::Val:
+ return "Val";
+ case LocKind::Mem:
+ return "Mem";
+ case LocKind::None:
+ return "None";
+ };
+ llvm_unreachable("unknown LocKind");
+}
+#endif
+
+void AssignmentTrackingLowering::emitDbgValue(
+ AssignmentTrackingLowering::LocKind Kind,
+ const DbgVariableIntrinsic *Source, Instruction *After) {
+
+ DILocation *DL = Source->getDebugLoc();
+ auto Emit = [this, Source, After, DL](Metadata *Val, DIExpression *Expr) {
+ assert(Expr);
+ if (!Val)
+ Val = ValueAsMetadata::get(
+ PoisonValue::get(Type::getInt1Ty(Source->getContext())));
+
+ // Find a suitable insert point.
+ Instruction *InsertBefore = After->getNextNode();
+ assert(InsertBefore && "Shouldn't be inserting after a terminator");
+
+ VariableID Var = getVariableID(DebugVariable(Source));
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = static_cast<VariableID>(Var);
+ VarLoc.Expr = Expr;
+ VarLoc.Values = RawLocationWrapper(Val);
+ VarLoc.DL = DL;
+ // Insert it into the map for later.
+ InsertBeforeMap[InsertBefore].push_back(VarLoc);
+ };
+
+ // NOTE: This block can mutate Kind.
+ if (Kind == LocKind::Mem) {
+ const auto *DAI = cast<DbgAssignIntrinsic>(Source);
+ // Check the address hasn't been dropped (e.g. the debug uses may not have
+ // been replaced before deleting a Value).
+ if (DAI->isKillAddress()) {
+ // The address isn't valid so treat this as a non-memory def.
+ Kind = LocKind::Val;
+ } else {
+ Value *Val = DAI->getAddress();
+ DIExpression *Expr = DAI->getAddressExpression();
+ assert(!Expr->getFragmentInfo() &&
+ "fragment info should be stored in value-expression only");
+ // Copy the fragment info over from the value-expression to the new
+ // DIExpression.
+ if (auto OptFragInfo = Source->getExpression()->getFragmentInfo()) {
+ auto FragInfo = *OptFragInfo;
+ Expr = *DIExpression::createFragmentExpression(
+ Expr, FragInfo.OffsetInBits, FragInfo.SizeInBits);
+ }
+ // The address-expression has an implicit deref, add it now.
+ std::tie(Val, Expr) =
+ walkToAllocaAndPrependOffsetDeref(Layout, Val, Expr);
+ Emit(ValueAsMetadata::get(Val), Expr);
+ return;
+ }
+ }
+
+ if (Kind == LocKind::Val) {
+ Emit(Source->getRawLocation(), Source->getExpression());
+ return;
+ }
+
+ if (Kind == LocKind::None) {
+ Emit(nullptr, Source->getExpression());
+ return;
+ }
+}
+
+void AssignmentTrackingLowering::processNonDbgInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ if (I.hasMetadata(LLVMContext::MD_DIAssignID))
+ processTaggedInstruction(I, LiveSet);
+ else
+ processUntaggedInstruction(I, LiveSet);
+}
+
+void AssignmentTrackingLowering::processUntaggedInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ // Interpret stack stores that are not tagged as an assignment in memory for
+ // the variables associated with that address. These stores may not be tagged
+ // because a) the store cannot be represented using dbg.assigns (non-const
+ // length or offset) or b) the tag was accidentally dropped during
+ // optimisations. For these stores we fall back to assuming that the stack
+ // home is a valid location for the variables. The benefit is that this
+ // prevents us missing an assignment and therefore incorrectly maintaining
+ // earlier location definitions, and in many cases it should be a reasonable
+ // assumption. However, this will occasionally lead to slight
+ // inaccuracies. The value of a hoisted untagged store will be visible
+ // "early", for example.
+ assert(!I.hasMetadata(LLVMContext::MD_DIAssignID));
+ auto It = UntaggedStoreVars.find(&I);
+ if (It == UntaggedStoreVars.end())
+ return; // No variables associated with the store destination.
+
+ LLVM_DEBUG(dbgs() << "processUntaggedInstruction on UNTAGGED INST " << I
+ << "\n");
+ // Iterate over the variables that this store affects, add a NoneOrPhi dbg
+ // and mem def, set lockind to Mem, and emit a location def for each.
+ for (auto [Var, Info] : It->second) {
+ // This instruction is treated as both a debug and memory assignment,
+ // meaning the memory location should be used. We don't have an assignment
+ // ID though so use Assignment::makeNoneOrPhi() to create an imaginary one.
+ addMemDef(LiveSet, Var, Assignment::makeNoneOrPhi());
+ addDbgDef(LiveSet, Var, Assignment::makeNoneOrPhi());
+ setLocKind(LiveSet, Var, LocKind::Mem);
+ LLVM_DEBUG(dbgs() << " setting Stack LocKind to: " << locStr(LocKind::Mem)
+ << "\n");
+ // Build the dbg location def to insert.
+ //
+ // DIExpression: Add fragment and offset.
+ DebugVariable V = FnVarLocs->getVariable(Var);
+ DIExpression *DIE = DIExpression::get(I.getContext(), std::nullopt);
+ if (auto Frag = V.getFragment()) {
+ auto R = DIExpression::createFragmentExpression(DIE, Frag->OffsetInBits,
+ Frag->SizeInBits);
+ assert(R && "unexpected createFragmentExpression failure");
+ DIE = *R;
+ }
+ SmallVector<uint64_t, 3> Ops;
+ if (Info.OffsetInBits)
+ Ops = {dwarf::DW_OP_plus_uconst, Info.OffsetInBits / 8};
+ Ops.push_back(dwarf::DW_OP_deref);
+ DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false,
+ /*EntryValue=*/false);
+ // Find a suitable insert point.
+ Instruction *InsertBefore = I.getNextNode();
+ assert(InsertBefore && "Shouldn't be inserting after a terminator");
+
+ // Get DILocation for this unrecorded assignment.
+ DILocation *InlinedAt = const_cast<DILocation *>(V.getInlinedAt());
+ const DILocation *DILoc = DILocation::get(
+ Fn.getContext(), 0, 0, V.getVariable()->getScope(), InlinedAt);
+
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = static_cast<VariableID>(Var);
+ VarLoc.Expr = DIE;
+ VarLoc.Values = RawLocationWrapper(
+ ValueAsMetadata::get(const_cast<AllocaInst *>(Info.Base)));
+ VarLoc.DL = DILoc;
+ // 3. Insert it into the map for later.
+ InsertBeforeMap[InsertBefore].push_back(VarLoc);
+ }
+}
+
+void AssignmentTrackingLowering::processTaggedInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ auto Linked = at::getAssignmentMarkers(&I);
+ // No dbg.assign intrinsics linked.
+ // FIXME: All vars that have a stack slot this store modifies that don't have
+ // a dbg.assign linked to it should probably treat this like an untagged
+ // store.
+ if (Linked.empty())
+ return;
+
+ LLVM_DEBUG(dbgs() << "processTaggedInstruction on " << I << "\n");
+ for (DbgAssignIntrinsic *DAI : Linked) {
+ VariableID Var = getVariableID(DebugVariable(DAI));
+ // Something has gone wrong if VarsWithStackSlot doesn't contain a variable
+ // that is linked to a store.
+ assert(VarsWithStackSlot->count(getAggregate(DAI)) &&
+ "expected DAI's variable to have stack slot");
+
+ Assignment AV = Assignment::makeFromMemDef(getIDFromInst(I));
+ addMemDef(LiveSet, Var, AV);
+
+ LLVM_DEBUG(dbgs() << " linked to " << *DAI << "\n");
+ LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var))
+ << " -> ");
+
+ // The last assignment to the stack is now AV. Check if the last debug
+ // assignment has a matching Assignment.
+ if (hasVarWithAssignment(LiveSet, BlockInfo::Debug, Var, AV)) {
+ // The StackHomeValue and DebugValue for this variable match so we can
+ // emit a stack home location here.
+ LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";);
+ LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << " Debug val: ";
+ LiveSet->DebugValue[static_cast<unsigned>(Var)].dump(dbgs());
+ dbgs() << "\n");
+ setLocKind(LiveSet, Var, LocKind::Mem);
+ emitDbgValue(LocKind::Mem, DAI, &I);
+ continue;
+ }
+
+ // The StackHomeValue and DebugValue for this variable do not match. I.e.
+ // The value currently stored in the stack is not what we'd expect to
+ // see, so we cannot use emit a stack home location here. Now we will
+ // look at the live LocKind for the variable and determine an appropriate
+ // dbg.value to emit.
+ LocKind PrevLoc = getLocKind(LiveSet, Var);
+ switch (PrevLoc) {
+ case LocKind::Val: {
+ // The value in memory in memory has changed but we're not currently
+ // using the memory location. Do nothing.
+ LLVM_DEBUG(dbgs() << "Val, (unchanged)\n";);
+ setLocKind(LiveSet, Var, LocKind::Val);
+ } break;
+ case LocKind::Mem: {
+ // There's been an assignment to memory that we were using as a
+ // location for this variable, and the Assignment doesn't match what
+ // we'd expect to see in memory.
+ Assignment DbgAV = LiveSet->getAssignment(BlockInfo::Debug, Var);
+ if (DbgAV.Status == Assignment::NoneOrPhi) {
+ // We need to terminate any previously open location now.
+ LLVM_DEBUG(dbgs() << "None, No Debug value available\n";);
+ setLocKind(LiveSet, Var, LocKind::None);
+ emitDbgValue(LocKind::None, DAI, &I);
+ } else {
+ // The previous DebugValue Value can be used here.
+ LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";);
+ setLocKind(LiveSet, Var, LocKind::Val);
+ if (DbgAV.Source) {
+ emitDbgValue(LocKind::Val, DbgAV.Source, &I);
+ } else {
+ // PrevAV.Source is nullptr so we must emit undef here.
+ emitDbgValue(LocKind::None, DAI, &I);
+ }
+ }
+ } break;
+ case LocKind::None: {
+ // There's been an assignment to memory and we currently are
+ // not tracking a location for the variable. Do not emit anything.
+ LLVM_DEBUG(dbgs() << "None, (unchanged)\n";);
+ setLocKind(LiveSet, Var, LocKind::None);
+ } break;
+ }
+ }
+}
+
+void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI,
+ BlockInfo *LiveSet) {
+ // Only bother tracking variables that are at some point stack homed. Other
+ // variables can be dealt with trivially later.
+ if (!VarsWithStackSlot->count(getAggregate(&DAI)))
+ return;
+
+ VariableID Var = getVariableID(DebugVariable(&DAI));
+ Assignment AV = Assignment::make(getIDFromMarker(DAI), &DAI);
+ addDbgDef(LiveSet, Var, AV);
+
+ LLVM_DEBUG(dbgs() << "processDbgAssign on " << DAI << "\n";);
+ LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var))
+ << " -> ");
+
+ // Check if the DebugValue and StackHomeValue both hold the same
+ // Assignment.
+ if (hasVarWithAssignment(LiveSet, BlockInfo::Stack, Var, AV)) {
+ // They match. We can use the stack home because the debug intrinsics state
+ // that an assignment happened here, and we know that specific assignment
+ // was the last one to take place in memory for this variable.
+ LocKind Kind;
+ if (DAI.isKillAddress()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Val, Stack matches Debug program but address is killed\n";);
+ Kind = LocKind::Val;
+ } else {
+ LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";);
+ Kind = LocKind::Mem;
+ };
+ setLocKind(LiveSet, Var, Kind);
+ emitDbgValue(Kind, &DAI, &DAI);
+ } else {
+ // The last assignment to the memory location isn't the one that we want to
+ // show to the user so emit a dbg.value(Value). Value may be undef.
+ LLVM_DEBUG(dbgs() << "Val, Stack contents is unknown\n";);
+ setLocKind(LiveSet, Var, LocKind::Val);
+ emitDbgValue(LocKind::Val, &DAI, &DAI);
+ }
+}
+
+void AssignmentTrackingLowering::processDbgValue(DbgValueInst &DVI,
+ BlockInfo *LiveSet) {
+ // Only other tracking variables that are at some point stack homed.
+ // Other variables can be dealt with trivally later.
+ if (!VarsWithStackSlot->count(getAggregate(&DVI)))
+ return;
+
+ VariableID Var = getVariableID(DebugVariable(&DVI));
+ // We have no ID to create an Assignment with so we mark this assignment as
+ // NoneOrPhi. Note that the dbg.value still exists, we just cannot determine
+ // the assignment responsible for setting this value.
+ // This is fine; dbg.values are essentially interchangable with unlinked
+ // dbg.assigns, and some passes such as mem2reg and instcombine add them to
+ // PHIs for promoted variables.
+ Assignment AV = Assignment::makeNoneOrPhi();
+ addDbgDef(LiveSet, Var, AV);
+
+ LLVM_DEBUG(dbgs() << "processDbgValue on " << DVI << "\n";);
+ LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var))
+ << " -> Val, dbg.value override");
+
+ setLocKind(LiveSet, Var, LocKind::Val);
+ emitDbgValue(LocKind::Val, &DVI, &DVI);
+}
+
+static bool hasZeroSizedFragment(DbgVariableIntrinsic &DVI) {
+ if (auto F = DVI.getExpression()->getFragmentInfo())
+ return F->SizeInBits == 0;
+ return false;
+}
+
+void AssignmentTrackingLowering::processDbgInstruction(
+ DbgInfoIntrinsic &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ return;
+
+ // Ignore assignments to zero bits of the variable.
+ if (hasZeroSizedFragment(*DVI))
+ return;
+
+ if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
+ processDbgAssign(*DAI, LiveSet);
+ else if (auto *DVI = dyn_cast<DbgValueInst>(&I))
+ processDbgValue(*DVI, LiveSet);
+}
+
+void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) {
+ assert(!After.isTerminator() && "Can't insert after a terminator");
+ auto R = InsertBeforeMap.find(After.getNextNode());
+ if (R == InsertBeforeMap.end())
+ return;
+ R->second.clear();
+}
+
+void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
+ for (auto II = BB.begin(), EI = BB.end(); II != EI;) {
+ assert(VarsTouchedThisFrame.empty());
+ // Process the instructions in "frames". A "frame" includes a single
+ // non-debug instruction followed any debug instructions before the
+ // next non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(&*II)) {
+ if (II->isTerminator())
+ break;
+ resetInsertionPoint(*II);
+ processNonDbgInstruction(*II, LiveSet);
+ assert(LiveSet->isValid());
+ ++II;
+ }
+ while (II != EI) {
+ auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II);
+ if (!Dbg)
+ break;
+ resetInsertionPoint(*II);
+ processDbgInstruction(*Dbg, LiveSet);
+ assert(LiveSet->isValid());
+ ++II;
+ }
+
+ // We've processed everything in the "frame". Now determine which variables
+ // cannot be represented by a dbg.declare.
+ for (auto Var : VarsTouchedThisFrame) {
+ LocKind Loc = getLocKind(LiveSet, Var);
+ // If a variable's LocKind is anything other than LocKind::Mem then we
+ // must note that it cannot be represented with a dbg.declare.
+ // Note that this check is enough without having to check the result of
+ // joins() because for join to produce anything other than Mem after
+ // we've already seen a Mem we'd be joining None or Val with Mem. In that
+ // case, we've already hit this codepath when we set the LocKind to Val
+ // or None in that block.
+ if (Loc != LocKind::Mem) {
+ DebugVariable DbgVar = FnVarLocs->getVariable(Var);
+ DebugAggregate Aggr{DbgVar.getVariable(), DbgVar.getInlinedAt()};
+ NotAlwaysStackHomed.insert(Aggr);
+ }
+ }
+ VarsTouchedThisFrame.clear();
+ }
+}
+
+AssignmentTrackingLowering::LocKind
+AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) {
+ // Partial order:
+ // None > Mem, Val
+ return A == B ? A : LocKind::None;
+}
+
+AssignmentTrackingLowering::Assignment
+AssignmentTrackingLowering::joinAssignment(const Assignment &A,
+ const Assignment &B) {
+ // Partial order:
+ // NoneOrPhi(null, null) > Known(v, ?s)
+
+ // If either are NoneOrPhi the join is NoneOrPhi.
+ // If either value is different then the result is
+ // NoneOrPhi (joining two values is a Phi).
+ if (!A.isSameSourceAssignment(B))
+ return Assignment::makeNoneOrPhi();
+ if (A.Status == Assignment::NoneOrPhi)
+ return Assignment::makeNoneOrPhi();
+
+ // Source is used to lookup the value + expression in the debug program if
+ // the stack slot gets assigned a value earlier than expected. Because
+ // we're only tracking the one dbg.assign, we can't capture debug PHIs.
+ // It's unlikely that we're losing out on much coverage by avoiding that
+ // extra work.
+ // The Source may differ in this situation:
+ // Pred.1:
+ // dbg.assign i32 0, ..., !1, ...
+ // Pred.2:
+ // dbg.assign i32 1, ..., !1, ...
+ // Here the same assignment (!1) was performed in both preds in the source,
+ // but we can't use either one unless they are identical (e.g. .we don't
+ // want to arbitrarily pick between constant values).
+ auto JoinSource = [&]() -> DbgAssignIntrinsic * {
+ if (A.Source == B.Source)
+ return A.Source;
+ if (A.Source == nullptr || B.Source == nullptr)
+ return nullptr;
+ if (A.Source->isIdenticalTo(B.Source))
+ return A.Source;
+ return nullptr;
+ };
+ DbgAssignIntrinsic *Source = JoinSource();
+ assert(A.Status == B.Status && A.Status == Assignment::Known);
+ assert(A.ID == B.ID);
+ return Assignment::make(A.ID, Source);
+}
+
+AssignmentTrackingLowering::BlockInfo
+AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A,
+ const BlockInfo &B) {
+ return BlockInfo::join(A, B, TrackedVariablesVectorSize);
+}
+
+bool AssignmentTrackingLowering::join(
+ const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited) {
+
+ SmallVector<const BasicBlock *> VisitedPreds;
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block. This
+ // is essentially the same as initialising all LocKinds and Assignments to
+ // an implicit ⊥ value which is the identity value for the join operation.
+ for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
+ const BasicBlock *Pred = *I;
+ if (Visited.count(Pred))
+ VisitedPreds.push_back(Pred);
+ }
+
+ // No preds visited yet.
+ if (VisitedPreds.empty()) {
+ auto It = LiveIn.try_emplace(&BB, BlockInfo());
+ bool DidInsert = It.second;
+ if (DidInsert)
+ It.first->second.init(TrackedVariablesVectorSize);
+ return /*Changed*/ DidInsert;
+ }
+
+ // Exactly one visited pred. Copy the LiveOut from that pred into BB LiveIn.
+ if (VisitedPreds.size() == 1) {
+ const BlockInfo &PredLiveOut = LiveOut.find(VisitedPreds[0])->second;
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
+
+ // Check if there isn't an entry, or there is but the LiveIn set has
+ // changed (expensive check).
+ if (CurrentLiveInEntry == LiveIn.end())
+ LiveIn.insert(std::make_pair(&BB, PredLiveOut));
+ else if (PredLiveOut != CurrentLiveInEntry->second)
+ CurrentLiveInEntry->second = PredLiveOut;
+ else
+ return /*Changed*/ false;
+ return /*Changed*/ true;
+ }
+
+ // More than one pred. Join LiveOuts of blocks 1 and 2.
+ assert(VisitedPreds.size() > 1);
+ const BlockInfo &PredLiveOut0 = LiveOut.find(VisitedPreds[0])->second;
+ const BlockInfo &PredLiveOut1 = LiveOut.find(VisitedPreds[1])->second;
+ BlockInfo BBLiveIn = joinBlockInfo(PredLiveOut0, PredLiveOut1);
+
+ // Join the LiveOuts of subsequent blocks.
+ ArrayRef Tail = ArrayRef(VisitedPreds).drop_front(2);
+ for (const BasicBlock *Pred : Tail) {
+ const auto &PredLiveOut = LiveOut.find(Pred);
+ assert(PredLiveOut != LiveOut.end() &&
+ "block should have been processed already");
+ BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second);
+ }
+
+ // Save the joined result for BB.
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
+ // Check if there isn't an entry, or there is but the LiveIn set has changed
+ // (expensive check).
+ if (CurrentLiveInEntry == LiveIn.end())
+ LiveIn.try_emplace(&BB, std::move(BBLiveIn));
+ else if (BBLiveIn != CurrentLiveInEntry->second)
+ CurrentLiveInEntry->second = std::move(BBLiveIn);
+ else
+ return /*Changed*/ false;
+ return /*Changed*/ true;
+}
+
+/// Return true if A fully contains B.
+static bool fullyContains(DIExpression::FragmentInfo A,
+ DIExpression::FragmentInfo B) {
+ auto ALeft = A.OffsetInBits;
+ auto BLeft = B.OffsetInBits;
+ if (BLeft < ALeft)
+ return false;
+
+ auto ARight = ALeft + A.SizeInBits;
+ auto BRight = BLeft + B.SizeInBits;
+ if (BRight > ARight)
+ return false;
+ return true;
+}
+
+static std::optional<at::AssignmentInfo>
+getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) {
+ // Don't bother checking if this is an AllocaInst. We know this
+ // instruction has no tag which means there are no variables associated
+ // with it.
+ if (const auto *SI = dyn_cast<StoreInst>(&I))
+ return at::getAssignmentInfo(Layout, SI);
+ if (const auto *MI = dyn_cast<MemIntrinsic>(&I))
+ return at::getAssignmentInfo(Layout, MI);
+ // Alloca or non-store-like inst.
+ return std::nullopt;
+}
+
+/// Build a map of {Variable x: Variables y} where all variable fragments
+/// contained within the variable fragment x are in set y. This means that
+/// y does not contain all overlaps because partial overlaps are excluded.
+///
+/// While we're iterating over the function, add single location defs for
+/// dbg.declares to \p FnVarLocs.
+///
+/// Variables that are interesting to this pass in are added to
+/// FnVarLocs->Variables first. TrackedVariablesVectorSize is set to the ID of
+/// the last interesting variable plus 1, meaning variables with ID 1
+/// (inclusive) to TrackedVariablesVectorSize (exclusive) are interesting. The
+/// subsequent variables are either stack homed or fully promoted.
+///
+/// Finally, populate UntaggedStoreVars with a mapping of untagged stores to
+/// the stored-to variable fragments.
+///
+/// These tasks are bundled together to reduce the number of times we need
+/// to iterate over the function as they can be achieved together in one pass.
+static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
+ Function &Fn, FunctionVarLocsBuilder *FnVarLocs,
+ const DenseSet<DebugAggregate> &VarsWithStackSlot,
+ AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars,
+ unsigned &TrackedVariablesVectorSize) {
+ DenseSet<DebugVariable> Seen;
+ // Map of Variable: [Fragments].
+ DenseMap<DebugAggregate, SmallVector<DebugVariable, 8>> FragmentMap;
+ // Iterate over all instructions:
+ // - dbg.declare -> add single location variable record
+ // - dbg.* -> Add fragments to FragmentMap
+ // - untagged store -> Add fragments to FragmentMap and update
+ // UntaggedStoreVars.
+ // We need to add fragments for untagged stores too so that we can correctly
+ // clobber overlapped fragment locations later.
+ SmallVector<DbgDeclareInst *> Declares;
+ for (auto &BB : Fn) {
+ for (auto &I : BB) {
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
+ Declares.push_back(DDI);
+ } else if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ DebugVariable DV = DebugVariable(DII);
+ DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (!VarsWithStackSlot.contains(DA))
+ continue;
+ if (Seen.insert(DV).second)
+ FragmentMap[DA].push_back(DV);
+ } else if (auto Info = getUntaggedStoreAssignmentInfo(
+ I, Fn.getParent()->getDataLayout())) {
+ // Find markers linked to this alloca.
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(Info->Base)) {
+ // Discard the fragment if it covers the entire variable.
+ std::optional<DIExpression::FragmentInfo> FragInfo =
+ [&Info, DAI]() -> std::optional<DIExpression::FragmentInfo> {
+ DIExpression::FragmentInfo F;
+ F.OffsetInBits = Info->OffsetInBits;
+ F.SizeInBits = Info->SizeInBits;
+ if (auto ExistingFrag = DAI->getExpression()->getFragmentInfo())
+ F.OffsetInBits += ExistingFrag->OffsetInBits;
+ if (auto Sz = DAI->getVariable()->getSizeInBits()) {
+ if (F.OffsetInBits == 0 && F.SizeInBits == *Sz)
+ return std::nullopt;
+ }
+ return F;
+ }();
+
+ DebugVariable DV = DebugVariable(DAI->getVariable(), FragInfo,
+ DAI->getDebugLoc().getInlinedAt());
+ DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (!VarsWithStackSlot.contains(DA))
+ continue;
+
+ // Cache this info for later.
+ UntaggedStoreVars[&I].push_back(
+ {FnVarLocs->insertVariable(DV), *Info});
+
+ if (Seen.insert(DV).second)
+ FragmentMap[DA].push_back(DV);
+ }
+ }
+ }
+ }
+
+ // Sort the fragment map for each DebugAggregate in ascending
+ // order of fragment size - there should be no duplicates.
+ for (auto &Pair : FragmentMap) {
+ SmallVector<DebugVariable, 8> &Frags = Pair.second;
+ std::sort(Frags.begin(), Frags.end(),
+ [](const DebugVariable &Next, const DebugVariable &Elmt) {
+ return Elmt.getFragmentOrDefault().SizeInBits >
+ Next.getFragmentOrDefault().SizeInBits;
+ });
+ // Check for duplicates.
+ assert(std::adjacent_find(Frags.begin(), Frags.end()) == Frags.end());
+ }
+
+ // Build the map.
+ AssignmentTrackingLowering::OverlapMap Map;
+ for (auto &Pair : FragmentMap) {
+ auto &Frags = Pair.second;
+ for (auto It = Frags.begin(), IEnd = Frags.end(); It != IEnd; ++It) {
+ DIExpression::FragmentInfo Frag = It->getFragmentOrDefault();
+ // Find the frags that this is contained within.
+ //
+ // Because Frags is sorted by size and none have the same offset and
+ // size, we know that this frag can only be contained by subsequent
+ // elements.
+ SmallVector<DebugVariable, 8>::iterator OtherIt = It;
+ ++OtherIt;
+ VariableID ThisVar = FnVarLocs->insertVariable(*It);
+ for (; OtherIt != IEnd; ++OtherIt) {
+ DIExpression::FragmentInfo OtherFrag = OtherIt->getFragmentOrDefault();
+ VariableID OtherVar = FnVarLocs->insertVariable(*OtherIt);
+ if (fullyContains(OtherFrag, Frag))
+ Map[OtherVar].push_back(ThisVar);
+ }
+ }
+ }
+
+ // VariableIDs are 1-based so the variable-tracking bitvector needs
+ // NumVariables plus 1 bits.
+ TrackedVariablesVectorSize = FnVarLocs->getNumVariables() + 1;
+
+ // Finally, insert the declares afterwards, so the first IDs are all
+ // partially stack homed vars.
+ for (auto *DDI : Declares)
+ FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
+ DDI->getDebugLoc(), DDI->getWrappedLocation());
+ return Map;
+}
+
+bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
+ if (Fn.size() > MaxNumBlocks) {
+ LLVM_DEBUG(dbgs() << "[AT] Dropping var locs in: " << Fn.getName()
+ << ": too many blocks (" << Fn.size() << ")\n");
+ at::deleteAll(&Fn);
+ return false;
+ }
+
+ FnVarLocs = FnVarLocsBuilder;
+
+ // The general structure here is inspired by VarLocBasedImpl.cpp
+ // (LiveDebugValues).
+
+ // Build the variable fragment overlap map.
+ // Note that this pass doesn't handle partial overlaps correctly (FWIW
+ // neither does LiveDebugVariables) because that is difficult to do and
+ // appears to be rare occurance.
+ VarContains = buildOverlapMapAndRecordDeclares(
+ Fn, FnVarLocs, *VarsWithStackSlot, UntaggedStoreVars,
+ TrackedVariablesVectorSize);
+
+ // Prepare for traversal.
+ ReversePostOrderTraversal<Function *> RPOT(&Fn);
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Pending;
+ DenseMap<unsigned int, BasicBlock *> OrderToBB;
+ DenseMap<BasicBlock *, unsigned int> BBToOrder;
+ { // Init OrderToBB and BBToOrder.
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
+ LiveIn.init(RPONumber);
+ LiveOut.init(RPONumber);
+ }
+
+ // Perform the traversal.
+ //
+ // This is a standard "union of predecessor outs" dataflow problem. To solve
+ // it, we perform join() and process() using the two worklist method until
+ // the LiveIn data for each block becomes unchanging. The "proof" that this
+ // terminates can be put together by looking at the comments around LocKind,
+ // Assignment, and the various join methods, which show that all the elements
+ // involved are made up of join-semilattices; LiveIn(n) can only
+ // monotonically increase in value throughout the dataflow.
+ //
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ while (!Worklist.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice.
+ SmallPtrSet<BasicBlock *, 16> OnPending;
+ LLVM_DEBUG(dbgs() << "Processing Worklist\n");
+ while (!Worklist.empty()) {
+ BasicBlock *BB = OrderToBB[Worklist.top()];
+ LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n");
+ Worklist.pop();
+ bool InChanged = join(*BB, Visited);
+ // Always consider LiveIn changed on the first visit.
+ InChanged |= Visited.insert(BB).second;
+ if (InChanged) {
+ LLVM_DEBUG(dbgs() << BB->getName() << " has new InLocs, process it\n");
+ // Mutate a copy of LiveIn while processing BB. After calling process
+ // LiveSet is the LiveOut set for BB.
+ BlockInfo LiveSet = LiveIn[BB];
+
+ // Process the instructions in the block.
+ process(*BB, &LiveSet);
+
+ // Relatively expensive check: has anything changed in LiveOut for BB?
+ if (LiveOut[BB] != LiveSet) {
+ LLVM_DEBUG(dbgs() << BB->getName()
+ << " has new OutLocs, add succs to worklist: [ ");
+ LiveOut[BB] = std::move(LiveSet);
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) {
+ if (OnPending.insert(*I).second) {
+ LLVM_DEBUG(dbgs() << I->getName() << " ");
+ Pending.push(BBToOrder[*I]);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "]\n");
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // That's the hard part over. Now we just have some admin to do.
+
+ // Record whether we inserted any intrinsics.
+ bool InsertedAnyIntrinsics = false;
+
+ // Identify and add defs for single location variables.
+ //
+ // Go through all of the defs that we plan to add. If the aggregate variable
+ // it's a part of is not in the NotAlwaysStackHomed set we can emit a single
+ // location def and omit the rest. Add an entry to AlwaysStackHomed so that
+ // we can identify those uneeded defs later.
+ DenseSet<DebugAggregate> AlwaysStackHomed;
+ for (const auto &Pair : InsertBeforeMap) {
+ const auto &Vec = Pair.second;
+ for (VarLocInfo VarLoc : Vec) {
+ DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
+ DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()};
+
+ // Skip this Var if it's not always stack homed.
+ if (NotAlwaysStackHomed.contains(Aggr))
+ continue;
+
+ // Skip complex cases such as when different fragments of a variable have
+ // been split into different allocas. Skipping in this case means falling
+ // back to using a list of defs (which could reduce coverage, but is no
+ // less correct).
+ bool Simple =
+ VarLoc.Expr->getNumElements() == 1 && VarLoc.Expr->startsWithDeref();
+ if (!Simple) {
+ NotAlwaysStackHomed.insert(Aggr);
+ continue;
+ }
+
+ // All source assignments to this variable remain and all stores to any
+ // part of the variable store to the same address (with varying
+ // offsets). We can just emit a single location for the whole variable.
+ //
+ // Unless we've already done so, create the single location def now.
+ if (AlwaysStackHomed.insert(Aggr).second) {
+ assert(!VarLoc.Values.hasArgList());
+ // TODO: When more complex cases are handled VarLoc.Expr should be
+ // built appropriately rather than always using an empty DIExpression.
+ // The assert below is a reminder.
+ assert(Simple);
+ VarLoc.Expr = DIExpression::get(Fn.getContext(), std::nullopt);
+ DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
+ FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.Values);
+ InsertedAnyIntrinsics = true;
+ }
+ }
+ }
+
+ // Insert the other DEFs.
+ for (const auto &[InsertBefore, Vec] : InsertBeforeMap) {
+ SmallVector<VarLocInfo> NewDefs;
+ for (const VarLocInfo &VarLoc : Vec) {
+ DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
+ DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()};
+ // If this variable is always stack homed then we have already inserted a
+ // dbg.declare and deleted this dbg.value.
+ if (AlwaysStackHomed.contains(Aggr))
+ continue;
+ NewDefs.push_back(VarLoc);
+ InsertedAnyIntrinsics = true;
+ }
+
+ FnVarLocs->setWedge(InsertBefore, std::move(NewDefs));
+ }
+
+ InsertedAnyIntrinsics |= emitPromotedVarLocs(FnVarLocs);
+
+ return InsertedAnyIntrinsics;
+}
+
+bool AssignmentTrackingLowering::emitPromotedVarLocs(
+ FunctionVarLocsBuilder *FnVarLocs) {
+ bool InsertedAnyIntrinsics = false;
+ // Go through every block, translating debug intrinsics for fully promoted
+ // variables into FnVarLocs location defs. No analysis required for these.
+ for (auto &BB : Fn) {
+ for (auto &I : BB) {
+ // Skip instructions other than dbg.values and dbg.assigns.
+ auto *DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+ // Skip variables that haven't been promoted - we've dealt with those
+ // already.
+ if (VarsWithStackSlot->contains(getAggregate(DVI)))
+ continue;
+ Instruction *InsertBefore = I.getNextNode();
+ assert(InsertBefore && "Unexpected: debug intrinsics after a terminator");
+ FnVarLocs->addVarLoc(InsertBefore, DebugVariable(DVI),
+ DVI->getExpression(), DVI->getDebugLoc(),
+ DVI->getWrappedLocation());
+ InsertedAnyIntrinsics = true;
+ }
+ }
+ return InsertedAnyIntrinsics;
+}
+
+/// Remove redundant definitions within sequences of consecutive location defs.
+/// This is done using a backward scan to keep the last def describing a
+/// specific variable/fragment.
+///
+/// This implements removeRedundantDbgInstrsUsingBackwardScan from
+/// lib/Transforms/Utils/BasicBlockUtils.cpp for locations described with
+/// FunctionVarLocsBuilder instead of with intrinsics.
+static bool
+removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ bool Changed = false;
+ SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBits;
+ // Scan over the entire block, not just over the instructions mapped by
+ // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // instructions.
+ for (const Instruction &I : reverse(*BB)) {
+ if (!isa<DbgVariableIntrinsic>(I)) {
+ // Sequence of consecutive defs ended. Clear map for the next one.
+ VariableDefinedBits.clear();
+ }
+
+ // Get the location defs that start just before this instruction.
+ const auto *Locs = FnVarLocs.getWedge(&I);
+ if (!Locs)
+ continue;
+
+ NumWedgesScanned++;
+ bool ChangedThisWedge = false;
+ // The new pruned set of defs, reversed because we're scanning backwards.
+ SmallVector<VarLocInfo> NewDefsReversed;
+
+ // Iterate over the existing defs in reverse.
+ for (auto RIt = Locs->rbegin(), REnd = Locs->rend(); RIt != REnd; ++RIt) {
+ NumDefsScanned++;
+ DebugAggregate Aggr =
+ getAggregate(FnVarLocs.getVariable(RIt->VariableID));
+ uint64_t SizeInBits = Aggr.first->getSizeInBits().value_or(0);
+
+ if (SizeInBits == 0) {
+ // If the size is unknown (0) then keep this location def to be safe.
+ NewDefsReversed.push_back(*RIt);
+ continue;
+ }
+
+ // Only keep this location definition if it is not fully eclipsed by
+ // other definitions in this wedge that come after it
+
+ // Inert the bits the location definition defines.
+ auto InsertResult =
+ VariableDefinedBits.try_emplace(Aggr, BitVector(SizeInBits));
+ bool FirstDefinition = InsertResult.second;
+ BitVector &DefinedBits = InsertResult.first->second;
+
+ DIExpression::FragmentInfo Fragment =
+ RIt->Expr->getFragmentInfo().value_or(
+ DIExpression::FragmentInfo(SizeInBits, 0));
+ bool InvalidFragment = Fragment.endInBits() > SizeInBits;
+
+ // If this defines any previously undefined bits, keep it.
+ if (FirstDefinition || InvalidFragment ||
+ DefinedBits.find_first_unset_in(Fragment.startInBits(),
+ Fragment.endInBits()) != -1) {
+ if (!InvalidFragment)
+ DefinedBits.set(Fragment.startInBits(), Fragment.endInBits());
+ NewDefsReversed.push_back(*RIt);
+ continue;
+ }
+
+ // Redundant def found: throw it away. Since the wedge of defs is being
+ // rebuilt, doing nothing is the same as deleting an entry.
+ ChangedThisWedge = true;
+ NumDefsRemoved++;
+ }
+
+ // Un-reverse the defs and replace the wedge with the pruned version.
+ if (ChangedThisWedge) {
+ std::reverse(NewDefsReversed.begin(), NewDefsReversed.end());
+ FnVarLocs.setWedge(&I, std::move(NewDefsReversed));
+ NumWedgesChanged++;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Remove redundant location defs using a forward scan. This can remove a
+/// location definition that is redundant due to indicating that a variable has
+/// the same value as is already being indicated by an earlier def.
+///
+/// This implements removeRedundantDbgInstrsUsingForwardScan from
+/// lib/Transforms/Utils/BasicBlockUtils.cpp for locations described with
+/// FunctionVarLocsBuilder instead of with intrinsics
+static bool
+removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ bool Changed = false;
+ DenseMap<DebugVariable, std::pair<RawLocationWrapper, DIExpression *>>
+ VariableMap;
+
+ // Scan over the entire block, not just over the instructions mapped by
+ // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // instructions.
+ for (const Instruction &I : *BB) {
+ // Get the defs that come just before this instruction.
+ const auto *Locs = FnVarLocs.getWedge(&I);
+ if (!Locs)
+ continue;
+
+ NumWedgesScanned++;
+ bool ChangedThisWedge = false;
+ // The new pruned set of defs.
+ SmallVector<VarLocInfo> NewDefs;
+
+ // Iterate over the existing defs.
+ for (const VarLocInfo &Loc : *Locs) {
+ NumDefsScanned++;
+ DebugVariable Key(FnVarLocs.getVariable(Loc.VariableID).getVariable(),
+ std::nullopt, Loc.DL.getInlinedAt());
+ auto VMI = VariableMap.find(Key);
+
+ // Update the map if we found a new value/expression describing the
+ // variable, or if the variable wasn't mapped already.
+ if (VMI == VariableMap.end() || VMI->second.first != Loc.Values ||
+ VMI->second.second != Loc.Expr) {
+ VariableMap[Key] = {Loc.Values, Loc.Expr};
+ NewDefs.push_back(Loc);
+ continue;
+ }
+
+ // Did not insert this Loc, which is the same as removing it.
+ ChangedThisWedge = true;
+ NumDefsRemoved++;
+ }
+
+ // Replace the existing wedge with the pruned version.
+ if (ChangedThisWedge) {
+ FnVarLocs.setWedge(&I, std::move(NewDefs));
+ NumWedgesChanged++;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+static bool
+removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ assert(BB->isEntryBlock());
+ // Do extra work to ensure that we remove semantically unimportant undefs.
+ //
+ // This is to work around the fact that SelectionDAG will hoist dbg.values
+ // using argument values to the top of the entry block. That can move arg
+ // dbg.values before undef and constant dbg.values which they previously
+ // followed. The easiest thing to do is to just try to feed SelectionDAG
+ // input it's happy with.
+ //
+ // Map of {Variable x: Fragments y} where the fragments y of variable x have
+ // have at least one non-undef location defined already. Don't use directly,
+ // instead call DefineBits and HasDefinedBits.
+ SmallDenseMap<DebugAggregate, SmallDenseSet<DIExpression::FragmentInfo>>
+ VarsWithDef;
+ // Specify that V (a fragment of A) has a non-undef location.
+ auto DefineBits = [&VarsWithDef](DebugAggregate A, DebugVariable V) {
+ VarsWithDef[A].insert(V.getFragmentOrDefault());
+ };
+ // Return true if a non-undef location has been defined for V (a fragment of
+ // A). Doesn't imply that the location is currently non-undef, just that a
+ // non-undef location has been seen previously.
+ auto HasDefinedBits = [&VarsWithDef](DebugAggregate A, DebugVariable V) {
+ auto FragsIt = VarsWithDef.find(A);
+ if (FragsIt == VarsWithDef.end())
+ return false;
+ return llvm::any_of(FragsIt->second, [V](auto Frag) {
+ return DIExpression::fragmentsOverlap(Frag, V.getFragmentOrDefault());
+ });
+ };
+
+ bool Changed = false;
+ DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap;
+
+ // Scan over the entire block, not just over the instructions mapped by
+ // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // instructions.
+ for (const Instruction &I : *BB) {
+ // Get the defs that come just before this instruction.
+ const auto *Locs = FnVarLocs.getWedge(&I);
+ if (!Locs)
+ continue;
+
+ NumWedgesScanned++;
+ bool ChangedThisWedge = false;
+ // The new pruned set of defs.
+ SmallVector<VarLocInfo> NewDefs;
+
+ // Iterate over the existing defs.
+ for (const VarLocInfo &Loc : *Locs) {
+ NumDefsScanned++;
+ DebugAggregate Aggr{FnVarLocs.getVariable(Loc.VariableID).getVariable(),
+ Loc.DL.getInlinedAt()};
+ DebugVariable Var = FnVarLocs.getVariable(Loc.VariableID);
+
+ // Remove undef entries that are encountered before any non-undef
+ // intrinsics from the entry block.
+ if (Loc.Values.isKillLocation(Loc.Expr) && !HasDefinedBits(Aggr, Var)) {
+ // Did not insert this Loc, which is the same as removing it.
+ NumDefsRemoved++;
+ ChangedThisWedge = true;
+ continue;
+ }
+
+ DefineBits(Aggr, Var);
+ NewDefs.push_back(Loc);
+ }
+
+ // Replace the existing wedge with the pruned version.
+ if (ChangedThisWedge) {
+ FnVarLocs.setWedge(&I, std::move(NewDefs));
+ NumWedgesChanged++;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+static bool removeRedundantDbgLocs(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ bool MadeChanges = false;
+ MadeChanges |= removeRedundantDbgLocsUsingBackwardScan(BB, FnVarLocs);
+ if (BB->isEntryBlock())
+ MadeChanges |= removeUndefDbgLocsFromEntryBlock(BB, FnVarLocs);
+ MadeChanges |= removeRedundantDbgLocsUsingForwardScan(BB, FnVarLocs);
+
+ if (MadeChanges)
+ LLVM_DEBUG(dbgs() << "Removed redundant dbg locs from: " << BB->getName()
+ << "\n");
+ return MadeChanges;
+}
+
+static DenseSet<DebugAggregate> findVarsWithStackSlot(Function &Fn) {
+ DenseSet<DebugAggregate> Result;
+ for (auto &BB : Fn) {
+ for (auto &I : BB) {
+ // Any variable linked to an instruction is considered
+ // interesting. Ideally we only need to check Allocas, however, a
+ // DIAssignID might get dropped from an alloca but not stores. In that
+ // case, we need to consider the variable interesting for NFC behaviour
+ // with this change. TODO: Consider only looking at allocas.
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(&I)) {
+ Result.insert({DAI->getVariable(), DAI->getDebugLoc().getInlinedAt()});
+ }
+ }
+ }
+ return Result;
+}
+
+static void analyzeFunction(Function &Fn, const DataLayout &Layout,
+ FunctionVarLocsBuilder *FnVarLocs) {
+ // The analysis will generate location definitions for all variables, but we
+ // only need to perform a dataflow on the set of variables which have a stack
+ // slot. Find those now.
+ DenseSet<DebugAggregate> VarsWithStackSlot = findVarsWithStackSlot(Fn);
+
+ bool Changed = false;
+
+ // Use a scope block to clean up AssignmentTrackingLowering before running
+ // MemLocFragmentFill to reduce peak memory consumption.
+ {
+ AssignmentTrackingLowering Pass(Fn, Layout, &VarsWithStackSlot);
+ Changed = Pass.run(FnVarLocs);
+ }
+
+ if (Changed) {
+ MemLocFragmentFill Pass(Fn, &VarsWithStackSlot,
+ shouldCoalesceFragments(Fn));
+ Pass.run(FnVarLocs);
+
+ // Remove redundant entries. As well as reducing memory consumption and
+ // avoiding waiting cycles later by burning some now, this has another
+ // important job. That is to work around some SelectionDAG quirks. See
+ // removeRedundantDbgLocsUsingForwardScan comments for more info on that.
+ for (auto &BB : Fn)
+ removeRedundantDbgLocs(&BB, *FnVarLocs);
+ }
+}
+
+bool AssignmentTrackingAnalysis::runOnFunction(Function &F) {
+ if (!isAssignmentTrackingEnabled(*F.getParent()))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "AssignmentTrackingAnalysis run on " << F.getName()
+ << "\n");
+ auto DL = std::make_unique<DataLayout>(F.getParent());
+
+ // Clear previous results.
+ Results->clear();
+
+ FunctionVarLocsBuilder Builder;
+ analyzeFunction(F, *DL.get(), &Builder);
+
+ // Save these results.
+ Results->init(Builder);
+
+ if (PrintResults && isFunctionInPrintList(F.getName()))
+ Results->print(errs(), F);
+
+ // Return false because this pass does not modify the function.
+ return false;
+}
+
+AssignmentTrackingAnalysis::AssignmentTrackingAnalysis()
+ : FunctionPass(ID), Results(std::make_unique<FunctionVarLocs>()) {}
+
+char AssignmentTrackingAnalysis::ID = 0;
+
+INITIALIZE_PASS(AssignmentTrackingAnalysis, DEBUG_TYPE,
+ "Assignment Tracking Analysis", false, true)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
new file mode 100644
index 000000000000..80a0bb957cfc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -0,0 +1,1974 @@
+//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions with
+// __atomic_* library calls, or target specific instruction which implement the
+// same semantics in a way which better fits the target backend. This can
+// include the use of (intrinsic-based) load-linked/store-conditional loops,
+// AtomicCmpXchg, or type coercions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/AtomicExpandUtils.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "atomic-expand"
+
+namespace {
+
+class AtomicExpand : public FunctionPass {
+ const TargetLowering *TLI = nullptr;
+ const DataLayout *DL = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ AtomicExpand() : FunctionPass(ID) {
+ initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
+ IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+ LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
+ bool tryExpandAtomicStore(StoreInst *SI);
+ void expandAtomicStore(StoreInst *SI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
+ AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
+ Value *
+ insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
+ void expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
+ void expandPartwordAtomicRMW(
+ AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
+ bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
+ void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
+
+ AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
+ static Value *insertRMWCmpXchgLoop(
+ IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg);
+ bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *RMWI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
+
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
+ Value *PointerOperand, Value *ValueOperand,
+ Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2,
+ ArrayRef<RTLIB::Libcall> Libcalls);
+ void expandAtomicLoadToLibcall(LoadInst *LI);
+ void expandAtomicStoreToLibcall(StoreInst *LI);
+ void expandAtomicRMWToLibcall(AtomicRMWInst *I);
+ void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
+
+ friend bool
+ llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg);
+};
+
+// IRBuilder to be used for replacement atomic instructions.
+struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
+ // Preserves the DebugLoc from I, and preserves still valid metadata.
+ explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
+ : IRBuilder(I->getContext(), DL) {
+ SetInsertPoint(I);
+ this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
+ }
+};
+
+} // end anonymous namespace
+
+char AtomicExpand::ID = 0;
+
+char &llvm::AtomicExpandID = AtomicExpand::ID;
+
+INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
+ false)
+
+FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
+
+// Helper functions to retrieve the size of atomic instructions.
+static unsigned getAtomicOpSize(LoadInst *LI) {
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(LI->getType());
+}
+
+static unsigned getAtomicOpSize(StoreInst *SI) {
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(SI->getValueOperand()->getType());
+}
+
+static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
+ const DataLayout &DL = RMWI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
+}
+
+static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
+ const DataLayout &DL = CASI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
+}
+
+// Determine if a particular atomic operation has a supported size,
+// and is of appropriate alignment, to be passed through for target
+// lowering. (Versus turning into a __atomic libcall)
+template <typename Inst>
+static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
+ unsigned Size = getAtomicOpSize(I);
+ Align Alignment = I->getAlign();
+ return Alignment >= Size &&
+ Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+}
+
+bool AtomicExpand::runOnFunction(Function &F) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ const auto *Subtarget = TM.getSubtargetImpl(F);
+ if (!Subtarget->enableAtomicExpand())
+ return false;
+ TLI = Subtarget->getTargetLowering();
+ DL = &F.getParent()->getDataLayout();
+
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (Instruction &I : instructions(F))
+ if (I.isAtomic() && !isa<FenceInst>(&I))
+ AtomicInsts.push_back(&I);
+
+ bool MadeChange = false;
+ for (auto *I : AtomicInsts) {
+ auto LI = dyn_cast<LoadInst>(I);
+ auto SI = dyn_cast<StoreInst>(I);
+ auto RMWI = dyn_cast<AtomicRMWInst>(I);
+ auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
+ assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
+
+ // If the Size/Alignment is not supported, replace with a libcall.
+ if (LI) {
+ if (!atomicSizeSupported(TLI, LI)) {
+ expandAtomicLoadToLibcall(LI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (SI) {
+ if (!atomicSizeSupported(TLI, SI)) {
+ expandAtomicStoreToLibcall(SI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (RMWI) {
+ if (!atomicSizeSupported(TLI, RMWI)) {
+ expandAtomicRMWToLibcall(RMWI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (CASI) {
+ if (!atomicSizeSupported(TLI, CASI)) {
+ expandAtomicCASToLibcall(CASI);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = LI = convertAtomicLoadToIntegerType(LI);
+ MadeChange = true;
+ } else if (SI &&
+ TLI->shouldCastAtomicStoreInIR(SI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = SI = convertAtomicStoreToIntegerType(SI);
+ MadeChange = true;
+ } else if (RMWI &&
+ TLI->shouldCastAtomicRMWIInIR(RMWI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = RMWI = convertAtomicXchgToIntegerType(RMWI);
+ MadeChange = true;
+ } else if (CASI) {
+ // TODO: when we're ready to make the change at the IR level, we can
+ // extend convertCmpXchgToInteger for floating point too.
+ if (CASI->getCompareOperand()->getType()->isPointerTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ I = CASI = convertCmpXchgToIntegerType(CASI);
+ MadeChange = true;
+ }
+ }
+
+ if (TLI->shouldInsertFencesForAtomic(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ if (LI && isAcquireOrStronger(LI->getOrdering())) {
+ FenceOrdering = LI->getOrdering();
+ LI->setOrdering(AtomicOrdering::Monotonic);
+ } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
+ FenceOrdering = SI->getOrdering();
+ SI->setOrdering(AtomicOrdering::Monotonic);
+ } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
+ isAcquireOrStronger(RMWI->getOrdering()))) {
+ FenceOrdering = RMWI->getOrdering();
+ RMWI->setOrdering(AtomicOrdering::Monotonic);
+ } else if (CASI &&
+ TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+ TargetLoweringBase::AtomicExpansionKind::None &&
+ (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getFailureOrdering()))) {
+ // If a compare and swap is lowered to LL/SC, we can do smarter fence
+ // insertion, with a stronger one on the success path than on the
+ // failure path. As a result, fence insertion is directly done by
+ // expandAtomicCmpXchg in that case.
+ FenceOrdering = CASI->getMergedOrdering();
+ CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
+ CASI->setFailureOrdering(AtomicOrdering::Monotonic);
+ }
+
+ if (FenceOrdering != AtomicOrdering::Monotonic) {
+ MadeChange |= bracketInstWithFences(I, FenceOrdering);
+ }
+ } else if (I->hasAtomicStore() &&
+ TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ if (SI)
+ FenceOrdering = SI->getOrdering();
+ else if (RMWI)
+ FenceOrdering = RMWI->getOrdering();
+ else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
+ TargetLoweringBase::AtomicExpansionKind::LLSC)
+ // LLSC is handled in expandAtomicCmpXchg().
+ FenceOrdering = CASI->getSuccessOrdering();
+
+ IRBuilder Builder(I);
+ if (auto TrailingFence =
+ TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
+ TrailingFence->moveAfter(I);
+ MadeChange = true;
+ }
+ }
+
+ if (LI)
+ MadeChange |= tryExpandAtomicLoad(LI);
+ else if (SI)
+ MadeChange |= tryExpandAtomicStore(SI);
+ else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
+ } else {
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(RMWI);
+ if (ValueSize < MinCASSize &&
+ (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And)) {
+ RMWI = widenPartwordAtomicRMW(RMWI);
+ MadeChange = true;
+ }
+
+ MadeChange |= tryExpandAtomicRMW(RMWI);
+ }
+ } else if (CASI)
+ MadeChange |= tryExpandAtomicCmpXchg(CASI);
+ }
+ return MadeChange;
+}
+
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
+ ReplacementIRBuilder Builder(I, *DL);
+
+ auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
+
+ auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
+ // We have a guard here because not every atomic operation generates a
+ // trailing fence.
+ if (TrailingFence)
+ TrailingFence->moveAfter(I);
+
+ return (LeadingFence || TrailingFence);
+}
+
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+ const DataLayout &DL) {
+ EVT VT = TLI->getMemValueType(DL, T);
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+ return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivalent bitwidth. See the function comment on
+/// convertAtomicStoreToIntegerType for background.
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+ auto *M = LI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
+
+ ReplacementIRBuilder Builder(LI, *DL);
+
+ Value *Addr = LI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
+ NewLI->setAlignment(LI->getAlign());
+ NewLI->setVolatile(LI->isVolatile());
+ NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
+ LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+
+ Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+ LI->replaceAllUsesWith(NewVal);
+ LI->eraseFromParent();
+ return NewLI;
+}
+
+AtomicRMWInst *
+AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
+ auto *M = RMWI->getModule();
+ Type *NewTy =
+ getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
+
+ ReplacementIRBuilder Builder(RMWI, *DL);
+
+ Value *Addr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+ Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+ Value *NewVal = Val->getType()->isPointerTy()
+ ? Builder.CreatePtrToInt(Val, NewTy)
+ : Builder.CreateBitCast(Val, NewTy);
+
+ auto *NewRMWI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
+ RMWI->getAlign(), RMWI->getOrdering());
+ NewRMWI->setVolatile(RMWI->isVolatile());
+ LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
+
+ Value *NewRVal = RMWI->getType()->isPointerTy()
+ ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
+ : Builder.CreateBitCast(NewRMWI, RMWI->getType());
+ RMWI->replaceAllUsesWith(NewRVal);
+ RMWI->eraseFromParent();
+ return NewRMWI;
+}
+
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+ switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ expandAtomicOpToLLSC(
+ LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
+ LI->getOrdering(),
+ [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
+ return true;
+ case TargetLoweringBase::AtomicExpansionKind::LLOnly:
+ return expandAtomicLoadToLL(LI);
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
+ return expandAtomicLoadToCmpXchg(LI);
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ LI->setAtomic(AtomicOrdering::NotAtomic);
+ return true;
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
+ }
+}
+
+bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
+ switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::Expand:
+ expandAtomicStore(SI);
+ return true;
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ SI->setAtomic(AtomicOrdering::NotAtomic);
+ return true;
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicStore");
+ }
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
+ ReplacementIRBuilder Builder(LI, *DL);
+
+ // On some architectures, load-linked instructions are atomic for larger
+ // sizes than normal loads. For example, the only 64-bit load guaranteed
+ // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
+ Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
+ LI->getPointerOperand(), LI->getOrdering());
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+
+ LI->replaceAllUsesWith(Val);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ ReplacementIRBuilder Builder(LI, *DL);
+ AtomicOrdering Order = LI->getOrdering();
+ if (Order == AtomicOrdering::Unordered)
+ Order = AtomicOrdering::Monotonic;
+
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = LI->getType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, LI->getAlign(), Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivalent bitwidth. We used to not support floating point or vector
+/// atomics in the IR at all. The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store. The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+ ReplacementIRBuilder Builder(SI, *DL);
+ auto *M = SI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+ M->getDataLayout());
+ Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+
+ Value *Addr = SI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+ NewSI->setAlignment(SI->getAlign());
+ NewSI->setVolatile(SI->isVolatile());
+ NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
+ LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+ SI->eraseFromParent();
+ return NewSI;
+}
+
+void AtomicExpand::expandAtomicStore(StoreInst *SI) {
+ // This function is only called on atomic stores that are too large to be
+ // atomic if implemented as a native store. So we replace them by an
+ // atomic swap, that can be implemented for example as a ldrex/strex on ARM
+ // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
+ // It is the responsibility of the target to only signal expansion via
+ // shouldExpandAtomicRMW in cases where this is required and possible.
+ ReplacementIRBuilder Builder(SI, *DL);
+ AtomicOrdering Ordering = SI->getOrdering();
+ assert(Ordering != AtomicOrdering::NotAtomic);
+ AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
+ ? AtomicOrdering::Monotonic
+ : Ordering;
+ AtomicRMWInst *AI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
+ SI->getAlign(), RMWOrdering);
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ tryExpandAtomicRMW(AI);
+}
+
+static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
+ Value *&Success, Value *&NewLoaded) {
+ Type *OrigTy = NewVal->getType();
+
+ // This code can go away when cmpxchg supports FP types.
+ assert(!OrigTy->isPointerTy());
+ bool NeedBitcast = OrigTy->isFloatingPointTy();
+ if (NeedBitcast) {
+ IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
+ unsigned AS = Addr->getType()->getPointerAddressSpace();
+ Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
+ NewVal = Builder.CreateBitCast(NewVal, IntTy);
+ Loaded = Builder.CreateBitCast(Loaded, IntTy);
+ }
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ if (NeedBitcast)
+ NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
+}
+
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ LLVMContext &Ctx = AI->getModule()->getContext();
+ TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+ switch (Kind) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ expandPartwordAtomicRMW(AI,
+ TargetLoweringBase::AtomicExpansionKind::LLSC);
+ } else {
+ auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
+ };
+ expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
+ AI->getAlign(), AI->getOrdering(), PerformOp);
+ }
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ expandPartwordAtomicRMW(AI,
+ TargetLoweringBase::AtomicExpansionKind::CmpXChg);
+ } else {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[AI->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[AI->getSyncScopeID()];
+ OptimizationRemarkEmitter ORE(AI->getFunction());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
+ << "A compare and swap loop was generated for an atomic "
+ << AI->getOperationName(AI->getOperation()) << " operation at "
+ << MemScope << " memory scope";
+ });
+ expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ }
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+ expandAtomicRMWToMaskedIntrinsic(AI);
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
+ TLI->emitBitTestAtomicRMWIntrinsic(AI);
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
+ TLI->emitCmpArithAtomicRMWIntrinsic(AI);
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ return lowerAtomicRMWInst(AI);
+ case TargetLoweringBase::AtomicExpansionKind::Expand:
+ TLI->emitExpandAtomicRMW(AI);
+ return true;
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+ }
+}
+
+namespace {
+
+struct PartwordMaskValues {
+ // These three fields are guaranteed to be set by createMaskInstrs.
+ Type *WordType = nullptr;
+ Type *ValueType = nullptr;
+ Type *IntValueType = nullptr;
+ Value *AlignedAddr = nullptr;
+ Align AlignedAddrAlignment;
+ // The remaining fields can be null.
+ Value *ShiftAmt = nullptr;
+ Value *Mask = nullptr;
+ Value *Inv_Mask = nullptr;
+};
+
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
+ auto PrintObj = [&O](auto *V) {
+ if (V)
+ O << *V;
+ else
+ O << "nullptr";
+ O << '\n';
+ };
+ O << "PartwordMaskValues {\n";
+ O << " WordType: ";
+ PrintObj(PMV.WordType);
+ O << " ValueType: ";
+ PrintObj(PMV.ValueType);
+ O << " AlignedAddr: ";
+ PrintObj(PMV.AlignedAddr);
+ O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
+ O << " ShiftAmt: ";
+ PrintObj(PMV.ShiftAmt);
+ O << " Mask: ";
+ PrintObj(PMV.Mask);
+ O << " Inv_Mask: ";
+ PrintObj(PMV.Inv_Mask);
+ O << "}\n";
+ return O;
+}
+
+} // end anonymous namespace
+
+/// This is a helper function which builds instructions to provide
+/// values necessary for partword atomic operations. It takes an
+/// incoming address, Addr, and ValueType, and constructs the address,
+/// shift-amounts and masks needed to work with a larger value of size
+/// WordSize.
+///
+/// AlignedAddr: Addr rounded down to a multiple of WordSize
+///
+/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
+/// from AlignAddr for it to have the same value as if
+/// ValueType was loaded from Addr.
+///
+/// Mask: Value to mask with the value loaded from AlignAddr to
+/// include only the part that would've been loaded from Addr.
+///
+/// Inv_Mask: The inverse of Mask.
+static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
+ Instruction *I, Type *ValueType,
+ Value *Addr, Align AddrAlign,
+ unsigned MinWordSize) {
+ PartwordMaskValues PMV;
+
+ Module *M = I->getModule();
+ LLVMContext &Ctx = M->getContext();
+ const DataLayout &DL = M->getDataLayout();
+ unsigned ValueSize = DL.getTypeStoreSize(ValueType);
+
+ PMV.ValueType = PMV.IntValueType = ValueType;
+ if (PMV.ValueType->isFloatingPointTy())
+ PMV.IntValueType =
+ Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
+
+ PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
+ : ValueType;
+ if (PMV.ValueType == PMV.WordType) {
+ PMV.AlignedAddr = Addr;
+ PMV.AlignedAddrAlignment = AddrAlign;
+ PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
+ PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
+ return PMV;
+ }
+
+ PMV.AlignedAddrAlignment = Align(MinWordSize);
+
+ assert(ValueSize < MinWordSize);
+
+ PointerType *PtrTy = cast<PointerType>(Addr->getType());
+ Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace());
+ IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
+ Value *PtrLSB;
+
+ if (AddrAlign < MinWordSize) {
+ PMV.AlignedAddr = Builder.CreateIntrinsic(
+ Intrinsic::ptrmask, {PtrTy, IntTy},
+ {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
+ "AlignedAddr");
+
+ Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
+ PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
+ } else {
+ // If the alignment is high enough, the LSB are known 0.
+ PMV.AlignedAddr = Addr;
+ PtrLSB = ConstantInt::getNullValue(IntTy);
+ }
+
+ if (DL.isLittleEndian()) {
+ // turn bytes into bits
+ PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+ } else {
+ // turn bytes into bits, and count from the other side.
+ PMV.ShiftAmt = Builder.CreateShl(
+ Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
+ }
+
+ PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
+ PMV.Mask = Builder.CreateShl(
+ ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
+ "Mask");
+
+ PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
+
+ // Cast for typed pointers.
+ PMV.AlignedAddr =
+ Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr");
+
+ return PMV;
+}
+
+static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
+ const PartwordMaskValues &PMV) {
+ assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+ if (PMV.WordType == PMV.ValueType)
+ return WideWord;
+
+ Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
+ Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
+ return Builder.CreateBitCast(Trunc, PMV.ValueType);
+}
+
+static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
+ Value *Updated, const PartwordMaskValues &PMV) {
+ assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+ assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
+ if (PMV.WordType == PMV.ValueType)
+ return Updated;
+
+ Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
+
+ Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
+ Value *Shift =
+ Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
+ Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
+ Value *Or = Builder.CreateOr(And, Shift, "inserted");
+ return Or;
+}
+
+/// Emit IR to implement a masked version of a given atomicrmw
+/// operation. (That is, only the bits under the Mask should be
+/// affected by the operation)
+static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
+ IRBuilderBase &Builder, Value *Loaded,
+ Value *Shifted_Inc, Value *Inc,
+ const PartwordMaskValues &PMV) {
+ // TODO: update to use
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
+ // to merge bits from two values without requiring PMV.Inv_Mask.
+ switch (Op) {
+ case AtomicRMWInst::Xchg: {
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
+ return FinalVal;
+ }
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ case AtomicRMWInst::And:
+ llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Nand: {
+ // The other arithmetic ops need to be masked into place.
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
+ Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
+ return FinalVal;
+ }
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ case AtomicRMWInst::FMin:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::UIncWrap:
+ case AtomicRMWInst::UDecWrap: {
+ // Finally, other ops will operate on the full value, so truncate down to
+ // the original size, and expand out again after doing the
+ // operation. Bitcasts will be inserted for FP values.
+ Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
+ Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
+ return FinalVal;
+ }
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+/// Expand a sub-word atomicrmw operation into an appropriate
+/// word-sized operation.
+///
+/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
+/// way as a typical atomicrmw expansion. The only difference here is
+/// that the operation inside of the loop may operate upon only a
+/// part of the value.
+void AtomicExpand::expandPartwordAtomicRMW(
+ AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
+ AtomicOrdering MemOpOrder = AI->getOrdering();
+ SyncScope::ID SSID = AI->getSyncScopeID();
+
+ ReplacementIRBuilder Builder(AI, *DL);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *ValOperand_Shifted = nullptr;
+ if (AI->getOperation() == AtomicRMWInst::Xchg ||
+ AI->getOperation() == AtomicRMWInst::Add ||
+ AI->getOperation() == AtomicRMWInst::Sub ||
+ AI->getOperation() == AtomicRMWInst::Nand) {
+ ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+ }
+
+ auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
+ return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
+ ValOperand_Shifted, AI->getValOperand(), PMV);
+ };
+
+ Value *OldResult;
+ if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
+ OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
+ PMV.AlignedAddrAlignment, MemOpOrder, SSID,
+ PerformPartwordOp, createCmpXchgInstFun);
+ } else {
+ assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
+ OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
+ PMV.AlignedAddrAlignment, MemOpOrder,
+ PerformPartwordOp);
+ }
+
+ Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
+// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
+AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
+ ReplacementIRBuilder Builder(AI, *DL);
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+
+ assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And) &&
+ "Unable to widen operation");
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+
+ Value *NewOperand;
+
+ if (Op == AtomicRMWInst::And)
+ NewOperand =
+ Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
+ else
+ NewOperand = ValOperand_Shifted;
+
+ AtomicRMWInst *NewAI =
+ Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
+ PMV.AlignedAddrAlignment, AI->getOrdering());
+
+ Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+ return NewAI;
+}
+
+bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
+ // The basic idea here is that we're expanding a cmpxchg of a
+ // smaller memory size up to a word-sized cmpxchg. To do this, we
+ // need to add a retry-loop for strong cmpxchg, so that
+ // modifications to other parts of the word don't cause a spurious
+ // failure.
+
+ // This generates code like the following:
+ // [[Setup mask values PMV.*]]
+ // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
+ // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
+ // %InitLoaded = load i32* %addr
+ // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
+ // br partword.cmpxchg.loop
+ // partword.cmpxchg.loop:
+ // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
+ // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
+ // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
+ // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
+ // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
+ // i32 %FullWord_NewVal success_ordering failure_ordering
+ // %OldVal = extractvalue { i32, i1 } %NewCI, 0
+ // %Success = extractvalue { i32, i1 } %NewCI, 1
+ // br i1 %Success, label %partword.cmpxchg.end,
+ // label %partword.cmpxchg.failure
+ // partword.cmpxchg.failure:
+ // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
+ // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
+ // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
+ // label %partword.cmpxchg.end
+ // partword.cmpxchg.end:
+ // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
+ // %FinalOldVal = trunc i32 %tmp1 to i8
+ // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
+ // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
+
+ Value *Addr = CI->getPointerOperand();
+ Value *Cmp = CI->getCompareOperand();
+ Value *NewVal = CI->getNewValOperand();
+
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ ReplacementIRBuilder Builder(CI, *DL);
+ LLVMContext &Ctx = Builder.getContext();
+
+ BasicBlock *EndBB =
+ BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
+ auto FailureBB =
+ BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
+ auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
+
+ // The split call above "helpfully" added a branch at the end of BB
+ // (to the wrong place).
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
+ CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
+
+ // Shift the incoming values over, into the right location in the word.
+ Value *NewVal_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
+ Value *Cmp_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
+
+ // Load the entire current word, and mask into place the expected and new
+ // values
+ LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
+ InitLoaded->setVolatile(CI->isVolatile());
+ Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
+ Builder.CreateBr(LoopBB);
+
+ // partword.cmpxchg.loop:
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
+ Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
+
+ // Mask/Or the expected and new values into place in the loaded word.
+ Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
+ Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
+ AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
+ PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
+ CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID());
+ NewCI->setVolatile(CI->isVolatile());
+ // When we're building a strong cmpxchg, we need a loop, so you
+ // might think we could use a weak cmpxchg inside. But, using strong
+ // allows the below comparison for ShouldContinue, and we're
+ // expecting the underlying cmpxchg to be a machine instruction,
+ // which is strong anyways.
+ NewCI->setWeak(CI->isWeak());
+
+ Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
+ Value *Success = Builder.CreateExtractValue(NewCI, 1);
+
+ if (CI->isWeak())
+ Builder.CreateBr(EndBB);
+ else
+ Builder.CreateCondBr(Success, EndBB, FailureBB);
+
+ // partword.cmpxchg.failure:
+ Builder.SetInsertPoint(FailureBB);
+ // Upon failure, verify that the masked-out part of the loaded value
+ // has been modified. If it didn't, abort the cmpxchg, since the
+ // masked-in part must've.
+ Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
+ Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
+ Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
+
+ // Add the second value to the phi from above
+ Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
+
+ // partword.cmpxchg.end:
+ Builder.SetInsertPoint(CI);
+
+ Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
+ Value *Res = PoisonValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return true;
+}
+
+void AtomicExpand::expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
+ ReplacementIRBuilder Builder(I, *DL);
+ Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
+ MemOpOrder, PerformOp);
+
+ I->replaceAllUsesWith(Loaded);
+ I->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+ ReplacementIRBuilder Builder(AI, *DL);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
+
+ // The value operand must be sign-extended for signed min/max so that the
+ // target's signed comparison instructions can be used. Otherwise, just
+ // zero-ext.
+ Instruction::CastOps CastOp = Instruction::ZExt;
+ AtomicRMWInst::BinOp RMWOp = AI->getOperation();
+ if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
+ CastOp = Instruction::SExt;
+
+ Value *ValOperand_Shifted = Builder.CreateShl(
+ Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+ Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
+ Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
+ AI->getOrdering());
+ Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
+ ReplacementIRBuilder Builder(CI, *DL);
+
+ PartwordMaskValues PMV = createMaskInstrs(
+ Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
+ CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *CmpVal_Shifted = Builder.CreateShl(
+ Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
+ "CmpVal_Shifted");
+ Value *NewVal_Shifted = Builder.CreateShl(
+ Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
+ "NewVal_Shifted");
+ Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
+ Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
+ CI->getMergedOrdering());
+ Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
+ Value *Res = PoisonValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+ Value *Success = Builder.CreateICmpEQ(
+ CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+}
+
+Value *AtomicExpand::insertRMWLLSCLoop(
+ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
+ LLVMContext &Ctx = Builder.getContext();
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Function *F = BB->getParent();
+
+ assert(AddrAlign >=
+ F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
+ "Expected at least natural alignment at this point.");
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // atomicrmw.start:
+ // %loaded = @load.linked(%addr)
+ // %new = some_op iN %loaded, %incr
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %atomicrmw.end
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB =
+ BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place).
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
+
+ Value *NewVal = PerformOp(Builder, Loaded);
+
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ return Loaded;
+}
+
+/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
+/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
+/// IR. As a migration step, we convert back to what use to be the standard
+/// way to represent a pointer cmpxchg so that we can update backends one by
+/// one.
+AtomicCmpXchgInst *
+AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
+ auto *M = CI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
+ M->getDataLayout());
+
+ ReplacementIRBuilder Builder(CI, *DL);
+
+ Value *Addr = CI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
+ Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
+
+ auto *NewCI = Builder.CreateAtomicCmpXchg(
+ NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
+ CI->getFailureOrdering(), CI->getSyncScopeID());
+ NewCI->setVolatile(CI->isVolatile());
+ NewCI->setWeak(CI->isWeak());
+ LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
+
+ Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
+ Value *Succ = Builder.CreateExtractValue(NewCI, 1);
+
+ OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
+
+ Value *Res = PoisonValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, OldVal, 0);
+ Res = Builder.CreateInsertValue(Res, Succ, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return NewCI;
+}
+
+bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
+ AtomicOrdering FailureOrder = CI->getFailureOrdering();
+ Value *Addr = CI->getPointerOperand();
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+ // If shouldInsertFencesForAtomic() returns true, then the target does not
+ // want to deal with memory orders, and emitLeading/TrailingFence should take
+ // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+ // should preserve the ordering.
+ bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
+ AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
+ ? AtomicOrdering::Monotonic
+ : CI->getMergedOrdering();
+
+ // In implementations which use a barrier to achieve release semantics, we can
+ // delay emitting this barrier until we know a store is actually going to be
+ // attempted. The cost of this delay is that we need 2 copies of the block
+ // emitting the load-linked, affecting code size.
+ //
+ // Ideally, this logic would be unconditional except for the minsize check
+ // since in other cases the extra blocks naturally collapse down to the
+ // minimal loop. Unfortunately, this puts too much stress on later
+ // optimisations so we avoid emitting the extra logic in those cases too.
+ bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
+ SuccessOrder != AtomicOrdering::Monotonic &&
+ SuccessOrder != AtomicOrdering::Acquire &&
+ !F->hasMinSize();
+
+ // There's no overhead for sinking the release barrier in a weak cmpxchg, so
+ // do it even on minsize.
+ bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
+
+ // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
+ //
+ // The full expansion we produce is:
+ // [...]
+ // %aligned.addr = ...
+ // cmpxchg.start:
+ // %unreleasedload = @load.linked(%aligned.addr)
+ // %unreleasedload.extract = extract value from %unreleasedload
+ // %should_store = icmp eq %unreleasedload.extract, %desired
+ // br i1 %should_store, label %cmpxchg.releasingstore,
+ // label %cmpxchg.nostore
+ // cmpxchg.releasingstore:
+ // fence?
+ // br label cmpxchg.trystore
+ // cmpxchg.trystore:
+ // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
+ // [%releasedload, %cmpxchg.releasedload]
+ // %updated.new = insert %new into %loaded.trystore
+ // %stored = @store_conditional(%updated.new, %aligned.addr)
+ // %success = icmp eq i32 %stored, 0
+ // br i1 %success, label %cmpxchg.success,
+ // label %cmpxchg.releasedload/%cmpxchg.failure
+ // cmpxchg.releasedload:
+ // %releasedload = @load.linked(%aligned.addr)
+ // %releasedload.extract = extract value from %releasedload
+ // %should_store = icmp eq %releasedload.extract, %desired
+ // br i1 %should_store, label %cmpxchg.trystore,
+ // label %cmpxchg.failure
+ // cmpxchg.success:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.nostore:
+ // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
+ // [%releasedload,
+ // %cmpxchg.releasedload/%cmpxchg.trystore]
+ // @load_linked_fail_balance()?
+ // br label %cmpxchg.failure
+ // cmpxchg.failure:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.end:
+ // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
+ // [%loaded.trystore, %cmpxchg.trystore]
+ // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %loaded = extract value from %loaded.exit
+ // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+ // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
+ auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+ auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
+ auto ReleasedLoadBB =
+ BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
+ auto TryStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
+ auto ReleasingStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
+ auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
+
+ ReplacementIRBuilder Builder(CI, *DL);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, CI, SuccessOrder);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
+ CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
+ Builder.CreateBr(StartBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(StartBB);
+ Value *UnreleasedLoad =
+ TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
+ Value *UnreleasedLoadExtract =
+ extractMaskedValue(Builder, UnreleasedLoad, PMV);
+ Value *ShouldStore = Builder.CreateICmpEQ(
+ UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
+
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+
+ Builder.SetInsertPoint(ReleasingStoreBB);
+ if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, CI, SuccessOrder);
+ Builder.CreateBr(TryStoreBB);
+
+ Builder.SetInsertPoint(TryStoreBB);
+ PHINode *LoadedTryStore =
+ Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
+ LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+ Value *NewValueInsert =
+ insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
+ Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
+ PMV.AlignedAddr, MemOpOrder);
+ StoreSuccess = Builder.CreateICmpEQ(
+ StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : RetryBB);
+
+ Builder.SetInsertPoint(ReleasedLoadBB);
+ Value *SecondLoad;
+ if (HasReleasedLoadBB) {
+ SecondLoad =
+ TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
+ Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
+ ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
+ CI->getCompareOperand(), "should_store");
+
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ // Update PHI node in TryStoreBB.
+ LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
+ } else
+ Builder.CreateUnreachable();
+
+ // Make sure later instructions don't get reordered with a fence if
+ // necessary.
+ Builder.SetInsertPoint(SuccessBB);
+ if (ShouldInsertFencesForAtomic ||
+ TLI->shouldInsertTrailingFenceForAtomicStore(CI))
+ TLI->emitTrailingFence(Builder, CI, SuccessOrder);
+ Builder.CreateBr(ExitBB);
+
+ Builder.SetInsertPoint(NoStoreBB);
+ PHINode *LoadedNoStore =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
+ LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
+ if (HasReleasedLoadBB)
+ LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ // In the failing case, where we don't execute the store-conditional, the
+ // target might want to balance out the load-linked with a dedicated
+ // instruction (e.g., on ARM, clearing the exclusive monitor).
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+ Builder.CreateBr(FailureBB);
+
+ Builder.SetInsertPoint(FailureBB);
+ PHINode *LoadedFailure =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
+ LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
+ if (CI->isWeak())
+ LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
+ if (ShouldInsertFencesForAtomic)
+ TLI->emitTrailingFence(Builder, CI, FailureOrder);
+ Builder.CreateBr(ExitBB);
+
+ // Finally, we have control-flow based knowledge of whether the cmpxchg
+ // succeeded or not. We expose this to later passes by converting any
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
+ // PHI.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ PHINode *LoadedExit =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
+ LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
+ LoadedExit->addIncoming(LoadedFailure, FailureBB);
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
+ Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+ Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+ // This is the "exit value" from the cmpxchg expansion. It may be of
+ // a type wider than the one in the cmpxchg instruction.
+ Value *LoadedFull = LoadedExit;
+
+ Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
+ Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
+
+ // Look for any users of the cmpxchg that are just comparing the loaded value
+ // against the desired one, and replace them with the CFG-derived version.
+ SmallVector<ExtractValueInst *, 2> PrunedInsts;
+ for (auto *User : CI->users()) {
+ ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+ if (!EV)
+ continue;
+
+ assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+ "weird extraction from { iN, i1 }");
+
+ if (EV->getIndices()[0] == 0)
+ EV->replaceAllUsesWith(Loaded);
+ else
+ EV->replaceAllUsesWith(Success);
+
+ PrunedInsts.push_back(EV);
+ }
+
+ // We can remove the instructions now we're no longer iterating through them.
+ for (auto *EV : PrunedInsts)
+ EV->eraseFromParent();
+
+ if (!CI->use_empty()) {
+ // Some use of the full struct return that we don't understand has happened,
+ // so we've got to reconstruct it properly.
+ Value *Res;
+ Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ }
+
+ CI->eraseFromParent();
+ return true;
+}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if (!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch (Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ tryExpandAtomicLoad(ResultingLoad);
+ return true;
+ }
+ return false;
+}
+
+Value *AtomicExpand::insertRMWCmpXchgLoop(
+ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ LLVMContext &Ctx = Builder.getContext();
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Function *F = BB->getParent();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB =
+ BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal = PerformOp(Builder, Loaded);
+
+ Value *NewLoaded = nullptr;
+ Value *Success = nullptr;
+
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
+ MemOpOrder == AtomicOrdering::Unordered
+ ? AtomicOrdering::Monotonic
+ : MemOpOrder,
+ SSID, Success, NewLoaded);
+ assert(Success && NewLoaded);
+
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ return NewLoaded;
+}
+
+bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(CI);
+
+ switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ if (ValueSize < MinCASSize)
+ return expandPartwordCmpXchg(CI);
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ return expandAtomicCmpXchg(CI);
+ }
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
+ expandAtomicCmpXchgToMaskedIntrinsic(CI);
+ return true;
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ return lowerAtomicCmpXchgInst(CI);
+ }
+}
+
+// Note: This function is exposed externally by AtomicExpandUtils.h
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
+ Builder.setIsFPConstrained(
+ AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
+
+ // FIXME: If FP exceptions are observable, we should force them off for the
+ // loop for the FP atomics.
+ Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
+ Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
+ AI->getOrdering(), AI->getSyncScopeID(),
+ [&](IRBuilderBase &Builder, Value *Loaded) {
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
+ },
+ CreateCmpXchg);
+
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+ return true;
+}
+
+// In order to use one of the sized library calls such as
+// __atomic_fetch_add_4, the alignment must be sufficient, the size
+// must be one of the potentially-specialized sizes, and the value
+// type must actually exist in C on the target (otherwise, the
+// function wouldn't actually be defined.)
+static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
+ const DataLayout &DL) {
+ // TODO: "LargestSize" is an approximation for "largest type that
+ // you can express in C". It seems to be the case that int128 is
+ // supported on all 64-bit platforms, otherwise only up to 64-bit
+ // integers are supported. If we get this wrong, then we'll try to
+ // call a sized libcall that doesn't actually exist. There should
+ // really be some more reliable way in LLVM of determining integer
+ // sizes which are valid in the target's C ABI...
+ unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
+ return Alignment >= Size &&
+ (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
+ Size <= LargestSize;
+}
+
+void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
+ RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
+ unsigned Size = getAtomicOpSize(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
+}
+
+void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
+ RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
+ unsigned Size = getAtomicOpSize(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
+ nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
+}
+
+void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
+ unsigned Size = getAtomicOpSize(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
+ I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
+ Libcalls);
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
+}
+
+static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
+ static const RTLIB::Libcall LibcallsXchg[6] = {
+ RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
+ RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
+ RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
+ static const RTLIB::Libcall LibcallsAdd[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
+ RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
+ RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
+ static const RTLIB::Libcall LibcallsSub[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
+ RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
+ RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
+ static const RTLIB::Libcall LibcallsAnd[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
+ RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
+ RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
+ static const RTLIB::Libcall LibcallsOr[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
+ RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
+ RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
+ static const RTLIB::Libcall LibcallsXor[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
+ RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
+ RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
+ static const RTLIB::Libcall LibcallsNand[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
+ RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
+ RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
+
+ switch (Op) {
+ case AtomicRMWInst::BAD_BINOP:
+ llvm_unreachable("Should not have BAD_BINOP.");
+ case AtomicRMWInst::Xchg:
+ return ArrayRef(LibcallsXchg);
+ case AtomicRMWInst::Add:
+ return ArrayRef(LibcallsAdd);
+ case AtomicRMWInst::Sub:
+ return ArrayRef(LibcallsSub);
+ case AtomicRMWInst::And:
+ return ArrayRef(LibcallsAnd);
+ case AtomicRMWInst::Or:
+ return ArrayRef(LibcallsOr);
+ case AtomicRMWInst::Xor:
+ return ArrayRef(LibcallsXor);
+ case AtomicRMWInst::Nand:
+ return ArrayRef(LibcallsNand);
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ case AtomicRMWInst::UIncWrap:
+ case AtomicRMWInst::UDecWrap:
+ // No atomic libcalls are available for max/min/umax/umin.
+ return {};
+ }
+ llvm_unreachable("Unexpected AtomicRMW operation.");
+}
+
+void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
+ ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
+
+ unsigned Size = getAtomicOpSize(I);
+
+ bool Success = false;
+ if (!Libcalls.empty())
+ Success = expandAtomicOpToLibcall(
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
+ nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+
+ // The expansion failed: either there were no libcalls at all for
+ // the operation (min/max), or there were only size-specialized
+ // libcalls (add/sub/etc) and we needed a generic. So, expand to a
+ // CAS libcall, via a CAS loop, instead.
+ if (!Success) {
+ expandAtomicRMWToCmpXchg(
+ I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
+ Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
+ SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
+ // Create the CAS instruction normally...
+ AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, Alignment, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ // ...and then expand the CAS into a libcall.
+ expandAtomicCASToLibcall(Pair);
+ });
+ }
+}
+
+// A helper routine for the above expandAtomic*ToLibcall functions.
+//
+// 'Libcalls' contains an array of enum values for the particular
+// ATOMIC libcalls to be emitted. All of the other arguments besides
+// 'I' are extracted from the Instruction subclass by the
+// caller. Depending on the particular call, some will be null.
+bool AtomicExpand::expandAtomicOpToLibcall(
+ Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
+ Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
+ assert(Libcalls.size() == 6);
+
+ LLVMContext &Ctx = I->getContext();
+ Module *M = I->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ IRBuilder<> Builder(I);
+ IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
+
+ bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+ Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
+
+ const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
+
+ // TODO: the "order" argument type is "int", not int32. So
+ // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
+ ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
+ assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
+ Constant *OrderingVal =
+ ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
+ Constant *Ordering2Val = nullptr;
+ if (CASExpected) {
+ assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
+ Ordering2Val =
+ ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
+ }
+ bool HasResult = I->getType() != Type::getVoidTy(Ctx);
+
+ RTLIB::Libcall RTLibType;
+ if (UseSizedLibcall) {
+ switch (Size) {
+ case 1:
+ RTLibType = Libcalls[1];
+ break;
+ case 2:
+ RTLibType = Libcalls[2];
+ break;
+ case 4:
+ RTLibType = Libcalls[3];
+ break;
+ case 8:
+ RTLibType = Libcalls[4];
+ break;
+ case 16:
+ RTLibType = Libcalls[5];
+ break;
+ }
+ } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
+ RTLibType = Libcalls[0];
+ } else {
+ // Can't use sized function, and there's no generic for this
+ // operation, so give up.
+ return false;
+ }
+
+ if (!TLI->getLibcallName(RTLibType)) {
+ // This target does not implement the requested atomic libcall so give up.
+ return false;
+ }
+
+ // Build up the function call. There's two kinds. First, the sized
+ // variants. These calls are going to be one of the following (with
+ // N=1,2,4,8,16):
+ // iN __atomic_load_N(iN *ptr, int ordering)
+ // void __atomic_store_N(iN *ptr, iN val, int ordering)
+ // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
+ // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
+ // int success_order, int failure_order)
+ //
+ // Note that these functions can be used for non-integer atomic
+ // operations, the values just need to be bitcast to integers on the
+ // way in and out.
+ //
+ // And, then, the generic variants. They look like the following:
+ // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
+ // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
+ // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
+ // int ordering)
+ // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
+ // void *desired, int success_order,
+ // int failure_order)
+ //
+ // The different signatures are built up depending on the
+ // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
+ // variables.
+
+ AllocaInst *AllocaCASExpected = nullptr;
+ Value *AllocaCASExpected_i8 = nullptr;
+ AllocaInst *AllocaValue = nullptr;
+ Value *AllocaValue_i8 = nullptr;
+ AllocaInst *AllocaResult = nullptr;
+ Value *AllocaResult_i8 = nullptr;
+
+ Type *ResultTy;
+ SmallVector<Value *, 6> Args;
+ AttributeList Attr;
+
+ // 'size' argument.
+ if (!UseSizedLibcall) {
+ // Note, getIntPtrType is assumed equivalent to size_t.
+ Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
+ }
+
+ // 'ptr' argument.
+ // note: This assumes all address spaces share a common libfunc
+ // implementation and that addresses are convertable. For systems without
+ // that property, we'd need to extend this mechanism to support AS-specific
+ // families of atomic intrinsics.
+ auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
+ Value *PtrVal =
+ Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
+ PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
+ Args.push_back(PtrVal);
+
+ // 'expected' argument, if present.
+ if (CASExpected) {
+ AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
+ AllocaCASExpected->setAlignment(AllocaAlignment);
+ unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
+
+ AllocaCASExpected_i8 = Builder.CreateBitCast(
+ AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
+ Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
+ Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
+ Args.push_back(AllocaCASExpected_i8);
+ }
+
+ // 'val' argument ('desired' for cas), if present.
+ if (ValueOperand) {
+ if (UseSizedLibcall) {
+ Value *IntValue =
+ Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
+ Args.push_back(IntValue);
+ } else {
+ AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
+ AllocaValue->setAlignment(AllocaAlignment);
+ AllocaValue_i8 =
+ Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
+ Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
+ Args.push_back(AllocaValue_i8);
+ }
+ }
+
+ // 'ret' argument.
+ if (!CASExpected && HasResult && !UseSizedLibcall) {
+ AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
+ AllocaResult->setAlignment(AllocaAlignment);
+ unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
+ AllocaResult_i8 =
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
+ Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
+ Args.push_back(AllocaResult_i8);
+ }
+
+ // 'ordering' ('success_order' for cas) argument.
+ Args.push_back(OrderingVal);
+
+ // 'failure_order' argument, if present.
+ if (Ordering2Val)
+ Args.push_back(Ordering2Val);
+
+ // Now, the return type.
+ if (CASExpected) {
+ ResultTy = Type::getInt1Ty(Ctx);
+ Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
+ } else if (HasResult && UseSizedLibcall)
+ ResultTy = SizedIntTy;
+ else
+ ResultTy = Type::getVoidTy(Ctx);
+
+ // Done with setting up arguments and return types, create the call:
+ SmallVector<Type *, 6> ArgTys;
+ for (Value *Arg : Args)
+ ArgTys.push_back(Arg->getType());
+ FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
+ FunctionCallee LibcallFn =
+ M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
+ CallInst *Call = Builder.CreateCall(LibcallFn, Args);
+ Call->setAttributes(Attr);
+ Value *Result = Call;
+
+ // And then, extract the results...
+ if (ValueOperand && !UseSizedLibcall)
+ Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
+
+ if (CASExpected) {
+ // The final result from the CAS is {load of 'expected' alloca, bool result
+ // from call}
+ Type *FinalResultTy = I->getType();
+ Value *V = PoisonValue::get(FinalResultTy);
+ Value *ExpectedOut = Builder.CreateAlignedLoad(
+ CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
+ Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
+ V = Builder.CreateInsertValue(V, ExpectedOut, 0);
+ V = Builder.CreateInsertValue(V, Result, 1);
+ I->replaceAllUsesWith(V);
+ } else if (HasResult) {
+ Value *V;
+ if (UseSizedLibcall)
+ V = Builder.CreateBitOrPointerCast(Result, I->getType());
+ else {
+ V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
+ AllocaAlignment);
+ Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
+ }
+ I->replaceAllUsesWith(V);
+ }
+ I->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
new file mode 100644
index 000000000000..6967ca5160c0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -0,0 +1,406 @@
+//===-- BasicBlockSections.cpp ---=========--------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// BasicBlockSections implementation.
+//
+// The purpose of this pass is to assign sections to basic blocks when
+// -fbasic-block-sections= option is used. Further, with profile information
+// only the subset of basic blocks with profiles are placed in separate sections
+// and the rest are grouped in a cold section. The exception handling blocks are
+// treated specially to ensure they are all in one seciton.
+//
+// Basic Block Sections
+// ====================
+//
+// With option, -fbasic-block-sections=list, every function may be split into
+// clusters of basic blocks. Every cluster will be emitted into a separate
+// section with its basic blocks sequenced in the given order. To get the
+// optimized performance, the clusters must form an optimal BB layout for the
+// function. We insert a symbol at the beginning of every cluster's section to
+// allow the linker to reorder the sections in any arbitrary sequence. A global
+// order of these sections would encapsulate the function layout.
+// For example, consider the following clusters for a function foo (consisting
+// of 6 basic blocks 0, 1, ..., 5).
+//
+// 0 2
+// 1 3 5
+//
+// * Basic blocks 0 and 2 are placed in one section with symbol `foo`
+// referencing the beginning of this section.
+// * Basic blocks 1, 3, 5 are placed in a separate section. A new symbol
+// `foo.__part.1` will reference the beginning of this section.
+// * Basic block 4 (note that it is not referenced in the list) is placed in
+// one section, and a new symbol `foo.cold` will point to it.
+//
+// There are a couple of challenges to be addressed:
+//
+// 1. The last basic block of every cluster should not have any implicit
+// fallthrough to its next basic block, as it can be reordered by the linker.
+// The compiler should make these fallthroughs explicit by adding
+// unconditional jumps..
+//
+// 2. All inter-cluster branch targets would now need to be resolved by the
+// linker as they cannot be calculated during compile time. This is done
+// using static relocations. Further, the compiler tries to use short branch
+// instructions on some ISAs for small branch offsets. This is not possible
+// for inter-cluster branches as the offset is not determined at compile
+// time, and therefore, long branch instructions have to be used for those.
+//
+// 3. Debug Information (DebugInfo) and Call Frame Information (CFI) emission
+// needs special handling with basic block sections. DebugInfo needs to be
+// emitted with more relocations as basic block sections can break a
+// function into potentially several disjoint pieces, and CFI needs to be
+// emitted per cluster. This also bloats the object file and binary sizes.
+//
+// Basic Block Labels
+// ==================
+//
+// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of
+// every function into the .llvm_bb_addr_map section. Along with the function
+// symbols, this allows for mapping of virtual addresses in PMU profiles back to
+// the corresponding basic blocks. This logic is implemented in AsmPrinter. This
+// pass only assigns the BBSectionType of every function to ``labels``.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include <optional>
+
+using namespace llvm;
+
+// Placing the cold clusters in a separate section mitigates against poor
+// profiles and allows optimizations such as hugepage mapping to be applied at a
+// section granularity. Defaults to ".text.split." which is recognized by lld
+// via the `-z keep-text-section-prefix` flag.
+cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
+ "bbsections-cold-text-prefix",
+ cl::desc("The text prefix to use for cold basic block clusters"),
+ cl::init(".text.split."), cl::Hidden);
+
+static cl::opt<bool> BBSectionsDetectSourceDrift(
+ "bbsections-detect-source-drift",
+ cl::desc("This checks if there is a fdo instr. profile hash "
+ "mismatch for this function"),
+ cl::init(true), cl::Hidden);
+
+namespace {
+
+class BasicBlockSections : public MachineFunctionPass {
+public:
+ static char ID;
+
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
+
+ BasicBlockSections() : MachineFunctionPass(ID) {
+ initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Basic Block Sections Analysis";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char BasicBlockSections::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ BasicBlockSections, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
+
+// This function updates and optimizes the branching instructions of every basic
+// block in a given function to account for changes in the layout.
+static void
+updateBranches(MachineFunction &MF,
+ const SmallVector<MachineBasicBlock *> &PreLayoutFallThroughs) {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (auto &MBB : MF) {
+ auto NextMBBI = std::next(MBB.getIterator());
+ auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()];
+ // If this block had a fallthrough before we need an explicit unconditional
+ // branch to that block if either
+ // 1- the block ends a section, which means its next block may be
+ // reorderd by the linker, or
+ // 2- the fallthrough block is not adjacent to the block in the new
+ // order.
+ if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB))
+ TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
+
+ // We do not optimize branches for machine basic blocks ending sections, as
+ // their adjacent block might be reordered by the linker.
+ if (MBB.isEndSection())
+ continue;
+
+ // It might be possible to optimize branches by flipping the branch
+ // condition.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
+ continue;
+ MBB.updateTerminator(FTMBB);
+ }
+}
+
+// This function provides the BBCluster information associated with a function.
+// Returns true if a valid association exists and false otherwise.
+bool getBBClusterInfoForFunction(
+ const MachineFunction &MF,
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader,
+ DenseMap<unsigned, BBClusterInfo> &V) {
+
+ // Find the assoicated cluster information.
+ std::pair<bool, SmallVector<BBClusterInfo, 4>> P =
+ BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName());
+ if (!P.first)
+ return false;
+
+ if (P.second.empty()) {
+ // This indicates that sections are desired for all basic blocks of this
+ // function. We clear the BBClusterInfo vector to denote this.
+ V.clear();
+ return true;
+ }
+
+ for (const BBClusterInfo &BBCI : P.second)
+ V[BBCI.BBID] = BBCI;
+ return true;
+}
+
+// This function sorts basic blocks according to the cluster's information.
+// All explicitly specified clusters of basic blocks will be ordered
+// accordingly. All non-specified BBs go into a separate "Cold" section.
+// Additionally, if exception handling landing pads end up in more than one
+// clusters, they are moved into a single "Exception" section. Eventually,
+// clusters are ordered in increasing order of their IDs, with the "Exception"
+// and "Cold" succeeding all other clusters.
+// FuncBBClusterInfo represent the cluster information for basic blocks. It
+// maps from BBID of basic blocks to their cluster information. If this is
+// empty, it means unique sections for all basic blocks in the function.
+static void
+assignSections(MachineFunction &MF,
+ const DenseMap<unsigned, BBClusterInfo> &FuncBBClusterInfo) {
+ assert(MF.hasBBSections() && "BB Sections is not set for function.");
+ // This variable stores the section ID of the cluster containing eh_pads (if
+ // all eh_pads are one cluster). If more than one cluster contain eh_pads, we
+ // set it equal to ExceptionSectionID.
+ std::optional<MBBSectionID> EHPadsSectionID;
+
+ for (auto &MBB : MF) {
+ // With the 'all' option, every basic block is placed in a unique section.
+ // With the 'list' option, every basic block is placed in a section
+ // associated with its cluster, unless we want individual unique sections
+ // for every basic block in this function (if FuncBBClusterInfo is empty).
+ if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All ||
+ FuncBBClusterInfo.empty()) {
+ // If unique sections are desired for all basic blocks of the function, we
+ // set every basic block's section ID equal to its original position in
+ // the layout (which is equal to its number). This ensures that basic
+ // blocks are ordered canonically.
+ MBB.setSectionID(MBB.getNumber());
+ } else {
+ // TODO: Replace `getBBIDOrNumber` with `getBBID` once version 1 is
+ // deprecated.
+ auto I = FuncBBClusterInfo.find(MBB.getBBIDOrNumber());
+ if (I != FuncBBClusterInfo.end()) {
+ MBB.setSectionID(I->second.ClusterID);
+ } else {
+ // BB goes into the special cold section if it is not specified in the
+ // cluster info map.
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
+ }
+
+ if (MBB.isEHPad() && EHPadsSectionID != MBB.getSectionID() &&
+ EHPadsSectionID != MBBSectionID::ExceptionSectionID) {
+ // If we already have one cluster containing eh_pads, this must be updated
+ // to ExceptionSectionID. Otherwise, we set it equal to the current
+ // section ID.
+ EHPadsSectionID = EHPadsSectionID ? MBBSectionID::ExceptionSectionID
+ : MBB.getSectionID();
+ }
+ }
+
+ // If EHPads are in more than one section, this places all of them in the
+ // special exception section.
+ if (EHPadsSectionID == MBBSectionID::ExceptionSectionID)
+ for (auto &MBB : MF)
+ if (MBB.isEHPad())
+ MBB.setSectionID(*EHPadsSectionID);
+}
+
+void llvm::sortBasicBlocksAndUpdateBranches(
+ MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
+ [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front();
+ SmallVector<MachineBasicBlock *> PreLayoutFallThroughs(MF.getNumBlockIDs());
+ for (auto &MBB : MF)
+ PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+
+ MF.sort(MBBCmp);
+ assert(&MF.front() == EntryBlock &&
+ "Entry block should not be displaced by basic block sections");
+
+ // Set IsBeginSection and IsEndSection according to the assigned section IDs.
+ MF.assignBeginEndSections();
+
+ // After reordering basic blocks, we must update basic block branches to
+ // insert explicit fallthrough branches when required and optimize branches
+ // when possible.
+ updateBranches(MF, PreLayoutFallThroughs);
+}
+
+// If the exception section begins with a landing pad, that landing pad will
+// assume a zero offset (relative to @LPStart) in the LSDA. However, a value of
+// zero implies "no landing pad." This function inserts a NOP just before the EH
+// pad label to ensure a nonzero offset.
+void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) {
+ for (auto &MBB : MF) {
+ if (MBB.isBeginSection() && MBB.isEHPad()) {
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (!MI->isEHLabel())
+ ++MI;
+ MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop();
+ BuildMI(MBB, MI, DebugLoc(),
+ MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode()));
+ }
+ }
+}
+
+// This checks if the source of this function has drifted since this binary was
+// profiled previously. For now, we are piggy backing on what PGO does to
+// detect this with instrumented profiles. PGO emits an hash of the IR and
+// checks if the hash has changed. Advanced basic block layout is usually done
+// on top of PGO optimized binaries and hence this check works well in practice.
+static bool hasInstrProfHashMismatch(MachineFunction &MF) {
+ if (!BBSectionsDetectSourceDrift)
+ return false;
+
+ const char MetadataName[] = "instr_prof_hash_mismatch";
+ auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation);
+ if (Existing) {
+ MDTuple *Tuple = cast<MDTuple>(Existing);
+ for (const auto &N : Tuple->operands())
+ if (N.equalsStr(MetadataName))
+ return true;
+ }
+
+ return false;
+}
+
+bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
+ auto BBSectionsType = MF.getTarget().getBBSectionsType();
+ assert(BBSectionsType != BasicBlockSection::None &&
+ "BB Sections not enabled!");
+
+ // Check for source drift. If the source has changed since the profiles
+ // were obtained, optimizing basic blocks might be sub-optimal.
+ // This only applies to BasicBlockSection::List as it creates
+ // clusters of basic blocks using basic block ids. Source drift can
+ // invalidate these groupings leading to sub-optimal code generation with
+ // regards to performance.
+ if (BBSectionsType == BasicBlockSection::List &&
+ hasInstrProfHashMismatch(MF))
+ return true;
+ // Renumber blocks before sorting them. This is useful during sorting,
+ // basic blocks in the same section will retain the default order.
+ // This renumbering should also be done for basic block labels to match the
+ // profiles with the correct blocks.
+ // For LLVM_BB_ADDR_MAP versions 2 and higher, this renumbering serves
+ // the different purpose of accessing the original layout positions and
+ // finding the original fallthroughs.
+ // TODO: Change the above comment accordingly when version 1 is deprecated.
+ MF.RenumberBlocks();
+
+ if (BBSectionsType == BasicBlockSection::Labels) {
+ MF.setBBSectionsType(BBSectionsType);
+ return true;
+ }
+
+ BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>();
+
+ // Map from BBID of blocks to their cluster information.
+ DenseMap<unsigned, BBClusterInfo> FuncBBClusterInfo;
+ if (BBSectionsType == BasicBlockSection::List &&
+ !getBBClusterInfoForFunction(MF, BBSectionsProfileReader,
+ FuncBBClusterInfo))
+ return true;
+ MF.setBBSectionsType(BBSectionsType);
+ assignSections(MF, FuncBBClusterInfo);
+
+ // We make sure that the cluster including the entry basic block precedes all
+ // other clusters.
+ auto EntryBBSectionID = MF.front().getSectionID();
+
+ // Helper function for ordering BB sections as follows:
+ // * Entry section (section including the entry block).
+ // * Regular sections (in increasing order of their Number).
+ // ...
+ // * Exception section
+ // * Cold section
+ auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS,
+ const MBBSectionID &RHS) {
+ // We make sure that the section containing the entry block precedes all the
+ // other sections.
+ if (LHS == EntryBBSectionID || RHS == EntryBBSectionID)
+ return LHS == EntryBBSectionID;
+ return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type;
+ };
+
+ // We sort all basic blocks to make sure the basic blocks of every cluster are
+ // contiguous and ordered accordingly. Furthermore, clusters are ordered in
+ // increasing order of their section IDs, with the exception and the
+ // cold section placed at the end of the function.
+ auto Comparator = [&](const MachineBasicBlock &X,
+ const MachineBasicBlock &Y) {
+ auto XSectionID = X.getSectionID();
+ auto YSectionID = Y.getSectionID();
+ if (XSectionID != YSectionID)
+ return MBBSectionOrder(XSectionID, YSectionID);
+ // If the two basic block are in the same section, the order is decided by
+ // their position within the section.
+ if (XSectionID.Type == MBBSectionID::SectionType::Default)
+ return FuncBBClusterInfo.lookup(X.getBBIDOrNumber()).PositionInCluster <
+ FuncBBClusterInfo.lookup(Y.getBBIDOrNumber()).PositionInCluster;
+ return X.getNumber() < Y.getNumber();
+ };
+
+ sortBasicBlocksAndUpdateBranches(MF, Comparator);
+ avoidZeroOffsetLandingPad(MF);
+ return true;
+}
+
+void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReader>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createBasicBlockSectionsPass() {
+ return new BasicBlockSections();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
new file mode 100644
index 000000000000..5dede452ec34
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -0,0 +1,200 @@
+//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the basic block sections profile reader pass. It parses
+// and stores the basic block sections profile file (which is specified via the
+// `-basic-block-sections` flag).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include <llvm/ADT/STLExtras.h>
+
+using namespace llvm;
+
+char BasicBlockSectionsProfileReader::ID = 0;
+INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
+ "Reads and parses a basic block sections profile.", false,
+ false)
+
+bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
+ return getBBClusterInfoForFunction(FuncName).first;
+}
+
+std::pair<bool, SmallVector<BBClusterInfo>>
+BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
+ StringRef FuncName) const {
+ auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
+ return R != ProgramBBClusterInfo.end()
+ ? std::pair(true, R->second)
+ : std::pair(false, SmallVector<BBClusterInfo>{});
+}
+
+// Basic Block Sections can be enabled for a subset of machine basic blocks.
+// This is done by passing a file containing names of functions for which basic
+// block sections are desired. Additionally, machine basic block ids of the
+// functions can also be specified for a finer granularity. Moreover, a cluster
+// of basic blocks could be assigned to the same section.
+// Optionally, a debug-info filename can be specified for each function to allow
+// distinguishing internal-linkage functions of the same name.
+// A file with basic block sections for all of function main and three blocks
+// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// (Profile for function foo is only loaded when its debug-info filename
+// matches 'path/to/foo_file.cc').
+// ----------------------------
+// list.txt:
+// !main
+// !foo M=path/to/foo_file.cc
+// !!1 2
+// !!4
+Error BasicBlockSectionsProfileReader::ReadProfile() {
+ assert(MBuf);
+ line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+
+ auto invalidProfileError = [&](auto Message) {
+ return make_error<StringError>(
+ Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
+ Twine(LineIt.line_number()) + ": " + Message),
+ inconvertibleErrorCode());
+ };
+
+ auto FI = ProgramBBClusterInfo.end();
+
+ // Current cluster ID corresponding to this function.
+ unsigned CurrentCluster = 0;
+ // Current position in the current cluster.
+ unsigned CurrentPosition = 0;
+
+ // Temporary set to ensure every basic block ID appears once in the clusters
+ // of a function.
+ SmallSet<unsigned, 4> FuncBBIDs;
+
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef S(*LineIt);
+ if (S[0] == '@')
+ continue;
+ // Check for the leading "!"
+ if (!S.consume_front("!") || S.empty())
+ break;
+ // Check for second "!" which indicates a cluster of basic blocks.
+ if (S.consume_front("!")) {
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramBBClusterInfo.end())
+ continue;
+ SmallVector<StringRef, 4> BBIDs;
+ S.split(BBIDs, ' ');
+ // Reset current cluster position.
+ CurrentPosition = 0;
+ for (auto BBIDStr : BBIDs) {
+ unsigned long long BBID;
+ if (getAsUnsignedInteger(BBIDStr, 10, BBID))
+ return invalidProfileError(Twine("Unsigned integer expected: '") +
+ BBIDStr + "'.");
+ if (!FuncBBIDs.insert(BBID).second)
+ return invalidProfileError(Twine("Duplicate basic block id found '") +
+ BBIDStr + "'.");
+ if (BBID == 0 && CurrentPosition)
+ return invalidProfileError("Entry BB (0) does not begin a cluster.");
+
+ FI->second.emplace_back(
+ BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
+ }
+ CurrentCluster++;
+ } else {
+ // This is a function name specifier. It may include a debug info filename
+ // specifier starting with `M=`.
+ auto [AliasesStr, DIFilenameStr] = S.split(' ');
+ SmallString<128> DIFilename;
+ if (DIFilenameStr.startswith("M=")) {
+ DIFilename =
+ sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
+ if (DIFilename.empty())
+ return invalidProfileError("Empty module name specifier.");
+ } else if (!DIFilenameStr.empty()) {
+ return invalidProfileError("Unknown string found: '" + DIFilenameStr +
+ "'.");
+ }
+ // Function aliases are separated using '/'. We use the first function
+ // name for the cluster info mapping and delegate all other aliases to
+ // this one.
+ SmallVector<StringRef, 4> Aliases;
+ AliasesStr.split(Aliases, '/');
+ bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
+ auto It = FunctionNameToDIFilename.find(Alias);
+ // No match if this function name is not found in this module.
+ if (It == FunctionNameToDIFilename.end())
+ return false;
+ // Return a match if debug-info-filename is not specified. Otherwise,
+ // check for equality.
+ return DIFilename.empty() || It->second.equals(DIFilename);
+ });
+ if (!FunctionFound) {
+ // Skip the following profile by setting the profile iterator (FI) to
+ // the past-the-end element.
+ FI = ProgramBBClusterInfo.end();
+ continue;
+ }
+ for (size_t i = 1; i < Aliases.size(); ++i)
+ FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
+
+ // Prepare for parsing clusters of this function name.
+ // Start a new cluster map for this function name.
+ auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
+ // Report error when multiple profiles have been specified for the same
+ // function.
+ if (!R.second)
+ return invalidProfileError("Duplicate profile for function '" +
+ Aliases.front() + "'.");
+ FI = R.first;
+ CurrentCluster = 0;
+ FuncBBIDs.clear();
+ }
+ }
+ return Error::success();
+}
+
+bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
+ if (!MBuf)
+ return false;
+ // Get the function name to debug info filename mapping.
+ FunctionNameToDIFilename.clear();
+ for (const Function &F : M) {
+ SmallString<128> DIFilename;
+ if (F.isDeclaration())
+ continue;
+ DISubprogram *Subprogram = F.getSubprogram();
+ if (Subprogram) {
+ llvm::DICompileUnit *CU = Subprogram->getUnit();
+ if (CU)
+ DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
+ }
+ [[maybe_unused]] bool inserted =
+ FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
+ assert(inserted);
+ }
+ if (auto Err = ReadProfile())
+ report_fatal_error(std::move(Err));
+ return false;
+}
+
+ImmutablePass *
+llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
+ return new BasicBlockSectionsProfileReader(Buf);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
new file mode 100644
index 000000000000..57cefae2066a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -0,0 +1,34 @@
+//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the implementation of a basic TargetTransformInfo pass
+/// predicated on the target abstractions present in the target independent
+/// code generator. It uses these (primarily TargetLowering) to model as much
+/// of the TTI query interface as possible. It is included by most targets so
+/// that they can specialize only a small subset of the query space.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+// This flag is used by the template base class for BasicTTIImpl, and here to
+// provide a definition.
+cl::opt<unsigned>
+llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+ cl::desc("Threshold for partial unrolling"),
+ cl::Hidden);
+
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 000000000000..3830f25debaf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,2046 @@
+//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form. It also must handle virtual registers for targets that emit virtual
+// ISA (e.g. NVPTX).
+//
+//===----------------------------------------------------------------------===//
+
+#include "BranchFolding.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <numeric>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-folder"
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls, "Number of tail calls optimized");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ explicit BranchFolderPass(): MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+ };
+
+} // end anonymous namespace
+
+char BranchFolderPass::ID = 0;
+
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
+
+INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,
+ "Control Flow Optimizer", false, false)
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structurized CFG.
+ bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge();
+ MBFIWrapper MBBFreqInfo(
+ getAnalysis<MachineBlockFrequencyInfo>());
+ BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
+ getAnalysis<MachineBranchProbabilityInfo>(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
+ return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
+ MF.getSubtarget().getRegisterInfo());
+}
+
+BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
+ MBFIWrapper &FreqInfo,
+ const MachineBranchProbabilityInfo &ProbInfo,
+ ProfileSummaryInfo *PSI, unsigned MinTailLength)
+ : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
+ MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
+ if (MinCommonTailLength == 0)
+ MinCommonTailLength = TailMergeSize;
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET:
+ EnableTailMerge = DefaultEnableTailMerge;
+ break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+}
+
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Avoid matching if this pointer gets reused.
+ TriedMerging.erase(MBB);
+
+ // Update call site info.
+ for (const MachineInstr &MI : *MBB)
+ if (MI.shouldUpdateCallSiteInfo())
+ MF->eraseCallSiteInfo(&MI);
+
+ // Remove the block.
+ MF->erase(MBB);
+ EHScopeMembership.erase(MBB);
+ if (MLI)
+ MLI->removeBlock(MBB);
+}
+
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineLoopInfo *mli, bool AfterPlacement) {
+ if (!tii) return false;
+
+ TriedMerging.clear();
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ AfterBlockPlacement = AfterPlacement;
+ TII = tii;
+ TRI = tri;
+ MLI = mli;
+ this->MRI = &MRI;
+
+ UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
+ if (!UpdateLiveIns)
+ MRI.invalidateLiveness();
+
+ bool MadeChange = false;
+
+ // Recalculate EH scope membership.
+ EHScopeMembership = getEHScopeMembership(MF);
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ // No need to clean up if tail merging does not change anything after the
+ // block placement.
+ if (!AfterBlockPlacement || MadeChangeThisIteration)
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
+ MadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ if (!JTI)
+ return MadeChange;
+
+ // Walk the function to find jump tables that are live.
+ BitVector JTIsLive(JTI->getJumpTables().size());
+ for (const MachineBasicBlock &BB : MF) {
+ for (const MachineInstr &I : BB)
+ for (const MachineOperand &Op : I.operands()) {
+ if (!Op.isJTI()) continue;
+
+ // Remember that this JT is live.
+ JTIsLive.set(Op.getIndex());
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens when the
+ // indirect jump was unreachable (and thus deleted).
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr &MI) {
+ unsigned Hash = MI.getOpcode();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI.getOperand(i);
+
+ // Merge in bits from the operand if easy. We can't use MachineOperand's
+ // hash_code here because it's not deterministic and we sort by hash value
+ // later.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register:
+ OperandHash = Op.getReg();
+ break;
+ case MachineOperand::MO_Immediate:
+ OperandHash = Op.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default:
+ break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i & 31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(false);
+ if (I == MBB.end())
+ return 0;
+
+ return HashMachineInstr(*I);
+}
+
+/// Whether MI should be counted as an instruction when calculating common tail.
+static bool countsAsInstruction(const MachineInstr &MI) {
+ return !(MI.isDebugInstr() || MI.isCFIInstruction());
+}
+
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+ MachineBasicBlock *MBB) {
+ while (I != MBB->begin()) {
+ --I;
+ if (countsAsInstruction(*I))
+ return I;
+ }
+ return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ MachineBasicBlock::iterator MBBI1 = MBB1->end();
+ MachineBasicBlock::iterator MBBI2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (true) {
+ MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+ MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+ if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+ break;
+ if (!MBBI1->isIdenticalTo(*MBBI2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ MBBI1->isInlineAsm()) {
+ break;
+ }
+ if (MBBI1->getFlag(MachineInstr::NoMerge) ||
+ MBBI2->getFlag(MachineInstr::NoMerge))
+ break;
+ ++TailLen;
+ I1 = MBBI1;
+ I2 = MBBI2;
+ }
+
+ return TailLen;
+}
+
+void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock &NewDest) {
+ if (UpdateLiveIns) {
+ // OldInst should always point to an instruction.
+ MachineBasicBlock &OldMBB = *OldInst->getParent();
+ LiveRegs.clear();
+ LiveRegs.addLiveOuts(OldMBB);
+ // Move backward to the place where will insert the jump.
+ MachineBasicBlock::iterator I = OldMBB.end();
+ do {
+ --I;
+ LiveRegs.stepBackward(*I);
+ } while (I != OldInst);
+
+ // Merging the tails may have switched some undef operand to non-undef ones.
+ // Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the
+ // register.
+ for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) {
+ // We computed the liveins with computeLiveIn earlier and should only see
+ // full registers:
+ assert(P.LaneMask == LaneBitmask::getAll() &&
+ "Can only handle full register.");
+ MCPhysReg Reg = P.PhysReg;
+ if (!LiveRegs.available(*MRI, Reg))
+ continue;
+ DebugLoc DL;
+ BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+ }
+
+ TII->ReplaceTailWithBranchTo(OldInst, &NewDest);
+ ++NumTailMerge;
+}
+
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1,
+ const BasicBlock *BB) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return nullptr;
+
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = CurMBB.getIterator();
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB);
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // NewMBB belongs to the same loop as CurMBB.
+ if (MLI)
+ if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
+ ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
+
+ // NewMBB inherits CurMBB's block frequency.
+ MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
+
+ if (UpdateLiveIns)
+ computeAndAddLiveIns(LiveRegs, *NewMBB);
+
+ // Add the new block to the EH scope.
+ const auto &EHScopeI = EHScopeMembership.find(&CurMBB);
+ if (EHScopeI != EHScopeMembership.end()) {
+ auto n = EHScopeI->second;
+ EHScopeMembership[NewMBB] = n;
+ }
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ if (!countsAsInstruction(*I))
+ continue;
+ if (I->isCall())
+ Time += 10;
+ else if (I->mayLoadOrStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl = CurMBB->findBranchDebugLoc();
+ if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = &*I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->reverseBranchCondition(Cond)) {
+ TII->removeBranch(*CurMBB);
+ TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
+ return;
+ }
+ }
+ }
+ TII->insertBranch(*CurMBB, SuccBB, nullptr,
+ SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ if (getHash() > o.getHash())
+ return false;
+ if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ llvm_unreachable("Predecessor appears twice");
+#else
+ return false;
+#endif
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ while (true) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
+ }
+ --I;
+ if (!I->isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
+}
+
+/// A no successor, non-return block probably ends in unreachable and is cold.
+/// Also consider a block that ends in an indirect branch to be a return block,
+/// since many targets use plain indirect branches to return.
+static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {
+ if (!MBB->succ_empty())
+ return false;
+ if (MBB->empty())
+ return true;
+ return !(MBB->back().isReturn() || MBB->back().isIndirectBranch());
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails. Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+/// MBB1, MBB2 The blocks to check
+/// MinCommonTailLength Minimum size of tail block to be merged.
+/// CommonTailLen Out parameter to record the size of the shared tail between
+/// MBB1 and MBB2
+/// I1, I2 Iterator references that will be changed to point to the first
+/// instruction in the common tail shared by MBB1,MBB2
+/// SuccBB A common successor of MBB1, MBB2 which are in a canonical form
+/// relative to SuccBB
+/// PredBB The layout predecessor of SuccBB, if any.
+/// EHScopeMembership map from block to EH scope #.
+/// AfterPlacement True if we are merging blocks after layout. Stricter
+/// thresholds apply to prevent undoing tail-duplication.
+static bool
+ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
+ unsigned MinCommonTailLength, unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
+ bool AfterPlacement,
+ MBFIWrapper &MBBFreqInfo,
+ ProfileSummaryInfo *PSI) {
+ // It is never profitable to tail-merge blocks from two different EH scopes.
+ if (!EHScopeMembership.empty()) {
+ auto EHScope1 = EHScopeMembership.find(MBB1);
+ assert(EHScope1 != EHScopeMembership.end());
+ auto EHScope2 = EHScopeMembership.find(MBB2);
+ assert(EHScope2 != EHScopeMembership.end());
+ if (EHScope1->second != EHScope2->second)
+ return false;
+ }
+
+ CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+ if (CommonTailLen == 0)
+ return false;
+ LLVM_DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)
+ << " and " << printMBBReference(*MBB2) << " is "
+ << CommonTailLen << '\n');
+
+ // Move the iterators to the beginning of the MBB if we only got debug
+ // instructions before the tail. This is to avoid splitting a block when we
+ // only got debug instructions before the tail (to be invariant on -g).
+ if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end(), false) == I1)
+ I1 = MBB1->begin();
+ if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end(), false) == I2)
+ I2 = MBB2->begin();
+
+ bool FullBlockTail1 = I1 == MBB1->begin();
+ bool FullBlockTail2 = I2 == MBB2->begin();
+
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ // This is true only for a single successor. For multiple successors, we are
+ // trading a conditional branch for an unconditional one.
+ // TODO: Re-visit successor size for non-layout tail merging.
+ if ((MBB1 == PredBB || MBB2 == PredBB) &&
+ (!AfterPlacement || MBB1->succ_size() == 1)) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If these are identical non-return blocks with no successors, merge them.
+ // Such blocks are typically cold calls to noreturn functions like abort, and
+ // are unlikely to become a fallthrough target after machine block placement.
+ // Tail merging these blocks is unlikely to create additional unconditional
+ // branches, and will reduce the size of this cold code.
+ if (FullBlockTail1 && FullBlockTail2 &&
+ blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
+ return true;
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
+ return true;
+
+ // If both blocks are identical and end in a branch, merge them unless they
+ // both have a fallthrough predecessor and successor.
+ // We can only do this after block placement because it depends on whether
+ // there are fallthroughs, and we don't know until after layout.
+ if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
+ auto BothFallThrough = [](MachineBasicBlock *MBB) {
+ if (!MBB->succ_empty() && !MBB->canFallThrough())
+ return false;
+ MachineFunction::iterator I(MBB);
+ MachineFunction *MF = MBB->getParent();
+ return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough();
+ };
+ if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2))
+ return true;
+ }
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics. This heuristic is only accurate for single-succ blocks, so to
+ // make sure that during layout merging and duplicating don't crash, we check
+ // for that when merging during layout.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ (MBB1->succ_size() == 1 || !AfterPlacement) &&
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= MinCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ MachineFunction *MF = MBB1->getParent();
+ bool OptForSize =
+ MF->getFunction().hasOptSize() ||
+ (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
+ llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
+ return EffectiveTailLen >= 2 && OptForSize &&
+ (FullBlockTail1 || FullBlockTail2);
+}
+
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned MinCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = std::prev(MergePotentials.end());
+ for (MPIterator CurMPIter = std::prev(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {
+ for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
+ unsigned CommonTailLen;
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ MinCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB,
+ EHScopeMembership,
+ AfterBlockPlacement, MBBFreqInfo, PSI)) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
+ }
+ if (I == B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = std::prev(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash; --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter == B)
+ break;
+ }
+ if (CurMPIter->getHash() != CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ MachineBasicBlock *SuccBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
+ unsigned TimeEstimate = ~0U;
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].getBlock() == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ LLVM_DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "
+ << maxCommonTailLength);
+
+ // If the split block unconditionally falls-thru to SuccBB, it will be
+ // merged. In control flow terms it should then take SuccBB's name. e.g. If
+ // SuccBB is an inner loop, the common tail is still part of the inner loop.
+ const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?
+ SuccBB->getBasicBlock() : MBB->getBasicBlock();
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);
+ if (!newMBB) {
+ LLVM_DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB == MBB)
+ PredBB = newMBB;
+
+ return true;
+}
+
+static void
+mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
+ MachineBasicBlock &MBBCommon) {
+ MachineBasicBlock *MBB = MBBIStartPos->getParent();
+ // Note CommonTailLen does not necessarily matches the size of
+ // the common BB nor all its instructions because of debug
+ // instructions differences.
+ unsigned CommonTailLen = 0;
+ for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
+ ++CommonTailLen;
+
+ MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
+ MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
+ MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
+ MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
+
+ while (CommonTailLen--) {
+ assert(MBBI != MBBIE && "Reached BB end within common tail length!");
+ (void)MBBIE;
+
+ if (!countsAsInstruction(*MBBI)) {
+ ++MBBI;
+ continue;
+ }
+
+ while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon))
+ ++MBBICommon;
+
+ assert(MBBICommon != MBBIECommon &&
+ "Reached BB end within common tail length!");
+ assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
+
+ // Merge MMOs from memory operations in the common block.
+ if (MBBICommon->mayLoadOrStore())
+ MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
+ // Drop undef flags if they aren't present in all merged instructions.
+ for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBICommon->getOperand(I);
+ if (MO.isReg() && MO.isUndef()) {
+ const MachineOperand &OtherMO = MBBI->getOperand(I);
+ if (!OtherMO.isUndef())
+ MO.setIsUndef(false);
+ }
+ }
+
+ ++MBBI;
+ ++MBBICommon;
+ }
+}
+
+void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
+ for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
+ if (i != commonTailIndex) {
+ NextCommonInsts[i] = SameTails[i].getTailStartPos();
+ mergeOperations(SameTails[i].getTailStartPos(), *MBB);
+ } else {
+ assert(SameTails[i].getTailStartPos() == MBB->begin() &&
+ "MBB is not a common tail only block");
+ }
+ }
+
+ for (auto &MI : *MBB) {
+ if (!countsAsInstruction(MI))
+ continue;
+ DebugLoc DL = MI.getDebugLoc();
+ for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
+ if (i == commonTailIndex)
+ continue;
+
+ auto &Pos = NextCommonInsts[i];
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ while (!countsAsInstruction(*Pos)) {
+ ++Pos;
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ }
+ assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
+ DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
+ NextCommonInsts[i] = ++Pos;
+ }
+ MI.setDebugLoc(DL);
+ }
+
+ if (UpdateLiveIns) {
+ LivePhysRegs NewLiveIns(*TRI);
+ computeLiveIns(NewLiveIns, *MBB);
+ LiveRegs.init(*TRI);
+
+ // The flag merging may lead to some register uses no longer using the
+ // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveRegs.clear();
+ LiveRegs.addLiveOuts(*Pred);
+ MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
+ for (Register Reg : NewLiveIns) {
+ if (!LiveRegs.available(*MRI, Reg))
+ continue;
+
+ // Skip the register if we are about to add one of its super registers.
+ // TODO: Common this up with the same logic in addLineIns().
+ if (any_of(TRI->superregs(Reg), [&](MCPhysReg SReg) {
+ return NewLiveIns.contains(SReg) && !MRI->isReserved(SReg);
+ }))
+ continue;
+
+ DebugLoc DL;
+ BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
+ Reg);
+ }
+ }
+
+ MBB->clearLiveIns();
+ addLiveIns(*MBB, NewLiveIns);
+ }
+}
+
+// See if any of the blocks in MergePotentials (which all have SuccBB as a
+// successor, or all have no successor if it is null) can be tail-merged.
+// If there is a successor, any blocks in MergePotentials that are not
+// tail-merged and are not immediately before Succ must have an unconditional
+// branch to Succ added (but the predecessor/successor lists need no
+// adjustment). The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+// MinCommonTailLength - Except for the special cases below, tail-merge if
+// there are at least this many instructions in common.
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB,
+ unsigned MinCommonTailLength) {
+ bool MadeChange = false;
+
+ LLVM_DEBUG(
+ dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs()
+ << printMBBReference(*MergePotentials[i].getBlock())
+ << (i == e - 1 ? "" : ", ");
+ dbgs() << "\n"; if (SuccBB) {
+ dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from "
+ << printMBBReference(*PredBB) << "\n";
+ } dbgs() << "Looking for common tails of at least "
+ << MinCommonTailLength << " instruction"
+ << (MinCommonTailLength == 1 ? "" : "s") << '\n';);
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ array_pod_sort(MergePotentials.begin(), MergePotentials.end());
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = MergePotentials.back().getHash();
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ MinCommonTailLength,
+ SuccBB, PredBB);
+
+ // If we didn't find any pair that has at least MinCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and is not the entry
+ // block/an EH pad, which we can't jump to), we can treat all blocks with
+ // this same tail at once. Use PredBB if that is one of the possibilities,
+ // as that will not introduce any extra branches.
+ MachineBasicBlock *EntryBB =
+ &MergePotentials.front().getBlock()->getParent()->front();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock() && !SameTails[1].getBlock()->isEHPad())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock() &&
+ !SameTails[0].getBlock()->isEHPad())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if ((MBB == EntryBB || MBB->isEHPad()) &&
+ SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
+ }
+ }
+
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // Recompute common tail MBB's edge weights and block frequency.
+ setCommonTailEdgeWeights(*MBB);
+
+ // Merge debug locations, MMOs and undef flags across identical instructions
+ // for common tail.
+ mergeCommonTails(commonTailIndex);
+
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ LLVM_DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
+ continue;
+ LLVM_DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())
+ << (i == e - 1 ? "" : ", "));
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].getMPIter());
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+ if (!EnableTailMerge)
+ return MadeChange;
+
+ // First find blocks with no successors.
+ // Block placement may create new tail merging opportunities for these blocks.
+ MergePotentials.clear();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (const MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
+ I != E; ++I) {
+ if (I->pred_size() < 2) continue;
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = &*I;
+ MachineBasicBlock *PredBB = &*std::prev(I);
+ MergePotentials.clear();
+ MachineLoop *ML;
+
+ // Bail if merging after placement and IBB is the loop header because
+ // -- If merging predecessors that belong to the same loop as IBB, the
+ // common tail of merged predecessors may become the loop top if block
+ // placement is called again and the predecessors may branch to this common
+ // tail and require more branches. This can be relaxed if
+ // MachineBlockPlacement::findBestLoopTop is more flexible.
+ // --If merging predecessors that do not belong to the same loop as IBB, the
+ // loop info of IBB's loop and the other loops may be affected. Calling the
+ // block placement again may make big change to the layout and eliminate the
+ // reason to do tail merging here.
+ if (AfterBlockPlacement && MLI) {
+ ML = MLI->getLoopFor(IBB);
+ if (ML && IBB == ML->getHeader())
+ continue;
+ }
+
+ for (MachineBasicBlock *PBB : I->predecessors()) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+
+ if (TriedMerging.count(PBB))
+ continue;
+
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB).second)
+ continue;
+
+ // Skip blocks which may jump to a landing pad or jump from an asm blob.
+ // Can't tail merge these.
+ if (PBB->hasEHPadSuccessor() || PBB->mayHaveInlineAsmBr())
+ continue;
+
+ // After block placement, only consider predecessors that belong to the
+ // same loop as IBB. The reason is the same as above when skipping loop
+ // header.
+ if (AfterBlockPlacement && MLI)
+ if (ML != MLI->getLoopFor(PBB))
+ continue;
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and we cannot reverse the
+ // branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->reverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB) {
+ auto Next = ++PBB->getIterator();
+ if (Next != MF.end())
+ FBB = &*Next;
+ }
+ }
+
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl = PBB->findBranchDebugLoc();
+ TII->removeBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
+ NewCond, dl);
+ }
+
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB));
+ }
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks multiple
+ // times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
+
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
+
+ // Reinsert an unconditional branch if needed. The 1 below can occur as a
+ // result of removing blocks in TryTailMergeBlocks.
+ PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ }
+
+ return MadeChange;
+}
+
+void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
+ SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
+ BlockFrequency AccumulatedMBBFreq;
+
+ // Aggregate edge frequency of successor edge j:
+ // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
+ // where bb is a basic block that is in SameTails.
+ for (const auto &Src : SameTails) {
+ const MachineBasicBlock *SrcMBB = Src.getBlock();
+ BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);
+ AccumulatedMBBFreq += BlockFreq;
+
+ // It is not necessary to recompute edge weights if TailBB has less than two
+ // successors.
+ if (TailMBB.succ_size() <= 1)
+ continue;
+
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq)
+ *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);
+ }
+
+ MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);
+
+ if (TailMBB.succ_size() <= 1)
+ return;
+
+ auto SumEdgeFreq =
+ std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))
+ .getFrequency();
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ if (SumEdgeFreq > 0) {
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq) {
+ auto Prob = BranchProbability::getBranchProbability(
+ EdgeFreq->getFrequency(), SumEdgeFreq);
+ TailMBB.setSuccProbability(SuccI, Prob);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+ // Renumbering blocks alters EH scope membership, recalculate it.
+ EHScopeMembership = getEHScopeMembership(MF);
+
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(MF))) {
+ MadeChange |= OptimizeBlock(&MBB);
+
+ // If it is dead, remove it.
+ if (MBB.pred_empty() && !MBB.isMachineBlockAddressTaken()) {
+ RemoveDeadBlock(&MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+
+ return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+ return MBB->getFirstNonDebugInstr(true) == MBB->end();
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr();
+ assert(I != MBB->end() && "empty block!");
+ return I->isBranch();
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ assert(MBB1 && MBB2 && "Unknown MachineBasicBlock");
+
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr();
+ MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr();
+ if (MBB1I == MBB1->end() || MBB2I == MBB2->end())
+ return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I != MBB.end() && I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
+}
+
+static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock &PredMBB) {
+ auto InsertBefore = PredMBB.getFirstTerminator();
+ for (MachineInstr &MI : MBB.instrs())
+ if (MI.isDebugInstr()) {
+ TII->duplicate(PredMBB, InsertBefore, MI);
+ LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: "
+ << MI);
+ }
+}
+
+static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB) {
+ auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin());
+ for (MachineInstr &MI : MBB.instrs())
+ if (MI.isDebugInstr()) {
+ TII->duplicate(SuccMBB, InsertBefore, MI);
+ LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: "
+ << MI);
+ }
+}
+
+// Try to salvage DBG_VALUE instructions from an otherwise empty block. If such
+// a basic block is removed we would lose the debug information unless we have
+// copied the information to a predecessor/successor.
+//
+// TODO: This function only handles some simple cases. An alternative would be
+// to run a heavier analysis, such as the LiveDebugValues pass, before we do
+// branch folding.
+static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII,
+ MachineBasicBlock &MBB) {
+ assert(IsEmptyBlock(&MBB) && "Expected an empty block (except debug info).");
+ // If this MBB is the only predecessor of a successor it is legal to copy
+ // DBG_VALUE instructions to the beginning of the successor.
+ for (MachineBasicBlock *SuccBB : MBB.successors())
+ if (SuccBB->pred_size() == 1)
+ copyDebugInfoToSuccessor(TII, MBB, *SuccBB);
+ // If this MBB is the only successor of a predecessor it is legal to copy the
+ // DBG_VALUE instructions to the end of the predecessor (just before the
+ // terminators, assuming that the terminator isn't affecting the DBG_VALUE).
+ for (MachineBasicBlock *PredBB : MBB.predecessors())
+ if (PredBB->succ_size() == 1)
+ copyDebugInfoToPredecessor(TII, MBB, *PredBB);
+}
+
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
+
+ MachineFunction::iterator FallThrough = MBB->getIterator();
+ ++FallThrough;
+
+ // Make sure MBB and FallThrough belong to the same EH scope.
+ bool SameEHScope = true;
+ if (!EHScopeMembership.empty() && FallThrough != MF.end()) {
+ auto MBBEHScope = EHScopeMembership.find(MBB);
+ assert(MBBEHScope != EHScopeMembership.end());
+ auto FallThroughEHScope = EHScopeMembership.find(&*FallThrough);
+ assert(FallThroughEHScope != EHScopeMembership.end());
+ SameEHScope = MBBEHScope->second == FallThroughEHScope->second;
+ }
+
+ // Analyze the branch in the current block. As a side-effect, this may cause
+ // the block to become empty.
+ MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable =
+ TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block. Blocks with their addresses taken shouldn't be
+ // optimized away.
+ if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
+ SameEHScope) {
+ salvageDebugInfoFromEmptyBlock(TII, *MBB);
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return MadeChange;
+
+ if (FallThrough == MF.end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else if (FallThrough->isEHPad()) {
+ // Don't rewrite to a landing pad fallthough. That could lead to the case
+ // where a BB jumps to more than one landing pad.
+ // TODO: Is it ever worth rewriting predecessors which don't already
+ // jump to a landing pad, and so can safely jump to the fallthrough?
+ } else if (MBB->isSuccessor(&*FallThrough)) {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
+ }
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->removeBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // analyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
+ LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ // Remove redundant DBG_VALUEs first.
+ if (!PrevBB.empty()) {
+ MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+ --PrevBBIter;
+ MachineBasicBlock::iterator MBBIter = MBB->begin();
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // DBG_VALUE at the beginning of MBB.
+ while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+ && PrevBBIter->isDebugInstr() && MBBIter->isDebugInstr()) {
+ if (!MBBIter->isIdenticalTo(*PrevBBIter))
+ break;
+ MachineInstr &DuplicateDbg = *MBBIter;
+ ++MBBIter; -- PrevBBIter;
+ DuplicateDbg.eraseFromParent();
+ }
+ }
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && !PriorFBB) {
+ TII->removeBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->reverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this block has no successors (e.g. it is a return block or ends with
+ // a call to a no-return function like abort or __cxa_throw) and if the pred
+ // falls through into this block, and if it would otherwise fall through
+ // into the block after this, move this block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !MBB->canFallThrough()) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MF.end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->reverseBranchCondition(NewPriorCond)) {
+ LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
+
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(&MF.back());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return MadeChange;
+ }
+ }
+ }
+ }
+
+ if (!IsEmptyBlock(MBB)) {
+ MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+ if (TII->isUnconditionalTailCall(TailCall)) {
+ SmallVector<MachineBasicBlock *> PredsChanged;
+ for (auto &Pred : MBB->predecessors()) {
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ // Only eliminate if MBB == TBB (Taken Basic Block)
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
+ PredTBB != PredFBB) {
+ // The predecessor has a conditional branch to this block which
+ // consists of only a tail call. Try to fold the tail call into the
+ // conditional branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch instruction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ PredsChanged.push_back(Pred);
+ }
+ }
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ if (!PredsChanged.empty()) {
+ NumTailCalls += PredsChanged.size();
+ for (auto &Pred : PredsChanged)
+ Pred->removeSuccessor(MBB);
+
+ return true;
+ }
+ }
+ }
+
+ if (!CurUnAnalyzable) {
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->reverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ TII->removeBranch(*MBB);
+ TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && !CurFBB &&
+ IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->removeBranch(*MBB);
+ // If the only things remaining in the block are debug info, remove these
+ // as well, so this will behave the same as an empty block in non-debug
+ // mode.
+ if (IsEmptyBlock(MBB)) {
+ // Make the block empty, losing the debug info (we could probably
+ // improve this in some cases.)
+ MBB->erase(MBB->begin(), MBB->end());
+ }
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (!PriorTBB) {
+ assert(PriorCond.empty() && !PriorFBB &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(!PriorFBB && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch.
+ MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->analyzeBranch(
+ *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
+ TII->removeBranch(*PMBB);
+ NewCurCond.clear();
+ TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return MadeChange;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!PrevBB.canFallThrough()) {
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = MBB->canFallThrough();
+
+ if (!MBB->isEHPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, and analyzeBranch thinks it _could_ fallthrough to this
+ // block, move this block right after it.
+ for (MachineBasicBlock *PredBB : MBB->predecessors()) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !PredBB->canFallThrough() &&
+ !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) &&
+ (PredTBB == MBB || PredFBB == MBB) &&
+ (!CurFallsThru || !CurTBB || !CurFBB) &&
+ (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
+ CurCond.clear();
+ TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check analyzable branch-successors to see if we can move this block
+ // before one.
+ if (!CurUnAnalyzable) {
+ for (MachineBasicBlock *SuccBB : {CurFBB, CurTBB}) {
+ if (!SuccBB)
+ continue;
+ // Analyze the branch at the end of the block before the succ.
+ MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
+
+ // If this block doesn't already fall-through to that successor, and
+ // if the succ doesn't already have a block that can fall through into
+ // it, we can arrange for the fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function. There is no real
+ // advantage in "falling through" to an EH block, so we don't want to
+ // perform this transformation for that case.
+ //
+ // Also, Windows EH introduced the possibility of an arbitrary number of
+ // successors to a given block. The analyzeBranch call does not consider
+ // exception handling and so we can get in a state where a block
+ // containing a call is followed by multiple EH blocks that would be
+ // rotated infinitely at the end of the function if the transformation
+ // below were performed for EH "FallThrough" blocks. Therefore, even if
+ // that appears not to be happening anymore, we should assume that it is
+ // possible and not remove the "!FallThrough()->isEHPad" condition below.
+ MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !FallThrough->isEHPad() &&
+ !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ PrevBB.isSuccessor(&*FallThrough)) {
+ MBB->moveAfter(&MF.back());
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF))
+ MadeChange |= HoistCommonCodeInSuccs(&MBB);
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock *SuccBB : BB->successors())
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ return nullptr;
+}
+
+template <class Container>
+static void addRegAndItsAliases(Register Reg, const TargetRegisterInfo *TRI,
+ Container &Set) {
+ if (Reg.isPhysical()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Set.insert(*AI);
+ } else {
+ Set.insert(Reg);
+ }
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is usually just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<Register, 4> &Uses,
+ SmallSet<Register, 4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(*Loc))
+ return MBB->end();
+
+ for (const MachineOperand &MO : Loc->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ addRegAndItsAliases(Reg, TRI, Uses);
+ } else {
+ if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+
+ // If the terminator defines a register, make sure we don't hoist
+ // the instruction whose def might be clobbered by the terminator.
+ addRegAndItsAliases(Reg, TRI, Defs);
+ }
+ }
+
+ if (Uses.empty())
+ return Loc;
+ // If the terminator is the only instruction in the block and Uses is not
+ // empty (or we would have returned above), we can still safely hoist
+ // instructions just before the terminator as long as the Defs/Uses are not
+ // violated (which is checked in HoistCommonCodeInSuccs).
+ if (Loc == MBB->begin())
+ return Loc;
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = prev_nodbg(Loc, MBB->begin());
+
+ bool IsDef = false;
+ for (const MachineOperand &MO : PI->operands()) {
+ // If PI has a regmask operand, it is probably a call. Separate away.
+ if (MO.isRegMask())
+ return Loc;
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg)) {
+ IsDef = true;
+ break;
+ }
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))
+ return MBB->end();
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (const MachineOperand &MO : PI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ addRegAndItsAliases(Reg, TRI, Uses);
+ } else {
+ if (Uses.erase(Reg)) {
+ if (Reg.isPhysical()) {
+ for (MCPhysReg SubReg : TRI->subregs(Reg))
+ Uses.erase(SubReg); // Use sub-registers to be conservative
+ }
+ }
+ addRegAndItsAliases(Reg, TRI, Defs);
+ }
+ }
+
+ return PI;
+}
+
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<Register, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ bool HasDups = false;
+ SmallSet<Register, 4> ActiveDefsSet, AllDefsSet;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ TIB = skipDebugInstructionsForward(TIB, TIE, false);
+ FIB = skipDebugInstructionsForward(FIB, FIE, false);
+ if (TIB == TIE || FIB == FIE)
+ break;
+
+ if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
+ break;
+
+ if (TII->isPredicated(*TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (MachineOperand &MO : TIB->operands()) {
+ // Don't attempt to hoist instructions with register masks.
+ if (MO.isRegMask()) {
+ IsSafe = false;
+ break;
+ }
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (Defs.count(Reg) && !MO.isDead()) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, killed r1
+ IsSafe = false;
+ break;
+ }
+ } else if (!ActiveDefsSet.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (MO.isKill() && Uses.count(Reg))
+ // Kills a register that's read by the instruction at the point of
+ // insertion. Remove the kill marker.
+ MO.setIsKill(false);
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))
+ break;
+
+ // Remove kills from ActiveDefsSet, these registers had short live ranges.
+ for (const MachineOperand &MO : TIB->all_uses()) {
+ if (!MO.isKill())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!AllDefsSet.count(Reg)) {
+ continue;
+ }
+ if (Reg.isPhysical()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ ActiveDefsSet.erase(*AI);
+ } else {
+ ActiveDefsSet.erase(Reg);
+ }
+ }
+
+ // Track local defs so we can update liveins.
+ for (const MachineOperand &MO : TIB->all_defs()) {
+ if (MO.isDead())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg || Reg.isVirtual())
+ continue;
+ addRegAndItsAliases(Reg, TRI, ActiveDefsSet);
+ addRegAndItsAliases(Reg, TRI, AllDefsSet);
+ }
+
+ HasDups = true;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!HasDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+
+ if (UpdateLiveIns) {
+ recomputeLiveIns(*TBB);
+ recomputeLiveIns(*FBB);
+ }
+
+ ++NumHoist;
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
new file mode 100644
index 000000000000..63b2ef04b21b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,200 @@
+//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Compiler.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock;
+class MachineBranchProbabilityInfo;
+class MachineFunction;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class MBFIWrapper;
+class ProfileSummaryInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+ class LLVM_LIBRARY_VISIBILITY BranchFolder {
+ public:
+ explicit BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
+ MBFIWrapper &FreqInfo,
+ const MachineBranchProbabilityInfo &ProbInfo,
+ ProfileSummaryInfo *PSI,
+ // Min tail length to merge. Defaults to commandline
+ // flag. Ignored for optsize.
+ unsigned MinTailLength = 0);
+
+ /// Perhaps branch folding, tail merging and other CFG optimizations on the
+ /// given function. Block placement changes the layout and may create new
+ /// tail merging opportunities.
+ bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineLoopInfo *mli = nullptr,
+ bool AfterPlacement = false);
+
+ private:
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
+
+ using MPIterator = std::vector<MergePotentialsElt>::iterator;
+
+ std::vector<MergePotentialsElt> MergePotentials;
+ SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+ DenseMap<const MachineBasicBlock *, int> EHScopeMembership;
+
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
+ std::vector<SameTailElt> SameTails;
+
+ bool AfterBlockPlacement = false;
+ bool EnableTailMerge = false;
+ bool EnableHoistCommonCode = false;
+ bool UpdateLiveIns = false;
+ unsigned MinCommonTailLength;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+ LivePhysRegs LiveRegs;
+
+ private:
+ MBFIWrapper &MBBFreqInfo;
+ const MachineBranchProbabilityInfo &MBPI;
+ ProfileSummaryInfo *PSI;
+
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB,
+ unsigned MinCommonTailLength);
+ void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
+
+ /// Delete the instruction OldInst and everything after it, replacing it
+ /// with an unconditional branch to NewDest.
+ void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock &NewDest);
+
+ /// Given a machine basic block and an iterator into it, split the MBB so
+ /// that the part before the iterator falls into the part starting at the
+ /// iterator. This returns the new MBB.
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1,
+ const BasicBlock *BB);
+
+ /// Look through all the blocks in MergePotentials that have hash CurHash
+ /// (guaranteed to match the last element). Build the vector SameTails of
+ /// all those that have the (same) largest number of instructions in common
+ /// of any pair of these blocks. SameTails entries contain an iterator into
+ /// MergePotentials (from which the MachineBasicBlock can be found) and a
+ /// MachineBasicBlock::iterator into that MBB indicating the instruction
+ /// where the matching code sequence begins. Order of elements in SameTails
+ /// is the reverse of the order in which those blocks appear in
+ /// MergePotentials (where they are not necessarily consecutive).
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
+
+ /// Remove all blocks with hash CurHash from MergePotentials, restoring
+ /// branches at ends of blocks as appropriate.
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+
+ /// None of the blocks to be tail-merged consist only of the common tail.
+ /// Create a block that does by splitting one.
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ MachineBasicBlock *SuccBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
+
+ /// Create merged DebugLocs of identical instructions across SameTails and
+ /// assign it to the instruction in common tail; merge MMOs and undef flags.
+ void mergeCommonTails(unsigned commonTailIndex);
+
+ bool OptimizeBranches(MachineFunction &MF);
+
+ /// Analyze and optimize control flow related to the specified block. This
+ /// is never called on the entry block.
+ bool OptimizeBlock(MachineBasicBlock *MBB);
+
+ /// Remove the specified dead machine basic block from the function,
+ /// updating the CFG.
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+
+ /// Hoist common instruction sequences at the start of basic blocks to their
+ /// common predecessor.
+ bool HoistCommonCode(MachineFunction &MF);
+
+ /// If the successors of MBB has common instruction sequence at the start of
+ /// the function, move the instructions before MBB terminator if it's legal.
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
+ };
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
new file mode 100644
index 000000000000..05494f1ddc67
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -0,0 +1,637 @@
+//===- BranchRelaxation.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-relaxation"
+
+STATISTIC(NumSplit, "Number of basic blocks split");
+STATISTIC(NumConditionalRelaxed, "Number of conditional branches relaxed");
+STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
+
+#define BRANCH_RELAX_NAME "Branch relaxation pass"
+
+namespace {
+
+class BranchRelaxation : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset = 0;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size = 0;
+
+ BasicBlockInfo() = default;
+
+ /// Compute the offset immediately following this block. \p MBB is the next
+ /// block.
+ unsigned postOffset(const MachineBasicBlock &MBB) const {
+ const unsigned PO = Offset + Size;
+ const Align Alignment = MBB.getAlignment();
+ const Align ParentAlign = MBB.getParent()->getAlignment();
+ if (Alignment <= ParentAlign)
+ return alignTo(PO, Alignment);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value();
+ }
+ };
+
+ SmallVector<BasicBlockInfo, 16> BlockInfo;
+ std::unique_ptr<RegScavenger> RS;
+ LivePhysRegs LiveRegs;
+
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ bool relaxBranchInstructions();
+ void scanFunction();
+
+ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB);
+ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB,
+ const BasicBlock *BB);
+
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,
+ MachineBasicBlock *DestBB);
+ void adjustBlockOffsets(MachineBasicBlock &Start);
+ bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;
+
+ bool fixupConditionalBranch(MachineInstr &MI);
+ bool fixupUnconditionalBranch(MachineInstr &MI);
+ uint64_t computeBlockSize(const MachineBasicBlock &MBB) const;
+ unsigned getInstrOffset(const MachineInstr &MI) const;
+ void dumpBBs();
+ void verify();
+
+public:
+ static char ID;
+
+ BranchRelaxation() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return BRANCH_RELAX_NAME; }
+};
+
+} // end anonymous namespace
+
+char BranchRelaxation::ID = 0;
+
+char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
+
+INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void BranchRelaxation::verify() {
+#ifndef NDEBUG
+ unsigned PrevNum = MF->begin()->getNumber();
+ for (MachineBasicBlock &MBB : *MF) {
+ const unsigned Num = MBB.getNumber();
+ assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
+ assert(BlockInfo[Num].Size == computeBlockSize(MBB));
+ PrevNum = Num;
+ }
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+ J != MBB.end(); J = std::next(J)) {
+ MachineInstr &MI = *J;
+ if (!MI.isConditionalBranch() && !MI.isUnconditionalBranch())
+ continue;
+ if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+ continue;
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ assert(isBlockInRange(MI, *DestBB));
+ }
+ }
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// print block size and offset information - debugging
+LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
+ for (auto &MBB : *MF) {
+ const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
+ dbgs() << format("%%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+ << format("size=%#x\n", BBI.Size);
+ }
+}
+#endif
+
+/// scanFunction - Do the initial scan of the function, building up
+/// information about each block.
+void BranchRelaxation::scanFunction() {
+ BlockInfo.clear();
+ BlockInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineBasicBlock &MBB : *MF)
+ BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+
+ // Compute block offsets and known bits.
+ adjustBlockOffsets(*MF->begin());
+}
+
+/// computeBlockSize - Compute the size for MBB.
+uint64_t BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) const {
+ uint64_t Size = 0;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+}
+
+/// getInstrOffset - Return the current offset of the specified machine
+/// instruction from the start of the function. This offset changes as stuff is
+/// moved around inside the function.
+unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const {
+ const MachineBasicBlock *MBB = MI.getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+
+ return Offset;
+}
+
+void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
+ unsigned PrevNum = Start.getNumber();
+ for (auto &MBB :
+ make_range(std::next(MachineFunction::iterator(Start)), MF->end())) {
+ unsigned Num = MBB.getNumber();
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB);
+
+ PrevNum = Num;
+ }
+}
+
+/// Insert a new empty MachineBasicBlock and insert it after \p OrigMBB
+MachineBasicBlock *
+BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigBB) {
+ return createNewBlockAfter(OrigBB, OrigBB.getBasicBlock());
+}
+
+/// Insert a new empty MachineBasicBlock with \p BB as its BasicBlock
+/// and insert it after \p OrigMBB
+MachineBasicBlock *
+BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigMBB,
+ const BasicBlock *BB) {
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++OrigMBB.getIterator(), NewBB);
+
+ // Insert an entry into BlockInfo to align it properly with the block numbers.
+ BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ return NewBB;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
+ MachineBasicBlock *DestBB) {
+ MachineBasicBlock *OrigBB = MI.getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->insert(++OrigBB->getIterator(), NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ TII->insertUnconditionalBranch(*OrigBB, NewBB, DebugLoc());
+
+ // Insert an entry into BlockInfo to align it properly with the block numbers.
+ BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ NewBB->transferSuccessors(OrigBB);
+ OrigBB->addSuccessor(NewBB);
+ OrigBB->addSuccessor(DestBB);
+
+ // Cleanup potential unconditional branch to successor block.
+ // Note that updateTerminator may change the size of the blocks.
+ OrigBB->updateTerminator(NewBB);
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ BlockInfo[OrigBB->getNumber()].Size = computeBlockSize(*OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBlockOffsets(*OrigBB);
+
+ // Need to fix live-in lists if we track liveness.
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *NewBB);
+
+ ++NumSplit;
+
+ return NewBB;
+}
+
+/// isBlockInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool BranchRelaxation::isBlockInRange(
+ const MachineInstr &MI, const MachineBasicBlock &DestBB) const {
+ int64_t BrOffset = getInstrOffset(MI);
+ int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
+
+ if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
+ return true;
+
+ LLVM_DEBUG(dbgs() << "Out of range branch to destination "
+ << printMBBReference(DestBB) << " from "
+ << printMBBReference(*MI.getParent()) << " to "
+ << DestOffset << " offset " << DestOffset - BrOffset << '\t'
+ << MI);
+
+ return false;
+}
+
+/// fixupConditionalBranch - Fix up a conditional branch whose destination is
+/// too far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ MachineBasicBlock *NewBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+
+ auto insertUncondBranch = [&](MachineBasicBlock *MBB,
+ MachineBasicBlock *DestBB) {
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int NewBrSize = 0;
+ TII->insertUnconditionalBranch(*MBB, DestBB, DL, &NewBrSize);
+ BBSize += NewBrSize;
+ };
+ auto insertBranch = [&](MachineBasicBlock *MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ SmallVectorImpl<MachineOperand>& Cond) {
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int NewBrSize = 0;
+ TII->insertBranch(*MBB, TBB, FBB, Cond, DL, &NewBrSize);
+ BBSize += NewBrSize;
+ };
+ auto removeBranch = [&](MachineBasicBlock *MBB) {
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int RemovedSize = 0;
+ TII->removeBranch(*MBB, &RemovedSize);
+ BBSize -= RemovedSize;
+ };
+
+ auto finalizeBlockChanges = [&](MachineBasicBlock *MBB,
+ MachineBasicBlock *NewBB) {
+ // Keep the block offsets up to date.
+ adjustBlockOffsets(*MBB);
+
+ // Need to fix live-in lists if we track liveness.
+ if (NewBB && TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *NewBB);
+ };
+
+ bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+ assert(!Fail && "branches to be relaxed must be analyzable");
+ (void)Fail;
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // tbz L1
+ // =>
+ // tbnz L2
+ // b L1
+ // L2:
+
+ bool ReversedCond = !TII->reverseBranchCondition(Cond);
+ if (ReversedCond) {
+ if (FBB && isBlockInRange(MI, *FBB)) {
+ // Last MI in the BB is an unconditional branch. We can simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ LLVM_DEBUG(dbgs() << " Invert condition and swap "
+ "its destination with "
+ << MBB->back());
+
+ removeBranch(MBB);
+ insertBranch(MBB, FBB, TBB, Cond);
+ finalizeBlockChanges(MBB, nullptr);
+ return true;
+ }
+ if (FBB) {
+ // We need to split the basic block here to obtain two long-range
+ // unconditional branches.
+ NewBB = createNewBlockAfter(*MBB);
+
+ insertUncondBranch(NewBB, FBB);
+ // Update the succesor lists according to the transformation to follow.
+ // Do it here since if there's no split, no update is needed.
+ MBB->replaceSuccessor(FBB, NewBB);
+ NewBB->addSuccessor(FBB);
+ }
+
+ // We now have an appropriate fall-through block in place (either naturally or
+ // just created), so we can use the inverted the condition.
+ MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
+
+ LLVM_DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB)
+ << ", invert condition and change dest. to "
+ << printMBBReference(NextBB) << '\n');
+
+ removeBranch(MBB);
+ // Insert a new conditional branch and a new unconditional branch.
+ insertBranch(MBB, &NextBB, TBB, Cond);
+
+ finalizeBlockChanges(MBB, NewBB);
+ return true;
+ }
+ // Branch cond can't be inverted.
+ // In this case we always add a block after the MBB.
+ LLVM_DEBUG(dbgs() << " The branch condition can't be inverted. "
+ << " Insert a new BB after " << MBB->back());
+
+ if (!FBB)
+ FBB = &(*std::next(MachineFunction::iterator(MBB)));
+
+ // This is the block with cond. branch and the distance to TBB is too long.
+ // beq L1
+ // L2:
+
+ // We do the following transformation:
+ // beq NewBB
+ // b L2
+ // NewBB:
+ // b L1
+ // L2:
+
+ NewBB = createNewBlockAfter(*MBB);
+ insertUncondBranch(NewBB, TBB);
+
+ LLVM_DEBUG(dbgs() << " Insert cond B to the new BB "
+ << printMBBReference(*NewBB)
+ << " Keep the exiting condition.\n"
+ << " Insert B to " << printMBBReference(*FBB) << ".\n"
+ << " In the new BB: Insert B to "
+ << printMBBReference(*TBB) << ".\n");
+
+ // Update the successor lists according to the transformation to follow.
+ MBB->replaceSuccessor(TBB, NewBB);
+ NewBB->addSuccessor(TBB);
+
+ // Replace branch in the current (MBB) block.
+ removeBranch(MBB);
+ insertBranch(MBB, NewBB, FBB, Cond);
+
+ finalizeBlockChanges(MBB, NewBB);
+ return true;
+}
+
+bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
+ MachineBasicBlock *MBB = MI.getParent();
+ SmallVector<MachineOperand, 4> Cond;
+ unsigned OldBrSize = TII->getInstSizeInBytes(MI);
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+
+ int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
+ int64_t SrcOffset = getInstrOffset(MI);
+
+ assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset));
+
+ BlockInfo[MBB->getNumber()].Size -= OldBrSize;
+
+ MachineBasicBlock *BranchBB = MBB;
+
+ // If this was an expanded conditional branch, there is already a single
+ // unconditional branch in a block.
+ if (!MBB->empty()) {
+ BranchBB = createNewBlockAfter(*MBB);
+
+ // Add live outs.
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : Succ->liveins())
+ BranchBB->addLiveIn(LiveIn);
+ }
+
+ BranchBB->sortUniqueLiveIns();
+ BranchBB->addSuccessor(DestBB);
+ MBB->replaceSuccessor(DestBB, BranchBB);
+ }
+
+ DebugLoc DL = MI.getDebugLoc();
+ MI.eraseFromParent();
+
+ // Create the optional restore block and, initially, place it at the end of
+ // function. That block will be placed later if it's used; otherwise, it will
+ // be erased.
+ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(),
+ DestBB->getBasicBlock());
+
+ TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
+ DestOffset - SrcOffset, RS.get());
+
+ BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
+ adjustBlockOffsets(*MBB);
+
+ // If RestoreBB is required, try to place just before DestBB.
+ if (!RestoreBB->empty()) {
+ // TODO: For multiple far branches to the same destination, there are
+ // chances that some restore blocks could be shared if they clobber the
+ // same registers and share the same restore sequence. So far, those
+ // restore blocks are just duplicated for each far branch.
+ assert(!DestBB->isEntryBlock());
+ MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
+ // Fall through only if PrevBB has no unconditional branch as one of its
+ // terminators.
+ if (auto *FT = PrevBB->getLogicalFallThrough()) {
+ assert(FT == DestBB);
+ TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
+ BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
+ }
+ // Now, RestoreBB could be placed directly before DestBB.
+ MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
+ // Update successors and predecessors.
+ RestoreBB->addSuccessor(DestBB);
+ BranchBB->replaceSuccessor(DestBB, RestoreBB);
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *RestoreBB);
+ // Compute the restore block size.
+ BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
+ // Update the offset starting from the previous block.
+ adjustBlockOffsets(*PrevBB);
+ } else {
+ // Remove restore block if it's not required.
+ MF->erase(RestoreBB);
+ }
+
+ return true;
+}
+
+bool BranchRelaxation::relaxBranchInstructions() {
+ bool Changed = false;
+
+ // Relaxing branches involves creating new basic blocks, so re-eval
+ // end() for termination.
+ for (MachineBasicBlock &MBB : *MF) {
+ // Empty block?
+ MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
+ if (Last == MBB.end())
+ continue;
+
+ // Expand the unconditional branch first if necessary. If there is a
+ // conditional branch, this will end up changing the branch destination of
+ // it to be over the newly inserted indirect branch block, which may avoid
+ // the need to try expanding the conditional branch first, saving an extra
+ // jump.
+ if (Last->isUnconditionalBranch()) {
+ // Unconditional branch destination might be unanalyzable, assume these
+ // are OK.
+ if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) {
+ if (!isBlockInRange(*Last, *DestBB)) {
+ fixupUnconditionalBranch(*Last);
+ ++NumUnconditionalRelaxed;
+ Changed = true;
+ }
+ }
+ }
+
+ // Loop over the conditional branches.
+ MachineBasicBlock::iterator Next;
+ for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+ J != MBB.end(); J = Next) {
+ Next = std::next(J);
+ MachineInstr &MI = *J;
+
+ if (!MI.isConditionalBranch())
+ continue;
+
+ if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+ // FAULTING_OP's destination is not encoded in the instruction stream
+ // and thus never needs relaxed.
+ continue;
+
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ if (!isBlockInRange(MI, *DestBB)) {
+ if (Next != MBB.end() && Next->isConditionalBranch()) {
+ // If there are multiple conditional branches, this isn't an
+ // analyzable block. Split later terminators into a new block so
+ // each one will be analyzable.
+
+ splitBlockBeforeInstr(*Next, DestBB);
+ } else {
+ fixupConditionalBranch(MI);
+ ++NumConditionalRelaxed;
+ }
+
+ Changed = true;
+
+ // This may have modified all of the terminators, so start over.
+ Next = MBB.getFirstTerminator();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+
+ LLVM_DEBUG(dbgs() << "***** BranchRelaxation *****\n");
+
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+
+ TRI = ST.getRegisterInfo();
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ RS.reset(new RegScavenger());
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block.
+ scanFunction();
+
+ LLVM_DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs(););
+
+ bool MadeChange = false;
+ while (relaxBranchInstructions())
+ MadeChange = true;
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs());
+
+ BlockInfo.clear();
+
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
new file mode 100644
index 000000000000..618e41894b29
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -0,0 +1,305 @@
+//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Break False Dependency pass.
+///
+/// Some instructions have false dependencies which cause unnecessary stalls.
+/// For example, instructions may write part of a register and implicitly
+/// need to read the other parts of the register. This may cause unwanted
+/// stalls preventing otherwise unrelated instructions from executing in
+/// parallel in an out-of-order CPU.
+/// This pass is aimed at identifying and avoiding these dependencies.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class BreakFalseDeps : public MachineFunctionPass {
+private:
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+ /// List of undefined register reads in this block in forward order.
+ std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
+
+ /// Storage for register unit liveness.
+ LivePhysRegs LiveRegSet;
+
+ ReachingDefAnalysis *RDA = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ BreakFalseDeps() : MachineFunctionPass(ID) {
+ initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<ReachingDefAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ /// Process he given basic block.
+ void processBasicBlock(MachineBasicBlock *MBB);
+
+ /// Update def-ages for registers defined by MI.
+ /// Also break dependencies on partial defs and undef uses.
+ void processDefs(MachineInstr *MI);
+
+ /// Helps avoid false dependencies on undef registers by updating the
+ /// machine instructions' undef operand to use a register that the instruction
+ /// is truly dependent on, or use a register with clearance higher than Pref.
+ /// Returns true if it was able to find a true dependency, thus not requiring
+ /// a dependency breaking instruction regardless of clearance.
+ bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref);
+
+ /// Return true to if it makes sense to break dependence on a partial
+ /// def or undef use.
+ bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref);
+
+ /// Break false dependencies on undefined register reads.
+ /// Walk the block backward computing precise liveness. This is expensive, so
+ /// we only do it on demand. Note that the occurrence of undefined register
+ /// reads that should be broken is very rare, but when they occur we may have
+ /// many in a single block.
+ void processUndefReads(MachineBasicBlock *);
+};
+
+} // namespace llvm
+
+#define DEBUG_TYPE "break-false-deps"
+
+char BreakFalseDeps::ID = 0;
+INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
+INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
+INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
+
+FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); }
+
+bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+
+ // We can't change tied operands.
+ if (MI->isRegTiedToDefOperand(OpIdx))
+ return false;
+
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ assert(MO.isUndef() && "Expected undef machine operand");
+
+ // We can't change registers that aren't renamable.
+ if (!MO.isRenamable())
+ return false;
+
+ MCRegister OriginalReg = MO.getReg().asMCReg();
+
+ // Update only undef operands that have reg units that are mapped to one root.
+ for (MCRegUnit Unit : TRI->regunits(OriginalReg)) {
+ unsigned NumRoots = 0;
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ NumRoots++;
+ if (NumRoots > 1)
+ return false;
+ }
+ }
+
+ // Get the undef operand's register class
+ const TargetRegisterClass *OpRC =
+ TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+ assert(OpRC && "Not a valid register class");
+
+ // If the instruction has a true dependency, we can hide the false depdency
+ // behind it.
+ for (MachineOperand &CurrMO : MI->all_uses()) {
+ if (CurrMO.isUndef() || !OpRC->contains(CurrMO.getReg()))
+ continue;
+ // We found a true dependency - replace the undef register with the true
+ // dependency.
+ MO.setReg(CurrMO.getReg());
+ return true;
+ }
+
+ // Go over all registers in the register class and find the register with
+ // max clearance or clearance higher than Pref.
+ unsigned MaxClearance = 0;
+ unsigned MaxClearanceReg = OriginalReg;
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
+ for (MCPhysReg Reg : Order) {
+ unsigned Clearance = RDA->getClearance(MI, Reg);
+ if (Clearance <= MaxClearance)
+ continue;
+ MaxClearance = Clearance;
+ MaxClearanceReg = Reg;
+
+ if (MaxClearance > Pref)
+ break;
+ }
+
+ // Update the operand if we found a register with better clearance.
+ if (MaxClearanceReg != OriginalReg)
+ MO.setReg(MaxClearanceReg);
+
+ return false;
+}
+
+bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+ MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg();
+ unsigned Clearance = RDA->getClearance(MI, Reg);
+ LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+
+ if (Pref > Clearance) {
+ LLVM_DEBUG(dbgs() << ": Break dependency.\n");
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << ": OK .\n");
+ return false;
+}
+
+void BreakFalseDeps::processDefs(MachineInstr *MI) {
+ assert(!MI->isDebugInstr() && "Won't process debug values");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // Break dependence on undef uses. Do this before updating LiveRegs below.
+ // This can remove a false dependence with no additional instructions.
+ for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef())
+ continue;
+
+ unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref))
+ UndefReads.push_back(std::make_pair(MI, i));
+ }
+ }
+
+ // The code below allows the target to create a new instruction to break the
+ // dependence. That opposes the goal of minimizing size, so bail out now.
+ if (MF->getFunction().hasMinSize())
+ return;
+
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isUse())
+ continue;
+ // Check clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(*MI, i, TRI);
+ }
+}
+
+void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
+ if (UndefReads.empty())
+ return;
+
+ // The code below allows the target to create a new instruction to break the
+ // dependence. That opposes the goal of minimizing size, so bail out now.
+ if (MF->getFunction().hasMinSize())
+ return;
+
+ // Collect this block's live out register units.
+ LiveRegSet.init(*TRI);
+ // We do not need to care about pristine registers as they are just preserved
+ // but not actually used in the function.
+ LiveRegSet.addLiveOutsNoPristines(*MBB);
+
+ MachineInstr *UndefMI = UndefReads.back().first;
+ unsigned OpIdx = UndefReads.back().second;
+
+ for (MachineInstr &I : llvm::reverse(*MBB)) {
+ // Update liveness, including the current instruction's defs.
+ LiveRegSet.stepBackward(I);
+
+ if (UndefMI == &I) {
+ if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
+ TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
+
+ UndefReads.pop_back();
+ if (UndefReads.empty())
+ return;
+
+ UndefMI = UndefReads.back().first;
+ OpIdx = UndefReads.back().second;
+ }
+ }
+}
+
+void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) {
+ UndefReads.clear();
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isDebugInstr())
+ processDefs(&MI);
+ }
+ processUndefReads(MBB);
+}
+
+bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+ MF = &mf;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ RDA = &getAnalysis<ReachingDefAnalysis>();
+
+ RegClassInfo.runOnMachineFunction(mf);
+
+ LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
+
+ // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions
+ // in them.
+ df_iterator_default_set<MachineBasicBlock *> Reachable;
+ for (MachineBasicBlock *MBB : depth_first_ext(&mf, Reachable))
+ (void)MBB /* Mark all reachable blocks */;
+
+ // Traverse the basic blocks.
+ for (MachineBasicBlock &MBB : mf)
+ if (Reachable.count(&MBB))
+ processBasicBlock(&MBB);
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
new file mode 100644
index 000000000000..c3bf93855111
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
@@ -0,0 +1,120 @@
+//===-- CFGuardLongjmp.cpp - Longjmp symbols for CFGuard --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a machine function pass to insert a symbol after each
+/// call to _setjmp and store this in the MachineFunction's LongjmpTargets
+/// vector. This will be used to emit the table of valid longjmp targets used
+/// by Control Flow Guard.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfguard-longjmp"
+
+STATISTIC(CFGuardLongjmpTargets,
+ "Number of Control Flow Guard longjmp targets");
+
+namespace {
+
+/// MachineFunction pass to insert a symbol after each call to _setjmp and store
+/// this in the MachineFunction's LongjmpTargets vector.
+class CFGuardLongjmp : public MachineFunctionPass {
+public:
+ static char ID;
+
+ CFGuardLongjmp() : MachineFunctionPass(ID) {
+ initializeCFGuardLongjmpPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Control Flow Guard longjmp targets";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char CFGuardLongjmp::ID = 0;
+
+INITIALIZE_PASS(CFGuardLongjmp, "CFGuardLongjmp",
+ "Insert symbols at valid longjmp targets for /guard:cf", false,
+ false)
+FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); }
+
+bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) {
+
+ // Skip modules for which the cfguard flag is not set.
+ if (!MF.getMMI().getModule()->getModuleFlag("cfguard"))
+ return false;
+
+ // Skip functions that do not have calls to _setjmp.
+ if (!MF.getFunction().callsFunctionThatReturnsTwice())
+ return false;
+
+ SmallVector<MachineInstr *, 8> SetjmpCalls;
+
+ // Iterate over all instructions in the function and add calls to functions
+ // that return twice to the list of targets.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // Skip instructions that are not calls.
+ if (!MI.isCall() || MI.getNumOperands() < 1)
+ continue;
+
+ // Iterate over operands to find calls to global functions.
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isGlobal())
+ continue;
+
+ auto *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F)
+ continue;
+
+ // If the instruction calls a function that returns twice, add
+ // it to the list of targets.
+ if (F->hasFnAttribute(Attribute::ReturnsTwice)) {
+ SetjmpCalls.push_back(&MI);
+ break;
+ }
+ }
+ }
+ }
+
+ if (SetjmpCalls.empty())
+ return false;
+
+ unsigned SetjmpNum = 0;
+
+ // For each possible target, create a new symbol and insert it immediately
+ // after the call to setjmp. Add this symbol to the MachineFunction's list
+ // of longjmp targets.
+ for (MachineInstr *Setjmp : SetjmpCalls) {
+ SmallString<128> SymbolName;
+ raw_svector_ostream(SymbolName) << "$cfgsj_" << MF.getName() << SetjmpNum++;
+ MCSymbol *SjSymbol = MF.getContext().getOrCreateSymbol(SymbolName);
+
+ Setjmp->setPostInstrSymbol(MF, SjSymbol);
+ MF.addLongjmpTarget(SjSymbol);
+ CFGuardLongjmpTargets++;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
new file mode 100644
index 000000000000..837dbd77d073
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
@@ -0,0 +1,225 @@
+//===------ CFIFixup.cpp - Insert CFI remember/restore instructions -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+// This pass inserts the necessary instructions to adjust for the inconsistency
+// of the call-frame information caused by final machine basic block layout.
+// The pass relies in constraints LLVM imposes on the placement of
+// save/restore points (cf. ShrinkWrap):
+// * there is a single basic block, containing the function prologue
+// * possibly multiple epilogue blocks, where each epilogue block is
+// complete and self-contained, i.e. CSR restore instructions (and the
+// corresponding CFI instructions are not split across two or more blocks.
+// * prologue and epilogue blocks are outside of any loops
+// Thus, during execution, at the beginning and at the end of each basic block
+// the function can be in one of two states:
+// - "has a call frame", if the function has executed the prologue, and
+// has not executed any epilogue
+// - "does not have a call frame", if the function has not executed the
+// prologue, or has executed an epilogue
+// which can be computed by a single RPO traversal.
+
+// In order to accommodate backends which do not generate unwind info in
+// epilogues we compute an additional property "strong no call frame on entry",
+// which is set for the entry point of the function and for every block
+// reachable from the entry along a path that does not execute the prologue. If
+// this property holds, it takes precedence over the "has a call frame"
+// property.
+
+// From the point of view of the unwind tables, the "has/does not have call
+// frame" state at beginning of each block is determined by the state at the end
+// of the previous block, in layout order. Where these states differ, we insert
+// compensating CFI instructions, which come in two flavours:
+
+// - CFI instructions, which reset the unwind table state to the initial one.
+// This is done by a target specific hook and is expected to be trivial
+// to implement, for example it could be:
+// .cfi_def_cfa <sp>, 0
+// .cfi_same_value <rN>
+// .cfi_same_value <rN-1>
+// ...
+// where <rN> are the callee-saved registers.
+// - CFI instructions, which reset the unwind table state to the one
+// created by the function prologue. These are
+// .cfi_restore_state
+// .cfi_remember_state
+// In this case we also insert a `.cfi_remember_state` after the last CFI
+// instruction in the function prologue.
+//
+// Known limitations:
+// * the pass cannot handle an epilogue preceding the prologue in the basic
+// block layout
+// * the pass does not handle functions where SP is used as a frame pointer and
+// SP adjustments up and down are done in different basic blocks (TODO)
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CFIFixup.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfi-fixup"
+
+char CFIFixup::ID = 0;
+
+INITIALIZE_PASS(CFIFixup, "cfi-fixup",
+ "Insert CFI remember/restore state instructions", false, false)
+FunctionPass *llvm::createCFIFixup() { return new CFIFixup(); }
+
+static bool isPrologueCFIInstruction(const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+ MI.getFlag(MachineInstr::FrameSetup);
+}
+
+static bool containsPrologue(const MachineBasicBlock &MBB) {
+ return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
+}
+
+static bool containsEpilogue(const MachineBasicBlock &MBB) {
+ return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
+ return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+ MI.getFlag(MachineInstr::FrameDestroy);
+ });
+}
+
+bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
+ const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
+ if (!TFL.enableCFIFixup(MF))
+ return false;
+
+ const unsigned NumBlocks = MF.getNumBlockIDs();
+ if (NumBlocks < 2)
+ return false;
+
+ struct BlockFlags {
+ bool Reachable : 1;
+ bool StrongNoFrameOnEntry : 1;
+ bool HasFrameOnEntry : 1;
+ bool HasFrameOnExit : 1;
+ };
+ SmallVector<BlockFlags, 32> BlockInfo(NumBlocks, {false, false, false, false});
+ BlockInfo[0].Reachable = true;
+ BlockInfo[0].StrongNoFrameOnEntry = true;
+
+ // Compute the presence/absence of frame at each basic block.
+ MachineBasicBlock *PrologueBlock = nullptr;
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ for (MachineBasicBlock *MBB : RPOT) {
+ BlockFlags &Info = BlockInfo[MBB->getNumber()];
+
+ // Set to true if the current block contains the prologue or the epilogue,
+ // respectively.
+ bool HasPrologue = false;
+ bool HasEpilogue = false;
+
+ if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
+ PrologueBlock = MBB;
+ HasPrologue = true;
+ }
+
+ if (Info.HasFrameOnEntry || HasPrologue)
+ HasEpilogue = containsEpilogue(*MBB);
+
+ // If the function has a call frame at the entry of the current block or the
+ // current block contains the prologue, then the function has a call frame
+ // at the exit of the block, unless the block contains the epilogue.
+ Info.HasFrameOnExit = (Info.HasFrameOnEntry || HasPrologue) && !HasEpilogue;
+
+ // Set the successors' state on entry.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ BlockFlags &SuccInfo = BlockInfo[Succ->getNumber()];
+ SuccInfo.Reachable = true;
+ SuccInfo.StrongNoFrameOnEntry |=
+ Info.StrongNoFrameOnEntry && !HasPrologue;
+ SuccInfo.HasFrameOnEntry = Info.HasFrameOnExit;
+ }
+ }
+
+ if (!PrologueBlock)
+ return false;
+
+ // Walk the blocks of the function in "physical" order.
+ // Every block inherits the frame state (as recorded in the unwind tables)
+ // of the previous block. If the intended frame state is different, insert
+ // compensating CFI instructions.
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ bool Change = false;
+ // `InsertPt` always points to the point in a preceding block where we have to
+ // insert a `.cfi_remember_state`, in the case that the current block needs a
+ // `.cfi_restore_state`.
+ MachineBasicBlock *InsertMBB = PrologueBlock;
+ MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
+ for (MachineInstr &MI : *PrologueBlock)
+ if (isPrologueCFIInstruction(MI))
+ InsertPt = std::next(MI.getIterator());
+
+ assert(InsertPt != PrologueBlock->begin() &&
+ "Inconsistent notion of \"prologue block\"");
+
+ // No point starting before the prologue block.
+ // TODO: the unwind tables will still be incorrect if an epilogue physically
+ // preceeds the prologue.
+ MachineFunction::iterator CurrBB = std::next(PrologueBlock->getIterator());
+ bool HasFrame = BlockInfo[PrologueBlock->getNumber()].HasFrameOnExit;
+ while (CurrBB != MF.end()) {
+ const BlockFlags &Info = BlockInfo[CurrBB->getNumber()];
+ if (!Info.Reachable) {
+ ++CurrBB;
+ continue;
+ }
+
+#ifndef NDEBUG
+ if (!Info.StrongNoFrameOnEntry) {
+ for (auto *Pred : CurrBB->predecessors()) {
+ BlockFlags &PredInfo = BlockInfo[Pred->getNumber()];
+ assert((!PredInfo.Reachable ||
+ Info.HasFrameOnEntry == PredInfo.HasFrameOnExit) &&
+ "Inconsistent call frame state");
+ }
+ }
+#endif
+ if (!Info.StrongNoFrameOnEntry && Info.HasFrameOnEntry && !HasFrame) {
+ // Reset to the "after prologue" state.
+
+ // Insert a `.cfi_remember_state` into the last block known to have a
+ // stack frame.
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+ BuildMI(*InsertMBB, InsertPt, DebugLoc(),
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ // Insert a `.cfi_restore_state` at the beginning of the current block.
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+ InsertPt = BuildMI(*CurrBB, CurrBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ ++InsertPt;
+ InsertMBB = &*CurrBB;
+ Change = true;
+ } else if ((Info.StrongNoFrameOnEntry || !Info.HasFrameOnEntry) &&
+ HasFrame) {
+ // Reset to the state upon function entry.
+ TFL.resetCFIToInitialState(*CurrBB);
+ Change = true;
+ }
+
+ HasFrame = Info.HasFrameOnExit;
+ ++CurrBB;
+ }
+
+ return Change;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
new file mode 100644
index 000000000000..6a024287f002
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -0,0 +1,449 @@
+//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass verifies incoming and outgoing CFA information of basic
+/// blocks. CFA information is information about offset and register set by CFI
+/// directives, valid at the start and end of a basic block. This pass checks
+/// that outgoing information of predecessors matches incoming information of
+/// their successors. Then it checks if blocks have correct CFA calculation rule
+/// set and inserts additional CFI instruction at their beginnings if they
+/// don't. CFI instructions are inserted if basic blocks have incorrect offset
+/// or register set by previous blocks, as a result of a non-linear layout of
+/// blocks in a function.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCDwarf.h"
+using namespace llvm;
+
+static cl::opt<bool> VerifyCFI("verify-cfiinstrs",
+ cl::desc("Verify Call Frame Information instructions"),
+ cl::init(false),
+ cl::Hidden);
+
+namespace {
+class CFIInstrInserter : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ CFIInstrInserter() : MachineFunctionPass(ID) {
+ initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!MF.needsFrameMoves())
+ return false;
+
+ MBBVector.resize(MF.getNumBlockIDs());
+ calculateCFAInfo(MF);
+
+ if (VerifyCFI) {
+ if (unsigned ErrorNum = verify(MF))
+ report_fatal_error("Found " + Twine(ErrorNum) +
+ " in/out CFI information errors.");
+ }
+ bool insertedCFI = insertCFIInstrs(MF);
+ MBBVector.clear();
+ return insertedCFI;
+ }
+
+ private:
+ struct MBBCFAInfo {
+ MachineBasicBlock *MBB;
+ /// Value of cfa offset valid at basic block entry.
+ int IncomingCFAOffset = -1;
+ /// Value of cfa offset valid at basic block exit.
+ int OutgoingCFAOffset = -1;
+ /// Value of cfa register valid at basic block entry.
+ unsigned IncomingCFARegister = 0;
+ /// Value of cfa register valid at basic block exit.
+ unsigned OutgoingCFARegister = 0;
+ /// Set of callee saved registers saved at basic block entry.
+ BitVector IncomingCSRSaved;
+ /// Set of callee saved registers saved at basic block exit.
+ BitVector OutgoingCSRSaved;
+ /// If in/out cfa offset and register values for this block have already
+ /// been set or not.
+ bool Processed = false;
+ };
+
+#define INVALID_REG UINT_MAX
+#define INVALID_OFFSET INT_MAX
+ /// contains the location where CSR register is saved.
+ struct CSRSavedLocation {
+ CSRSavedLocation(std::optional<unsigned> R, std::optional<int> O)
+ : Reg(R), Offset(O) {}
+ std::optional<unsigned> Reg;
+ std::optional<int> Offset;
+ };
+
+ /// Contains cfa offset and register values valid at entry and exit of basic
+ /// blocks.
+ std::vector<MBBCFAInfo> MBBVector;
+
+ /// Map the callee save registers to the locations where they are saved.
+ SmallDenseMap<unsigned, CSRSavedLocation, 16> CSRLocMap;
+
+ /// Calculate cfa offset and register values valid at entry and exit for all
+ /// basic blocks in a function.
+ void calculateCFAInfo(MachineFunction &MF);
+ /// Calculate cfa offset and register values valid at basic block exit by
+ /// checking the block for CFI instructions. Block's incoming CFA info remains
+ /// the same.
+ void calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo);
+ /// Update in/out cfa offset and register values for successors of the basic
+ /// block.
+ void updateSuccCFAInfo(MBBCFAInfo &MBBInfo);
+
+ /// Check if incoming CFA information of a basic block matches outgoing CFA
+ /// information of the previous block. If it doesn't, insert CFI instruction
+ /// at the beginning of the block that corrects the CFA calculation rule for
+ /// that block.
+ bool insertCFIInstrs(MachineFunction &MF);
+ /// Return the cfa offset value that should be set at the beginning of a MBB
+ /// if needed. The negated value is needed when creating CFI instructions that
+ /// set absolute offset.
+ int getCorrectCFAOffset(MachineBasicBlock *MBB) {
+ return MBBVector[MBB->getNumber()].IncomingCFAOffset;
+ }
+
+ void reportCFAError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
+ void reportCSRError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
+ /// Go through each MBB in a function and check that outgoing offset and
+ /// register of its predecessors match incoming offset and register of that
+ /// MBB, as well as that incoming offset and register of its successors match
+ /// outgoing offset and register of the MBB.
+ unsigned verify(MachineFunction &MF);
+};
+} // namespace
+
+char CFIInstrInserter::ID = 0;
+INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter",
+ "Check CFA info and insert CFI instructions if needed", false,
+ false)
+FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); }
+
+void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ // Initial CFA offset value i.e. the one valid at the beginning of the
+ // function.
+ int InitialOffset =
+ MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF);
+ // Initial CFA register value i.e. the one valid at the beginning of the
+ // function.
+ Register InitialRegister =
+ MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
+ InitialRegister = TRI.getDwarfRegNum(InitialRegister, true);
+ unsigned NumRegs = TRI.getNumRegs();
+
+ // Initialize MBBMap.
+ for (MachineBasicBlock &MBB : MF) {
+ MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()];
+ MBBInfo.MBB = &MBB;
+ MBBInfo.IncomingCFAOffset = InitialOffset;
+ MBBInfo.OutgoingCFAOffset = InitialOffset;
+ MBBInfo.IncomingCFARegister = InitialRegister;
+ MBBInfo.OutgoingCFARegister = InitialRegister;
+ MBBInfo.IncomingCSRSaved.resize(NumRegs);
+ MBBInfo.OutgoingCSRSaved.resize(NumRegs);
+ }
+ CSRLocMap.clear();
+
+ // Set in/out cfa info for all blocks in the function. This traversal is based
+ // on the assumption that the first block in the function is the entry block
+ // i.e. that it has initial cfa offset and register values as incoming CFA
+ // information.
+ updateSuccCFAInfo(MBBVector[MF.front().getNumber()]);
+}
+
+void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
+ // Outgoing cfa offset set by the block.
+ int SetOffset = MBBInfo.IncomingCFAOffset;
+ // Outgoing cfa register set by the block.
+ unsigned SetRegister = MBBInfo.IncomingCFARegister;
+ MachineFunction *MF = MBBInfo.MBB->getParent();
+ const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ unsigned NumRegs = TRI.getNumRegs();
+ BitVector CSRSaved(NumRegs), CSRRestored(NumRegs);
+
+ // Determine cfa offset and register set by the block.
+ for (MachineInstr &MI : *MBBInfo.MBB) {
+ if (MI.isCFIInstruction()) {
+ std::optional<unsigned> CSRReg;
+ std::optional<int> CSROffset;
+ unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+ const MCCFIInstruction &CFI = Instrs[CFIIndex];
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpDefCfaRegister:
+ SetRegister = CFI.getRegister();
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ SetOffset = CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ SetOffset += CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ SetRegister = CFI.getRegister();
+ SetOffset = CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpOffset:
+ CSROffset = CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRegister:
+ CSRReg = CFI.getRegister2();
+ break;
+ case MCCFIInstruction::OpRelOffset:
+ CSROffset = CFI.getOffset() - SetOffset;
+ break;
+ case MCCFIInstruction::OpRestore:
+ CSRRestored.set(CFI.getRegister());
+ break;
+ case MCCFIInstruction::OpLLVMDefAspaceCfa:
+ // TODO: Add support for handling cfi_def_aspace_cfa.
+#ifndef NDEBUG
+ report_fatal_error(
+ "Support for cfi_llvm_def_aspace_cfa not implemented! Value of CFA "
+ "may be incorrect!\n");
+#endif
+ break;
+ case MCCFIInstruction::OpRememberState:
+ // TODO: Add support for handling cfi_remember_state.
+#ifndef NDEBUG
+ report_fatal_error(
+ "Support for cfi_remember_state not implemented! Value of CFA "
+ "may be incorrect!\n");
+#endif
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ // TODO: Add support for handling cfi_restore_state.
+#ifndef NDEBUG
+ report_fatal_error(
+ "Support for cfi_restore_state not implemented! Value of CFA may "
+ "be incorrect!\n");
+#endif
+ break;
+ // Other CFI directives do not affect CFA value.
+ case MCCFIInstruction::OpUndefined:
+ case MCCFIInstruction::OpSameValue:
+ case MCCFIInstruction::OpEscape:
+ case MCCFIInstruction::OpWindowSave:
+ case MCCFIInstruction::OpNegateRAState:
+ case MCCFIInstruction::OpGnuArgsSize:
+ break;
+ }
+ if (CSRReg || CSROffset) {
+ auto It = CSRLocMap.find(CFI.getRegister());
+ if (It == CSRLocMap.end()) {
+ CSRLocMap.insert(
+ {CFI.getRegister(), CSRSavedLocation(CSRReg, CSROffset)});
+ } else if (It->second.Reg != CSRReg || It->second.Offset != CSROffset) {
+ llvm_unreachable("Different saved locations for the same CSR");
+ }
+ CSRSaved.set(CFI.getRegister());
+ }
+ }
+ }
+
+ MBBInfo.Processed = true;
+
+ // Update outgoing CFA info.
+ MBBInfo.OutgoingCFAOffset = SetOffset;
+ MBBInfo.OutgoingCFARegister = SetRegister;
+
+ // Update outgoing CSR info.
+ BitVector::apply([](auto x, auto y, auto z) { return (x | y) & ~z; },
+ MBBInfo.OutgoingCSRSaved, MBBInfo.IncomingCSRSaved, CSRSaved,
+ CSRRestored);
+}
+
+void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) {
+ SmallVector<MachineBasicBlock *, 4> Stack;
+ Stack.push_back(MBBInfo.MBB);
+
+ do {
+ MachineBasicBlock *Current = Stack.pop_back_val();
+ MBBCFAInfo &CurrentInfo = MBBVector[Current->getNumber()];
+ calculateOutgoingCFAInfo(CurrentInfo);
+ for (auto *Succ : CurrentInfo.MBB->successors()) {
+ MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()];
+ if (!SuccInfo.Processed) {
+ SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset;
+ SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister;
+ SuccInfo.IncomingCSRSaved = CurrentInfo.OutgoingCSRSaved;
+ Stack.push_back(Succ);
+ }
+ }
+ } while (!Stack.empty());
+}
+
+bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
+ const MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()];
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool InsertedCFIInstr = false;
+
+ BitVector SetDifference;
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip the first MBB in a function
+ if (MBB.getNumber() == MF.front().getNumber()) continue;
+
+ const MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()];
+ auto MBBI = MBBInfo.MBB->begin();
+ DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI);
+
+ // If the current MBB will be placed in a unique section, a full DefCfa
+ // must be emitted.
+ const bool ForceFullCFA = MBB.isBeginSection();
+
+ if ((PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset &&
+ PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) ||
+ ForceFullCFA) {
+ // If both outgoing offset and register of a previous block don't match
+ // incoming offset and register of this block, or if this block begins a
+ // section, add a def_cfa instruction with the correct offset and
+ // register for this block.
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ } else if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+ // If outgoing offset of a previous block doesn't match incoming offset
+ // of this block, add a def_cfa_offset instruction with the correct
+ // offset for this block.
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
+ nullptr, getCorrectCFAOffset(&MBB)));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ } else if (PrevMBBInfo->OutgoingCFARegister !=
+ MBBInfo.IncomingCFARegister) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MBBInfo.IncomingCFARegister));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ }
+
+ if (ForceFullCFA) {
+ MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMovesFullCFA(
+ *MBBInfo.MBB, MBBI);
+ InsertedCFIInstr = true;
+ PrevMBBInfo = &MBBInfo;
+ continue;
+ }
+
+ BitVector::apply([](auto x, auto y) { return x & ~y; }, SetDifference,
+ PrevMBBInfo->OutgoingCSRSaved, MBBInfo.IncomingCSRSaved);
+ for (int Reg : SetDifference.set_bits()) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ }
+
+ BitVector::apply([](auto x, auto y) { return x & ~y; }, SetDifference,
+ MBBInfo.IncomingCSRSaved, PrevMBBInfo->OutgoingCSRSaved);
+ for (int Reg : SetDifference.set_bits()) {
+ auto it = CSRLocMap.find(Reg);
+ assert(it != CSRLocMap.end() && "Reg should have an entry in CSRLocMap");
+ unsigned CFIIndex;
+ CSRSavedLocation RO = it->second;
+ if (!RO.Reg && RO.Offset) {
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, *RO.Offset));
+ } else if (RO.Reg && !RO.Offset) {
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createRegister(nullptr, Reg, *RO.Reg));
+ } else {
+ llvm_unreachable("RO.Reg and RO.Offset cannot both be valid/invalid");
+ }
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ }
+
+ PrevMBBInfo = &MBBInfo;
+ }
+ return InsertedCFIInstr;
+}
+
+void CFIInstrInserter::reportCFAError(const MBBCFAInfo &Pred,
+ const MBBCFAInfo &Succ) {
+ errs() << "*** Inconsistent CFA register and/or offset between pred and succ "
+ "***\n";
+ errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
+ << " in " << Pred.MBB->getParent()->getName()
+ << " outgoing CFA Reg:" << Pred.OutgoingCFARegister << "\n";
+ errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
+ << " in " << Pred.MBB->getParent()->getName()
+ << " outgoing CFA Offset:" << Pred.OutgoingCFAOffset << "\n";
+ errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber()
+ << " incoming CFA Reg:" << Succ.IncomingCFARegister << "\n";
+ errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber()
+ << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n";
+}
+
+void CFIInstrInserter::reportCSRError(const MBBCFAInfo &Pred,
+ const MBBCFAInfo &Succ) {
+ errs() << "*** Inconsistent CSR Saved between pred and succ in function "
+ << Pred.MBB->getParent()->getName() << " ***\n";
+ errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
+ << " outgoing CSR Saved: ";
+ for (int Reg : Pred.OutgoingCSRSaved.set_bits())
+ errs() << Reg << " ";
+ errs() << "\n";
+ errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber()
+ << " incoming CSR Saved: ";
+ for (int Reg : Succ.IncomingCSRSaved.set_bits())
+ errs() << Reg << " ";
+ errs() << "\n";
+}
+
+unsigned CFIInstrInserter::verify(MachineFunction &MF) {
+ unsigned ErrorNum = 0;
+ for (auto *CurrMBB : depth_first(&MF)) {
+ const MBBCFAInfo &CurrMBBInfo = MBBVector[CurrMBB->getNumber()];
+ for (MachineBasicBlock *Succ : CurrMBB->successors()) {
+ const MBBCFAInfo &SuccMBBInfo = MBBVector[Succ->getNumber()];
+ // Check that incoming offset and register values of successors match the
+ // outgoing offset and register values of CurrMBB
+ if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset ||
+ SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) {
+ // Inconsistent offsets/registers are ok for 'noreturn' blocks because
+ // we don't generate epilogues inside such blocks.
+ if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock())
+ continue;
+ reportCFAError(CurrMBBInfo, SuccMBBInfo);
+ ErrorNum++;
+ }
+ // Check that IncomingCSRSaved of every successor matches the
+ // OutgoingCSRSaved of CurrMBB
+ if (SuccMBBInfo.IncomingCSRSaved != CurrMBBInfo.OutgoingCSRSaved) {
+ reportCSRError(CurrMBBInfo, SuccMBBInfo);
+ ErrorNum++;
+ }
+ }
+ }
+ return ErrorNum;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 000000000000..5a005ba7b414
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,323 @@
+//===- CalcSpillWeights.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "calcspillweights"
+
+void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
+ LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ calculateSpillWeightAndHint(LIS.getInterval(Reg));
+ }
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+Register VirtRegAuxInfo::copyHint(const MachineInstr *MI, unsigned Reg,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ unsigned Sub, HSub;
+ Register HReg;
+ if (MI->getOperand(0).getReg() == Reg) {
+ Sub = MI->getOperand(0).getSubReg();
+ HReg = MI->getOperand(1).getReg();
+ HSub = MI->getOperand(1).getSubReg();
+ } else {
+ Sub = MI->getOperand(1).getSubReg();
+ HReg = MI->getOperand(0).getReg();
+ HSub = MI->getOperand(0).getSubReg();
+ }
+
+ if (!HReg)
+ return 0;
+
+ if (HReg.isVirtual())
+ return Sub == HSub ? HReg : Register();
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg();
+ if (RC->contains(CopiedPReg))
+ return CopiedPReg;
+
+ // Check if reg:sub matches so that a super register could be hinted.
+ if (Sub)
+ return TRI.getMatchingSuperReg(CopiedPReg, Sub, RC);
+
+ return 0;
+}
+
+// Check if all values in LI are rematerializable
+bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ const VirtRegMap &VRM,
+ const TargetInstrInfo &TII) {
+ Register Reg = LI.reg();
+ Register Original = VRM.getOriginal(Reg);
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ if (VNI->isPHIDef())
+ return false;
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+
+ // Trace copies introduced by live range splitting. The inline
+ // spiller can rematerialize through these copies, so the spill
+ // weight must reflect this.
+ while (MI->isFullCopy()) {
+ // The copy destination must match the interval register.
+ if (MI->getOperand(0).getReg() != Reg)
+ return false;
+
+ // Get the source register.
+ Reg = MI->getOperand(1).getReg();
+
+ // If the original (pre-splitting) registers match this
+ // copy came from a split.
+ if (!Reg.isVirtual() || VRM.getOriginal(Reg) != Original)
+ return false;
+
+ // Follow the copy live-in value.
+ const LiveInterval &SrcLI = LIS.getInterval(Reg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ VNI = SrcQ.valueIn();
+ assert(VNI && "Copy from non-existing value");
+ if (VNI->isPHIDef())
+ return false;
+ MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+ }
+
+ if (!TII.isTriviallyReMaterializable(*MI))
+ return false;
+ }
+ return true;
+}
+
+bool VirtRegAuxInfo::isLiveAtStatepointVarArg(LiveInterval &LI) {
+ return any_of(VRM.getRegInfo().reg_operands(LI.reg()),
+ [](MachineOperand &MO) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT)
+ return false;
+ return StatepointOpers(MI).getVarIdx() <= MO.getOperandNo();
+ });
+}
+
+void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
+ float Weight = weightCalcHelper(LI);
+ // Check if unspillable.
+ if (Weight < 0)
+ return;
+ LI.setWeight(Weight);
+}
+
+float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
+ SlotIndex *End) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineBasicBlock *MBB = nullptr;
+ MachineLoop *Loop = nullptr;
+ bool IsExiting = false;
+ float TotalWeight = 0;
+ unsigned NumInstr = 0; // Number of instructions using LI
+ SmallPtrSet<MachineInstr *, 8> Visited;
+
+ std::pair<unsigned, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
+
+ if (LI.isSpillable()) {
+ Register Reg = LI.reg();
+ Register Original = VRM.getOriginal(Reg);
+ const LiveInterval &OrigInt = LIS.getInterval(Original);
+ // li comes from a split of OrigInt. If OrigInt was marked
+ // as not spillable, make sure the new interval is marked
+ // as not spillable as well.
+ if (!OrigInt.isSpillable())
+ LI.markNotSpillable();
+ }
+
+ // Don't recompute spill weight for an unspillable register.
+ bool IsSpillable = LI.isSpillable();
+
+ bool IsLocalSplitArtifact = Start && End;
+
+ // Do not update future local split artifacts.
+ bool ShouldUpdateLI = !IsLocalSplitArtifact;
+
+ if (IsLocalSplitArtifact) {
+ MachineBasicBlock *LocalMBB = LIS.getMBBFromIndex(*End);
+ assert(LocalMBB == LIS.getMBBFromIndex(*Start) &&
+ "start and end are expected to be in the same basic block");
+
+ // Local split artifact will have 2 additional copy instructions and they
+ // will be in the same BB.
+ // localLI = COPY other
+ // ...
+ // other = COPY localLI
+ TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
+
+ NumInstr += 2;
+ }
+
+ // CopyHint is a sortable hint derived from a COPY instruction.
+ struct CopyHint {
+ const Register Reg;
+ const float Weight;
+ CopyHint(Register R, float W) : Reg(R), Weight(W) {}
+ bool operator<(const CopyHint &Rhs) const {
+ // Always prefer any physreg hint.
+ if (Reg.isPhysical() != Rhs.Reg.isPhysical())
+ return Reg.isPhysical();
+ if (Weight != Rhs.Weight)
+ return (Weight > Rhs.Weight);
+ return Reg.id() < Rhs.Reg.id(); // Tie-breaker.
+ }
+ };
+
+ std::set<CopyHint> CopyHints;
+ DenseMap<unsigned, float> Hint;
+ for (MachineRegisterInfo::reg_instr_nodbg_iterator
+ I = MRI.reg_instr_nodbg_begin(LI.reg()),
+ E = MRI.reg_instr_nodbg_end();
+ I != E;) {
+ MachineInstr *MI = &*(I++);
+
+ // For local split artifacts, we are interested only in instructions between
+ // the expected start and end of the range.
+ SlotIndex SI = LIS.getInstructionIndex(*MI);
+ if (IsLocalSplitArtifact && ((SI < *Start) || (SI > *End)))
+ continue;
+
+ NumInstr++;
+ if (MI->isIdentityCopy() || MI->isImplicitDef())
+ continue;
+ if (!Visited.insert(MI).second)
+ continue;
+
+ // For terminators that produce values, ask the backend if the register is
+ // not spillable.
+ if (TII.isUnspillableTerminator(MI) && MI->definesRegister(LI.reg())) {
+ LI.markNotSpillable();
+ return -1.0f;
+ }
+
+ float Weight = 1.0f;
+ if (IsSpillable) {
+ // Get loop info for mi.
+ if (MI->getParent() != MBB) {
+ MBB = MI->getParent();
+ Loop = Loops.getLoopFor(MBB);
+ IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
+ }
+
+ // Calculate instr weight.
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+ Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
+ Weight *= 3;
+
+ TotalWeight += Weight;
+ }
+
+ // Get allocation hints from copies.
+ if (!MI->isCopy())
+ continue;
+ Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
+ if (!HintReg)
+ continue;
+ // Force hweight onto the stack so that x86 doesn't add hidden precision,
+ // making the comparison incorrectly pass (i.e., 1 > 1 == true??).
+ //
+ // FIXME: we probably shouldn't use floats at all.
+ volatile float HWeight = Hint[HintReg] += Weight;
+ if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
+ CopyHints.insert(CopyHint(HintReg, HWeight));
+ }
+
+ // Pass all the sorted copy hints to mri.
+ if (ShouldUpdateLI && CopyHints.size()) {
+ // Remove a generic hint if previously added by target.
+ if (TargetHint.first == 0 && TargetHint.second)
+ MRI.clearSimpleHint(LI.reg());
+
+ SmallSet<Register, 4> HintedRegs;
+ for (const auto &Hint : CopyHints) {
+ if (!HintedRegs.insert(Hint.Reg).second ||
+ (TargetHint.first != 0 && Hint.Reg == TargetHint.second))
+ // Don't add the same reg twice or the target-type hint again.
+ continue;
+ MRI.addRegAllocationHint(LI.reg(), Hint.Reg);
+ }
+
+ // Weakly boost the spill weight of hinted registers.
+ TotalWeight *= 1.01F;
+ }
+
+ // If the live interval was already unspillable, leave it that way.
+ if (!IsSpillable)
+ return -1.0;
+
+ // Mark li as unspillable if all live ranges are tiny and the interval
+ // is not live at any reg mask. If the interval is live at a reg mask
+ // spilling may be required. If li is live as use in statepoint instruction
+ // spilling may be required due to if we mark interval with use in statepoint
+ // as not spillable we are risky to end up with no register to allocate.
+ // At the same time STATEPOINT instruction is perfectly fine to have this
+ // operand on stack, so spilling such interval and folding its load from stack
+ // into instruction itself makes perfect sense.
+ if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) &&
+ !LI.isLiveAtIndexes(LIS.getRegMaskSlots()) &&
+ !isLiveAtStatepointVarArg(LI)) {
+ LI.markNotSpillable();
+ return -1.0;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
+ TotalWeight *= 0.5F;
+
+ if (IsLocalSplitArtifact)
+ return normalize(TotalWeight, Start->distance(*End), NumInstr);
+ return normalize(TotalWeight, LI.getSize(), NumInstr);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
new file mode 100644
index 000000000000..db243a0bfebe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -0,0 +1,231 @@
+//===-- CallBrPrepare - Prepare callbr for code generation ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
+// codegen.
+//
+// In particular, this pass assists in inserting register copies for the output
+// values of a callbr along the edges leading to the indirect target blocks.
+// Though the output SSA value is defined by the callbr instruction itself in
+// the IR representation, the value cannot be copied to the appropriate virtual
+// registers prior to jumping to an indirect label, since the jump occurs
+// within the user-provided assembly blob.
+//
+// Instead, those copies must occur separately at the beginning of each
+// indirect target. That requires that we create a separate SSA definition in
+// each of them (via llvm.callbr.landingpad), and may require splitting
+// critical edges so we have a location to place the intrinsic. Finally, we
+// remap users of the original callbr output SSA value to instead point to the
+// appropriate llvm.callbr.landingpad value.
+//
+// Ideally, this could be done inside SelectionDAG, or in the
+// MachineInstruction representation, without the use of an IR-level intrinsic.
+// But, within the current framework, it’s simpler to implement as an IR pass.
+// (If support for callbr in GlobalISel is implemented, it’s worth considering
+// whether this is still required.)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "callbrprepare"
+
+namespace {
+
+class CallBrPrepare : public FunctionPass {
+ bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+ bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) const;
+ void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) const;
+
+public:
+ CallBrPrepare() : FunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &Fn) override;
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char CallBrPrepare::ID = 0;
+INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+
+FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
+
+void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
+ SmallVector<CallBrInst *, 2> CBRs;
+ for (BasicBlock &BB : Fn)
+ if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
+ if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
+ CBRs.push_back(CBR);
+ return CBRs;
+}
+
+bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) {
+ bool Changed = false;
+ CriticalEdgeSplittingOptions Options(&DT);
+ Options.setMergeIdenticalEdges();
+
+ // The indirect destination might be duplicated between another parameter...
+ // %0 = callbr ... [label %x, label %x]
+ // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
+ // to split the default destination if it's duplicated between an indirect
+ // destination...
+ // %1 = callbr ... to label %x [label %x]
+ // ...hence starting at 1 and checking against successor 0 (aka the default
+ // destination).
+ for (CallBrInst *CBR : CBRs)
+ for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
+ if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
+ isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
+ if (SplitKnownCriticalEdge(CBR, i, Options))
+ Changed = true;
+ return Changed;
+}
+
+bool CallBrPrepare::InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) const {
+ bool Changed = false;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ IRBuilder<> Builder(CBRs[0]->getContext());
+ for (CallBrInst *CBR : CBRs) {
+ if (!CBR->getNumIndirectDests())
+ continue;
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(CBR->getType(), CBR->getName());
+ SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
+ SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+
+ for (BasicBlock *IndDest : CBR->getIndirectDests()) {
+ if (!Visited.insert(IndDest).second)
+ continue;
+ Builder.SetInsertPoint(&*IndDest->begin());
+ CallInst *Intrinsic = Builder.CreateIntrinsic(
+ CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
+ SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
+ UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
+ const auto *I = dyn_cast<Instruction>(U.getUser());
+ return I && I->getParent() == BB;
+}
+
+#ifndef NDEBUG
+static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
+ const BasicBlock *BB, bool IsDefaultDest) {
+ if (!isa<Instruction>(U.getUser()))
+ return;
+ LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
+ << cast<Instruction>(U.getUser())->getParent()->getName()
+ << ", is " << (DT.dominates(BB, U) ? "" : "NOT ")
+ << "dominated by " << BB->getName() << " ("
+ << (IsDefaultDest ? "in" : "") << "direct)\n");
+}
+#endif
+
+void CallBrPrepare::UpdateSSA(DominatorTree &DT, CallBrInst *CBR,
+ CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) const {
+
+ SmallPtrSet<Use *, 4> Visited;
+ BasicBlock *DefaultDest = CBR->getDefaultDest();
+ BasicBlock *LandingPad = Intrinsic->getParent();
+
+ SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
+ for (Use *U : Uses) {
+ if (!Visited.insert(U).second)
+ continue;
+
+#ifndef NDEBUG
+ PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
+ PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
+#endif
+
+ // Don't rewrite the use in the newly inserted intrinsic.
+ if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
+ if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
+ continue;
+
+ // If the Use is in the same BasicBlock as the Intrinsic call, replace
+ // the Use with the value of the Intrinsic call.
+ if (IsInSameBasicBlock(*U, LandingPad)) {
+ U->set(Intrinsic);
+ continue;
+ }
+
+ // If the Use is dominated by the default dest, do not touch it.
+ if (DT.dominates(DefaultDest, *U))
+ continue;
+
+ SSAUpdate.RewriteUse(*U);
+ }
+}
+
+bool CallBrPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+ SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
+
+ if (CBRs.empty())
+ return Changed;
+
+ // It's highly likely that most programs do not contain CallBrInsts. Follow a
+ // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
+ // domtree analysis if available, otherwise compute it lazily. This avoids
+ // forcing Dominator Tree Construction at -O0 for programs that likely do not
+ // contain CallBrInsts. It does pessimize programs with callbr at higher
+ // optimization levels, as the DominatorTree created here is not reused by
+ // subsequent passes.
+ DominatorTree *DT;
+ std::optional<DominatorTree> LazilyComputedDomTree;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ else {
+ LazilyComputedDomTree.emplace(Fn);
+ DT = &*LazilyComputedDomTree;
+ }
+
+ if (SplitCriticalEdges(CBRs, *DT))
+ Changed = true;
+
+ if (InsertIntrinsicCalls(CBRs, *DT))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 000000000000..b7152587a9fa
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,292 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &Locs, LLVMContext &Context,
+ bool NegativeOffsets)
+ : CallingConv(CC), IsVarArg(IsVarArg), MF(MF),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(Locs), Context(Context),
+ NegativeOffsets(NegativeOffsets) {
+
+ // No stack is used.
+ StackSize = 0;
+
+ clearByValRegsInfo();
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+/// Allocate space on the stack large enough to pass an argument by value.
+/// The size and alignment information of the argument is encoded in
+/// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, int MinSize,
+ Align MinAlign, ISD::ArgFlagsTy ArgFlags) {
+ Align Alignment = ArgFlags.getNonZeroByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > Alignment)
+ Alignment = MinAlign;
+ ensureMaxAlignment(Alignment);
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Alignment);
+ Size = unsigned(alignTo(Size, MinAlign));
+ uint64_t Offset = AllocateStack(Size, Alignment);
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(MCPhysReg Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI / 32] |= 1 << (*AI & 31);
+}
+
+void CCState::MarkUnallocated(MCPhysReg Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI / 32] &= ~(1 << (*AI & 31));
+}
+
+bool CCState::IsShadowAllocatedReg(MCRegister Reg) const {
+ if (!isAllocated(Reg))
+ return false;
+
+ for (auto const &ValAssign : Locs)
+ if (ValAssign.isRegLoc() && TRI.regsOverlap(ValAssign.getLocReg(), Reg))
+ return false;
+ return true;
+}
+
+/// Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ unsigned NumArgs = Ins.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this))
+ report_fatal_error("unable to allocate function argument #" + Twine(i));
+ }
+}
+
+/// Analyze the return values of a function, returning true if the return can
+/// be performed without sret-demotion and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
+/// Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ report_fatal_error("unable to allocate function return #" + Twine(i));
+ }
+}
+
+/// Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Same as above except it takes vectors of types and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Analyze the return values of a call, incorporating info about the passed
+/// values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ MVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << VT << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Same as above except it's specialized for calls that produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result has unhandled type "
+ << VT << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+}
+
+void CCState::ensureMaxAlignment(Align Alignment) {
+ if (!AnalyzingMustTailForwardedRegs)
+ MF.getFrameInfo().ensureMaxAlignment(Alignment);
+}
+
+static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
+ if (VT.isVector())
+ return true; // Assume -msse-regparm might be in effect.
+ if (!VT.isInteger())
+ return false;
+ return (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall);
+}
+
+void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
+ MVT VT, CCAssignFn Fn) {
+ uint64_t SavedStackSize = StackSize;
+ Align SavedMaxStackArgAlign = MaxStackArgAlign;
+ unsigned NumLocs = Locs.size();
+
+ // Set the 'inreg' flag if it is used for this calling convention.
+ ISD::ArgFlagsTy Flags;
+ if (isValueTypeInRegForCC(CallingConv, VT))
+ Flags.setInReg();
+
+ // Allocate something of this value type repeatedly until we get assigned a
+ // location in memory.
+ bool HaveRegParm;
+ do {
+ if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call has unhandled type " << VT
+ << " while computing remaining regparms\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ HaveRegParm = Locs.back().isRegLoc();
+ } while (HaveRegParm);
+
+ // Copy all the registers from the value locations we added.
+ assert(NumLocs < Locs.size() && "CC assignment failed to add location");
+ for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I)
+ if (Locs[I].isRegLoc())
+ Regs.push_back(MCPhysReg(Locs[I].getLocReg()));
+
+ // Clear the assigned values and stack memory. We leave the registers marked
+ // as allocated so that future queries don't return the same registers, i.e.
+ // when i64 and f64 are both passed in GPRs.
+ StackSize = SavedStackSize;
+ MaxStackArgAlign = SavedMaxStackArgAlign;
+ Locs.truncate(NumLocs);
+}
+
+void CCState::analyzeMustTailForwardedRegisters(
+ SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes,
+ CCAssignFn Fn) {
+ // Oftentimes calling conventions will not user register parameters for
+ // variadic functions, so we need to assume we're not variadic so that we get
+ // all the registers that might be used in a non-variadic call.
+ SaveAndRestore SavedVarArg(IsVarArg, false);
+ SaveAndRestore SavedMustTail(AnalyzingMustTailForwardedRegs, true);
+
+ for (MVT RegVT : RegParmTypes) {
+ SmallVector<MCPhysReg, 8> RemainingRegs;
+ getRemainingRegParmsForType(RemainingRegs, RegVT, Fn);
+ const TargetLowering *TL = MF.getSubtarget().getTargetLowering();
+ const TargetRegisterClass *RC = TL->getRegClassFor(RegVT);
+ for (MCPhysReg PReg : RemainingRegs) {
+ Register VReg = MF.addLiveIn(PReg, RC);
+ Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT));
+ }
+ }
+}
+
+bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, MachineFunction &MF,
+ LLVMContext &C,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn CalleeFn, CCAssignFn CallerFn) {
+ if (CalleeCC == CallerCC)
+ return true;
+ SmallVector<CCValAssign, 4> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, MF, RVLocs1, C);
+ CCInfo1.AnalyzeCallResult(Ins, CalleeFn);
+
+ SmallVector<CCValAssign, 4> RVLocs2;
+ CCState CCInfo2(CallerCC, false, MF, RVLocs2, C);
+ CCInfo2.AnalyzeCallResult(Ins, CallerFn);
+
+ auto AreCompatible = [](const CCValAssign &Loc1, const CCValAssign &Loc2) {
+ assert(!Loc1.isPendingLoc() && !Loc2.isPendingLoc() &&
+ "The location must have been decided by now");
+ // Must fill the same part of their locations.
+ if (Loc1.getLocInfo() != Loc2.getLocInfo())
+ return false;
+ // Must both be in the same registers, or both in memory at the same offset.
+ if (Loc1.isRegLoc() && Loc2.isRegLoc())
+ return Loc1.getLocReg() == Loc2.getLocReg();
+ if (Loc1.isMemLoc() && Loc2.isMemLoc())
+ return Loc1.getLocMemOffset() == Loc2.getLocMemOffset();
+ llvm_unreachable("Unknown location kind");
+ };
+
+ return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
+ RVLocs2.end(), AreCompatible);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..6272b654b329
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,143 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeAssignmentTrackingAnalysisPass(Registry);
+ initializeAtomicExpandPass(Registry);
+ initializeBasicBlockSectionsPass(Registry);
+ initializeBranchFolderPassPass(Registry);
+ initializeBranchRelaxationPass(Registry);
+ initializeBreakFalseDepsPass(Registry);
+ initializeCallBrPreparePass(Registry);
+ initializeCFGuardLongjmpPass(Registry);
+ initializeCFIFixupPass(Registry);
+ initializeCFIInstrInserterPass(Registry);
+ initializeCheckDebugMachineModulePass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeDebugifyMachineModulePass(Registry);
+ initializeDetectDeadLanesPass(Registry);
+ initializeDwarfEHPrepareLegacyPassPass(Registry);
+ initializeEarlyIfConverterPass(Registry);
+ initializeEarlyIfPredicatorPass(Registry);
+ initializeEarlyMachineLICMPass(Registry);
+ initializeEarlyTailDuplicatePass(Registry);
+ initializeExpandLargeDivRemLegacyPassPass(Registry);
+ initializeExpandLargeFpConvertLegacyPassPass(Registry);
+ initializeExpandMemCmpPassPass(Registry);
+ initializeExpandPostRAPass(Registry);
+ initializeFEntryInserterPass(Registry);
+ initializeFinalizeISelPass(Registry);
+ initializeFinalizeMachineBundlesPass(Registry);
+ initializeFixupStatepointCallerSavedPass(Registry);
+ initializeFuncletLayoutPass(Registry);
+ initializeGCMachineCodeAnalysisPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeHardwareLoopsLegacyPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeImplicitNullChecksPass(Registry);
+ initializeIndirectBrExpandPassPass(Registry);
+ initializeInterleavedLoadCombinePass(Registry);
+ initializeInterleavedAccessPass(Registry);
+ initializeJMCInstrumenterPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveRangeShrinkPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeLocalStackSlotPassPass(Registry);
+ initializeLowerGlobalDtorsLegacyPassPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+ initializeMIRAddFSDiscriminatorsPass(Registry);
+ initializeMIRCanonicalizerPass(Registry);
+ initializeMIRNamerPass(Registry);
+ initializeMIRProfileLoaderPassPass(Registry);
+ initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCFGPrinterPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineCombinerPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCycleInfoPrinterPassPass(Registry);
+ initializeMachineCycleInfoWrapperPassPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
+ initializeMachineLateInstrsCleanupPass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoWrapperPassPass(Registry);
+ initializeMachineOptimizationRemarkEmitterPassPass(Registry);
+ initializeMachineOutlinerPass(Registry);
+ initializeMachinePipelinerPass(Registry);
+ initializeMachineSanitizerBinaryMetadataPass(Registry);
+ initializeModuloScheduleTestPass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineRegionInfoPassPass(Registry);
+ initializeMachineSchedulerPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineUniformityAnalysisPassPass(Registry);
+ initializeMachineUniformityInfoPrinterPassPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeObjCARCContractLegacyPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePEIPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePatchableFunctionPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializePostMachineSchedulerPass(Registry);
+ initializePostRAHazardRecognizerPass(Registry);
+ initializePostRAMachineSinkingPass(Registry);
+ initializePostRASchedulerPass(Registry);
+ initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializeRABasicPass(Registry);
+ initializeRAGreedyPass(Registry);
+ initializeRegAllocFastPass(Registry);
+ initializeRegUsageInfoCollectorPass(Registry);
+ initializeRegUsageInfoPropagationPass(Registry);
+ initializeRegisterCoalescerPass(Registry);
+ initializeRemoveRedundantDebugValuesPass(Registry);
+ initializeRenameIndependentSubregsPass(Registry);
+ initializeSafeStackLegacyPassPass(Registry);
+ initializeSelectOptimizePass(Registry);
+ initializeShadowStackGCLoweringPass(Registry);
+ initializeShrinkWrapPass(Registry);
+ initializeSjLjEHPreparePass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeStackColoringPass(Registry);
+ initializeStackFrameLayoutAnalysisPassPass(Registry);
+ initializeStackMapLivenessPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStripDebugMachineModulePass(Registry);
+ initializeTailDuplicatePass(Registry);
+ initializeTargetPassConfigPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeTypePromotionLegacyPass(Registry);
+ initializeUnpackMachineBundlesPass(Registry);
+ initializeUnreachableBlockElimLegacyPassPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeVirtRegRewriterPass(Registry);
+ initializeWasmEHPreparePass(Registry);
+ initializeWinEHPreparePass(Registry);
+ initializeXRayInstrumentationPass(Registry);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
new file mode 100644
index 000000000000..577c5dbc8e2d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -0,0 +1,293 @@
+//===-- CodeGenCommonISel.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common utilies that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+#define DEBUG_TYPE "codegen-common"
+
+using namespace llvm;
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+StackProtectorDescriptor::addSuccessorMBB(
+ const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI(ParentMBB);
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
+ return SuccMBB;
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI.isCopy() && !MI.isImplicitDef()) {
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ if (MI.isDebugInstr())
+ return true;
+
+ // For GlobalISel, we may have extension instructions for arguments within
+ // copy sequences. Allow these.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI.isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI.operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() ||
+ (!OPI->getReg().isPhysical() && OPI2->getReg().isPhysical()))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ do {
+ --Previous;
+ } while (Previous != Start && Previous->isDebugInstr());
+
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // Call frames cannot be nested, so if this frame is describing the tail
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail
+ // call has no register moves of its own and should be the split point. For
+ // example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
+ while (MIIsInTerminatorSequence(*Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
+}
+
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+ FPClassTest InvertedTest = ~Test;
+ // Pick the direction with fewer tests
+ // TODO: Handle more combinations of cases that can be handled together
+ switch (static_cast<unsigned>(InvertedTest)) {
+ case fcNan:
+ case fcSNan:
+ case fcQNan:
+ case fcInf:
+ case fcPosInf:
+ case fcNegInf:
+ case fcNormal:
+ case fcPosNormal:
+ case fcNegNormal:
+ case fcSubnormal:
+ case fcPosSubnormal:
+ case fcNegSubnormal:
+ case fcZero:
+ case fcPosZero:
+ case fcNegZero:
+ case fcFinite:
+ case fcPosFinite:
+ case fcNegFinite:
+ case fcZero | fcNan:
+ case fcSubnormal | fcZero:
+ case fcSubnormal | fcZero | fcNan:
+ return InvertedTest;
+ default:
+ return fcNone;
+ }
+
+ llvm_unreachable("covered FPClassTest");
+}
+
+static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI,
+ MachineInstr &Copy) {
+ assert(Copy.getOpcode() == TargetOpcode::COPY && "Must be a COPY");
+
+ return &Copy.getOperand(1);
+}
+
+static MachineOperand *getSalvageOpsForTrunc(const MachineRegisterInfo &MRI,
+ MachineInstr &Trunc,
+ SmallVectorImpl<uint64_t> &Ops) {
+ assert(Trunc.getOpcode() == TargetOpcode::G_TRUNC && "Must be a G_TRUNC");
+
+ const auto FromLLT = MRI.getType(Trunc.getOperand(1).getReg());
+ const auto ToLLT = MRI.getType(Trunc.defs().begin()->getReg());
+
+ // TODO: Support non-scalar types.
+ if (!FromLLT.isScalar()) {
+ return nullptr;
+ }
+
+ auto ExtOps = DIExpression::getExtOps(FromLLT.getSizeInBits(),
+ ToLLT.getSizeInBits(), false);
+ Ops.append(ExtOps.begin(), ExtOps.end());
+ return &Trunc.getOperand(1);
+}
+
+static MachineOperand *salvageDebugInfoImpl(const MachineRegisterInfo &MRI,
+ MachineInstr &MI,
+ SmallVectorImpl<uint64_t> &Ops) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ return getSalvageOpsForTrunc(MRI, MI, Ops);
+ case TargetOpcode::COPY:
+ return getSalvageOpsForCopy(MRI, MI);
+ default:
+ return nullptr;
+ }
+}
+
+void llvm::salvageDebugInfoForDbgValue(const MachineRegisterInfo &MRI,
+ MachineInstr &MI,
+ ArrayRef<MachineOperand *> DbgUsers) {
+ // These are arbitrary chosen limits on the maximum number of values and the
+ // maximum size of a debug expression we can salvage up to, used for
+ // performance reasons.
+ const unsigned MaxExpressionSize = 128;
+
+ for (auto *DefMO : DbgUsers) {
+ MachineInstr *DbgMI = DefMO->getParent();
+ if (DbgMI->isIndirectDebugValue()) {
+ continue;
+ }
+
+ int UseMOIdx = DbgMI->findRegisterUseOperandIdx(DefMO->getReg());
+ assert(UseMOIdx != -1 && DbgMI->hasDebugOperandForReg(DefMO->getReg()) &&
+ "Must use salvaged instruction as its location");
+
+ // TODO: Support DBG_VALUE_LIST.
+ if (DbgMI->getOpcode() != TargetOpcode::DBG_VALUE) {
+ assert(DbgMI->getOpcode() == TargetOpcode::DBG_VALUE_LIST &&
+ "Must be either DBG_VALUE or DBG_VALUE_LIST");
+ continue;
+ }
+
+ const DIExpression *SalvagedExpr = DbgMI->getDebugExpression();
+
+ SmallVector<uint64_t, 16> Ops;
+ auto Op0 = salvageDebugInfoImpl(MRI, MI, Ops);
+ if (!Op0)
+ continue;
+ SalvagedExpr = DIExpression::appendOpsToArg(SalvagedExpr, Ops, 0, true);
+
+ bool IsValidSalvageExpr =
+ SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (IsValidSalvageExpr) {
+ auto &UseMO = DbgMI->getOperand(UseMOIdx);
+ UseMO.setReg(Op0->getReg());
+ UseMO.setSubReg(Op0->getSubReg());
+ DbgMI->getDebugExpressionOp().setMetadata(SalvagedExpr);
+
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << *DbgMI << '\n');
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
new file mode 100644
index 000000000000..7f37f2069a3b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
@@ -0,0 +1,25 @@
+//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenPassBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ AnalysisKey PASS_NAME::Key;
+#include "llvm/CodeGen/MachinePassRegistry.def"
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ AnalysisKey PASS_NAME::Key;
+#include "llvm/CodeGen/MachinePassRegistry.def"
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
new file mode 100644
index 000000000000..b00df0b6c6cb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -0,0 +1,8660 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "codegenprepare"
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumMemoryInstsPhiCreated,
+ "Number of phis created when address "
+ "computations were sunk to memory instructions");
+STATISTIC(NumMemoryInstsSelectCreated,
+ "Number of select created when address "
+ "computations were sunk to memory instructions");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumAndsAdded,
+ "Number of and mask instructions added to form ext loads");
+STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
+STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
+STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
+
+static cl::opt<bool> DisableBranchOpts(
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
+
+static cl::opt<bool>
+ DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
+static cl::opt<bool>
+ DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable select to branch conversion."));
+
+static cl::opt<bool>
+ AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
+ cl::desc("Address sinking in CGP using GEPs."));
+
+static cl::opt<bool>
+ EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
+ cl::desc("Enable sinkinig and/cmp into branches."));
+
+static cl::opt<bool> DisableStoreExtract(
+ "disable-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> StressStoreExtract(
+ "stress-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> DisableExtLdPromotion(
+ "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
+ "CodeGenPrepare"));
+
+static cl::opt<bool> StressExtLdPromotion(
+ "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
+ "optimization in CodeGenPrepare"));
+
+static cl::opt<bool> DisablePreheaderProtect(
+ "disable-preheader-prot", cl::Hidden, cl::init(false),
+ cl::desc("Disable protection against removing loop preheaders"));
+
+static cl::opt<bool> ProfileGuidedSectionPrefix(
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true),
+ cl::desc("Use profile info to add section prefix for hot/cold functions"));
+
+static cl::opt<bool> ProfileUnknownInSpecialSection(
+ "profile-unknown-in-special-section", cl::Hidden,
+ cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
+ "profile, we cannot tell the function is cold for sure because "
+ "it may be a function newly added without ever being sampled. "
+ "With the flag enabled, compiler can put such profile unknown "
+ "functions into a special section, so runtime system can choose "
+ "to handle it in a different way than .text section, to save "
+ "RAM for example. "));
+
+static cl::opt<bool> BBSectionsGuidedSectionPrefix(
+ "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
+ cl::desc("Use the basic-block-sections profile to determine the text "
+ "section prefix for hot functions. Functions with "
+ "basic-block-sections profile will be placed in `.text.hot` "
+ "regardless of their FDO profile info. Other functions won't be "
+ "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
+ "profiles."));
+
+static cl::opt<unsigned> FreqRatioToSkipMerge(
+ "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
+ cl::desc("Skip merging empty blocks if (frequency of empty block) / "
+ "(frequency of destination block) is greater than this ratio"));
+
+static cl::opt<bool> ForceSplitStore(
+ "force-split-store", cl::Hidden, cl::init(false),
+ cl::desc("Force store splitting no matter what the target query says."));
+
+static cl::opt<bool> EnableTypePromotionMerge(
+ "cgp-type-promotion-merge", cl::Hidden,
+ cl::desc("Enable merging of redundant sexts when one is dominating"
+ " the other."),
+ cl::init(true));
+
+static cl::opt<bool> DisableComplexAddrModes(
+ "disable-complex-addr-modes", cl::Hidden, cl::init(false),
+ cl::desc("Disables combining addressing modes with different parts "
+ "in optimizeMemoryInst."));
+
+static cl::opt<bool>
+ AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
+ cl::desc("Allow creation of Phis in Address sinking."));
+
+static cl::opt<bool> AddrSinkNewSelects(
+ "addr-sink-new-select", cl::Hidden, cl::init(true),
+ cl::desc("Allow creation of selects in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseReg(
+ "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of BaseReg field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseGV(
+ "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of BaseGV field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseOffs(
+ "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of BaseOffs field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineScaledReg(
+ "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of ScaledReg field in Address sinking."));
+
+static cl::opt<bool>
+ EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
+ cl::init(true),
+ cl::desc("Enable splitting large offset of GEP."));
+
+static cl::opt<bool> EnableICMP_EQToICMP_ST(
+ "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
+ cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
+
+static cl::opt<bool>
+ VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
+ cl::desc("Enable BFI update verification for "
+ "CodeGenPrepare."));
+
+static cl::opt<bool>
+ OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
+ cl::desc("Enable converting phi types in CodeGenPrepare"));
+
+static cl::opt<unsigned>
+ HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
+ cl::desc("Least BB number of huge function."));
+
+static cl::opt<unsigned>
+ MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
+ cl::Hidden,
+ cl::desc("Max number of address users to look at"));
+namespace {
+
+enum ExtType {
+ ZeroExtension, // Zero extension has been seen.
+ SignExtension, // Sign extension has been seen.
+ BothExtension // This extension type is used if we saw sext after
+ // ZeroExtension had been set, or if we saw zext after
+ // SignExtension had been set. It makes the type
+ // information of a promoted instruction invalid.
+};
+
+enum ModifyDT {
+ NotModifyDT, // Not Modify any DT.
+ ModifyBBDT, // Modify the Basic Block Dominator Tree.
+ ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
+ // This usually means we move/delete/insert instruction
+ // in a Basic Block. So we should re-iterate instructions
+ // in such Basic Block.
+};
+
+using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
+using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
+using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
+using SExts = SmallVector<Instruction *, 16>;
+using ValueToSExts = MapVector<Value *, SExts>;
+
+class TypePromotionTransaction;
+
+class CodeGenPrepare : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+ const TargetSubtargetInfo *SubtargetInfo = nullptr;
+ const TargetLowering *TLI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
+ const TargetLibraryInfo *TLInfo = nullptr;
+ LoopInfo *LI = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI = nullptr;
+
+ /// As we scan instructions optimizing them, this is the next instruction
+ /// to optimize. Transforms that can invalidate this should update it.
+ BasicBlock::iterator CurInstIterator;
+
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address. The usage of WeakTrackingVH
+ /// enables SunkAddrs to be treated as a cache whose entries can be
+ /// invalidated if a sunken address computation has been erased.
+ ValueMap<Value *, WeakTrackingVH> SunkAddrs;
+
+ /// Keeps track of all instructions inserted for the current function.
+ SetOfInstrs InsertedInsts;
+
+ /// Keeps track of the type of the related instruction before their
+ /// promotion for the current function.
+ InstrToOrigTy PromotedInsts;
+
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
+
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+ /// Keep track of GEPs accessing the same data structures such as structs or
+ /// arrays that are candidates to be split later because of their large
+ /// size.
+ MapVector<AssertingVH<Value>,
+ SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
+ LargeOffsetGEPMap;
+
+ /// Keep track of new GEP base after splitting the GEPs having large offset.
+ SmallSet<AssertingVH<Value>, 2> NewGEPBases;
+
+ /// Map serial numbers to Large offset GEPs.
+ DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
+ /// True if the function has the OptSize attribute.
+ bool OptSize;
+
+ /// DataLayout for the Function being processed.
+ const DataLayout *DL = nullptr;
+
+ /// Building the dominator tree can be expensive, so we only build it
+ /// lazily and update it when required.
+ std::unique_ptr<DominatorTree> DT;
+
+public:
+ /// If encounter huge function, we need to limit the build time.
+ bool IsHugeFunc = false;
+
+ /// FreshBBs is like worklist, it collected the updated BBs which need
+ /// to be optimized again.
+ /// Note: Consider building time in this pass, when a BB updated, we need
+ /// to insert such BB into FreshBBs for huge function.
+ SmallSet<BasicBlock *, 32> FreshBBs;
+
+ static char ID; // Pass identification, replacement for typeid
+
+ CodeGenPrepare() : FunctionPass(ID) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void releaseMemory() override {
+ // Clear per function information.
+ InsertedInsts.clear();
+ PromotedInsts.clear();
+ FreshBBs.clear();
+ BPI.reset();
+ BFI.reset();
+ }
+
+ StringRef getPassName() const override { return "CodeGen Prepare"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // FIXME: When we can selectively preserve passes, preserve the domtree.
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
+ }
+
+private:
+ template <typename F>
+ void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
+ // Substituting can cause recursive simplifications, which can invalidate
+ // our iterator. Use a WeakTrackingVH to hold onto it in case this
+ // happens.
+ Value *CurValue = &*CurInstIterator;
+ WeakTrackingVH IterHandle(CurValue);
+
+ f();
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurValue) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ }
+
+ // Get the DominatorTree, building if necessary.
+ DominatorTree &getDT(Function &F) {
+ if (!DT)
+ DT = std::make_unique<DominatorTree>(F);
+ return *DT;
+ }
+
+ void removeAllAssertingVHReferences(Value *V);
+ bool eliminateAssumptions(Function &F);
+ bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
+ bool eliminateMostlyEmptyBlocks(Function &F);
+ BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
+ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
+ bool isPreheader);
+ bool makeBitReverse(Instruction &I);
+ bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
+ bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
+ bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
+ unsigned AddrSpace);
+ bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
+ bool optimizeInlineAsmInst(CallInst *CS);
+ bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
+ bool optimizeExt(Instruction *&I);
+ bool optimizeExtUses(Instruction *I);
+ bool optimizeLoadExt(LoadInst *Load);
+ bool optimizeShiftInst(BinaryOperator *BO);
+ bool optimizeFunnelShift(IntrinsicInst *Fsh);
+ bool optimizeSelectInst(SelectInst *SI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
+ bool optimizeSwitchType(SwitchInst *SI);
+ bool optimizeSwitchPhiConstants(SwitchInst *SI);
+ bool optimizeSwitchInst(SwitchInst *SI);
+ bool optimizeExtractElementInst(Instruction *Inst);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
+ bool fixupDbgValue(Instruction *I);
+ bool placeDbgValues(Function &F);
+ bool placePseudoProbes(Function &F);
+ bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
+ LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
+ bool tryToPromoteExts(TypePromotionTransaction &TPT,
+ const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool splitLargeGEPOffsets();
+ bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
+ SmallPtrSetImpl<Instruction *> &DeletedInstrs);
+ bool optimizePhiTypes(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
+ bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
+ bool simplifyOffsetableRelocate(GCStatepointInst &I);
+
+ bool tryToSinkFreeOperands(Instruction *I);
+ bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
+ CmpInst *Cmp, Intrinsic::ID IID);
+ bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ void verifyBFIUpdates(Function &F);
+};
+
+} // end anonymous namespace
+
+char CodeGenPrepare::ID = 0;
+
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation",
+ false, false)
+
+FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ DL = &F.getParent()->getDataLayout();
+
+ bool EverMadeChange = false;
+
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ SubtargetInfo = TM->getSubtargetImpl(F);
+ TLI = SubtargetInfo->getTargetLowering();
+ TRI = SubtargetInfo->getRegisterInfo();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BBSectionsProfileReader =
+ getAnalysisIfAvailable<BasicBlockSectionsProfileReader>();
+ OptSize = F.hasOptSize();
+ // Use the basic-block-sections profile to promote hot functions to .text.hot
+ // if requested.
+ if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
+ BBSectionsProfileReader->isFunctionHot(F.getName())) {
+ F.setSectionPrefix("hot");
+ } else if (ProfileGuidedSectionPrefix) {
+ // The hot attribute overwrites profile count based hotness while profile
+ // counts based hotness overwrite the cold attribute.
+ // This is a conservative behabvior.
+ if (F.hasFnAttribute(Attribute::Hot) ||
+ PSI->isFunctionHotInCallGraph(&F, *BFI))
+ F.setSectionPrefix("hot");
+ // If PSI shows this function is not hot, we will placed the function
+ // into unlikely section if (1) PSI shows this is a cold function, or
+ // (2) the function has a attribute of cold.
+ else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
+ F.hasFnAttribute(Attribute::Cold))
+ F.setSectionPrefix("unlikely");
+ else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
+ PSI->isFunctionHotnessUnknown(F))
+ F.setSectionPrefix("unknown");
+ }
+
+ /// This optimization identifies DIV instructions that can be
+ /// profitably bypassed and carried out with a shorter, faster divide.
+ if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
+ const DenseMap<unsigned int, unsigned int> &BypassWidths =
+ TLI->getBypassSlowDivWidths();
+ BasicBlock *BB = &*F.begin();
+ while (BB != nullptr) {
+ // bypassSlowDivision may create new BBs, but we don't want to reapply the
+ // optimization to those blocks.
+ BasicBlock *Next = BB->getNextNode();
+ // F.hasOptSize is already checked in the outer if statement.
+ if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+ EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+ BB = Next;
+ }
+ }
+
+ // Get rid of @llvm.assume builtins before attempting to eliminate empty
+ // blocks, since there might be blocks that only contain @llvm.assume calls
+ // (plus arguments that we can get rid of).
+ EverMadeChange |= eliminateAssumptions(F);
+
+ // Eliminate blocks that contain only PHI nodes and an
+ // unconditional branch.
+ EverMadeChange |= eliminateMostlyEmptyBlocks(F);
+
+ ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
+ if (!DisableBranchOpts)
+ EverMadeChange |= splitBranchCondition(F, ModifiedDT);
+
+ // Split some critical edges where one of the sources is an indirect branch,
+ // to help generate sane code for PHIs involving such edges.
+ EverMadeChange |=
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
+
+ // If we are optimzing huge function, we need to consider the build time.
+ // Because the basic algorithm's complex is near O(N!).
+ IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
+
+ // Transformations above may invalidate dominator tree and/or loop info.
+ DT.reset();
+ LI->releaseMemory();
+ LI->analyze(getDT(F));
+
+ bool MadeChange = true;
+ bool FuncIterated = false;
+ while (MadeChange) {
+ MadeChange = false;
+
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+ if (FuncIterated && !FreshBBs.contains(&BB))
+ continue;
+
+ ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
+ bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
+
+ if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
+ DT.reset();
+
+ MadeChange |= Changed;
+ if (IsHugeFunc) {
+ // If the BB is updated, it may still has chance to be optimized.
+ // This usually happen at sink optimization.
+ // For example:
+ //
+ // bb0:
+ // %and = and i32 %a, 4
+ // %cmp = icmp eq i32 %and, 0
+ //
+ // If the %cmp sink to other BB, the %and will has chance to sink.
+ if (Changed)
+ FreshBBs.insert(&BB);
+ else if (FuncIterated)
+ FreshBBs.erase(&BB);
+ } else {
+ // For small/normal functions, we restart BB iteration if the dominator
+ // tree of the Function was changed.
+ if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
+ break;
+ }
+ }
+ // We have iterated all the BB in the (only work for huge) function.
+ FuncIterated = IsHugeFunc;
+
+ if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+ MadeChange |= mergeSExts(F);
+ if (!LargeOffsetGEPMap.empty())
+ MadeChange |= splitLargeGEPOffsets();
+ MadeChange |= optimizePhiTypes(F);
+
+ if (MadeChange)
+ eliminateFallThrough(F, DT.get());
+
+#ifndef NDEBUG
+ if (MadeChange && VerifyLoopInfo)
+ LI->verify(getDT(F));
+#endif
+
+ // Really free removed instructions during promotion.
+ for (Instruction *I : RemovedInsts)
+ I->deleteValue();
+
+ EverMadeChange |= MadeChange;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
+ LargeOffsetGEPMap.clear();
+ LargeOffsetGEPID.clear();
+ }
+
+ NewGEPBases.clear();
+ SunkAddrs.clear();
+
+ if (!DisableBranchOpts) {
+ MadeChange = false;
+ // Use a set vector to get deterministic iteration order. The order the
+ // blocks are removed may affect whether or not PHI nodes in successors
+ // are removed.
+ SmallSetVector<BasicBlock *, 8> WorkList;
+ for (BasicBlock &BB : F) {
+ SmallVector<BasicBlock *, 2> Successors(successors(&BB));
+ MadeChange |= ConstantFoldTerminator(&BB, true);
+ if (!MadeChange)
+ continue;
+
+ for (BasicBlock *Succ : Successors)
+ if (pred_empty(Succ))
+ WorkList.insert(Succ);
+ }
+
+ // Delete the dead blocks and any of their dead successors.
+ MadeChange |= !WorkList.empty();
+ while (!WorkList.empty()) {
+ BasicBlock *BB = WorkList.pop_back_val();
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
+
+ DeleteDeadBlock(BB);
+
+ for (BasicBlock *Succ : Successors)
+ if (pred_empty(Succ))
+ WorkList.insert(Succ);
+ }
+
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= eliminateFallThrough(F);
+
+ EverMadeChange |= MadeChange;
+ }
+
+ if (!DisableGCOpts) {
+ SmallVector<GCStatepointInst *, 2> Statepoints;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (auto *SP = dyn_cast<GCStatepointInst>(&I))
+ Statepoints.push_back(SP);
+ for (auto &I : Statepoints)
+ EverMadeChange |= simplifyOffsetableRelocate(*I);
+ }
+
+ // Do this last to clean up use-before-def scenarios introduced by other
+ // preparatory transforms.
+ EverMadeChange |= placeDbgValues(F);
+ EverMadeChange |= placePseudoProbes(F);
+
+#ifndef NDEBUG
+ if (VerifyBFIUpdates)
+ verifyBFIUpdates(F);
+#endif
+
+ return EverMadeChange;
+}
+
+bool CodeGenPrepare::eliminateAssumptions(Function &F) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ Instruction *I = &*(CurInstIterator++);
+ if (auto *Assume = dyn_cast<AssumeInst>(I)) {
+ MadeChange = true;
+ Value *Operand = Assume->getOperand(0);
+ Assume->eraseFromParent();
+
+ resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
+ });
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// An instruction is about to be deleted, so remove all references to it in our
+/// GEP-tracking data strcutures.
+void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
+ LargeOffsetGEPMap.erase(V);
+ NewGEPBases.erase(V);
+
+ auto GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ return;
+
+ LargeOffsetGEPID.erase(GEP);
+
+ auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
+ if (VecI == LargeOffsetGEPMap.end())
+ return;
+
+ auto &GEPVector = VecI->second;
+ llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
+
+ if (GEPVector.empty())
+ LargeOffsetGEPMap.erase(VecI);
+}
+
+// Verify BFI has been updated correctly by recomputing BFI and comparing them.
+void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
+ DominatorTree NewDT(F);
+ LoopInfo NewLI(NewDT);
+ BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
+ BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
+ NewBFI.verifyMatch(*BFI);
+}
+
+/// Merge basic blocks which are connected by a single edge, where one of the
+/// basic blocks has a single successor pointing to the other basic block,
+/// which has a single predecessor.
+bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ // Use a temporary array to avoid iterator being invalidated when
+ // deleting blocks.
+ SmallVector<WeakTrackingVH, 16> Blocks;
+ for (auto &Block : llvm::drop_begin(F))
+ Blocks.push_back(&Block);
+
+ SmallSet<WeakTrackingVH, 16> Preds;
+ for (auto &Block : Blocks) {
+ auto *BB = cast_or_null<BasicBlock>(Block);
+ if (!BB)
+ continue;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
+ continue;
+
+ // Make an effort to skip unreachable blocks.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
+
+ // Merge BB into SinglePred and delete it.
+ MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
+ /* MemDep */ nullptr,
+ /* PredecessorWithTwoSuccessors */ false, DT);
+ Preds.insert(SinglePred);
+
+ if (IsHugeFunc) {
+ // Update FreshBBs to optimize the merged BB.
+ FreshBBs.insert(SinglePred);
+ FreshBBs.erase(BB);
+ }
+ }
+ }
+
+ // (Repeatedly) merging blocks into their predecessors can create redundant
+ // debug intrinsics.
+ for (const auto &Pred : Preds)
+ if (auto *BB = cast_or_null<BasicBlock>(Pred))
+ RemoveRedundantDbgInstrs(BB);
+
+ return Changed;
+}
+
+/// Find a destination block from BB if BB is mergeable empty block.
+BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ return nullptr;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI->getIterator();
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ return nullptr;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ return nullptr;
+
+ if (!canMergeBlocks(BB, DestBB))
+ DestBB = nullptr;
+
+ return DestBB;
+}
+
+/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
+/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
+/// edges in ways that are non-optimal for isel. Start by eliminating these
+/// blocks so we can split them the way we want them.
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+ SmallPtrSet<BasicBlock *, 16> Preheaders;
+ SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+ while (!LoopList.empty()) {
+ Loop *L = LoopList.pop_back_val();
+ llvm::append_range(LoopList, *L);
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ Preheaders.insert(Preheader);
+ }
+
+ bool MadeChange = false;
+ // Copy blocks into a temporary array to avoid iterator invalidation issues
+ // as we remove them.
+ // Note that this intentionally skips the entry block.
+ SmallVector<WeakTrackingVH, 16> Blocks;
+ for (auto &Block : llvm::drop_begin(F))
+ Blocks.push_back(&Block);
+
+ for (auto &Block : Blocks) {
+ BasicBlock *BB = cast_or_null<BasicBlock>(Block);
+ if (!BB)
+ continue;
+ BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
+ if (!DestBB ||
+ !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
+ continue;
+
+ eliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
+ BasicBlock *DestBB,
+ bool isPreheader) {
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && isPreheader &&
+ !(BB->getSinglePredecessor() &&
+ BB->getSinglePredecessor()->getSingleSuccessor()))
+ return false;
+
+ // Skip merging if the block's successor is also a successor to any callbr
+ // that leads to this block.
+ // FIXME: Is this really needed? Is this a correctness issue?
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator()))
+ for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
+ if (DestBB == CBI->getSuccessor(i))
+ return false;
+ }
+
+ // Try to skip merging if the unique predecessor of BB is terminated by a
+ // switch or indirect branch instruction, and BB is used as an incoming block
+ // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
+ // add COPY instructions in the predecessor of BB instead of BB (if it is not
+ // merged). Note that the critical edge created by merging such blocks wont be
+ // split in MachineSink because the jump table is not analyzable. By keeping
+ // such empty block (BB), ISel will place COPY instructions in BB, not in the
+ // predecessor of BB.
+ BasicBlock *Pred = BB->getUniquePredecessor();
+ if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
+ isa<IndirectBrInst>(Pred->getTerminator())))
+ return true;
+
+ if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
+ return true;
+
+ // We use a simple cost heuristic which determine skipping merging is
+ // profitable if the cost of skipping merging is less than the cost of
+ // merging : Cost(skipping merging) < Cost(merging BB), where the
+ // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
+ // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
+ // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
+ // Freq(Pred) / Freq(BB) > 2.
+ // Note that if there are multiple empty blocks sharing the same incoming
+ // value for the PHIs in the DestBB, we consider them together. In such
+ // case, Cost(merging BB) will be the sum of their frequencies.
+
+ if (!isa<PHINode>(DestBB->begin()))
+ return true;
+
+ SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
+
+ // Find all other incoming blocks from which incoming values of all PHIs in
+ // DestBB are the same as the ones from BB.
+ for (BasicBlock *DestBBPred : predecessors(DestBB)) {
+ if (DestBBPred == BB)
+ continue;
+
+ if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
+ return DestPN.getIncomingValueForBlock(BB) ==
+ DestPN.getIncomingValueForBlock(DestBBPred);
+ }))
+ SameIncomingValueBBs.insert(DestBBPred);
+ }
+
+ // See if all BB's incoming values are same as the value from Pred. In this
+ // case, no reason to skip merging because COPYs are expected to be place in
+ // Pred already.
+ if (SameIncomingValueBBs.count(Pred))
+ return true;
+
+ BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
+ BlockFrequency BBFreq = BFI->getBlockFreq(BB);
+
+ for (auto *SameValueBB : SameIncomingValueBBs)
+ if (SameValueBB->getUniquePredecessor() == Pred &&
+ DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
+ BBFreq += BFI->getBlockFreq(SameValueBB);
+
+ return PredFreq.getFrequency() <=
+ BBFreq.getFrequency() * FreqRatioToSkipMerge;
+}
+
+/// Return true if we can merge BB into DestBB if there is a single
+/// unconditional branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
+ const BasicBlock *DestBB) const {
+ // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+ // the successor. If there are more complex condition (e.g. preheaders),
+ // don't mess around with them.
+ for (const PHINode &PN : BB->phis()) {
+ for (const User *U : PN.users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != DestBB || !isa<PHINode>(UI))
+ return false;
+ // If User is inside DestBB block and it is a PHINode then check
+ // incoming value. If incoming value is not from BB then this is
+ // a complex condition (e.g. preheaders) we want to avoid here.
+ if (UI->getParent() == DestBB) {
+ if (const PHINode *UPN = dyn_cast<PHINode>(UI))
+ for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+ Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+ if (Insn && Insn->getParent() == BB &&
+ Insn->getParent() != UPN->getIncomingBlock(I))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+ // and DestBB may have conflicting incoming values for the block. If so, we
+ // can't merge the block.
+ const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+ if (!DestBBPN)
+ return true; // no conflict.
+
+ // Collect the preds of BB.
+ SmallPtrSet<const BasicBlock *, 16> BBPreds;
+ if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ // It is faster to get preds from a PHI than with pred_iterator.
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ BBPreds.insert(BBPN->getIncomingBlock(i));
+ } else {
+ BBPreds.insert(pred_begin(BB), pred_end(BB));
+ }
+
+ // Walk the preds of DestBB.
+ for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+ if (BBPreds.count(Pred)) { // Common predecessor?
+ for (const PHINode &PN : DestBB->phis()) {
+ const Value *V1 = PN.getIncomingValueForBlock(Pred);
+ const Value *V2 = PN.getIncomingValueForBlock(BB);
+
+ // If V2 is a phi node in BB, look up what the mapped value will be.
+ if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+ if (V2PN->getParent() == BB)
+ V2 = V2PN->getIncomingValueForBlock(Pred);
+
+ // If there is a conflict, bail out.
+ if (V1 != V2)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
+static void replaceAllUsesWith(Value *Old, Value *New,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHuge) {
+ auto *OldI = dyn_cast<Instruction>(Old);
+ if (OldI) {
+ for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (IsHuge)
+ FreshBBs.insert(User->getParent());
+ }
+ }
+ Old->replaceAllUsesWith(New);
+}
+
+/// Eliminate a basic block that has only phi's and an unconditional branch in
+/// it.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ BasicBlock *DestBB = BI->getSuccessor(0);
+
+ LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
+ << *BB << *DestBB);
+
+ // If the destination block has a single pred, then this is a trivial edge,
+ // just collapse it.
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ assert(SinglePred == BB &&
+ "Single predecessor not the same as predecessor");
+ // Merge DestBB into SinglePred/BB and delete it.
+ MergeBlockIntoPredecessor(DestBB);
+ // Note: BB(=SinglePred) will not be deleted on this path.
+ // DestBB(=its single successor) is the one that was deleted.
+ LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
+
+ if (IsHugeFunc) {
+ // Update FreshBBs to optimize the merged BB.
+ FreshBBs.insert(SinglePred);
+ FreshBBs.erase(DestBB);
+ }
+ return;
+ }
+ }
+
+ // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
+ // to handle the new incoming edges it is about to have.
+ for (PHINode &PN : DestBB->phis()) {
+ // Remove the incoming value for BB, and remember it.
+ Value *InVal = PN.removeIncomingValue(BB, false);
+
+ // Two options: either the InVal is a phi node defined in BB or it is some
+ // value that dominates BB.
+ PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+ if (InValPhi && InValPhi->getParent() == BB) {
+ // Add all of the input values of the input PHI as inputs of this phi.
+ for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+ PN.addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
+ } else {
+ // Otherwise, add one instance of the dominating value for each edge that
+ // we will be adding.
+ if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
+ } else {
+ for (BasicBlock *Pred : predecessors(BB))
+ PN.addIncoming(InVal, Pred);
+ }
+ }
+ }
+
+ // The PHIs are now updated, change everything that refers to BB to use
+ // DestBB and remove BB.
+ BB->replaceAllUsesWith(DestBB);
+ BB->eraseFromParent();
+ ++NumBlocksElim;
+
+ LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+ const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
+ DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
+ &RelocateInstMap) {
+ // Collect information in two maps: one primarily for locating the base object
+ // while filling the second map; the second map is the final structure holding
+ // a mapping between Base and corresponding Derived relocate calls
+ DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
+ for (auto *ThisRelocate : AllRelocateCalls) {
+ auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
+ ThisRelocate->getDerivedPtrIndex());
+ RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
+ }
+ for (auto &Item : RelocateIdxMap) {
+ std::pair<unsigned, unsigned> Key = Item.first;
+ if (Key.first == Key.second)
+ // Base relocation: nothing to insert
+ continue;
+
+ GCRelocateInst *I = Item.second;
+ auto BaseKey = std::make_pair(Key.first, Key.first);
+
+ // We're iterating over RelocateIdxMap so we cannot modify it.
+ auto MaybeBase = RelocateIdxMap.find(BaseKey);
+ if (MaybeBase == RelocateIdxMap.end())
+ // TODO: We might want to insert a new base object relocate and gep off
+ // that, if there are enough derived object relocates.
+ continue;
+
+ RelocateInstMap[MaybeBase->second].push_back(I);
+ }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+ SmallVectorImpl<Value *> &OffsetV) {
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ // Only accept small constant integer operands
+ auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!Op || Op->getZExtValue() > 20)
+ return false;
+ }
+
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+ OffsetV.push_back(GEP->getOperand(i));
+ return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
+ const SmallVectorImpl<GCRelocateInst *> &Targets) {
+ bool MadeChange = false;
+ // We must ensure the relocation of derived pointer is defined after
+ // relocation of base pointer. If we find a relocation corresponding to base
+ // defined earlier than relocation of base then we move relocation of base
+ // right before found relocation. We consider only relocation in the same
+ // basic block as relocation of base. Relocations from other basic block will
+ // be skipped by optimization and we do not care about them.
+ for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
+ &*R != RelocatedBase; ++R)
+ if (auto *RI = dyn_cast<GCRelocateInst>(R))
+ if (RI->getStatepoint() == RelocatedBase->getStatepoint())
+ if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
+ RelocatedBase->moveBefore(RI);
+ break;
+ }
+
+ for (GCRelocateInst *ToReplace : Targets) {
+ assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
+ "Not relocating a derived object of the original base object");
+ if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
+ // A duplicate relocate call. TODO: coalesce duplicates.
+ continue;
+ }
+
+ if (RelocatedBase->getParent() != ToReplace->getParent()) {
+ // Base and derived relocates are in different basic blocks.
+ // In this case transform is only valid when base dominates derived
+ // relocate. However it would be too expensive to check dominance
+ // for each such relocate, so we skip the whole transformation.
+ continue;
+ }
+
+ Value *Base = ToReplace->getBasePtr();
+ auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
+ if (!Derived || Derived->getPointerOperand() != Base)
+ continue;
+
+ SmallVector<Value *, 2> OffsetV;
+ if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+ continue;
+
+ // Create a Builder and replace the target callsite with a gep
+ assert(RelocatedBase->getNextNode() &&
+ "Should always have one since it's not a terminator");
+
+ // Insert after RelocatedBase
+ IRBuilder<> Builder(RelocatedBase->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+
+ // If gc_relocate does not match the actual type, cast it to the right type.
+ // In theory, there must be a bitcast after gc_relocate if the type does not
+ // match, and we should reuse it to get the derived pointer. But it could be
+ // cases like this:
+ // bb1:
+ // ...
+ // %g1 = call coldcc i8 addrspace(1)*
+ // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
+ //
+ // bb2:
+ // ...
+ // %g2 = call coldcc i8 addrspace(1)*
+ // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
+ //
+ // merge:
+ // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+ // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+ //
+ // In this case, we can not find the bitcast any more. So we insert a new
+ // bitcast no matter there is already one or not. In this way, we can handle
+ // all cases, and the extra bitcast should be optimized away in later
+ // passes.
+ Value *ActualRelocatedBase = RelocatedBase;
+ if (RelocatedBase->getType() != Base->getType()) {
+ ActualRelocatedBase =
+ Builder.CreateBitCast(RelocatedBase, Base->getType());
+ }
+ Value *Replacement =
+ Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
+ ArrayRef(OffsetV));
+ Replacement->takeName(ToReplace);
+ // If the newly generated derived pointer's type does not match the original
+ // derived pointer's type, cast the new derived pointer to match it. Same
+ // reasoning as above.
+ Value *ActualReplacement = Replacement;
+ if (Replacement->getType() != ToReplace->getType()) {
+ ActualReplacement =
+ Builder.CreateBitCast(Replacement, ToReplace->getType());
+ }
+ ToReplace->replaceAllUsesWith(ActualReplacement);
+ ToReplace->eraseFromParent();
+
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
+ bool MadeChange = false;
+ SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
+ for (auto *U : I.users())
+ if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
+ // Collect all the relocate calls associated with a statepoint
+ AllRelocateCalls.push_back(Relocate);
+
+ // We need at least one base pointer relocation + one derived pointer
+ // relocation to mangle
+ if (AllRelocateCalls.size() < 2)
+ return false;
+
+ // RelocateInstMap is a mapping from the base relocate instruction to the
+ // corresponding derived relocate instructions
+ DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
+ computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+ if (RelocateInstMap.empty())
+ return false;
+
+ for (auto &Item : RelocateInstMap)
+ // Item.first is the RelocatedBase to offset against
+ // Item.second is the vector of Targets to replace
+ MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+ return MadeChange;
+}
+
+/// Sink the specified cast instruction into its user blocks.
+static bool SinkCast(CastInst *CI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCasts - Only insert a cast in each block once.
+ DenseMap<BasicBlock *, CastInst *> InsertedCasts;
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this cast is used in. For PHI's this is the
+ // appropriate predecessor block.
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ UserBB = PN->getIncomingBlock(TheUse);
+ }
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // The first insertion point of a block containing an EH pad is after the
+ // pad. If the pad is the user, we cannot sink the cast past the pad.
+ if (User->isEHPad())
+ continue;
+
+ // If the block selected to receive the cast is an EH pad that does not
+ // allow non-PHI instructions before the terminator, we can't sink the
+ // cast.
+ if (UserBB->getTerminator()->isEHPad())
+ continue;
+
+ // If this user is in the same block as the cast, don't change the cast.
+ if (UserBB == DefBB)
+ continue;
+
+ // If we have already inserted a cast into this block, use it.
+ CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+ if (!InsertedCast) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
+ CI->getType(), "", &*InsertPt);
+ InsertedCast->setDebugLoc(CI->getDebugLoc());
+ }
+
+ // Replace a use of the cast with a use of the new cast.
+ TheUse = InsertedCast;
+ MadeChange = true;
+ ++NumCastUses;
+ }
+
+ // If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ salvageDebugInfo(*CI);
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// If the specified cast instruction is a noop copy (e.g. it's casting from
+/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
+/// reduce the number of virtual registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
+ const DataLayout &DL) {
+ // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
+ // than sinking only nop casts, but is helpful on some platforms.
+ if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
+ if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
+ ASC->getDestAddressSpace()))
+ return false;
+ }
+
+ // If this is a noop copy,
+ EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, CI->getType());
+
+ // This is an fp<->int conversion?
+ if (SrcVT.isInteger() != DstVT.isInteger())
+ return false;
+
+ // If this is an extension, it will be a zero or sign extension, which
+ // isn't a noop.
+ if (SrcVT.bitsLT(DstVT))
+ return false;
+
+ // If these values will be promoted, find out what they will be promoted
+ // to. This helps us consider truncates on PPC as noop copies when they
+ // are.
+ if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+ TargetLowering::TypePromoteInteger)
+ SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+ if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+ TargetLowering::TypePromoteInteger)
+ DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+ // If, after promotion, these are the same types, this is a noop copy.
+ if (SrcVT != DstVT)
+ return false;
+
+ return SinkCast(CI);
+}
+
+// Match a simple increment by constant operation. Note that if a sub is
+// matched, the step is negated (as if the step had been canonicalized to
+// an add, even though we leave the instruction alone.)
+bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
+ Constant *&Step) {
+ if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
+ match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
+ m_Instruction(LHS), m_Constant(Step)))))
+ return true;
+ if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
+ match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
+ m_Instruction(LHS), m_Constant(Step))))) {
+ Step = ConstantExpr::getNeg(Step);
+ return true;
+ }
+ return false;
+}
+
+/// If given \p PN is an inductive variable with value IVInc coming from the
+/// backedge, and on each iteration it gets increased by Step, return pair
+/// <IVInc, Step>. Otherwise, return std::nullopt.
+static std::optional<std::pair<Instruction *, Constant *>>
+getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
+ const Loop *L = LI->getLoopFor(PN->getParent());
+ if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
+ return std::nullopt;
+ auto *IVInc =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
+ return std::nullopt;
+ Instruction *LHS = nullptr;
+ Constant *Step = nullptr;
+ if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
+ return std::make_pair(IVInc, Step);
+ return std::nullopt;
+}
+
+static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+ Instruction *LHS = nullptr;
+ Constant *Step = nullptr;
+ if (!matchIncrement(I, LHS, Step))
+ return false;
+ if (auto *PN = dyn_cast<PHINode>(LHS))
+ if (auto IVInc = getIVIncrement(PN, LI))
+ return IVInc->first == I;
+ return false;
+}
+
+bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+ Value *Arg0, Value *Arg1,
+ CmpInst *Cmp,
+ Intrinsic::ID IID) {
+ auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
+ if (!isIVIncrement(BO, LI))
+ return false;
+ const Loop *L = LI->getLoopFor(BO->getParent());
+ assert(L && "L should not be null after isIVIncrement()");
+ // Do not risk on moving increment into a child loop.
+ if (LI->getLoopFor(Cmp->getParent()) != L)
+ return false;
+
+ // Finally, we need to ensure that the insert point will dominate all
+ // existing uses of the increment.
+
+ auto &DT = getDT(*BO->getParent()->getParent());
+ if (DT.dominates(Cmp->getParent(), BO->getParent()))
+ // If we're moving up the dom tree, all uses are trivially dominated.
+ // (This is the common case for code produced by LSR.)
+ return true;
+
+ // Otherwise, special case the single use in the phi recurrence.
+ return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
+ };
+ if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
+ // We used to use a dominator tree here to allow multi-block optimization.
+ // But that was problematic because:
+ // 1. It could cause a perf regression by hoisting the math op into the
+ // critical path.
+ // 2. It could cause a perf regression by creating a value that was live
+ // across multiple blocks and increasing register pressure.
+ // 3. Use of a dominator tree could cause large compile-time regression.
+ // This is because we recompute the DT on every change in the main CGP
+ // run-loop. The recomputing is probably unnecessary in many cases, so if
+ // that was fixed, using a DT here would be ok.
+ //
+ // There is one important particular case we still want to handle: if BO is
+ // the IV increment. Important properties that make it profitable:
+ // - We can speculate IV increment anywhere in the loop (as long as the
+ // indvar Phi is its only user);
+ // - Upon computing Cmp, we effectively compute something equivalent to the
+ // IV increment (despite it loops differently in the IR). So moving it up
+ // to the cmp point does not really increase register pressure.
+ return false;
+ }
+
+ // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+ if (BO->getOpcode() == Instruction::Add &&
+ IID == Intrinsic::usub_with_overflow) {
+ assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+ Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+ }
+
+ // Insert at the first instruction of the pair.
+ Instruction *InsertPt = nullptr;
+ for (Instruction &Iter : *Cmp->getParent()) {
+ // If BO is an XOR, it is not guaranteed that it comes after both inputs to
+ // the overflow intrinsic are defined.
+ if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
+ InsertPt = &Iter;
+ break;
+ }
+ }
+ assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
+
+ IRBuilder<> Builder(InsertPt);
+ Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
+ if (BO->getOpcode() != Instruction::Xor) {
+ Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+ replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
+ } else
+ assert(BO->hasOneUse() &&
+ "Patterns with XOr should use the BO only in the compare");
+ Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+ replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
+ Cmp->eraseFromParent();
+ BO->eraseFromParent();
+ return true;
+}
+
+/// Match special-case patterns that check for unsigned add overflow.
+static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
+ BinaryOperator *&Add) {
+ // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
+ // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ if (isa<Constant>(A))
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
+ B = ConstantInt::get(B->getType(), 1);
+ else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
+ B = ConstantInt::get(B->getType(), -1);
+ else
+ return false;
+
+ // Check the users of the variable operand of the compare looking for an add
+ // with the adjusted constant.
+ for (User *U : A->users()) {
+ if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
+ Add = cast<BinaryOperator>(U);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Try to combine the compare into a call to the llvm.uadd.with.overflow
+/// intrinsic. Return true if any changes were made.
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
+ ModifyDT &ModifiedDT) {
+ bool EdgeCase = false;
+ Value *A, *B;
+ BinaryOperator *Add;
+ if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
+ if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
+ return false;
+ // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
+ A = Add->getOperand(0);
+ B = Add->getOperand(1);
+ EdgeCase = true;
+ }
+
+ if (!TLI->shouldFormOverflowOp(ISD::UADDO,
+ TLI->getValueType(*DL, Add->getType()),
+ Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
+ return false;
+
+ // We don't want to move around uses of condition values this late, so we
+ // check if it is legal to create the call to the intrinsic in the basic
+ // block containing the icmp.
+ if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
+ Intrinsic::uadd_with_overflow))
+ return false;
+
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = ModifyDT::ModifyInstDT;
+ return true;
+}
+
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
+ ModifyDT &ModifiedDT) {
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ if (isa<Constant>(A) && isa<Constant>(B))
+ return false;
+
+ // Convert (A u> B) to (A u< B) to simplify pattern matching.
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_UGT) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A == 0) is the same as (A u< 1).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A != 0) is the same as (0 u< A).
+ if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u< B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u< C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+ TLI->getValueType(*DL, Sub->getType()),
+ Sub->hasNUsesOrMore(1)))
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+ Cmp, Intrinsic::usub_with_overflow))
+ return false;
+
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = ModifyDT::ModifyInstDT;
+ return true;
+}
+
+/// Sink the given CmpInst into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced. This is a clear win except on
+/// targets with multiple condition code registers (PowerPC), where it might
+/// lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+ if (TLI.hasMultipleConditionRegisters())
+ return false;
+
+ // Avoid sinking soft-FP comparisons, since this can move them into a loop.
+ if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
+ return false;
+
+ // Only insert a cmp in each block once.
+ DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+ BasicBlock *DefBB = Cmp->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB)
+ continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1), "",
+ &*InsertPt);
+ // Propagate the debug info.
+ InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ MadeChange = true;
+ ++NumCmpUses;
+ }
+
+ // If we removed all uses, nuke the cmp.
+ if (Cmp->use_empty()) {
+ Cmp->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// For pattern like:
+///
+/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
+/// ...
+/// DomBB:
+/// ...
+/// br DomCond, TrueBB, CmpBB
+/// CmpBB: (with DomBB being the single predecessor)
+/// ...
+/// Cmp = icmp eq CmpOp0, CmpOp1
+/// ...
+///
+/// It would use two comparison on targets that lowering of icmp sgt/slt is
+/// different from lowering of icmp eq (PowerPC). This function try to convert
+/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
+/// After that, DomCond and Cmp can use the same comparison so reduce one
+/// comparison.
+///
+/// Return true if any changes are made.
+static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
+ const TargetLowering &TLI) {
+ if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred != ICmpInst::ICMP_EQ)
+ return false;
+
+ // If icmp eq has users other than BranchInst and SelectInst, converting it to
+ // icmp slt/sgt would introduce more redundant LLVM IR.
+ for (User *U : Cmp->users()) {
+ if (isa<BranchInst>(U))
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
+ continue;
+ return false;
+ }
+
+ // This is a cheap/incomplete check for dominance - just match a single
+ // predecessor with a conditional branch.
+ BasicBlock *CmpBB = Cmp->getParent();
+ BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+ if (!DomBB)
+ return false;
+
+ // We want to ensure that the only way control gets to the comparison of
+ // interest is that a less/greater than comparison on the same operands is
+ // false.
+ Value *DomCond;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+ return false;
+ if (CmpBB != FalseBB)
+ return false;
+
+ Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
+ ICmpInst::Predicate DomPred;
+ if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
+ return false;
+ if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
+ return false;
+
+ // Convert the equality comparison to the opposite of the dominating
+ // comparison and swap the direction for all branch/select users.
+ // We have conceptually converted:
+ // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
+ // to
+ // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
+ // And similarly for branches.
+ for (User *U : Cmp->users()) {
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ SI->swapValues();
+ SI->swapProfMetadata();
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
+ return true;
+}
+
+/// Many architectures use the same instruction for both subtract and cmp. Try
+/// to swap cmp operands to match subtract operations to allow for CSE.
+static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
+ Value *Op0 = Cmp->getOperand(0);
+ Value *Op1 = Cmp->getOperand(1);
+ if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
+ isa<Constant>(Op1) || Op0 == Op1)
+ return false;
+
+ // If a subtract already has the same operands as a compare, swapping would be
+ // bad. If a subtract has the same operands as a compare but in reverse order,
+ // then swapping is good.
+ int GoodToSwap = 0;
+ unsigned NumInspected = 0;
+ for (const User *U : Op0->users()) {
+ // Avoid walking many users.
+ if (++NumInspected > 128)
+ return false;
+ if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
+ GoodToSwap++;
+ else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
+ GoodToSwap--;
+ }
+
+ if (GoodToSwap > 0) {
+ Cmp->swapOperands();
+ return true;
+ }
+ return false;
+}
+
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
+ if (sinkCmpExpression(Cmp, *TLI))
+ return true;
+
+ if (combineToUAddWithOverflow(Cmp, ModifiedDT))
+ return true;
+
+ if (combineToUSubWithOverflow(Cmp, ModifiedDT))
+ return true;
+
+ if (foldICmpWithDominatingICmp(Cmp, *TLI))
+ return true;
+
+ if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
+ return true;
+
+ return false;
+}
+
+/// Duplicate and sink the given 'and' instruction into user blocks where it is
+/// used in a compare to allow isel to generate better code for targets where
+/// this operation can be combined.
+///
+/// Return true if any changes are made.
+static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI,
+ SetOfInstrs &InsertedInsts) {
+ // Double-check that we're not trying to optimize an instruction that was
+ // already optimized by some other part of this pass.
+ assert(!InsertedInsts.count(AndI) &&
+ "Attempting to optimize already optimized and instruction");
+ (void)InsertedInsts;
+
+ // Nothing to do for single use in same basic block.
+ if (AndI->hasOneUse() &&
+ AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
+ return false;
+
+ // Try to avoid cases where sinking/duplicating is likely to increase register
+ // pressure.
+ if (!isa<ConstantInt>(AndI->getOperand(0)) &&
+ !isa<ConstantInt>(AndI->getOperand(1)) &&
+ AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
+ return false;
+
+ for (auto *U : AndI->users()) {
+ Instruction *User = cast<Instruction>(U);
+
+ // Only sink 'and' feeding icmp with 0.
+ if (!isa<ICmpInst>(User))
+ return false;
+
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
+ if (!CmpC || !CmpC->isZero())
+ return false;
+ }
+
+ if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
+ LLVM_DEBUG(AndI->getParent()->dump());
+
+ // Push the 'and' into the same block as the icmp 0. There should only be
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
+ // others, so we don't need to keep track of which BBs we insert into.
+ for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
+
+ // Keep the 'and' in the same place if the use is already in the same block.
+ Instruction *InsertPt =
+ User->getParent() == AndI->getParent() ? AndI : User;
+ Instruction *InsertedAnd =
+ BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
+ AndI->getOperand(1), "", InsertPt);
+ // Propagate the debug info.
+ InsertedAnd->setDebugLoc(AndI->getDebugLoc());
+
+ // Replace a use of the 'and' with a use of the new 'and'.
+ TheUse = InsertedAnd;
+ ++NumAndUses;
+ LLVM_DEBUG(User->getParent()->dump());
+ }
+
+ // We removed all uses, nuke the and.
+ AndI->eraseFromParent();
+ return true;
+}
+
+/// Check if the candidates could be combined with a shift instruction, which
+/// includes:
+/// 1. Truncate instruction
+/// 2. And instruction and the imm is a mask of the low bits:
+/// imm & (imm+1) == 0
+static bool isExtractBitsCandidateUse(Instruction *User) {
+ if (!isa<TruncInst>(User)) {
+ if (User->getOpcode() != Instruction::And ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return false;
+
+ const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
+
+ if ((Cimm & (Cimm + 1)).getBoolValue())
+ return false;
+ }
+ return true;
+}
+
+/// Sink both shift and truncate instruction to the use of truncate's BB.
+static bool
+SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
+ DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
+ const TargetLowering &TLI, const DataLayout &DL) {
+ BasicBlock *UserBB = User->getParent();
+ DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
+ auto *TruncI = cast<TruncInst>(User);
+ bool MadeChange = false;
+
+ for (Value::user_iterator TruncUI = TruncI->user_begin(),
+ TruncE = TruncI->user_end();
+ TruncUI != TruncE;) {
+
+ Use &TruncTheUse = TruncUI.getUse();
+ Instruction *TruncUser = cast<Instruction>(*TruncUI);
+ // Preincrement use iterator so we don't invalidate it.
+
+ ++TruncUI;
+
+ int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
+ if (!ISDOpcode)
+ continue;
+
+ // If the use is actually a legal node, there will not be an
+ // implicit truncate.
+ // FIXME: always querying the result type is just an
+ // approximation; some nodes' legality is determined by the
+ // operand or other means. There's no good way to find out though.
+ if (TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
+ continue;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(TruncUser))
+ continue;
+
+ BasicBlock *TruncUserBB = TruncUser->getParent();
+
+ if (UserBB == TruncUserBB)
+ continue;
+
+ BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
+ CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
+
+ if (!InsertedShift && !InsertedTrunc) {
+ BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ assert(InsertPt != TruncUserBB->end());
+ // Sink the shift
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+ else
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+ InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
+
+ // Sink the trunc
+ BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
+ TruncInsertPt++;
+ assert(TruncInsertPt != TruncUserBB->end());
+
+ InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
+ TruncI->getType(), "", &*TruncInsertPt);
+ InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
+
+ MadeChange = true;
+
+ TruncTheUse = InsertedTrunc;
+ }
+ }
+ return MadeChange;
+}
+
+/// Sink the shift *right* instruction into user blocks if the uses could
+/// potentially be combined with this shift instruction and generate BitExtract
+/// instruction. It will only be applied if the architecture supports BitExtract
+/// instruction. Here is an example:
+/// BB1:
+/// %x.extract.shift = lshr i64 %arg1, 32
+/// BB2:
+/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
+/// ==>
+///
+/// BB2:
+/// %x.extract.shift.1 = lshr i64 %arg1, 32
+/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
+///
+/// CodeGen will recognize the pattern in BB2 and generate BitExtract
+/// instruction.
+/// Return true if any changes are made.
+static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
+ const TargetLowering &TLI,
+ const DataLayout &DL) {
+ BasicBlock *DefBB = ShiftI->getParent();
+
+ /// Only insert instructions in each block once.
+ DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
+
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ if (!isExtractBitsCandidateUse(User))
+ continue;
+
+ BasicBlock *UserBB = User->getParent();
+
+ if (UserBB == DefBB) {
+ // If the shift and truncate instruction are in the same BB. The use of
+ // the truncate(TruncUse) may still introduce another truncate if not
+ // legal. In this case, we would like to sink both shift and truncate
+ // instruction to the BB of TruncUse.
+ // for example:
+ // BB1:
+ // i64 shift.result = lshr i64 opnd, imm
+ // trunc.result = trunc shift.result to i16
+ //
+ // BB2:
+ // ----> We will have an implicit truncate here if the architecture does
+ // not have i16 compare.
+ // cmp i16 trunc.result, opnd2
+ //
+ if (isa<TruncInst>(User) &&
+ shiftIsLegal
+ // If the type of the truncate is legal, no truncate will be
+ // introduced in other basic blocks.
+ && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
+ MadeChange =
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
+
+ continue;
+ }
+ // If we have already inserted a shift into this block, use it.
+ BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
+
+ if (!InsertedShift) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+ else
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+ InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
+
+ MadeChange = true;
+ }
+
+ // Replace a use of the shift with a use of the new shift.
+ TheUse = InsertedShift;
+ }
+
+ // If we removed all uses, or there are none, nuke the shift.
+ if (ShiftI->use_empty()) {
+ salvageDebugInfo(*ShiftI);
+ ShiftI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// If counting leading or trailing zeros is an expensive operation and a zero
+/// input is defined, add a check for zero to avoid calling the intrinsic.
+///
+/// We want to transform:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+///
+/// into:
+/// entry:
+/// %cmpz = icmp eq i64 %A, 0
+/// br i1 %cmpz, label %cond.end, label %cond.false
+/// cond.false:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+/// br label %cond.end
+/// cond.end:
+/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+///
+/// If the transform is performed, return true and set ModifiedDT to true.
+static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ LoopInfo &LI,
+ const TargetLowering *TLI,
+ const DataLayout *DL, ModifyDT &ModifiedDT,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHugeFunc) {
+ // If a zero input is undefined, it doesn't make sense to despeculate that.
+ if (match(CountZeros->getOperand(1), m_One()))
+ return false;
+
+ // If it's cheap to speculate, there's nothing to do.
+ Type *Ty = CountZeros->getType();
+ auto IntrinsicID = CountZeros->getIntrinsicID();
+ if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
+ (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
+ return false;
+
+ // Only handle legal scalar cases. Anything else requires too much work.
+ unsigned SizeInBits = Ty->getScalarSizeInBits();
+ if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
+ return false;
+
+ // Bail if the value is never zero.
+ Use &Op = CountZeros->getOperandUse(0);
+ if (isKnownNonZero(Op, *DL))
+ return false;
+
+ // The intrinsic will be sunk behind a compare against zero and branch.
+ BasicBlock *StartBlock = CountZeros->getParent();
+ BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+ if (IsHugeFunc)
+ FreshBBs.insert(CallBlock);
+
+ // Create another block after the count zero intrinsic. A PHI will be added
+ // in this block to select the result of the intrinsic or the bit-width
+ // constant if the input to the intrinsic is zero.
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+ BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+ if (IsHugeFunc)
+ FreshBBs.insert(EndBlock);
+
+ // Update the LoopInfo. The new blocks are in the same loop as the start
+ // block.
+ if (Loop *L = LI.getLoopFor(StartBlock)) {
+ L->addBasicBlockToLoop(CallBlock, LI);
+ L->addBasicBlockToLoop(EndBlock, LI);
+ }
+
+ // Set up a builder to create a compare, conditional branch, and PHI.
+ IRBuilder<> Builder(CountZeros->getContext());
+ Builder.SetInsertPoint(StartBlock->getTerminator());
+ Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
+
+ // Replace the unconditional branch that was created by the first split with
+ // a compare against zero and a conditional branch.
+ Value *Zero = Constant::getNullValue(Ty);
+ // Avoid introducing branch on poison. This also replaces the ctz operand.
+ if (!isGuaranteedNotToBeUndefOrPoison(Op))
+ Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
+ Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
+ Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Create a PHI in the end block to select either the output of the intrinsic
+ // or the bit width of the operand.
+ Builder.SetInsertPoint(&EndBlock->front());
+ PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
+ replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
+ Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
+ PN->addIncoming(BitWidth, StartBlock);
+ PN->addIncoming(CountZeros, CallBlock);
+
+ // We are explicitly handling the zero case, so we can set the intrinsic's
+ // undefined zero argument to 'true'. This will also prevent reprocessing the
+ // intrinsic; we only despeculate when a zero input is defined.
+ CountZeros->setArgOperand(1, Builder.getTrue());
+ ModifiedDT = ModifyDT::ModifyBBDT;
+ return true;
+}
+
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (CI->isInlineAsm()) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (optimizeInlineAsmInst(CI))
+ return true;
+ }
+
+ // Align the pointer arguments to this call if the target thinks it's a good
+ // idea
+ unsigned MinSize;
+ Align PrefAlign;
+ if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ for (auto &Arg : CI->args()) {
+ // We want to align both objects whose address is used directly and
+ // objects whose address is used in casts and GEPs, though it only makes
+ // sense for GEPs if the offset is a multiple of the desired alignment and
+ // if size - offset meets the size threshold.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ APInt Offset(DL->getIndexSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()),
+ 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
+ uint64_t Offset2 = Offset.getLimitedValue();
+ if (!isAligned(PrefAlign, Offset2))
+ continue;
+ AllocaInst *AI;
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
+ DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ AI->setAlignment(PrefAlign);
+ // Global variables can only be aligned if they are defined in this
+ // object (i.e. they are uniquely initialized in this object), and
+ // over-aligning global variables that have an explicit section is
+ // forbidden.
+ GlobalVariable *GV;
+ if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
+ GV->getPointerAlignment(*DL) < PrefAlign &&
+ DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
+ GV->setAlignment(PrefAlign);
+ }
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
+ MaybeAlign MIDestAlign = MI->getDestAlign();
+ if (!MIDestAlign || DestAlign > *MIDestAlign)
+ MI->setDestAlignment(DestAlign);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ MaybeAlign MTISrcAlign = MTI->getSourceAlign();
+ Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
+ if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
+ MTI->setSourceAlignment(SrcAlign);
+ }
+ }
+
+ // If we have a cold call site, try to sink addressing computation into the
+ // cold block. This interacts with our handling for loads and stores to
+ // ensure that we can fold all uses of a potential addressing computation
+ // into their uses. TODO: generalize this to work over profiling data
+ if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
+ !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+ for (auto &Arg : CI->args()) {
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned AS = Arg->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
+ return true;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::assume:
+ llvm_unreachable("llvm.assume should have been removed already");
+ case Intrinsic::experimental_widenable_condition: {
+ // Give up on future widening oppurtunties so that we can fold away dead
+ // paths and merge blocks before going into block-local instruction
+ // selection.
+ if (II->use_empty()) {
+ II->eraseFromParent();
+ return true;
+ }
+ Constant *RetVal = ConstantInt::getTrue(II->getContext());
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
+ return true;
+ }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr: {
+ ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+ if (!ExtVal || !ExtVal->hasOneUse() ||
+ ExtVal->getParent() == CI->getParent())
+ return false;
+ // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+ ExtVal->moveBefore(CI);
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(ExtVal);
+ return true;
+ }
+
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group: {
+ Value *ArgVal = II->getArgOperand(0);
+ auto it = LargeOffsetGEPMap.find(II);
+ if (it != LargeOffsetGEPMap.end()) {
+ // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
+ // Make sure not to have to deal with iterator invalidation
+ // after possibly adding ArgVal to LargeOffsetGEPMap.
+ auto GEPs = std::move(it->second);
+ LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
+ LargeOffsetGEPMap.erase(II);
+ }
+
+ replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
+ II->eraseFromParent();
+ return true;
+ }
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ // If counting zeros is expensive, try to avoid it.
+ return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
+ IsHugeFunc);
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ return optimizeFunnelShift(II);
+ case Intrinsic::dbg_assign:
+ case Intrinsic::dbg_value:
+ return fixupDbgValue(II);
+ case Intrinsic::masked_gather:
+ return optimizeGatherScatterInst(II, II->getArgOperand(0));
+ case Intrinsic::masked_scatter:
+ return optimizeGatherScatterInst(II, II->getArgOperand(1));
+ }
+
+ SmallVector<Value *, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty()) {
+ Value *PtrVal = PtrOps.pop_back_val();
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
+ return true;
+ }
+ }
+
+ // From here on out we're working with named functions.
+ if (!CI->getCalledFunction())
+ return false;
+
+ // Lower all default uses of _chk calls. This is very similar
+ // to what InstCombineCalls does, but here we are only lowering calls
+ // to fortified library functions (e.g. __memcpy_chk) that have the default
+ // "don't know" as the objectsize. Anything else should be left alone.
+ FortifiedLibCallSimplifier Simplifier(TLInfo, true);
+ IRBuilder<> Builder(CI);
+ if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
+ replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+/// Look for opportunities to duplicate return instructions to the predecessor
+/// to enable tail call optimizations. The case it is currently looking for is:
+/// @code
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+/// @endcode
+///
+/// =>
+///
+/// @code
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+/// @endcode
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
+ ModifyDT &ModifiedDT) {
+ if (!BB->getTerminator())
+ return false;
+
+ ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RetI)
+ return false;
+
+ assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
+
+ PHINode *PN = nullptr;
+ ExtractValueInst *EVI = nullptr;
+ BitCastInst *BCI = nullptr;
+ Value *V = RetI->getReturnValue();
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ EVI = dyn_cast<ExtractValueInst>(V);
+ if (EVI) {
+ V = EVI->getOperand(0);
+ if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
+ return false;
+ }
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
+
+ if (PN && PN->getParent() != BB)
+ return false;
+
+ auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
+ const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
+ if (BC && BC->hasOneUse())
+ Inst = BC->user_back();
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_end;
+ return false;
+ };
+
+ // Make sure there are no instructions between the first instruction
+ // and return.
+ const Instruction *BI = BB->getFirstNonPHI();
+ // Skip over debug and the bitcast.
+ while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
+ isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
+ BI = BI->getNextNode();
+ if (BI != RetI)
+ return false;
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+ /// call.
+ const Function *F = BB->getParent();
+ SmallVector<BasicBlock *, 4> TailCallBBs;
+ if (PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ // Look through bitcasts.
+ Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
+ CallInst *CI = dyn_cast<CallInst>(IncomingVal);
+ BasicBlock *PredBB = PN->getIncomingBlock(I);
+ // Make sure the phi value is indeed produced by the tail call.
+ if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
+ TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
+ TailCallBBs.push_back(PredBB);
+ }
+ } else {
+ SmallPtrSet<BasicBlock *, 4> VisitedBBs;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (!VisitedBBs.insert(Pred).second)
+ continue;
+ if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
+ TailCallBBs.push_back(Pred);
+ }
+ }
+ }
+
+ bool Changed = false;
+ for (auto const &TailCallBB : TailCallBBs) {
+ // Make sure the call instruction is followed by an unconditional branch to
+ // the return block.
+ BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into TailCallBB.
+ (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
+ assert(!VerifyBFIUpdates ||
+ BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
+ BFI->setBlockFreq(
+ BB,
+ (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency());
+ ModifiedDT = ModifyDT::ModifyBBDT;
+ Changed = true;
+ ++NumRetsDup;
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+ Value *BaseReg = nullptr;
+ Value *ScaledReg = nullptr;
+ Value *OriginalValue = nullptr;
+ bool InBounds = true;
+
+ enum FieldName {
+ NoField = 0x00,
+ BaseRegField = 0x01,
+ BaseGVField = 0x02,
+ BaseOffsField = 0x04,
+ ScaledRegField = 0x08,
+ ScaleField = 0x10,
+ MultipleFields = 0xff
+ };
+
+ ExtAddrMode() = default;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+ FieldName compare(const ExtAddrMode &other) {
+ // First check that the types are the same on each field, as differing types
+ // is something we can't cope with later on.
+ if (BaseReg && other.BaseReg &&
+ BaseReg->getType() != other.BaseReg->getType())
+ return MultipleFields;
+ if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
+ return MultipleFields;
+ if (ScaledReg && other.ScaledReg &&
+ ScaledReg->getType() != other.ScaledReg->getType())
+ return MultipleFields;
+
+ // Conservatively reject 'inbounds' mismatches.
+ if (InBounds != other.InBounds)
+ return MultipleFields;
+
+ // Check each field to see if it differs.
+ unsigned Result = NoField;
+ if (BaseReg != other.BaseReg)
+ Result |= BaseRegField;
+ if (BaseGV != other.BaseGV)
+ Result |= BaseGVField;
+ if (BaseOffs != other.BaseOffs)
+ Result |= BaseOffsField;
+ if (ScaledReg != other.ScaledReg)
+ Result |= ScaledRegField;
+ // Don't count 0 as being a different scale, because that actually means
+ // unscaled (which will already be counted by having no ScaledReg).
+ if (Scale && other.Scale && Scale != other.Scale)
+ Result |= ScaleField;
+
+ if (llvm::popcount(Result) > 1)
+ return MultipleFields;
+ else
+ return static_cast<FieldName>(Result);
+ }
+
+ // An AddrMode is trivial if it involves no calculation i.e. it is just a base
+ // with no offset.
+ bool isTrivial() {
+ // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
+ // trivial if at most one of these terms is nonzero, except that BaseGV and
+ // BaseReg both being zero actually means a null pointer value, which we
+ // consider to be 'non-zero' here.
+ return !BaseOffs && !Scale && !(BaseGV && BaseReg);
+ }
+
+ Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
+ switch (Field) {
+ default:
+ return nullptr;
+ case BaseRegField:
+ return BaseReg;
+ case BaseGVField:
+ return BaseGV;
+ case ScaledRegField:
+ return ScaledReg;
+ case BaseOffsField:
+ return ConstantInt::get(IntPtrTy, BaseOffs);
+ }
+ }
+
+ void SetCombinedField(FieldName Field, Value *V,
+ const SmallVectorImpl<ExtAddrMode> &AddrModes) {
+ switch (Field) {
+ default:
+ llvm_unreachable("Unhandled fields are expected to be rejected earlier");
+ break;
+ case ExtAddrMode::BaseRegField:
+ BaseReg = V;
+ break;
+ case ExtAddrMode::BaseGVField:
+ // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
+ // in the BaseReg field.
+ assert(BaseReg == nullptr);
+ BaseReg = V;
+ BaseGV = nullptr;
+ break;
+ case ExtAddrMode::ScaledRegField:
+ ScaledReg = V;
+ // If we have a mix of scaled and unscaled addrmodes then we want scale
+ // to be the scale and not zero.
+ if (!Scale)
+ for (const ExtAddrMode &AM : AddrModes)
+ if (AM.Scale) {
+ Scale = AM.Scale;
+ break;
+ }
+ break;
+ case ExtAddrMode::BaseOffsField:
+ // The offset is no longer a constant, so it goes in ScaledReg with a
+ // scale of 1.
+ assert(ScaledReg == nullptr);
+ ScaledReg = V;
+ Scale = 1;
+ BaseOffs = 0;
+ break;
+ }
+ }
+};
+
+#ifndef NDEBUG
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+ AM.print(OS);
+ return OS;
+}
+#endif
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ExtAddrMode::print(raw_ostream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (InBounds)
+ OS << "inbounds ";
+ if (BaseGV) {
+ OS << "GV:";
+ BaseGV->printAsOperand(OS, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs) {
+ OS << (NeedPlus ? " + " : "") << BaseOffs;
+ NeedPlus = true;
+ }
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "") << "Base:";
+ BaseReg->printAsOperand(OS, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "") << Scale << "*";
+ ScaledReg->printAsOperand(OS, /*PrintType=*/false);
+ }
+
+ OS << ']';
+}
+
+LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+} // end anonymous namespace
+
+namespace {
+
+/// This class provides transaction based operation on the IR.
+/// Every change made through this class is recorded in the internal state and
+/// can be undone (rollback) until commit is called.
+/// CGP does not check if instructions could be speculatively executed when
+/// moved. Preserving the original location would pessimize the debugging
+/// experience, as well as negatively impact the quality of sample PGO.
+class TypePromotionTransaction {
+ /// This represents the common interface of the individual transaction.
+ /// Each class implements the logic for doing one specific modification on
+ /// the IR via the TypePromotionTransaction.
+ class TypePromotionAction {
+ protected:
+ /// The Instruction modified.
+ Instruction *Inst;
+
+ public:
+ /// Constructor of the action.
+ /// The constructor performs the related action on the IR.
+ TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
+
+ virtual ~TypePromotionAction() = default;
+
+ /// Undo the modification done by this action.
+ /// When this method is called, the IR must be in the same state as it was
+ /// before this action was applied.
+ /// \pre Undoing the action works if and only if the IR is in the exact same
+ /// state as it was directly after this action was applied.
+ virtual void undo() = 0;
+
+ /// Advocate every change made by this action.
+ /// When the results on the IR of the action are to be kept, it is important
+ /// to call this function, otherwise hidden information may be kept forever.
+ virtual void commit() {
+ // Nothing to be done, this action is not doing anything.
+ }
+ };
+
+ /// Utility to remember the position of an instruction.
+ class InsertionHandler {
+ /// Position of an instruction.
+ /// Either an instruction:
+ /// - Is the first in a basic block: BB is used.
+ /// - Has a previous instruction: PrevInst is used.
+ union {
+ Instruction *PrevInst;
+ BasicBlock *BB;
+ } Point;
+
+ /// Remember whether or not the instruction had a previous instruction.
+ bool HasPrevInstruction;
+
+ public:
+ /// Record the position of \p Inst.
+ InsertionHandler(Instruction *Inst) {
+ BasicBlock::iterator It = Inst->getIterator();
+ HasPrevInstruction = (It != (Inst->getParent()->begin()));
+ if (HasPrevInstruction)
+ Point.PrevInst = &*--It;
+ else
+ Point.BB = Inst->getParent();
+ }
+
+ /// Insert \p Inst at the recorded position.
+ void insert(Instruction *Inst) {
+ if (HasPrevInstruction) {
+ if (Inst->getParent())
+ Inst->removeFromParent();
+ Inst->insertAfter(Point.PrevInst);
+ } else {
+ Instruction *Position = &*Point.BB->getFirstInsertionPt();
+ if (Inst->getParent())
+ Inst->moveBefore(Position);
+ else
+ Inst->insertBefore(Position);
+ }
+ }
+ };
+
+ /// Move an instruction before another.
+ class InstructionMoveBefore : public TypePromotionAction {
+ /// Original position of the instruction.
+ InsertionHandler Position;
+
+ public:
+ /// Move \p Inst before \p Before.
+ InstructionMoveBefore(Instruction *Inst, Instruction *Before)
+ : TypePromotionAction(Inst), Position(Inst) {
+ LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
+ << "\n");
+ Inst->moveBefore(Before);
+ }
+
+ /// Move the instruction back to its original position.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
+ Position.insert(Inst);
+ }
+ };
+
+ /// Set the operand of an instruction with a new value.
+ class OperandSetter : public TypePromotionAction {
+ /// Original operand of the instruction.
+ Value *Origin;
+
+ /// Index of the modified instruction.
+ unsigned Idx;
+
+ public:
+ /// Set \p Idx operand of \p Inst with \p NewVal.
+ OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
+ : TypePromotionAction(Inst), Idx(Idx) {
+ LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
+ << "for:" << *Inst << "\n"
+ << "with:" << *NewVal << "\n");
+ Origin = Inst->getOperand(Idx);
+ Inst->setOperand(Idx, NewVal);
+ }
+
+ /// Restore the original value of the instruction.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
+ << "for: " << *Inst << "\n"
+ << "with: " << *Origin << "\n");
+ Inst->setOperand(Idx, Origin);
+ }
+ };
+
+ /// Hide the operands of an instruction.
+ /// Do as if this instruction was not using any of its operands.
+ class OperandsHider : public TypePromotionAction {
+ /// The list of original operands.
+ SmallVector<Value *, 4> OriginalValues;
+
+ public:
+ /// Remove \p Inst from the uses of the operands of \p Inst.
+ OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
+ LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
+ unsigned NumOpnds = Inst->getNumOperands();
+ OriginalValues.reserve(NumOpnds);
+ for (unsigned It = 0; It < NumOpnds; ++It) {
+ // Save the current operand.
+ Value *Val = Inst->getOperand(It);
+ OriginalValues.push_back(Val);
+ // Set a dummy one.
+ // We could use OperandSetter here, but that would imply an overhead
+ // that we are not willing to pay.
+ Inst->setOperand(It, UndefValue::get(Val->getType()));
+ }
+ }
+
+ /// Restore the original list of uses.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
+ for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
+ Inst->setOperand(It, OriginalValues[It]);
+ }
+ };
+
+ /// Build a truncate instruction.
+ class TruncBuilder : public TypePromotionAction {
+ Value *Val;
+
+ public:
+ /// Build a truncate instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// trunc Opnd to Ty.
+ TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
+ IRBuilder<> Builder(Opnd);
+ Builder.SetCurrentDebugLocation(DebugLoc());
+ Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
+ LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
+ }
+
+ /// Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// Remove the built instruction.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// Build a sign extension instruction.
+ class SExtBuilder : public TypePromotionAction {
+ Value *Val;
+
+ public:
+ /// Build a sign extension instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// sext Opnd to Ty.
+ SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+ : TypePromotionAction(InsertPt) {
+ IRBuilder<> Builder(InsertPt);
+ Val = Builder.CreateSExt(Opnd, Ty, "promoted");
+ LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
+ }
+
+ /// Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// Remove the built instruction.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// Build a zero extension instruction.
+ class ZExtBuilder : public TypePromotionAction {
+ Value *Val;
+
+ public:
+ /// Build a zero extension instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// zext Opnd to Ty.
+ ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+ : TypePromotionAction(InsertPt) {
+ IRBuilder<> Builder(InsertPt);
+ Builder.SetCurrentDebugLocation(DebugLoc());
+ Val = Builder.CreateZExt(Opnd, Ty, "promoted");
+ LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
+ }
+
+ /// Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// Remove the built instruction.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// Mutate an instruction to another type.
+ class TypeMutator : public TypePromotionAction {
+ /// Record the original type.
+ Type *OrigTy;
+
+ public:
+ /// Mutate the type of \p Inst into \p NewTy.
+ TypeMutator(Instruction *Inst, Type *NewTy)
+ : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
+ LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
+ << "\n");
+ Inst->mutateType(NewTy);
+ }
+
+ /// Mutate the instruction back to its original type.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
+ << "\n");
+ Inst->mutateType(OrigTy);
+ }
+ };
+
+ /// Replace the uses of an instruction by another instruction.
+ class UsesReplacer : public TypePromotionAction {
+ /// Helper structure to keep track of the replaced uses.
+ struct InstructionAndIdx {
+ /// The instruction using the instruction.
+ Instruction *Inst;
+
+ /// The index where this instruction is used for Inst.
+ unsigned Idx;
+
+ InstructionAndIdx(Instruction *Inst, unsigned Idx)
+ : Inst(Inst), Idx(Idx) {}
+ };
+
+ /// Keep track of the original uses (pair Instruction, Index).
+ SmallVector<InstructionAndIdx, 4> OriginalUses;
+ /// Keep track of the debug users.
+ SmallVector<DbgValueInst *, 1> DbgValues;
+
+ /// Keep track of the new value so that we can undo it by replacing
+ /// instances of the new value with the original value.
+ Value *New;
+
+ using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
+
+ public:
+ /// Replace all the use of \p Inst by \p New.
+ UsesReplacer(Instruction *Inst, Value *New)
+ : TypePromotionAction(Inst), New(New) {
+ LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
+ << "\n");
+ // Record the original uses.
+ for (Use &U : Inst->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
+ }
+ // Record the debug uses separately. They are not in the instruction's
+ // use list, but they are replaced by RAUW.
+ findDbgValues(DbgValues, Inst);
+
+ // Now, we can replace the uses.
+ Inst->replaceAllUsesWith(New);
+ }
+
+ /// Reassign the original uses of Inst to Inst.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
+ for (InstructionAndIdx &Use : OriginalUses)
+ Use.Inst->setOperand(Use.Idx, Inst);
+ // RAUW has replaced all original uses with references to the new value,
+ // including the debug uses. Since we are undoing the replacements,
+ // the original debug uses must also be reinstated to maintain the
+ // correctness and utility of debug value instructions.
+ for (auto *DVI : DbgValues)
+ DVI->replaceVariableLocationOp(New, Inst);
+ }
+ };
+
+ /// Remove an instruction from the IR.
+ class InstructionRemover : public TypePromotionAction {
+ /// Original position of the instruction.
+ InsertionHandler Inserter;
+
+ /// Helper structure to hide all the link to the instruction. In other
+ /// words, this helps to do as if the instruction was removed.
+ OperandsHider Hider;
+
+ /// Keep track of the uses replaced, if any.
+ UsesReplacer *Replacer = nullptr;
+
+ /// Keep track of instructions removed.
+ SetOfInstrs &RemovedInsts;
+
+ public:
+ /// Remove all reference of \p Inst and optionally replace all its
+ /// uses with New.
+ /// \p RemovedInsts Keep track of the instructions removed by this Action.
+ /// \pre If !Inst->use_empty(), then New != nullptr
+ InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+ Value *New = nullptr)
+ : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
+ RemovedInsts(RemovedInsts) {
+ if (New)
+ Replacer = new UsesReplacer(Inst, New);
+ LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ RemovedInsts.insert(Inst);
+ /// The instructions removed here will be freed after completing
+ /// optimizeBlock() for all blocks as we need to keep track of the
+ /// removed instructions during promotion.
+ Inst->removeFromParent();
+ }
+
+ ~InstructionRemover() override { delete Replacer; }
+
+ InstructionRemover &operator=(const InstructionRemover &other) = delete;
+ InstructionRemover(const InstructionRemover &other) = delete;
+
+ /// Resurrect the instruction and reassign it to the proper uses if
+ /// new value was provided when build this action.
+ void undo() override {
+ LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
+ Inserter.insert(Inst);
+ if (Replacer)
+ Replacer->undo();
+ Hider.undo();
+ RemovedInsts.erase(Inst);
+ }
+ };
+
+public:
+ /// Restoration point.
+ /// The restoration point is a pointer to an action instead of an iterator
+ /// because the iterator may be invalidated but not the pointer.
+ using ConstRestorationPt = const TypePromotionAction *;
+
+ TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+ : RemovedInsts(RemovedInsts) {}
+
+ /// Advocate every changes made in that transaction. Return true if any change
+ /// happen.
+ bool commit();
+
+ /// Undo all the changes made after the given point.
+ void rollback(ConstRestorationPt Point);
+
+ /// Get the current restoration point.
+ ConstRestorationPt getRestorationPoint() const;
+
+ /// \name API for IR modification with state keeping to support rollback.
+ /// @{
+ /// Same as Instruction::setOperand.
+ void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
+
+ /// Same as Instruction::eraseFromParent.
+ void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
+
+ /// Same as Value::replaceAllUsesWith.
+ void replaceAllUsesWith(Instruction *Inst, Value *New);
+
+ /// Same as Value::mutateType.
+ void mutateType(Instruction *Inst, Type *NewTy);
+
+ /// Same as IRBuilder::createTrunc.
+ Value *createTrunc(Instruction *Opnd, Type *Ty);
+
+ /// Same as IRBuilder::createSExt.
+ Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+
+ /// Same as IRBuilder::createZExt.
+ Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
+
+ /// Same as Instruction::moveBefore.
+ void moveBefore(Instruction *Inst, Instruction *Before);
+ /// @}
+
+private:
+ /// The ordered list of actions made so far.
+ SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
+
+ using CommitPt =
+ SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
+
+ SetOfInstrs &RemovedInsts;
+};
+
+} // end anonymous namespace
+
+void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
+ Value *NewVal) {
+ Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
+ Inst, Idx, NewVal));
+}
+
+void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
+ Value *NewVal) {
+ Actions.push_back(
+ std::make_unique<TypePromotionTransaction::InstructionRemover>(
+ Inst, RemovedInsts, NewVal));
+}
+
+void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
+ Value *New) {
+ Actions.push_back(
+ std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
+}
+
+void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
+ Actions.push_back(
+ std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
+}
+
+Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
+ std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
+ Type *Ty) {
+ std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
+ Type *Ty) {
+ std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+void TypePromotionTransaction::moveBefore(Instruction *Inst,
+ Instruction *Before) {
+ Actions.push_back(
+ std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
+ Inst, Before));
+}
+
+TypePromotionTransaction::ConstRestorationPt
+TypePromotionTransaction::getRestorationPoint() const {
+ return !Actions.empty() ? Actions.back().get() : nullptr;
+}
+
+bool TypePromotionTransaction::commit() {
+ for (std::unique_ptr<TypePromotionAction> &Action : Actions)
+ Action->commit();
+ bool Modified = !Actions.empty();
+ Actions.clear();
+ return Modified;
+}
+
+void TypePromotionTransaction::rollback(
+ TypePromotionTransaction::ConstRestorationPt Point) {
+ while (!Actions.empty() && Point != Actions.back().get()) {
+ std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
+ Curr->undo();
+ }
+}
+
+namespace {
+
+/// A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+ SmallVectorImpl<Instruction *> &AddrModeInsts;
+ const TargetLowering &TLI;
+ const TargetRegisterInfo &TRI;
+ const DataLayout &DL;
+ const LoopInfo &LI;
+ const std::function<const DominatorTree &()> getDTFn;
+
+ /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+ /// the memory instruction that we're computing this address for.
+ Type *AccessTy;
+ unsigned AddrSpace;
+ Instruction *MemoryInst;
+
+ /// This is the addressing mode that we're building up. This is
+ /// part of the return value of this addressing mode matching stuff.
+ ExtAddrMode &AddrMode;
+
+ /// The instructions inserted by other CodeGenPrepare optimizations.
+ const SetOfInstrs &InsertedInsts;
+
+ /// A map from the instructions to their type before promotion.
+ InstrToOrigTy &PromotedInsts;
+
+ /// The ongoing transaction where every action should be registered.
+ TypePromotionTransaction &TPT;
+
+ // A GEP which has too large offset to be folded into the addressing mode.
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
+
+ /// This is set to true when we should not do profitability checks.
+ /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
+ bool IgnoreProfitability;
+
+ /// True if we are optimizing for size.
+ bool OptSize = false;
+
+ ProfileSummaryInfo *PSI;
+ BlockFrequencyInfo *BFI;
+
+ AddressingModeMatcher(
+ SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI, const LoopInfo &LI,
+ const std::function<const DominatorTree &()> getDTFn, Type *AT,
+ unsigned AS, Instruction *MI, ExtAddrMode &AM,
+ const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
+ TypePromotionTransaction &TPT,
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+ bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
+ : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
+ DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
+ AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
+ InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
+ LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
+ IgnoreProfitability = false;
+ }
+
+public:
+ /// Find the maximal addressing mode that a load/store of V can fold,
+ /// give an access type of AccessTy. This returns a list of involved
+ /// instructions in AddrModeInsts.
+ /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
+ /// optimizations.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ /// \p The ongoing transaction where every action should be registered.
+ static ExtAddrMode
+ Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
+ SmallVectorImpl<Instruction *> &AddrModeInsts,
+ const TargetLowering &TLI, const LoopInfo &LI,
+ const std::function<const DominatorTree &()> getDTFn,
+ const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+ bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ ExtAddrMode Result;
+
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
+ AccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT,
+ LargeOffsetGEP, OptSize, PSI, BFI)
+ .matchAddr(V, 0);
+ (void)Success;
+ assert(Success && "Couldn't select *anything*?");
+ return Result;
+ }
+
+private:
+ bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool matchAddr(Value *Addr, unsigned Depth);
+ bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
+ bool *MovedAway = nullptr);
+ bool isProfitableToFoldIntoAddressingMode(Instruction *I,
+ ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter);
+ bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+ bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
+ Value *PromotedOperand) const;
+};
+
+class PhiNodeSet;
+
+/// An iterator for PhiNodeSet.
+class PhiNodeSetIterator {
+ PhiNodeSet *const Set;
+ size_t CurrentIndex = 0;
+
+public:
+ /// The constructor. Start should point to either a valid element, or be equal
+ /// to the size of the underlying SmallVector of the PhiNodeSet.
+ PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
+ PHINode *operator*() const;
+ PhiNodeSetIterator &operator++();
+ bool operator==(const PhiNodeSetIterator &RHS) const;
+ bool operator!=(const PhiNodeSetIterator &RHS) const;
+};
+
+/// Keeps a set of PHINodes.
+///
+/// This is a minimal set implementation for a specific use case:
+/// It is very fast when there are very few elements, but also provides good
+/// performance when there are many. It is similar to SmallPtrSet, but also
+/// provides iteration by insertion order, which is deterministic and stable
+/// across runs. It is also similar to SmallSetVector, but provides removing
+/// elements in O(1) time. This is achieved by not actually removing the element
+/// from the underlying vector, so comes at the cost of using more memory, but
+/// that is fine, since PhiNodeSets are used as short lived objects.
+class PhiNodeSet {
+ friend class PhiNodeSetIterator;
+
+ using MapType = SmallDenseMap<PHINode *, size_t, 32>;
+ using iterator = PhiNodeSetIterator;
+
+ /// Keeps the elements in the order of their insertion in the underlying
+ /// vector. To achieve constant time removal, it never deletes any element.
+ SmallVector<PHINode *, 32> NodeList;
+
+ /// Keeps the elements in the underlying set implementation. This (and not the
+ /// NodeList defined above) is the source of truth on whether an element
+ /// is actually in the collection.
+ MapType NodeMap;
+
+ /// Points to the first valid (not deleted) element when the set is not empty
+ /// and the value is not zero. Equals to the size of the underlying vector
+ /// when the set is empty. When the value is 0, as in the beginning, the
+ /// first element may or may not be valid.
+ size_t FirstValidElement = 0;
+
+public:
+ /// Inserts a new element to the collection.
+ /// \returns true if the element is actually added, i.e. was not in the
+ /// collection before the operation.
+ bool insert(PHINode *Ptr) {
+ if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
+ NodeList.push_back(Ptr);
+ return true;
+ }
+ return false;
+ }
+
+ /// Removes the element from the collection.
+ /// \returns whether the element is actually removed, i.e. was in the
+ /// collection before the operation.
+ bool erase(PHINode *Ptr) {
+ if (NodeMap.erase(Ptr)) {
+ SkipRemovedElements(FirstValidElement);
+ return true;
+ }
+ return false;
+ }
+
+ /// Removes all elements and clears the collection.
+ void clear() {
+ NodeMap.clear();
+ NodeList.clear();
+ FirstValidElement = 0;
+ }
+
+ /// \returns an iterator that will iterate the elements in the order of
+ /// insertion.
+ iterator begin() {
+ if (FirstValidElement == 0)
+ SkipRemovedElements(FirstValidElement);
+ return PhiNodeSetIterator(this, FirstValidElement);
+ }
+
+ /// \returns an iterator that points to the end of the collection.
+ iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
+
+ /// Returns the number of elements in the collection.
+ size_t size() const { return NodeMap.size(); }
+
+ /// \returns 1 if the given element is in the collection, and 0 if otherwise.
+ size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
+
+private:
+ /// Updates the CurrentIndex so that it will point to a valid element.
+ ///
+ /// If the element of NodeList at CurrentIndex is valid, it does not
+ /// change it. If there are no more valid elements, it updates CurrentIndex
+ /// to point to the end of the NodeList.
+ void SkipRemovedElements(size_t &CurrentIndex) {
+ while (CurrentIndex < NodeList.size()) {
+ auto it = NodeMap.find(NodeList[CurrentIndex]);
+ // If the element has been deleted and added again later, NodeMap will
+ // point to a different index, so CurrentIndex will still be invalid.
+ if (it != NodeMap.end() && it->second == CurrentIndex)
+ break;
+ ++CurrentIndex;
+ }
+ }
+};
+
+PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
+ : Set(Set), CurrentIndex(Start) {}
+
+PHINode *PhiNodeSetIterator::operator*() const {
+ assert(CurrentIndex < Set->NodeList.size() &&
+ "PhiNodeSet access out of range");
+ return Set->NodeList[CurrentIndex];
+}
+
+PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
+ assert(CurrentIndex < Set->NodeList.size() &&
+ "PhiNodeSet access out of range");
+ ++CurrentIndex;
+ Set->SkipRemovedElements(CurrentIndex);
+ return *this;
+}
+
+bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
+ return CurrentIndex == RHS.CurrentIndex;
+}
+
+bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
+ return !((*this) == RHS);
+}
+
+/// Keep track of simplification of Phi nodes.
+/// Accept the set of all phi nodes and erase phi node from this set
+/// if it is simplified.
+class SimplificationTracker {
+ DenseMap<Value *, Value *> Storage;
+ const SimplifyQuery &SQ;
+ // Tracks newly created Phi nodes. The elements are iterated by insertion
+ // order.
+ PhiNodeSet AllPhiNodes;
+ // Tracks newly created Select nodes.
+ SmallPtrSet<SelectInst *, 32> AllSelectNodes;
+
+public:
+ SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
+
+ Value *Get(Value *V) {
+ do {
+ auto SV = Storage.find(V);
+ if (SV == Storage.end())
+ return V;
+ V = SV->second;
+ } while (true);
+ }
+
+ Value *Simplify(Value *Val) {
+ SmallVector<Value *, 32> WorkList;
+ SmallPtrSet<Value *, 32> Visited;
+ WorkList.push_back(Val);
+ while (!WorkList.empty()) {
+ auto *P = WorkList.pop_back_val();
+ if (!Visited.insert(P).second)
+ continue;
+ if (auto *PI = dyn_cast<Instruction>(P))
+ if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
+ for (auto *U : PI->users())
+ WorkList.push_back(cast<Value>(U));
+ Put(PI, V);
+ PI->replaceAllUsesWith(V);
+ if (auto *PHI = dyn_cast<PHINode>(PI))
+ AllPhiNodes.erase(PHI);
+ if (auto *Select = dyn_cast<SelectInst>(PI))
+ AllSelectNodes.erase(Select);
+ PI->eraseFromParent();
+ }
+ }
+ return Get(Val);
+ }
+
+ void Put(Value *From, Value *To) { Storage.insert({From, To}); }
+
+ void ReplacePhi(PHINode *From, PHINode *To) {
+ Value *OldReplacement = Get(From);
+ while (OldReplacement != From) {
+ From = To;
+ To = dyn_cast<PHINode>(OldReplacement);
+ OldReplacement = Get(From);
+ }
+ assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
+ Put(From, To);
+ From->replaceAllUsesWith(To);
+ AllPhiNodes.erase(From);
+ From->eraseFromParent();
+ }
+
+ PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
+
+ void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
+
+ void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
+
+ unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
+
+ unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
+
+ void destroyNewNodes(Type *CommonType) {
+ // For safe erasing, replace the uses with dummy value first.
+ auto *Dummy = PoisonValue::get(CommonType);
+ for (auto *I : AllPhiNodes) {
+ I->replaceAllUsesWith(Dummy);
+ I->eraseFromParent();
+ }
+ AllPhiNodes.clear();
+ for (auto *I : AllSelectNodes) {
+ I->replaceAllUsesWith(Dummy);
+ I->eraseFromParent();
+ }
+ AllSelectNodes.clear();
+ }
+};
+
+/// A helper class for combining addressing modes.
+class AddressingModeCombiner {
+ typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
+ typedef std::pair<PHINode *, PHINode *> PHIPair;
+
+private:
+ /// The addressing modes we've collected.
+ SmallVector<ExtAddrMode, 16> AddrModes;
+
+ /// The field in which the AddrModes differ, when we have more than one.
+ ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
+
+ /// Are the AddrModes that we have all just equal to their original values?
+ bool AllAddrModesTrivial = true;
+
+ /// Common Type for all different fields in addressing modes.
+ Type *CommonType = nullptr;
+
+ /// SimplifyQuery for simplifyInstruction utility.
+ const SimplifyQuery &SQ;
+
+ /// Original Address.
+ Value *Original;
+
+ /// Common value among addresses
+ Value *CommonValue = nullptr;
+
+public:
+ AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
+ : SQ(_SQ), Original(OriginalValue) {}
+
+ ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
+
+ /// Get the combined AddrMode
+ const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
+
+ /// Add a new AddrMode if it's compatible with the AddrModes we already
+ /// have.
+ /// \return True iff we succeeded in doing so.
+ bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
+ // Take note of if we have any non-trivial AddrModes, as we need to detect
+ // when all AddrModes are trivial as then we would introduce a phi or select
+ // which just duplicates what's already there.
+ AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
+
+ // If this is the first addrmode then everything is fine.
+ if (AddrModes.empty()) {
+ AddrModes.emplace_back(NewAddrMode);
+ return true;
+ }
+
+ // Figure out how different this is from the other address modes, which we
+ // can do just by comparing against the first one given that we only care
+ // about the cumulative difference.
+ ExtAddrMode::FieldName ThisDifferentField =
+ AddrModes[0].compare(NewAddrMode);
+ if (DifferentField == ExtAddrMode::NoField)
+ DifferentField = ThisDifferentField;
+ else if (DifferentField != ThisDifferentField)
+ DifferentField = ExtAddrMode::MultipleFields;
+
+ // If NewAddrMode differs in more than one dimension we cannot handle it.
+ bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
+
+ // If Scale Field is different then we reject.
+ CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
+
+ // We also must reject the case when base offset is different and
+ // scale reg is not null, we cannot handle this case due to merge of
+ // different offsets will be used as ScaleReg.
+ CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
+ !NewAddrMode.ScaledReg);
+
+ // We also must reject the case when GV is different and BaseReg installed
+ // due to we want to use base reg as a merge of GV values.
+ CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
+ !NewAddrMode.HasBaseReg);
+
+ // Even if NewAddMode is the same we still need to collect it due to
+ // original value is different. And later we will need all original values
+ // as anchors during finding the common Phi node.
+ if (CanHandle)
+ AddrModes.emplace_back(NewAddrMode);
+ else
+ AddrModes.clear();
+
+ return CanHandle;
+ }
+
+ /// Combine the addressing modes we've collected into a single
+ /// addressing mode.
+ /// \return True iff we successfully combined them or we only had one so
+ /// didn't need to combine them anyway.
+ bool combineAddrModes() {
+ // If we have no AddrModes then they can't be combined.
+ if (AddrModes.size() == 0)
+ return false;
+
+ // A single AddrMode can trivially be combined.
+ if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
+ return true;
+
+ // If the AddrModes we collected are all just equal to the value they are
+ // derived from then combining them wouldn't do anything useful.
+ if (AllAddrModesTrivial)
+ return false;
+
+ if (!addrModeCombiningAllowed())
+ return false;
+
+ // Build a map between <original value, basic block where we saw it> to
+ // value of base register.
+ // Bail out if there is no common type.
+ FoldAddrToValueMapping Map;
+ if (!initializeMap(Map))
+ return false;
+
+ CommonValue = findCommon(Map);
+ if (CommonValue)
+ AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
+ return CommonValue != nullptr;
+ }
+
+private:
+ /// `CommonValue` may be a placeholder inserted by us.
+ /// If the placeholder is not used, we should remove this dead instruction.
+ void eraseCommonValueIfDead() {
+ if (CommonValue && CommonValue->getNumUses() == 0)
+ if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
+ CommonInst->eraseFromParent();
+ }
+
+ /// Initialize Map with anchor values. For address seen
+ /// we set the value of different field saw in this address.
+ /// At the same time we find a common type for different field we will
+ /// use to create new Phi/Select nodes. Keep it in CommonType field.
+ /// Return false if there is no common type found.
+ bool initializeMap(FoldAddrToValueMapping &Map) {
+ // Keep track of keys where the value is null. We will need to replace it
+ // with constant null when we know the common type.
+ SmallVector<Value *, 2> NullValue;
+ Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
+ for (auto &AM : AddrModes) {
+ Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
+ if (DV) {
+ auto *Type = DV->getType();
+ if (CommonType && CommonType != Type)
+ return false;
+ CommonType = Type;
+ Map[AM.OriginalValue] = DV;
+ } else {
+ NullValue.push_back(AM.OriginalValue);
+ }
+ }
+ assert(CommonType && "At least one non-null value must be!");
+ for (auto *V : NullValue)
+ Map[V] = Constant::getNullValue(CommonType);
+ return true;
+ }
+
+ /// We have mapping between value A and other value B where B was a field in
+ /// addressing mode represented by A. Also we have an original value C
+ /// representing an address we start with. Traversing from C through phi and
+ /// selects we ended up with A's in a map. This utility function tries to find
+ /// a value V which is a field in addressing mode C and traversing through phi
+ /// nodes and selects we will end up in corresponded values B in a map.
+ /// The utility will create a new Phi/Selects if needed.
+ // The simple example looks as follows:
+ // BB1:
+ // p1 = b1 + 40
+ // br cond BB2, BB3
+ // BB2:
+ // p2 = b2 + 40
+ // br BB3
+ // BB3:
+ // p = phi [p1, BB1], [p2, BB2]
+ // v = load p
+ // Map is
+ // p1 -> b1
+ // p2 -> b2
+ // Request is
+ // p -> ?
+ // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
+ Value *findCommon(FoldAddrToValueMapping &Map) {
+ // Tracks the simplification of newly created phi nodes. The reason we use
+ // this mapping is because we will add new created Phi nodes in AddrToBase.
+ // Simplification of Phi nodes is recursive, so some Phi node may
+ // be simplified after we added it to AddrToBase. In reality this
+ // simplification is possible only if original phi/selects were not
+ // simplified yet.
+ // Using this mapping we can find the current value in AddrToBase.
+ SimplificationTracker ST(SQ);
+
+ // First step, DFS to create PHI nodes for all intermediate blocks.
+ // Also fill traverse order for the second step.
+ SmallVector<Value *, 32> TraverseOrder;
+ InsertPlaceholders(Map, TraverseOrder, ST);
+
+ // Second Step, fill new nodes by merged values and simplify if possible.
+ FillPlaceholders(Map, TraverseOrder, ST);
+
+ if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
+ ST.destroyNewNodes(CommonType);
+ return nullptr;
+ }
+
+ // Now we'd like to match New Phi nodes to existed ones.
+ unsigned PhiNotMatchedCount = 0;
+ if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
+ ST.destroyNewNodes(CommonType);
+ return nullptr;
+ }
+
+ auto *Result = ST.Get(Map.find(Original)->second);
+ if (Result) {
+ NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
+ NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
+ }
+ return Result;
+ }
+
+ /// Try to match PHI node to Candidate.
+ /// Matcher tracks the matched Phi nodes.
+ bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
+ SmallSetVector<PHIPair, 8> &Matcher,
+ PhiNodeSet &PhiNodesToMatch) {
+ SmallVector<PHIPair, 8> WorkList;
+ Matcher.insert({PHI, Candidate});
+ SmallSet<PHINode *, 8> MatchedPHIs;
+ MatchedPHIs.insert(PHI);
+ WorkList.push_back({PHI, Candidate});
+ SmallSet<PHIPair, 8> Visited;
+ while (!WorkList.empty()) {
+ auto Item = WorkList.pop_back_val();
+ if (!Visited.insert(Item).second)
+ continue;
+ // We iterate over all incoming values to Phi to compare them.
+ // If values are different and both of them Phi and the first one is a
+ // Phi we added (subject to match) and both of them is in the same basic
+ // block then we can match our pair if values match. So we state that
+ // these values match and add it to work list to verify that.
+ for (auto *B : Item.first->blocks()) {
+ Value *FirstValue = Item.first->getIncomingValueForBlock(B);
+ Value *SecondValue = Item.second->getIncomingValueForBlock(B);
+ if (FirstValue == SecondValue)
+ continue;
+
+ PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
+ PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
+
+ // One of them is not Phi or
+ // The first one is not Phi node from the set we'd like to match or
+ // Phi nodes from different basic blocks then
+ // we will not be able to match.
+ if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
+ FirstPhi->getParent() != SecondPhi->getParent())
+ return false;
+
+ // If we already matched them then continue.
+ if (Matcher.count({FirstPhi, SecondPhi}))
+ continue;
+ // So the values are different and does not match. So we need them to
+ // match. (But we register no more than one match per PHI node, so that
+ // we won't later try to replace them twice.)
+ if (MatchedPHIs.insert(FirstPhi).second)
+ Matcher.insert({FirstPhi, SecondPhi});
+ // But me must check it.
+ WorkList.push_back({FirstPhi, SecondPhi});
+ }
+ }
+ return true;
+ }
+
+ /// For the given set of PHI nodes (in the SimplificationTracker) try
+ /// to find their equivalents.
+ /// Returns false if this matching fails and creation of new Phi is disabled.
+ bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
+ unsigned &PhiNotMatchedCount) {
+ // Matched and PhiNodesToMatch iterate their elements in a deterministic
+ // order, so the replacements (ReplacePhi) are also done in a deterministic
+ // order.
+ SmallSetVector<PHIPair, 8> Matched;
+ SmallPtrSet<PHINode *, 8> WillNotMatch;
+ PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
+ while (PhiNodesToMatch.size()) {
+ PHINode *PHI = *PhiNodesToMatch.begin();
+
+ // Add us, if no Phi nodes in the basic block we do not match.
+ WillNotMatch.clear();
+ WillNotMatch.insert(PHI);
+
+ // Traverse all Phis until we found equivalent or fail to do that.
+ bool IsMatched = false;
+ for (auto &P : PHI->getParent()->phis()) {
+ // Skip new Phi nodes.
+ if (PhiNodesToMatch.count(&P))
+ continue;
+ if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
+ break;
+ // If it does not match, collect all Phi nodes from matcher.
+ // if we end up with no match, them all these Phi nodes will not match
+ // later.
+ for (auto M : Matched)
+ WillNotMatch.insert(M.first);
+ Matched.clear();
+ }
+ if (IsMatched) {
+ // Replace all matched values and erase them.
+ for (auto MV : Matched)
+ ST.ReplacePhi(MV.first, MV.second);
+ Matched.clear();
+ continue;
+ }
+ // If we are not allowed to create new nodes then bail out.
+ if (!AllowNewPhiNodes)
+ return false;
+ // Just remove all seen values in matcher. They will not match anything.
+ PhiNotMatchedCount += WillNotMatch.size();
+ for (auto *P : WillNotMatch)
+ PhiNodesToMatch.erase(P);
+ }
+ return true;
+ }
+ /// Fill the placeholders with values from predecessors and simplify them.
+ void FillPlaceholders(FoldAddrToValueMapping &Map,
+ SmallVectorImpl<Value *> &TraverseOrder,
+ SimplificationTracker &ST) {
+ while (!TraverseOrder.empty()) {
+ Value *Current = TraverseOrder.pop_back_val();
+ assert(Map.contains(Current) && "No node to fill!!!");
+ Value *V = Map[Current];
+
+ if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
+ // CurrentValue also must be Select.
+ auto *CurrentSelect = cast<SelectInst>(Current);
+ auto *TrueValue = CurrentSelect->getTrueValue();
+ assert(Map.contains(TrueValue) && "No True Value!");
+ Select->setTrueValue(ST.Get(Map[TrueValue]));
+ auto *FalseValue = CurrentSelect->getFalseValue();
+ assert(Map.contains(FalseValue) && "No False Value!");
+ Select->setFalseValue(ST.Get(Map[FalseValue]));
+ } else {
+ // Must be a Phi node then.
+ auto *PHI = cast<PHINode>(V);
+ // Fill the Phi node with values from predecessors.
+ for (auto *B : predecessors(PHI->getParent())) {
+ Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
+ assert(Map.contains(PV) && "No predecessor Value!");
+ PHI->addIncoming(ST.Get(Map[PV]), B);
+ }
+ }
+ Map[Current] = ST.Simplify(V);
+ }
+ }
+
+ /// Starting from original value recursively iterates over def-use chain up to
+ /// known ending values represented in a map. For each traversed phi/select
+ /// inserts a placeholder Phi or Select.
+ /// Reports all new created Phi/Select nodes by adding them to set.
+ /// Also reports and order in what values have been traversed.
+ void InsertPlaceholders(FoldAddrToValueMapping &Map,
+ SmallVectorImpl<Value *> &TraverseOrder,
+ SimplificationTracker &ST) {
+ SmallVector<Value *, 32> Worklist;
+ assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
+ "Address must be a Phi or Select node");
+ auto *Dummy = PoisonValue::get(CommonType);
+ Worklist.push_back(Original);
+ while (!Worklist.empty()) {
+ Value *Current = Worklist.pop_back_val();
+ // if it is already visited or it is an ending value then skip it.
+ if (Map.contains(Current))
+ continue;
+ TraverseOrder.push_back(Current);
+
+ // CurrentValue must be a Phi node or select. All others must be covered
+ // by anchors.
+ if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
+ // Is it OK to get metadata from OrigSelect?!
+ // Create a Select placeholder with dummy value.
+ SelectInst *Select = SelectInst::Create(
+ CurrentSelect->getCondition(), Dummy, Dummy,
+ CurrentSelect->getName(), CurrentSelect, CurrentSelect);
+ Map[Current] = Select;
+ ST.insertNewSelect(Select);
+ // We are interested in True and False values.
+ Worklist.push_back(CurrentSelect->getTrueValue());
+ Worklist.push_back(CurrentSelect->getFalseValue());
+ } else {
+ // It must be a Phi node then.
+ PHINode *CurrentPhi = cast<PHINode>(Current);
+ unsigned PredCount = CurrentPhi->getNumIncomingValues();
+ PHINode *PHI =
+ PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
+ Map[Current] = PHI;
+ ST.insertNewPhi(PHI);
+ append_range(Worklist, CurrentPhi->incoming_values());
+ }
+ }
+ }
+
+ bool addrModeCombiningAllowed() {
+ if (DisableComplexAddrModes)
+ return false;
+ switch (DifferentField) {
+ default:
+ return false;
+ case ExtAddrMode::BaseRegField:
+ return AddrSinkCombineBaseReg;
+ case ExtAddrMode::BaseGVField:
+ return AddrSinkCombineBaseGV;
+ case ExtAddrMode::BaseOffsField:
+ return AddrSinkCombineBaseOffs;
+ case ExtAddrMode::ScaledRegField:
+ return AddrSinkCombineScaledReg;
+ }
+ }
+};
+} // end anonymous namespace
+
+/// Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return matchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
+ // go any further: we can reuse it and cannot eliminate it.
+ ConstantInt *CI = nullptr;
+ Value *AddLHS = nullptr;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
+ !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
+ TestAddrMode.InBounds = false;
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+
+ // If this is an add recurrence with a constant step, return the increment
+ // instruction and the canonicalized step.
+ auto GetConstantStep =
+ [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
+ auto *PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return std::nullopt;
+ auto IVInc = getIVIncrement(PN, &LI);
+ if (!IVInc)
+ return std::nullopt;
+ // TODO: The result of the intrinsics above is two-complement. However when
+ // IV inc is expressed as add or sub, iv.next is potentially a poison value.
+ // If it has nuw or nsw flags, we need to make sure that these flags are
+ // inferrable at the point of memory instruction. Otherwise we are replacing
+ // well-defined two-complement computation with poison. Currently, to avoid
+ // potentially complex analysis needed to prove this, we reject such cases.
+ if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
+ if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
+ return std::nullopt;
+ if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
+ return std::make_pair(IVInc->first, ConstantStep->getValue());
+ return std::nullopt;
+ };
+
+ // Try to account for the following special case:
+ // 1. ScaleReg is an inductive variable;
+ // 2. We use it with non-zero offset;
+ // 3. IV's increment is available at the point of memory instruction.
+ //
+ // In this case, we may reuse the IV increment instead of the IV Phi to
+ // achieve the following advantages:
+ // 1. If IV step matches the offset, we will have no need in the offset;
+ // 2. Even if they don't match, we will reduce the overlap of living IV
+ // and IV increment, that will potentially lead to better register
+ // assignment.
+ if (AddrMode.BaseOffs) {
+ if (auto IVStep = GetConstantStep(ScaleReg)) {
+ Instruction *IVInc = IVStep->first;
+ // The following assert is important to ensure a lack of infinite loops.
+ // This transforms is (intentionally) the inverse of the one just above.
+ // If they don't agree on the definition of an increment, we'd alternate
+ // back and forth indefinitely.
+ assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
+ APInt Step = IVStep->second;
+ APInt Offset = Step * AddrMode.Scale;
+ if (Offset.isSignedIntN(64)) {
+ TestAddrMode.InBounds = false;
+ TestAddrMode.ScaledReg = IVInc;
+ TestAddrMode.BaseOffs -= Offset.getLimitedValue();
+ // If this addressing mode is legal, commit it..
+ // (Note that we defer the (expensive) domtree base legality check
+ // to the very last possible point.)
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
+ getDTFn().dominates(IVInc, MemoryInst)) {
+ AddrModeInsts.push_back(cast<Instruction>(IVInc));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+ }
+ }
+
+ // Otherwise, just return what we have.
+ return true;
+}
+
+/// This is a little filter, which returns true if an addressing computation
+/// involving I might be folded into a load/store accessing it.
+/// This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return I->getType()->isIntOrPtrTy();
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Check whether or not \p Val is a legal instruction for \p TLI.
+/// \note \p Val is assumed to be the product of some type promotion.
+/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
+/// to be legal, as the non-promoted value would have had the same state.
+static bool isPromotedInstructionLegal(const TargetLowering &TLI,
+ const DataLayout &DL, Value *Val) {
+ Instruction *PromotedInst = dyn_cast<Instruction>(Val);
+ if (!PromotedInst)
+ return false;
+ int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
+ // If the ISDOpcode is undefined, it was undefined before the promotion.
+ if (!ISDOpcode)
+ return true;
+ // Otherwise, check if the promoted instruction is legal or not.
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
+}
+
+namespace {
+
+/// Hepler class to perform type promotion.
+class TypePromotionHelper {
+ /// Utility function to add a promoted instruction \p ExtOpnd to
+ /// \p PromotedInsts and record the type of extension we have seen.
+ static void addPromotedInst(InstrToOrigTy &PromotedInsts,
+ Instruction *ExtOpnd, bool IsSExt) {
+ ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
+ InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
+ if (It != PromotedInsts.end()) {
+ // If the new extension is same as original, the information in
+ // PromotedInsts[ExtOpnd] is still correct.
+ if (It->second.getInt() == ExtTy)
+ return;
+
+ // Now the new extension is different from old extension, we make
+ // the type information invalid by setting extension type to
+ // BothExtension.
+ ExtTy = BothExtension;
+ }
+ PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
+ }
+
+ /// Utility function to query the original type of instruction \p Opnd
+ /// with a matched extension type. If the extension doesn't match, we
+ /// cannot use the information we had on the original type.
+ /// BothExtension doesn't match any extension type.
+ static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
+ Instruction *Opnd, bool IsSExt) {
+ ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
+ InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
+ if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
+ return It->second.getPointer();
+ return nullptr;
+ }
+
+ /// Utility function to check whether or not a sign or zero extension
+ /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
+ /// either using the operands of \p Inst or promoting \p Inst.
+ /// The type of the extension is defined by \p IsSExt.
+ /// In other words, check if:
+ /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
+ /// #1 Promotion applies:
+ /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
+ /// #2 Operand reuses:
+ /// ext opnd1 to ConsideredExtType.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts, bool IsSExt);
+
+ /// Utility function to determine if \p OpIdx should be promoted when
+ /// promoting \p Inst.
+ static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
+ return !(isa<SelectInst>(Inst) && OpIdx == 0);
+ }
+
+ /// Utility function to promote the operand of \p Ext when this
+ /// operand is a promotable trunc or sext or zext.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ /// \p CreatedInstsCost[out] contains the cost of all instructions
+ /// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
+ /// Should never be called directly.
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForTruncAndAnyExt(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
+
+ /// Utility function to promote the operand of \p Ext when this
+ /// operand is promotable and is not a supported trunc or sext.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ /// \p CreatedInstsCost[out] contains the cost of all the instructions
+ /// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
+ /// Should never be called directly.
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI, bool IsSExt);
+
+ /// \see promoteOperandForOther.
+ static Value *signExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, true);
+ }
+
+ /// \see promoteOperandForOther.
+ static Value *zeroExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, false);
+ }
+
+public:
+ /// Type for the utility function that promotes the operand of Ext.
+ using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI);
+
+ /// Given a sign/zero extend instruction \p Ext, return the appropriate
+ /// action to promote the operand of \p Ext instead of using Ext.
+ /// \return NULL if no promotable action is possible with the current
+ /// sign extension.
+ /// \p InsertedInsts keeps track of all the instructions inserted by the
+ /// other CodeGenPrepare optimizations. This information is important
+ /// because we do not want to promote these instructions as CodeGenPrepare
+ /// will reinsert them later. Thus creating an infinite loop: create/remove.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
+ const TargetLowering &TLI,
+ const InstrToOrigTy &PromotedInsts);
+};
+
+} // end anonymous namespace
+
+bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
+ Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts,
+ bool IsSExt) {
+ // The promotion helper does not know how to deal with vector types yet.
+ // To be able to fix that, we would need to fix the places where we
+ // statically extend, e.g., constants and such.
+ if (Inst->getType()->isVectorTy())
+ return false;
+
+ // We can always get through zext.
+ if (isa<ZExtInst>(Inst))
+ return true;
+
+ // sext(sext) is ok too.
+ if (IsSExt && isa<SExtInst>(Inst))
+ return true;
+
+ // We can get through binary operator, if it is legal. In other words, the
+ // binary operator must have a nuw or nsw flag.
+ if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
+ if (isa<OverflowingBinaryOperator>(BinOp) &&
+ ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
+ (IsSExt && BinOp->hasNoSignedWrap())))
+ return true;
+
+ // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
+ if ((Inst->getOpcode() == Instruction::And ||
+ Inst->getOpcode() == Instruction::Or))
+ return true;
+
+ // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
+ if (Inst->getOpcode() == Instruction::Xor) {
+ // Make sure it is not a NOT.
+ if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
+ if (!Cst->getValue().isAllOnes())
+ return true;
+ }
+
+ // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
+ // It may change a poisoned value into a regular value, like
+ // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
+ // poisoned value regular value
+ // It should be OK since undef covers valid value.
+ if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
+ return true;
+
+ // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
+ // It may change a poisoned value into a regular value, like
+ // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
+ // poisoned value regular value
+ // It should be OK since undef covers valid value.
+ if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
+ const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
+ if (ExtInst->hasOneUse()) {
+ const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
+ if (AndInst && AndInst->getOpcode() == Instruction::And) {
+ const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
+ if (Cst &&
+ Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
+ return true;
+ }
+ }
+ }
+
+ // Check if we can do the following simplification.
+ // ext(trunc(opnd)) --> ext(opnd)
+ if (!isa<TruncInst>(Inst))
+ return false;
+
+ Value *OpndVal = Inst->getOperand(0);
+ // Check if we can use this operand in the extension.
+ // If the type is larger than the result type of the extension, we cannot.
+ if (!OpndVal->getType()->isIntegerTy() ||
+ OpndVal->getType()->getIntegerBitWidth() >
+ ConsideredExtType->getIntegerBitWidth())
+ return false;
+
+ // If the operand of the truncate is not an instruction, we will not have
+ // any information on the dropped bits.
+ // (Actually we could for constant but it is not worth the extra logic).
+ Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
+ if (!Opnd)
+ return false;
+
+ // Check if the source of the type is narrow enough.
+ // I.e., check that trunc just drops extended bits of the same kind of
+ // the extension.
+ // #1 get the type of the operand and check the kind of the extended bits.
+ const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
+ if (OpndType)
+ ;
+ else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
+ OpndType = Opnd->getOperand(0)->getType();
+ else
+ return false;
+
+ // #2 check that the truncate just drops extended bits.
+ return Inst->getType()->getIntegerBitWidth() >=
+ OpndType->getIntegerBitWidth();
+}
+
+TypePromotionHelper::Action TypePromotionHelper::getAction(
+ Instruction *Ext, const SetOfInstrs &InsertedInsts,
+ const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
+ assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
+ "Unexpected instruction type");
+ Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
+ Type *ExtTy = Ext->getType();
+ bool IsSExt = isa<SExtInst>(Ext);
+ // If the operand of the extension is not an instruction, we cannot
+ // get through.
+ // If it, check we can get through.
+ if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
+ return nullptr;
+
+ // Do not promote if the operand has been added by codegenprepare.
+ // Otherwise, it means we are undoing an optimization that is likely to be
+ // redone, thus causing potential infinite loop.
+ if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
+ return nullptr;
+
+ // SExt or Trunc instructions.
+ // Return the related handler.
+ if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
+ isa<ZExtInst>(ExtOpnd))
+ return promoteOperandForTruncAndAnyExt;
+
+ // Regular instruction.
+ // Abort early if we will have to insert non-free instructions.
+ if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
+ return nullptr;
+ return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
+}
+
+Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
+ Instruction *SExt, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ // By construction, the operand of SExt is an instruction. Otherwise we cannot
+ // get through it and this method should not be called.
+ Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
+ Value *ExtVal = SExt;
+ bool HasMergedNonFreeExt = false;
+ if (isa<ZExtInst>(SExtOpnd)) {
+ // Replace s|zext(zext(opnd))
+ // => zext(opnd).
+ HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
+ Value *ZExt =
+ TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
+ TPT.replaceAllUsesWith(SExt, ZExt);
+ TPT.eraseInstruction(SExt);
+ ExtVal = ZExt;
+ } else {
+ // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
+ // => z|sext(opnd).
+ TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+ }
+ CreatedInstsCost = 0;
+
+ // Remove dead code.
+ if (SExtOpnd->use_empty())
+ TPT.eraseInstruction(SExtOpnd);
+
+ // Check if the extension is still needed.
+ Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
+ if (ExtInst) {
+ if (Exts)
+ Exts->push_back(ExtInst);
+ CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+ }
+ return ExtVal;
+ }
+
+ // At this point we have: ext ty opnd to ty.
+ // Reassign the uses of ExtInst to the opnd and remove ExtInst.
+ Value *NextVal = ExtInst->getOperand(0);
+ TPT.eraseInstruction(ExtInst, NextVal);
+ return NextVal;
+}
+
+Value *TypePromotionHelper::promoteOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+ bool IsSExt) {
+ // By construction, the operand of Ext is an instruction. Otherwise we cannot
+ // get through it and this method should not be called.
+ Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
+ CreatedInstsCost = 0;
+ if (!ExtOpnd->hasOneUse()) {
+ // ExtOpnd will be promoted.
+ // All its uses, but Ext, will need to use a truncated value of the
+ // promoted version.
+ // Create the truncate now.
+ Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
+ if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
+ // Insert it just after the definition.
+ ITrunc->moveAfter(ExtOpnd);
+ if (Truncs)
+ Truncs->push_back(ITrunc);
+ }
+
+ TPT.replaceAllUsesWith(ExtOpnd, Trunc);
+ // Restore the operand of Ext (which has been replaced by the previous call
+ // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
+ TPT.setOperand(Ext, 0, ExtOpnd);
+ }
+
+ // Get through the Instruction:
+ // 1. Update its type.
+ // 2. Replace the uses of Ext by Inst.
+ // 3. Extend each operand that needs to be extended.
+
+ // Remember the original type of the instruction before promotion.
+ // This is useful to know that the high bits are sign extended bits.
+ addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
+ // Step #1.
+ TPT.mutateType(ExtOpnd, Ext->getType());
+ // Step #2.
+ TPT.replaceAllUsesWith(Ext, ExtOpnd);
+ // Step #3.
+ Instruction *ExtForOpnd = Ext;
+
+ LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
+ for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
+ ++OpIdx) {
+ LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
+ if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
+ !shouldExtOperand(ExtOpnd, OpIdx)) {
+ LLVM_DEBUG(dbgs() << "No need to propagate\n");
+ continue;
+ }
+ // Check if we can statically extend the operand.
+ Value *Opnd = ExtOpnd->getOperand(OpIdx);
+ if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
+ LLVM_DEBUG(dbgs() << "Statically extend\n");
+ unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
+ APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
+ : Cst->getValue().zext(BitWidth);
+ TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
+ continue;
+ }
+ // UndefValue are typed, so we have to statically sign extend them.
+ if (isa<UndefValue>(Opnd)) {
+ LLVM_DEBUG(dbgs() << "Statically extend\n");
+ TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
+ continue;
+ }
+
+ // Otherwise we have to explicitly sign extend the operand.
+ // Check if Ext was reused to extend an operand.
+ if (!ExtForOpnd) {
+ // If yes, create a new one.
+ LLVM_DEBUG(dbgs() << "More operands to ext\n");
+ Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
+ : TPT.createZExt(Ext, Opnd, Ext->getType());
+ if (!isa<Instruction>(ValForExtOpnd)) {
+ TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
+ continue;
+ }
+ ExtForOpnd = cast<Instruction>(ValForExtOpnd);
+ }
+ if (Exts)
+ Exts->push_back(ExtForOpnd);
+ TPT.setOperand(ExtForOpnd, 0, Opnd);
+
+ // Move the sign extension before the insertion point.
+ TPT.moveBefore(ExtForOpnd, ExtOpnd);
+ TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+ CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
+ // If more sext are required, new instructions will have to be created.
+ ExtForOpnd = nullptr;
+ }
+ if (ExtForOpnd == Ext) {
+ LLVM_DEBUG(dbgs() << "Extension is useless now\n");
+ TPT.eraseInstruction(Ext);
+ }
+ return ExtOpnd;
+}
+
+/// Check whether or not promoting an instruction to a wider type is profitable.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
+/// \p PromotedOperand is the value that has been promoted.
+/// \return True if the promotion is profitable, false otherwise.
+bool AddressingModeMatcher::isPromotionProfitable(
+ unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+ LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
+ << '\n');
+ // The cost of the new extensions is greater than the cost of the
+ // old extension plus what we folded.
+ // This is not profitable.
+ if (NewCost > OldCost)
+ return false;
+ if (NewCost < OldCost)
+ return true;
+ // The promotion is neutral but it may help folding the sign extension in
+ // loads for instance.
+ // Check that we did not create an illegal instruction.
+ return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
+}
+
+/// Given an instruction or constant expr, see if we can fold the operation
+/// into the addressing mode. If so, update the addressing mode and return
+/// true, otherwise return false without modifying AddrMode.
+/// If \p MovedAway is not NULL, it contains the information of whether or
+/// not AddrInst has to be folded into the addressing mode on success.
+/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
+/// because it has been moved away.
+/// Thus AddrInst must not be added in the matched instructions.
+/// This state can happen when AddrInst is a sext, since it may be moved away.
+/// Therefore, AddrInst may not be valid when MovedAway is true and it must
+/// not be referenced anymore.
+bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth,
+ bool *MovedAway) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5)
+ return false;
+
+ // By default, all matched instructions stay in place.
+ if (MovedAway)
+ *MovedAway = false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr: {
+ auto AS = AddrInst->getType()->getPointerAddressSpace();
+ auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ }
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::AddrSpaceCast: {
+ unsigned SrcAS =
+ AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
+ if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ }
+ case Instruction::Add: {
+ // Check to see if we can merge in one operand, then the other. If so, we
+ // win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ // Start a transaction at this point.
+ // The LHS may match but not the RHS.
+ // Therefore, we need a higher level restoration point to undo partially
+ // matched operation.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+
+ // Try to match an integer constant second to increase its chance of ending
+ // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
+ int First = 0, Second = 1;
+ if (isa<ConstantInt>(AddrInst->getOperand(First))
+ && !isa<ConstantInt>(AddrInst->getOperand(Second)))
+ std::swap(First, Second);
+ AddrMode.InBounds = false;
+ if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(Second), Depth + 1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ TPT.rollback(LastKnownGood);
+
+ // Otherwise this was over-aggressive. Try merging operands in the opposite
+ // order.
+ if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(First), Depth + 1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ TPT.rollback(LastKnownGood);
+ break;
+ }
+ // case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ // break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ AddrMode.InBounds = false;
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS || RHS->getBitWidth() > 64)
+ return false;
+ int64_t Scale = Opcode == Instruction::Shl
+ ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
+ : RHS->getSExtValue();
+
+ return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (TS.isNonZero()) {
+ // The optimisations below currently only work for fixed offsets.
+ if (TS.isScalable())
+ return false;
+ int64_t TypeSize = TS.getFixedValue();
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ const APInt &CVal = CI->getValue();
+ if (CVal.getSignificantBits() <= 64) {
+ ConstantOffset += CVal.getSExtValue() * TypeSize;
+ continue;
+ }
+ }
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+
+ if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
+ TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
+ ConstantOffset > 0) {
+ // Record GEPs with non-zero offsets as candidates for splitting in
+ // the event that the offset cannot fit into the r+i addressing mode.
+ // Simple and common case that only one GEP is used in calculating the
+ // address for the memory access.
+ Value *Base = AddrInst->getOperand(0);
+ auto *BaseI = dyn_cast<Instruction>(Base);
+ auto *GEP = cast<GetElementPtrInst>(AddrInst);
+ if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+ (BaseI && !isa<CastInst>(BaseI) &&
+ !isa<GetElementPtrInst>(BaseI))) {
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
+ BasicBlock *Parent = BaseI ? BaseI->getParent()
+ : &GEP->getFunction()->getEntryBlock();
+ if (!Parent->getTerminator()->isEHPad())
+ LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
+ }
+ }
+
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
+
+ // Match the base operand of the GEP.
+ if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ Instruction *Ext = dyn_cast<Instruction>(AddrInst);
+ if (!Ext)
+ return false;
+
+ // Try to move this ext out of the way of the addressing mode.
+ // Ask for a method for doing so.
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
+ if (!TPH)
+ return false;
+
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ unsigned CreatedInstsCost = 0;
+ unsigned ExtCost = !TLI.isExtFree(Ext);
+ Value *PromotedOperand =
+ TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
+ // SExt has been moved away.
+ // Thus either it will be rematched later in the recursive calls or it is
+ // gone. Anyway, we must not fold it into the addressing mode at this point.
+ // E.g.,
+ // op = add opnd, 1
+ // idx = ext op
+ // addr = gep base, idx
+ // is now:
+ // promotedOpnd = ext opnd <- no match here
+ // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
+ // addr = gep base, op <- match
+ if (MovedAway)
+ *MovedAway = true;
+
+ assert(PromotedOperand &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ if (!matchAddr(PromotedOperand, Depth) ||
+ // The total of the new cost is equal to the cost of the created
+ // instructions.
+ // The total of the old cost is equal to the cost of the extension plus
+ // what we have saved in the addressing mode.
+ !isPromotionProfitable(CreatedInstsCost,
+ ExtCost + (AddrModeInsts.size() - OldSize),
+ PromotedOperand)) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
+ TPT.rollback(LastKnownGood);
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// If we can, try to add the value of 'Addr' into the current addressing mode.
+/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
+/// unmodified. This assumes that Addr is either a pointer type or intptr_t
+/// for the target.
+///
+bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
+ // Start a transaction at this point that we will rollback if the matching
+ // fails.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ if (CI->getValue().isSignedIntN(64)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ }
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (!AddrMode.BaseGV) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.BaseGV = nullptr;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ bool MovedAway = false;
+ if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+ // This instruction may have been moved away. If so, there is nothing
+ // to check here.
+ if (MovedAway)
+ return true;
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ TPT.rollback(LastKnownGood);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (matchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ TPT.rollback(LastKnownGood);
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = nullptr;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = nullptr;
+ }
+ // Couldn't match.
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+/// Check to see if all uses of OpVal by the specified inline asm call are due
+/// to memory operands. If so, return true, otherwise return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI) {
+ const Function *F = CI->getFunction();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
+
+ for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it! TODO: Also handle C_Address?
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+/// Recursively walk all the uses of I until we find a memory use.
+/// If we find an obviously non-foldable instruction, return true.
+/// Add accessed addresses and types to MemoryUses.
+static bool FindAllMemoryUses(
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I).second)
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ // Loop over all the uses, recursively processing them.
+ for (Use &U : I->uses()) {
+ // Conservatively return true if we're seeing a large number or a deep chain
+ // of users. This avoids excessive compilation times in pathological cases.
+ if (SeenInsts++ >= MaxAddressUsersToScan)
+ return true;
+
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
+ MemoryUses.push_back({&U, LI->getType()});
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
+ if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
+ continue;
+ }
+
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
+ if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
+ continue;
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
+ if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
+ if (CI->hasFnAttr(Attribute::Cold)) {
+ // If this is a cold call, we can sink the addressing calculation into
+ // the cold path. See optimizeCallInst
+ bool OptForSize =
+ OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ if (!OptForSize)
+ continue;
+ }
+
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
+ if (!IA)
+ return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts))
+ return true;
+ }
+
+ return false;
+}
+
+static bool FindAllMemoryUses(
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ unsigned SeenInsts = 0;
+ SmallPtrSet<Instruction *, 16> ConsideredInsts;
+ return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts);
+}
+
+
+/// Return true if Val is already known to be live at the use site that we're
+/// folding it into. If so, there is no cost to include it in the addressing
+/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
+/// instruction already.
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
+ Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val))
+ return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// It is possible for the addressing mode of the machine to fold the specified
+/// instruction into a load or store that ultimately uses it.
+/// However, the specified instruction has multiple uses.
+/// Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
+ Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability)
+ return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = nullptr;
+ if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = nullptr;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (!BaseReg && !ScaledReg)
+ return true;
+
+ // If all uses of this instruction can have the address mode sunk into them,
+ // we can remove the addressing mode and effectively trade one live register
+ // for another (at worst.) In this context, folding an addressing mode into
+ // the use is just a particularly nice way of sinking it.
+ SmallVector<std::pair<Use *, Type *>, 16> MemoryUses;
+ if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these memory operation uses and see
+ // if they could *actually* fold the instruction. The assumption is that
+ // addressing modes are cheap and that duplicating the computation involved
+ // many times is worthwhile, even on a fastpath. For sinking candidates
+ // (i.e. cold call sites), this serves as a way to prevent excessive code
+ // growth since most architectures have some reasonable small and fast way to
+ // compute an effective address. (i.e LEA on x86)
+ SmallVector<Instruction *, 32> MatchedAddrModeInsts;
+ for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first->get();
+ Instruction *UserI = cast<Instruction>(Pair.first->getUser());
+ Type *AddressAccessTy = Pair.second;
+ unsigned AS = Address->getType()->getPointerAddressSpace();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
+ 0);
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
+ AddressAccessTy, AS, UserI, Result,
+ InsertedInsts, PromotedInsts, TPT,
+ LargeOffsetGEP, OptSize, PSI, BFI);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.matchAddr(Address, 0);
+ (void)Success;
+ assert(Success && "Couldn't select *anything*?");
+
+ // The match was to check the profitability, the changes made are not
+ // part of the original matcher. Therefore, they should be dropped
+ // otherwise the original matcher will not present the right state.
+ TPT.rollback(LastKnownGood);
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (!is_contained(MatchedAddrModeInsts, I))
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
+
+/// Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() != BB;
+ return false;
+}
+
+/// Sink addressing mode computation immediate before MemoryInst if doing so
+/// can be done without increasing register pressure. The need for the
+/// register pressure constraint means this can end up being an all or nothing
+/// decision for all uses of the same addressing computation.
+///
+/// Load and Store Instructions often have addressing modes that can do
+/// significant amounts of computation. As such, instruction selection will try
+/// to get the load or store to do as much computation as possible for the
+/// program. The problem is that isel can only see within a single block. As
+/// such, we sink as much legal addressing mode work into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands. It's also used to sink addressing computations feeding into cold
+/// call sites into their (cold) basic block.
+///
+/// The motivation for handling sinking into cold blocks is that doing so can
+/// both enable other address mode sinking (by satisfying the register pressure
+/// constraint above), and reduce register pressure globally (by removing the
+/// addressing mode computation from the fast path entirely.).
+bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ Type *AccessTy, unsigned AddrSpace) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value *, 8> worklist;
+ SmallPtrSet<Value *, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI and select nodes, and
+ // ensure that the addressing mode obtained from the non-PHI/select roots of
+ // the graph are compatible.
+ bool PhiOrSelectSeen = false;
+ SmallVector<Instruction *, 16> AddrModeInsts;
+ const SimplifyQuery SQ(*DL, TLInfo);
+ AddressingModeCombiner AddrModes(SQ, Addr);
+ TypePromotionTransaction TPT(RemovedInsts);
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ while (!worklist.empty()) {
+ Value *V = worklist.pop_back_val();
+
+ // We allow traversing cyclic Phi nodes.
+ // In case of success after this loop we ensure that traversing through
+ // Phi nodes ends up with all cases to compute address of the form
+ // BaseGV + Base + Scale * Index + Offset
+ // where Scale and Offset are constans and BaseGV, Base and Index
+ // are exactly the same Values in all cases.
+ // It means that BaseGV, Scale and Offset dominate our memory instruction
+ // and have the same value as they had in address computation represented
+ // as Phi. So we can safely sink address computation to memory instruction.
+ if (!Visited.insert(V).second)
+ continue;
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ append_range(worklist, P->incoming_values());
+ PhiOrSelectSeen = true;
+ continue;
+ }
+ // Similar for select.
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ worklist.push_back(SI->getFalseValue());
+ worklist.push_back(SI->getTrueValue());
+ PhiOrSelectSeen = true;
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed. Note that
+ // the result may differ depending on what other uses our candidate
+ // addressing instructions might have.
+ AddrModeInsts.clear();
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
+ 0);
+ // Defer the query (and possible computation of) the dom tree to point of
+ // actual use. It's expected that most address matches don't actually need
+ // the domtree.
+ auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
+ Function *F = MemoryInst->getParent()->getParent();
+ return this->getDT(*F);
+ };
+ ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
+ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
+ *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ BFI.get());
+
+ GetElementPtrInst *GEP = LargeOffsetGEP.first;
+ if (GEP && !NewGEPBases.count(GEP)) {
+ // If splitting the underlying data structure can reduce the offset of a
+ // GEP, collect the GEP. Skip the GEPs that are the new bases of
+ // previously split data structures.
+ LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
+ LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
+ }
+
+ NewAddrMode.OriginalValue = V;
+ if (!AddrModes.addNewAddrMode(NewAddrMode))
+ break;
+ }
+
+ // Try to combine the AddrModes we've collected. If we couldn't collect any,
+ // or we have multiple but either couldn't combine them or combining them
+ // wouldn't do anything useful, bail out now.
+ if (!AddrModes.combineAddrModes()) {
+ TPT.rollback(LastKnownGood);
+ return false;
+ }
+ bool Modified = TPT.commit();
+
+ // Get the combined AddrMode (or the only AddrMode, if we only had one).
+ ExtAddrMode AddrMode = AddrModes.getAddrMode();
+
+ // If all the instructions matched are already in this BB, don't do anything.
+ // If we saw a Phi node then it is not local definitely, and if we saw a
+ // select then we want to push the address calculation past it even if it's
+ // already in this BB.
+ if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
+ return IsNonLocalValue(V, MemoryInst->getParent());
+ })) {
+ LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
+ << "\n");
+ return Modified;
+ }
+
+ // Insert this computation right after this user. Since our caller is
+ // scanning from the top of the BB to the bottom, reuse of the expr are
+ // guaranteed to happen later.
+ IRBuilder<> Builder(MemoryInst);
+
+ // Now that we determined the addressing expression we want to use and know
+ // that we have to sink it into this block. Check to see if we have already
+ // done this for some other load/store instr in this block. If so, reuse
+ // the computation. Before attempting reuse, check if the address is valid
+ // as it may have been erased.
+
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
+
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+ if (SunkAddr) {
+ LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
+ << " for " << *MemoryInst << "\n");
+ if (SunkAddr->getType() != Addr->getType()) {
+ if (SunkAddr->getType()->getPointerAddressSpace() !=
+ Addr->getType()->getPointerAddressSpace() &&
+ !DL->isNonIntegralPointerType(Addr->getType())) {
+ // There are two reasons the address spaces might not match: a no-op
+ // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+ // ptrtoint/inttoptr pair to ensure we match the original semantics.
+ // TODO: allow bitcast between different address space pointers with the
+ // same size.
+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+ SunkAddr =
+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+ } else
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ }
+ } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
+ SubtargetInfo->addrSinkUsingGEPs())) {
+ // By default, we use the GEP-based method when AA is used later. This
+ // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
+ LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
+ << " for " << *MemoryInst << "\n");
+ Value *ResultPtr = nullptr, *ResultIndex = nullptr;
+
+ // First, find the pointer.
+ if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
+ ResultPtr = AddrMode.BaseReg;
+ AddrMode.BaseReg = nullptr;
+ }
+
+ if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
+ // We can't add more than one pointer together, nor can we scale a
+ // pointer (both of which seem meaningless).
+ if (ResultPtr || AddrMode.Scale != 1)
+ return Modified;
+
+ ResultPtr = AddrMode.ScaledReg;
+ AddrMode.Scale = 0;
+ }
+
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ //
+ // (See below for code to add the scale.)
+ if (AddrMode.Scale) {
+ Type *ScaledRegTy = AddrMode.ScaledReg->getType();
+ if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
+ cast<IntegerType>(ScaledRegTy)->getBitWidth())
+ return Modified;
+ }
+
+ if (AddrMode.BaseGV) {
+ if (ResultPtr)
+ return Modified;
+
+ ResultPtr = AddrMode.BaseGV;
+ }
+
+ // If the real base value actually came from an inttoptr, then the matcher
+ // will look through it and provide only the integer value. In that case,
+ // use it here.
+ if (!DL->isNonIntegralPointerType(Addr->getType())) {
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.Scale = 0;
+ }
+ }
+
+ if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
+ !AddrMode.BaseOffs) {
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ } else if (!ResultPtr) {
+ return Modified;
+ } else {
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Type *I8Ty = Builder.getInt8Ty();
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType() != IntPtrTy)
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+
+ ResultIndex = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else {
+ assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth() &&
+ "We can't transform if ScaledReg is too narrow");
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+ }
+
+ if (AddrMode.Scale != 1)
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
+ if (ResultIndex)
+ ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
+ else
+ ResultIndex = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (ResultIndex) {
+ // We need to add this separately from the scale above to help with
+ // SDAG consecutive load/store merging.
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr", AddrMode.InBounds);
+ }
+
+ ResultIndex = V;
+ }
+
+ if (!ResultIndex) {
+ SunkAddr = ResultPtr;
+ } else {
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr",
+ AddrMode.InBounds);
+ }
+
+ if (SunkAddr->getType() != Addr->getType()) {
+ if (SunkAddr->getType()->getPointerAddressSpace() !=
+ Addr->getType()->getPointerAddressSpace() &&
+ !DL->isNonIntegralPointerType(Addr->getType())) {
+ // There are two reasons the address spaces might not match: a no-op
+ // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+ // ptrtoint/inttoptr pair to ensure we match the original semantics.
+ // TODO: allow bitcast between different address space pointers with
+ // the same size.
+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+ SunkAddr =
+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+ } else
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ }
+ }
+ } else {
+ // We'd require a ptrtoint/inttoptr down the line, which we can't do for
+ // non-integral pointers, so in that case bail out now.
+ Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
+ Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
+ PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
+ PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
+ if (DL->isNonIntegralPointerType(Addr->getType()) ||
+ (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
+ (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
+ (AddrMode.BaseGV &&
+ DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
+ return Modified;
+
+ LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
+ << " for " << *MemoryInst << "\n");
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+ Value *Result = nullptr;
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType()->isPointerTy())
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+ if (V->getType() != IntPtrTy)
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+ Result = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (V->getType()->isPointerTy()) {
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+ } else {
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ Instruction *I = dyn_cast_or_null<Instruction>(Result);
+ if (I && (Result != AddrMode.BaseReg))
+ I->eraseFromParent();
+ return Modified;
+ }
+ if (AddrMode.Scale != 1)
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ // Add in the BaseGV if present.
+ if (AddrMode.BaseGV) {
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ if (!Result)
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ else
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
+ }
+
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+ // Store the newly computed address into the cache. In the case we reused a
+ // value, this should be idempotent.
+ SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
+
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
+ if (Repl->use_empty()) {
+ resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Repl, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ });
+ }
+ ++NumMemoryInsts;
+ return true;
+}
+
+/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
+/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
+/// only handle a 2 operand GEP in the same basic block or a splat constant
+/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
+/// index.
+///
+/// If the existing GEP has a vector base pointer that is splat, we can look
+/// through the splat to find the scalar pointer. If we can't find a scalar
+/// pointer there's nothing we can do.
+///
+/// If we have a GEP with more than 2 indices where the middle indices are all
+/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
+///
+/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
+/// followed by a GEP with an all zeroes vector index. This will enable
+/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
+/// zero index.
+bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
+ Value *Ptr) {
+ Value *NewAddr;
+
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ // Don't optimize GEPs that don't have indices.
+ if (!GEP->hasIndices())
+ return false;
+
+ // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
+ // FIXME: We should support this by sinking the GEP.
+ if (MemoryInst->getParent() != GEP->getParent())
+ return false;
+
+ SmallVector<Value *, 2> Ops(GEP->operands());
+
+ bool RewriteGEP = false;
+
+ if (Ops[0]->getType()->isVectorTy()) {
+ Ops[0] = getSplatValue(Ops[0]);
+ if (!Ops[0])
+ return false;
+ RewriteGEP = true;
+ }
+
+ unsigned FinalIndex = Ops.size() - 1;
+
+ // Ensure all but the last index is 0.
+ // FIXME: This isn't strictly required. All that's required is that they are
+ // all scalars or splats.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<Constant>(Ops[i]);
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
+ return false;
+ // Scalarize the index if needed.
+ Ops[i] = CI;
+ }
+
+ // Try to scalarize the final index.
+ if (Ops[FinalIndex]->getType()->isVectorTy()) {
+ if (Value *V = getSplatValue(Ops[FinalIndex])) {
+ auto *C = dyn_cast<ConstantInt>(V);
+ // Don't scalarize all zeros vector.
+ if (!C || !C->isZero()) {
+ Ops[FinalIndex] = V;
+ RewriteGEP = true;
+ }
+ }
+ }
+
+ // If we made any changes or the we have extra operands, we need to generate
+ // new instructions.
+ if (!RewriteGEP && Ops.size() == 2)
+ return false;
+
+ auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
+
+ IRBuilder<> Builder(MemoryInst);
+
+ Type *SourceTy = GEP->getSourceElementType();
+ Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
+
+ // If the final index isn't a vector, emit a scalar GEP containing all ops
+ // and a vector GEP with all zeroes final index.
+ if (!Ops[FinalIndex]->getType()->isVectorTy()) {
+ NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
+ auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+ auto *SecondTy = GetElementPtrInst::getIndexedType(
+ SourceTy, ArrayRef(Ops).drop_front());
+ NewAddr =
+ Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
+ } else {
+ Value *Base = Ops[0];
+ Value *Index = Ops[FinalIndex];
+
+ // Create a scalar GEP if there are more than 2 operands.
+ if (Ops.size() != 2) {
+ // Replace the last index with 0.
+ Ops[FinalIndex] =
+ Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
+ Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
+ SourceTy = GetElementPtrInst::getIndexedType(
+ SourceTy, ArrayRef(Ops).drop_front());
+ }
+
+ // Now create the GEP with scalar pointer and vector index.
+ NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
+ }
+ } else if (!isa<Constant>(Ptr)) {
+ // Not a GEP, maybe its a splat and we can create a GEP to enable
+ // SelectionDAGBuilder to use it as a uniform base.
+ Value *V = getSplatValue(Ptr);
+ if (!V)
+ return false;
+
+ auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
+
+ IRBuilder<> Builder(MemoryInst);
+
+ // Emit a vector GEP with a scalar pointer and all 0s vector index.
+ Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
+ auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+ Type *ScalarTy;
+ if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
+ Intrinsic::masked_gather) {
+ ScalarTy = MemoryInst->getType()->getScalarType();
+ } else {
+ assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
+ Intrinsic::masked_scatter);
+ ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
+ }
+ NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
+ } else {
+ // Constant, SelectionDAGBuilder knows to check if its a splat.
+ return false;
+ }
+
+ MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
+
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
+ if (Ptr->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Ptr, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+
+ return true;
+}
+
+/// If there are any memory operands, use OptimizeMemoryInst to sink their
+/// address computing into the block when possible / profitable.
+bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
+ bool MadeChange = false;
+
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(*DL, TRI, *CS);
+ unsigned ArgNo = 0;
+ for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+ // TODO: Also handle C_Address?
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.isIndirect) {
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
+ }
+
+ return MadeChange;
+}
+
+/// Check if all the uses of \p Val are equivalent (or free) zero or
+/// sign extensions.
+static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
+ assert(!Val->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
+ bool IsSExt = isa<SExtInst>(FirstUser);
+ Type *ExtTy = FirstUser->getType();
+ for (const User *U : Val->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
+ return false;
+ Type *CurTy = UI->getType();
+ // Same input and output types: Same instruction after CSE.
+ if (CurTy == ExtTy)
+ continue;
+
+ // If IsSExt is true, we are in this situation:
+ // a = Val
+ // b = sext ty1 a to ty2
+ // c = sext ty1 a to ty3
+ // Assuming ty2 is shorter than ty3, this could be turned into:
+ // a = Val
+ // b = sext ty1 a to ty2
+ // c = sext ty2 b to ty3
+ // However, the last sext is not free.
+ if (IsSExt)
+ return false;
+
+ // This is a ZExt, maybe this is free to extend from one type to another.
+ // In that case, we would not account for a different use.
+ Type *NarrowTy;
+ Type *LargeTy;
+ if (ExtTy->getScalarType()->getIntegerBitWidth() >
+ CurTy->getScalarType()->getIntegerBitWidth()) {
+ NarrowTy = CurTy;
+ LargeTy = ExtTy;
+ } else {
+ NarrowTy = ExtTy;
+ LargeTy = CurTy;
+ }
+
+ if (!TLI.isZExtFree(NarrowTy, LargeTy))
+ return false;
+ }
+ // All uses are the same or can be derived from one another for free.
+ return true;
+}
+
+/// Try to speculatively promote extensions in \p Exts and continue
+/// promoting through newly promoted operands recursively as far as doing so is
+/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
+/// When some promotion happened, \p TPT contains the proper state to revert
+/// them.
+///
+/// \return true if some promotion happened, false otherwise.
+bool CodeGenPrepare::tryToPromoteExts(
+ TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost) {
+ bool Promoted = false;
+
+ // Iterate over all the extensions to try to promote them.
+ for (auto *I : Exts) {
+ // Early check if we directly have ext(load).
+ if (isa<LoadInst>(I->getOperand(0))) {
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+
+ // Check whether or not we want to do any promotion. The reason we have
+ // this check inside the for loop is to catch the case where an extension
+ // is directly fed by a load because in such case the extension can be moved
+ // up without any promotion on its operands.
+ if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion)
+ return false;
+
+ // Get the action to perform the promotion.
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
+ // Check if we can promote.
+ if (!TPH) {
+ // Save the current extension as we cannot move up through its operand.
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+
+ // Save the current state.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 4> NewExts;
+ unsigned NewCreatedInstsCost = 0;
+ unsigned ExtCost = !TLI->isExtFree(I);
+ // Promote.
+ Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+ &NewExts, nullptr, *TLI);
+ assert(PromotedVal &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ // We would be able to merge only one extension in a load.
+ // Therefore, if we have more than 1 new extension we heuristically
+ // cut this search path, because it means we degrade the code quality.
+ // With exactly 2, the transformation is neutral, because we will merge
+ // one extension but leave one. However, we optimistically keep going,
+ // because the new extension may be removed too.
+ long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+ // FIXME: It would be possible to propagate a negative value instead of
+ // conservatively ceiling it to 0.
+ TotalCreatedInstsCost =
+ std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
+ if (!StressExtLdPromotion &&
+ (TotalCreatedInstsCost > 1 ||
+ !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
+ // This promotion is not profitable, rollback to the previous state, and
+ // save the current extension in ProfitablyMovedExts as the latest
+ // speculative promotion turned out to be unprofitable.
+ TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+ // Continue promoting NewExts as far as doing so is profitable.
+ SmallVector<Instruction *, 2> NewlyMovedExts;
+ (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
+ bool NewPromoted = false;
+ for (auto *ExtInst : NewlyMovedExts) {
+ Instruction *MovedExt = cast<Instruction>(ExtInst);
+ Value *ExtOperand = MovedExt->getOperand(0);
+ // If we have reached to a load, we need this extra profitability check
+ // as it could potentially be merged into an ext(load).
+ if (isa<LoadInst>(ExtOperand) &&
+ !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
+ (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
+ continue;
+
+ ProfitablyMovedExts.push_back(MovedExt);
+ NewPromoted = true;
+ }
+
+ // If none of speculative promotions for NewExts is profitable, rollback
+ // and save the current extension (I) as the last profitable extension.
+ if (!NewPromoted) {
+ TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+ // The promotion is profitable.
+ Promoted = true;
+ }
+ return Promoted;
+}
+
+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+ bool Changed = false;
+ for (auto &Entry : ValToSExtendedUses) {
+ SExts &Insts = Entry.second;
+ SExts CurPts;
+ for (Instruction *Inst : Insts) {
+ if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+ Inst->getOperand(0) != Entry.first)
+ continue;
+ bool inserted = false;
+ for (auto &Pt : CurPts) {
+ if (getDT(F).dominates(Inst, Pt)) {
+ replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
+ RemovedInsts.insert(Pt);
+ Pt->removeFromParent();
+ Pt = Inst;
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!getDT(F).dominates(Pt, Inst))
+ // Give up if we need to merge in a common dominator as the
+ // experiments show it is not profitable.
+ continue;
+ replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
+ RemovedInsts.insert(Inst);
+ Inst->removeFromParent();
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!inserted)
+ CurPts.push_back(Inst);
+ }
+ }
+ return Changed;
+}
+
+// Splitting large data structures so that the GEPs accessing them can have
+// smaller offsets so that they can be sunk to the same blocks as their users.
+// For example, a large struct starting from %base is split into two parts
+// where the second part starts from %new_base.
+//
+// Before:
+// BB0:
+// %base =
+//
+// BB1:
+// %gep0 = gep %base, off0
+// %gep1 = gep %base, off1
+// %gep2 = gep %base, off2
+//
+// BB2:
+// %load1 = load %gep0
+// %load2 = load %gep1
+// %load3 = load %gep2
+//
+// After:
+// BB0:
+// %base =
+// %new_base = gep %base, off0
+//
+// BB1:
+// %new_gep0 = %new_base
+// %new_gep1 = gep %new_base, off1 - off0
+// %new_gep2 = gep %new_base, off2 - off0
+//
+// BB2:
+// %load1 = load i32, i32* %new_gep0
+// %load2 = load i32, i32* %new_gep1
+// %load3 = load i32, i32* %new_gep2
+//
+// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
+// their offsets are smaller enough to fit into the addressing mode.
+bool CodeGenPrepare::splitLargeGEPOffsets() {
+ bool Changed = false;
+ for (auto &Entry : LargeOffsetGEPMap) {
+ Value *OldBase = Entry.first;
+ SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
+ &LargeOffsetGEPs = Entry.second;
+ auto compareGEPOffset =
+ [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
+ const std::pair<GetElementPtrInst *, int64_t> &RHS) {
+ if (LHS.first == RHS.first)
+ return false;
+ if (LHS.second != RHS.second)
+ return LHS.second < RHS.second;
+ return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
+ };
+ // Sorting all the GEPs of the same data structures based on the offsets.
+ llvm::sort(LargeOffsetGEPs, compareGEPOffset);
+ LargeOffsetGEPs.erase(
+ std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
+ LargeOffsetGEPs.end());
+ // Skip if all the GEPs have the same offsets.
+ if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
+ continue;
+ GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
+ int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
+ Value *NewBaseGEP = nullptr;
+
+ auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
+ while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
+ GetElementPtrInst *GEP = LargeOffsetGEP->first;
+ int64_t Offset = LargeOffsetGEP->second;
+ if (Offset != BaseOffset) {
+ TargetLowering::AddrMode AddrMode;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseOffs = Offset - BaseOffset;
+ // The result type of the GEP might not be the type of the memory
+ // access.
+ if (!TLI->isLegalAddressingMode(*DL, AddrMode,
+ GEP->getResultElementType(),
+ GEP->getAddressSpace())) {
+ // We need to create a new base if the offset to the current base is
+ // too large to fit into the addressing mode. So, a very large struct
+ // may be split into several parts.
+ BaseGEP = GEP;
+ BaseOffset = Offset;
+ NewBaseGEP = nullptr;
+ }
+ }
+
+ // Generate a new GEP to replace the current one.
+ LLVMContext &Ctx = GEP->getContext();
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
+ Type *I8PtrTy =
+ Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
+ Type *I8Ty = Type::getInt8Ty(Ctx);
+
+ if (!NewBaseGEP) {
+ // Create a new base if we don't have one yet. Find the insertion
+ // pointer for the new base first.
+ BasicBlock::iterator NewBaseInsertPt;
+ BasicBlock *NewBaseInsertBB;
+ if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
+ // If the base of the struct is an instruction, the new base will be
+ // inserted close to it.
+ NewBaseInsertBB = BaseI->getParent();
+ if (isa<PHINode>(BaseI))
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+ NewBaseInsertBB =
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ } else
+ NewBaseInsertPt = std::next(BaseI->getIterator());
+ } else {
+ // If the current base is an argument or global value, the new base
+ // will be inserted to the entry block.
+ NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ }
+ IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+ // Create a new base.
+ Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
+ NewBaseGEP = OldBase;
+ if (NewBaseGEP->getType() != I8PtrTy)
+ NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+ NewBaseGEP =
+ NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+ NewGEPBases.insert(NewBaseGEP);
+ }
+
+ IRBuilder<> Builder(GEP);
+ Value *NewGEP = NewBaseGEP;
+ if (Offset == BaseOffset) {
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+ } else {
+ // Calculate the new offset for the new GEP.
+ Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
+ NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
+
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+ }
+ replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
+ LargeOffsetGEPID.erase(GEP);
+ LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+bool CodeGenPrepare::optimizePhiType(
+ PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
+ SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
+ // We are looking for a collection on interconnected phi nodes that together
+ // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
+ // are of the same type. Convert the whole set of nodes to the type of the
+ // bitcast.
+ Type *PhiTy = I->getType();
+ Type *ConvertTy = nullptr;
+ if (Visited.count(I) ||
+ (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
+ return false;
+
+ SmallVector<Instruction *, 4> Worklist;
+ Worklist.push_back(cast<Instruction>(I));
+ SmallPtrSet<PHINode *, 4> PhiNodes;
+ SmallPtrSet<ConstantData *, 4> Constants;
+ PhiNodes.insert(I);
+ Visited.insert(I);
+ SmallPtrSet<Instruction *, 4> Defs;
+ SmallPtrSet<Instruction *, 4> Uses;
+ // This works by adding extra bitcasts between load/stores and removing
+ // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
+ // we can get in the situation where we remove a bitcast in one iteration
+ // just to add it again in the next. We need to ensure that at least one
+ // bitcast we remove are anchored to something that will not change back.
+ bool AnyAnchored = false;
+
+ while (!Worklist.empty()) {
+ Instruction *II = Worklist.pop_back_val();
+
+ if (auto *Phi = dyn_cast<PHINode>(II)) {
+ // Handle Defs, which might also be PHI's
+ for (Value *V : Phi->incoming_values()) {
+ if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+ if (!PhiNodes.count(OpPhi)) {
+ if (!Visited.insert(OpPhi).second)
+ return false;
+ PhiNodes.insert(OpPhi);
+ Worklist.push_back(OpPhi);
+ }
+ } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
+ if (!OpLoad->isSimple())
+ return false;
+ if (Defs.insert(OpLoad).second)
+ Worklist.push_back(OpLoad);
+ } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
+ if (Defs.insert(OpEx).second)
+ Worklist.push_back(OpEx);
+ } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+ if (!ConvertTy)
+ ConvertTy = OpBC->getOperand(0)->getType();
+ if (OpBC->getOperand(0)->getType() != ConvertTy)
+ return false;
+ if (Defs.insert(OpBC).second) {
+ Worklist.push_back(OpBC);
+ AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
+ !isa<ExtractElementInst>(OpBC->getOperand(0));
+ }
+ } else if (auto *OpC = dyn_cast<ConstantData>(V))
+ Constants.insert(OpC);
+ else
+ return false;
+ }
+ }
+
+ // Handle uses which might also be phi's
+ for (User *V : II->users()) {
+ if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+ if (!PhiNodes.count(OpPhi)) {
+ if (Visited.count(OpPhi))
+ return false;
+ PhiNodes.insert(OpPhi);
+ Visited.insert(OpPhi);
+ Worklist.push_back(OpPhi);
+ }
+ } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
+ if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
+ return false;
+ Uses.insert(OpStore);
+ } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+ if (!ConvertTy)
+ ConvertTy = OpBC->getType();
+ if (OpBC->getType() != ConvertTy)
+ return false;
+ Uses.insert(OpBC);
+ AnyAnchored |=
+ any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
+ } else {
+ return false;
+ }
+ }
+ }
+
+ if (!ConvertTy || !AnyAnchored ||
+ !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
+ << *ConvertTy << "\n");
+
+ // Create all the new phi nodes of the new type, and bitcast any loads to the
+ // correct type.
+ ValueToValueMap ValMap;
+ for (ConstantData *C : Constants)
+ ValMap[C] = ConstantExpr::getCast(Instruction::BitCast, C, ConvertTy);
+ for (Instruction *D : Defs) {
+ if (isa<BitCastInst>(D)) {
+ ValMap[D] = D->getOperand(0);
+ DeletedInstrs.insert(D);
+ } else {
+ ValMap[D] =
+ new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+ }
+ }
+ for (PHINode *Phi : PhiNodes)
+ ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
+ Phi->getName() + ".tc", Phi);
+ // Pipe together all the PhiNodes.
+ for (PHINode *Phi : PhiNodes) {
+ PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
+ for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
+ NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
+ Phi->getIncomingBlock(i));
+ Visited.insert(NewPhi);
+ }
+ // And finally pipe up the stores and bitcasts
+ for (Instruction *U : Uses) {
+ if (isa<BitCastInst>(U)) {
+ DeletedInstrs.insert(U);
+ replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
+ } else {
+ U->setOperand(0,
+ new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+ }
+ }
+
+ // Save the removed phis to be deleted later.
+ for (PHINode *Phi : PhiNodes)
+ DeletedInstrs.insert(Phi);
+ return true;
+}
+
+bool CodeGenPrepare::optimizePhiTypes(Function &F) {
+ if (!OptimizePhiTypes)
+ return false;
+
+ bool Changed = false;
+ SmallPtrSet<PHINode *, 4> Visited;
+ SmallPtrSet<Instruction *, 4> DeletedInstrs;
+
+ // Attempt to optimize all the phis in the functions to the correct type.
+ for (auto &BB : F)
+ for (auto &Phi : BB.phis())
+ Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
+
+ // Remove any old phi's that have been converted.
+ for (auto *I : DeletedInstrs) {
+ replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
+ I->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+/// Return true, if an ext(load) can be formed from an extension in
+/// \p MovedExts.
+bool CodeGenPrepare::canFormExtLd(
+ const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
+ Instruction *&Inst, bool HasPromoted) {
+ for (auto *MovedExtInst : MovedExts) {
+ if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
+ LI = cast<LoadInst>(MovedExtInst->getOperand(0));
+ Inst = MovedExtInst;
+ break;
+ }
+ }
+ if (!LI)
+ return false;
+
+ // If they're already in the same block, there's nothing to do.
+ // Make the cheap checks first if we did not promote.
+ // If we promoted, we need to check if it is indeed profitable.
+ if (!HasPromoted && LI->getParent() == Inst->getParent())
+ return false;
+
+ return TLI->isExtLoad(LI, Inst, *DL);
+}
+
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
+///
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+ bool AllowPromotionWithoutCommonHeader = false;
+ /// See if it is an interesting sext operations for the address type
+ /// promotion before trying to promote it, e.g., the ones with the right
+ /// type and used in memory accesses.
+ bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+ *Inst, AllowPromotionWithoutCommonHeader);
+ TypePromotionTransaction TPT(RemovedInsts);
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+ Exts.push_back(Inst);
+
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
+
+ // Look for a load being extended.
+ LoadInst *LI = nullptr;
+ Instruction *ExtFedByLoad;
+
+ // Try to promote a chain of computation if it allows to form an extended
+ // load.
+ if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+ assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+ TPT.commit();
+ // Move the extend into the same block as the load.
+ ExtFedByLoad->moveAfter(LI);
+ ++NumExtsMoved;
+ Inst = ExtFedByLoad;
+ return true;
+ }
+
+ // Continue promoting SExts if known as considerable depending on targets.
+ if (ATPConsiderable &&
+ performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+ HasPromoted, TPT, SpeculativelyMovedExts))
+ return true;
+
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+ bool Promoted = false;
+ SmallPtrSet<Instruction *, 1> UnhandledExts;
+ bool AllSeenFirst = true;
+ for (auto *I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+ SeenChainsForSExt.find(HeadOfChain);
+ // If there is an unhandled SExt which has the same header, try to promote
+ // it as well.
+ if (AlreadySeen != SeenChainsForSExt.end()) {
+ if (AlreadySeen->second != nullptr)
+ UnhandledExts.insert(AlreadySeen->second);
+ AllSeenFirst = false;
+ }
+ }
+
+ if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+ SpeculativelyMovedExts.size() == 1)) {
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto *I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ // Update Inst as promotion happen.
+ Inst = SpeculativelyMovedExts.pop_back_val();
+ } else {
+ // This is the first chain visited from the header, keep the current chain
+ // as unhandled. Defer to promote this until we encounter another SExt
+ // chain derived from the same header.
+ for (auto *I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = Inst;
+ }
+ return false;
+ }
+
+ if (!AllSeenFirst && !UnhandledExts.empty())
+ for (auto *VisitedSExt : UnhandledExts) {
+ if (RemovedInsts.count(VisitedSExt))
+ continue;
+ TypePromotionTransaction TPT(RemovedInsts);
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> Chains;
+ Exts.push_back(VisitedSExt);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto *I : Chains) {
+ Value *HeadOfChain = I->getOperand(0);
+ // Mark this as handled.
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ }
+ return Promoted;
+}
+
+bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (!TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = UI->getParent();
+ if (UserBB == DefBB)
+ continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure none of the uses are PHI nodes.
+ for (User *U : Src->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ BasicBlock *UserBB = UI->getParent();
+ if (UserBB == DefBB)
+ continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Use &U : Src->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB)
+ continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
+ InsertedInsts.insert(InsertedTrunc);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ U = InsertedTrunc;
+ ++NumExtUses;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+// Find loads whose uses only use some of the loaded value's bits. Add an "and"
+// just after the load if the target can fold this into one extload instruction,
+// with the hope of eliminating some of the other later "and" instructions using
+// the loaded value. "and"s that are made trivially redundant by the insertion
+// of the new "and" are removed by this function, while others (e.g. those whose
+// path from the load goes through a phi) are left for isel to potentially
+// remove.
+//
+// For example:
+//
+// b0:
+// x = load i32
+// ...
+// b1:
+// y = and x, 0xff
+// z = use y
+//
+// becomes:
+//
+// b0:
+// x = load i32
+// x' = and x, 0xff
+// ...
+// b1:
+// z = use x'
+//
+// whereas:
+//
+// b0:
+// x1 = load i32
+// ...
+// b1:
+// x2 = load i32
+// ...
+// b2:
+// x = phi x1, x2
+// y = and x, 0xff
+//
+// becomes (after a call to optimizeLoadExt for each load):
+//
+// b0:
+// x1 = load i32
+// x1' = and x1, 0xff
+// ...
+// b1:
+// x2 = load i32
+// x2' = and x2, 0xff
+// ...
+// b2:
+// x = phi x1', x2'
+// y = and x, 0xff
+bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
+ if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
+ return false;
+
+ // Skip loads we've already transformed.
+ if (Load->hasOneUse() &&
+ InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
+ return false;
+
+ // Look at all uses of Load, looking through phis, to determine how many bits
+ // of the loaded value are needed.
+ SmallVector<Instruction *, 8> WorkList;
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 8> AndsToMaybeRemove;
+ for (auto *U : Load->users())
+ WorkList.push_back(cast<Instruction>(U));
+
+ EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
+ unsigned BitWidth = LoadResultVT.getSizeInBits();
+ // If the BitWidth is 0, do not try to optimize the type
+ if (BitWidth == 0)
+ return false;
+
+ APInt DemandBits(BitWidth, 0);
+ APInt WidestAndBits(BitWidth, 0);
+
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+
+ // Break use-def graph loops.
+ if (!Visited.insert(I).second)
+ continue;
+
+ // For a PHI node, push all of its users.
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ for (auto *U : Phi->users())
+ WorkList.push_back(cast<Instruction>(U));
+ continue;
+ }
+
+ switch (I->getOpcode()) {
+ case Instruction::And: {
+ auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!AndC)
+ return false;
+ APInt AndBits = AndC->getValue();
+ DemandBits |= AndBits;
+ // Keep track of the widest and mask we see.
+ if (AndBits.ugt(WidestAndBits))
+ WidestAndBits = AndBits;
+ if (AndBits == WidestAndBits && I->getOperand(0) == Load)
+ AndsToMaybeRemove.push_back(I);
+ break;
+ }
+
+ case Instruction::Shl: {
+ auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!ShlC)
+ return false;
+ uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
+ DemandBits.setLowBits(BitWidth - ShiftAmt);
+ break;
+ }
+
+ case Instruction::Trunc: {
+ EVT TruncVT = TLI->getValueType(*DL, I->getType());
+ unsigned TruncBitWidth = TruncVT.getSizeInBits();
+ DemandBits.setLowBits(TruncBitWidth);
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ uint32_t ActiveBits = DemandBits.getActiveBits();
+ // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
+ // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
+ // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
+ // (and (load x) 1) is not matched as a single instruction, rather as a LDR
+ // followed by an AND.
+ // TODO: Look into removing this restriction by fixing backends to either
+ // return false for isLoadExtLegal for i1 or have them select this pattern to
+ // a single instruction.
+ //
+ // Also avoid hoisting if we didn't see any ands with the exact DemandBits
+ // mask, since these are the only ands that will be removed by isel.
+ if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
+ WidestAndBits != DemandBits)
+ return false;
+
+ LLVMContext &Ctx = Load->getType()->getContext();
+ Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
+ EVT TruncVT = TLI->getValueType(*DL, TruncTy);
+
+ // Reject cases that won't be matched as extloads.
+ if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
+ !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
+ return false;
+
+ IRBuilder<> Builder(Load->getNextNode());
+ auto *NewAnd = cast<Instruction>(
+ Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(NewAnd);
+
+ // Replace all uses of load with new and (except for the use of load in the
+ // new and itself).
+ replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
+ NewAnd->setOperand(0, Load);
+
+ // Remove any and instructions that are now redundant.
+ for (auto *And : AndsToMaybeRemove)
+ // Check that the and mask is the same as the one we decided to put on the
+ // new and.
+ if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
+ replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
+ if (&*CurInstIterator == And)
+ CurInstIterator = std::next(And->getIterator());
+ And->eraseFromParent();
+ ++NumAndUses;
+ }
+
+ ++NumAndsAdded;
+ return true;
+}
+
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // If it's safe to speculatively execute, then it should not have side
+ // effects; therefore, it's safe to sink and possibly *not* execute.
+ return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
+ TTI->isExpensiveToSpeculativelyExecute(I);
+}
+
+/// Returns true if a SelectInst should be turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ const TargetLowering *TLI,
+ SelectInst *SI) {
+ // If even a predictable select is cheap, then a branch can't be cheaper.
+ if (!TLI->isPredictableSelectExpensive())
+ return false;
+
+ // FIXME: This should use the same heuristics as IfConversion to determine
+ // whether a select is better represented as a branch.
+
+ // If metadata tells us that the select condition is obviously predictable,
+ // then we want to replace the select with a branch.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TTI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+
+ // If a branch is predictable, an out-of-order CPU can avoid blocking on its
+ // comparison condition. If the compare has more than one use, there's
+ // probably another cmov or setcc around, so it's not worth emitting a branch.
+ if (!Cmp || !Cmp->hasOneUse())
+ return false;
+
+ // If either operand of the select is expensive and only needed on one side
+ // of the select, we should form a branch.
+ if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+ sinkSelectOperand(TTI, SI->getFalseValue()))
+ return true;
+
+ return false;
+}
+
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *
+getTrueOrFalseValue(SelectInst *SI, bool isTrue,
+ const SmallPtrSet<const Instruction *, 2> &Selects) {
+ Value *V = nullptr;
+
+ for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+ DefSI = dyn_cast<SelectInst>(V)) {
+ assert(DefSI->getCondition() == SI->getCondition() &&
+ "The condition of DefSI does not match with SI");
+ V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ }
+
+ assert(V && "Failed to get select true/false value");
+ return V;
+}
+
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+ assert(Shift->isShift() && "Expected a shift");
+
+ // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+ // general vector shifts, and (3) the shift amount is a select-of-splatted
+ // values, hoist the shifts before the select:
+ // shift Op0, (select Cond, TVal, FVal) -->
+ // select Cond, (shift Op0, TVal), (shift Op0, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Shift->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Shift->getOperand(1),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Shift);
+ BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+ Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+ Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
+ Shift->eraseFromParent();
+ return true;
+}
+
+bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
+ Intrinsic::ID Opcode = Fsh->getIntrinsicID();
+ assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
+ "Expected a funnel shift");
+
+ // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
+ // than general vector shifts, and (3) the shift amount is select-of-splatted
+ // values, hoist the funnel shifts before the select:
+ // fsh Op0, Op1, (select Cond, TVal, FVal) -->
+ // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Fsh->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Fsh->getOperand(2),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Fsh);
+ Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
+ Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
+ Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
+ Fsh->eraseFromParent();
+ return true;
+}
+
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
+bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+ if (DisableSelectToBranch)
+ return false;
+
+ // If the SelectOptimize pass is enabled, selects have already been optimized.
+ if (!getCGPassBuilderOption().DisableSelectOptimize)
+ return false;
+
+ // Find all consecutive select instructions that share the same condition.
+ SmallVector<SelectInst *, 2> ASI;
+ ASI.push_back(SI);
+ for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
+ It != SI->getParent()->end(); ++It) {
+ SelectInst *I = dyn_cast<SelectInst>(&*It);
+ if (I && SI->getCondition() == I->getCondition()) {
+ ASI.push_back(I);
+ } else {
+ break;
+ }
+ }
+
+ SelectInst *LastSI = ASI.back();
+ // Increment the current iterator to skip all the rest of select instructions
+ // because they will be either "not lowered" or "all lowered" to branch.
+ CurInstIterator = std::next(LastSI->getIterator());
+
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+
+ // Can we convert the 'select' to CF ?
+ if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
+ return false;
+
+ TargetLowering::SelectSupportKind SelectKind;
+ if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+
+ if (TLI->isSelectSupported(SelectKind) &&
+ (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
+ return false;
+
+ // The DominatorTree needs to be rebuilt by any consumers after this
+ // transformation. We simply reset here rather than setting the ModifiedDT
+ // flag to avoid restarting the function walk in runOnFunction for each
+ // select optimized.
+ DT.reset();
+
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %cmp.frozen = freeze %cmp
+ // br i1 %cmp.frozen, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // %cmp should be frozen, otherwise it may introduce undefined behavior.
+ // In addition, we may sink instructions that produce %c or %d from
+ // the entry block into the destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
+ // Collect values that go on the true side and the values that go on the false
+ // side.
+ SmallVector<Instruction *> TrueInstrs, FalseInstrs;
+ for (SelectInst *SI : ASI) {
+ if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
+ TrueInstrs.push_back(cast<Instruction>(V));
+ if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
+ FalseInstrs.push_back(cast<Instruction>(V));
+ }
+
+ // Split the select block, according to how many (if any) values go on each
+ // side.
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+
+ IRBuilder<> IB(SI);
+ auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
+
+ BasicBlock *TrueBlock = nullptr;
+ BasicBlock *FalseBlock = nullptr;
+ BasicBlock *EndBlock = nullptr;
+ BranchInst *TrueBranch = nullptr;
+ BranchInst *FalseBranch = nullptr;
+ if (TrueInstrs.size() == 0) {
+ FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
+ } else if (FalseInstrs.size() == 0) {
+ TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ TrueBlock = TrueBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ } else {
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
+ SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
+ nullptr, nullptr, LI);
+ TrueBranch = cast<BranchInst>(ThenTerm);
+ FalseBranch = cast<BranchInst>(ElseTerm);
+ TrueBlock = TrueBranch->getParent();
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ }
+
+ EndBlock->setName("select.end");
+ if (TrueBlock)
+ TrueBlock->setName("select.true.sink");
+ if (FalseBlock)
+ FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
+ : "select.false.sink");
+
+ if (IsHugeFunc) {
+ if (TrueBlock)
+ FreshBBs.insert(TrueBlock);
+ if (FalseBlock)
+ FreshBBs.insert(FalseBlock);
+ FreshBBs.insert(EndBlock);
+ }
+
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+
+ static const unsigned MD[] = {
+ LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+ LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
+ StartBlock->getTerminator()->copyMetadata(*SI, MD);
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ for (Instruction *I : TrueInstrs)
+ I->moveBefore(TrueBranch);
+ for (Instruction *I : FalseInstrs)
+ I->moveBefore(FalseBranch);
+
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ if (TrueBlock == nullptr)
+ TrueBlock = StartBlock;
+ else if (FalseBlock == nullptr)
+ FalseBlock = StartBlock;
+
+ SmallPtrSet<const Instruction *, 2> INS;
+ INS.insert(ASI.begin(), ASI.end());
+ // Use reverse iterator because later select may use the value of the
+ // earlier select, and we need to propagate value through earlier select
+ // to get the PHI operand.
+ for (SelectInst *SI : llvm::reverse(ASI)) {
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PN->takeName(SI);
+ PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+ PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+ PN->setDebugLoc(SI->getDebugLoc());
+
+ replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
+ SI->eraseFromParent();
+ INS.erase(SI);
+ ++NumSelectsExpanded;
+ }
+
+ // Instruct OptimizeBlock to skip to the next block.
+ CurInstIterator = StartBlock->end();
+ return true;
+}
+
+/// Some targets only accept certain types for splat inputs. For example a VDUP
+/// in MVE takes a GPR (integer) register, and the instruction that incorporate
+/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+ // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
+ if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
+ m_Undef(), m_ZeroMask())))
+ return false;
+ Type *NewType = TLI->shouldConvertSplatType(SVI);
+ if (!NewType)
+ return false;
+
+ auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
+ assert(!NewType->isVectorTy() && "Expected a scalar type!");
+ assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
+ "Expected a type of the same size!");
+ auto *NewVecType =
+ FixedVectorType::get(NewType, SVIVecType->getNumElements());
+
+ // Create a bitcast (shuffle (insert (bitcast(..))))
+ IRBuilder<> Builder(SVI->getContext());
+ Builder.SetInsertPoint(SVI);
+ Value *BC1 = Builder.CreateBitCast(
+ cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
+ Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
+ Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
+
+ replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ SVI, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+
+ // Also hoist the bitcast up to its operand if it they are not in the same
+ // block.
+ if (auto *BCI = dyn_cast<Instruction>(BC1))
+ if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
+ if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
+ !Op->isTerminator() && !Op->isEHPad())
+ BCI->moveAfter(Op);
+
+ return true;
+}
+
+bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
+ // If the operands of I can be folded into a target instruction together with
+ // I, duplicate and sink them.
+ SmallVector<Use *, 4> OpsToSink;
+ if (!TLI->shouldSinkOperands(I, OpsToSink))
+ return false;
+
+ // OpsToSink can contain multiple uses in a use chain (e.g.
+ // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
+ // uses must come first, so we process the ops in reverse order so as to not
+ // create invalid IR.
+ BasicBlock *TargetBB = I->getParent();
+ bool Changed = false;
+ SmallVector<Use *, 4> ToReplace;
+ Instruction *InsertPoint = I;
+ DenseMap<const Instruction *, unsigned long> InstOrdering;
+ unsigned long InstNumber = 0;
+ for (const auto &I : *TargetBB)
+ InstOrdering[&I] = InstNumber++;
+
+ for (Use *U : reverse(OpsToSink)) {
+ auto *UI = cast<Instruction>(U->get());
+ if (isa<PHINode>(UI))
+ continue;
+ if (UI->getParent() == TargetBB) {
+ if (InstOrdering[UI] < InstOrdering[InsertPoint])
+ InsertPoint = UI;
+ continue;
+ }
+ ToReplace.push_back(U);
+ }
+
+ SetVector<Instruction *> MaybeDead;
+ DenseMap<Instruction *, Instruction *> NewInstructions;
+ for (Use *U : ToReplace) {
+ auto *UI = cast<Instruction>(U->get());
+ Instruction *NI = UI->clone();
+
+ if (IsHugeFunc) {
+ // Now we clone an instruction, its operands' defs may sink to this BB
+ // now. So we put the operands defs' BBs into FreshBBs to do optimization.
+ for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
+ auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
+ if (!OpDef)
+ continue;
+ FreshBBs.insert(OpDef->getParent());
+ }
+ }
+
+ NewInstructions[UI] = NI;
+ MaybeDead.insert(UI);
+ LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
+ NI->insertBefore(InsertPoint);
+ InsertPoint = NI;
+ InsertedInsts.insert(NI);
+
+ // Update the use for the new instruction, making sure that we update the
+ // sunk instruction uses, if it is part of a chain that has already been
+ // sunk.
+ Instruction *OldI = cast<Instruction>(U->getUser());
+ if (NewInstructions.count(OldI))
+ NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
+ else
+ U->set(NI);
+ Changed = true;
+ }
+
+ // Remove instructions that are dead after sinking.
+ for (auto *I : MaybeDead) {
+ if (!I->hasNUsesOrMore(1)) {
+ LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
+ I->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ Type *OldType = Cond->getType();
+ LLVMContext &Context = Cond->getContext();
+ EVT OldVT = TLI->getValueType(*DL, OldType);
+ MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
+ unsigned RegWidth = RegType.getSizeInBits();
+
+ if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
+ return false;
+
+ // If the register width is greater than the type width, expand the condition
+ // of the switch instruction and each case constant to the width of the
+ // register. By widening the type of the switch condition, subsequent
+ // comparisons (for case comparisons) will not need to be extended to the
+ // preferred register width, so we will potentially eliminate N-1 extends,
+ // where N is the number of cases in the switch.
+ auto *NewType = Type::getIntNTy(Context, RegWidth);
+
+ // Extend the switch condition and case constants using the target preferred
+ // extend unless the switch condition is a function argument with an extend
+ // attribute. In that case, we can avoid an unnecessary mask/extension by
+ // matching the argument extension instead.
+ Instruction::CastOps ExtType = Instruction::ZExt;
+ // Some targets prefer SExt over ZExt.
+ if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
+ ExtType = Instruction::SExt;
+
+ if (auto *Arg = dyn_cast<Argument>(Cond)) {
+ if (Arg->hasSExtAttr())
+ ExtType = Instruction::SExt;
+ if (Arg->hasZExtAttr())
+ ExtType = Instruction::ZExt;
+ }
+
+ auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
+ ExtInst->insertBefore(SI);
+ ExtInst->setDebugLoc(SI->getDebugLoc());
+ SI->setCondition(ExtInst);
+ for (auto Case : SI->cases()) {
+ const APInt &NarrowConst = Case.getCaseValue()->getValue();
+ APInt WideConst = (ExtType == Instruction::ZExt)
+ ? NarrowConst.zext(RegWidth)
+ : NarrowConst.sext(RegWidth);
+ Case.setValue(ConstantInt::get(Context, WideConst));
+ }
+
+ return true;
+}
+
+bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
+ // The SCCP optimization tends to produce code like this:
+ // switch(x) { case 42: phi(42, ...) }
+ // Materializing the constant for the phi-argument needs instructions; So we
+ // change the code to:
+ // switch(x) { case 42: phi(x, ...) }
+
+ Value *Condition = SI->getCondition();
+ // Avoid endless loop in degenerate case.
+ if (isa<ConstantInt>(*Condition))
+ return false;
+
+ bool Changed = false;
+ BasicBlock *SwitchBB = SI->getParent();
+ Type *ConditionType = Condition->getType();
+
+ for (const SwitchInst::CaseHandle &Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseBB = Case.getCaseSuccessor();
+ // Set to true if we previously checked that `CaseBB` is only reached by
+ // a single case from this switch.
+ bool CheckedForSinglePred = false;
+ for (PHINode &PHI : CaseBB->phis()) {
+ Type *PHIType = PHI.getType();
+ // If ZExt is free then we can also catch patterns like this:
+ // switch((i32)x) { case 42: phi((i64)42, ...); }
+ // and replace `(i64)42` with `zext i32 %x to i64`.
+ bool TryZExt =
+ PHIType->isIntegerTy() &&
+ PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
+ TLI->isZExtFree(ConditionType, PHIType);
+ if (PHIType == ConditionType || TryZExt) {
+ // Set to true to skip this case because of multiple preds.
+ bool SkipCase = false;
+ Value *Replacement = nullptr;
+ for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
+ Value *PHIValue = PHI.getIncomingValue(I);
+ if (PHIValue != CaseValue) {
+ if (!TryZExt)
+ continue;
+ ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
+ if (!PHIValueInt ||
+ PHIValueInt->getValue() !=
+ CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
+ continue;
+ }
+ if (PHI.getIncomingBlock(I) != SwitchBB)
+ continue;
+ // We cannot optimize if there are multiple case labels jumping to
+ // this block. This check may get expensive when there are many
+ // case labels so we test for it last.
+ if (!CheckedForSinglePred) {
+ CheckedForSinglePred = true;
+ if (SI->findCaseDest(CaseBB) == nullptr) {
+ SkipCase = true;
+ break;
+ }
+ }
+
+ if (Replacement == nullptr) {
+ if (PHIValue == CaseValue) {
+ Replacement = Condition;
+ } else {
+ IRBuilder<> Builder(SI);
+ Replacement = Builder.CreateZExt(Condition, PHIType);
+ }
+ }
+ PHI.setIncomingValue(I, Replacement);
+ Changed = true;
+ }
+ if (SkipCase)
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ bool Changed = optimizeSwitchType(SI);
+ Changed |= optimizeSwitchPhiConstants(SI);
+ return Changed;
+}
+
+namespace {
+
+/// Helper class to promote a scalar operation to a vector one.
+/// This class is used to move downward extractelement transition.
+/// E.g.,
+/// a = vector_op <2 x i32>
+/// b = extractelement <2 x i32> a, i32 0
+/// c = scalar_op b
+/// store c
+///
+/// =>
+/// a = vector_op <2 x i32>
+/// c = vector_op a (equivalent to scalar_op on the related lane)
+/// * d = extractelement <2 x i32> c, i32 0
+/// * store d
+/// Assuming both extractelement and store can be combine, we get rid of the
+/// transition.
+class VectorPromoteHelper {
+ /// DataLayout associated with the current module.
+ const DataLayout &DL;
+
+ /// Used to perform some checks on the legality of vector operations.
+ const TargetLowering &TLI;
+
+ /// Used to estimated the cost of the promoted chain.
+ const TargetTransformInfo &TTI;
+
+ /// The transition being moved downwards.
+ Instruction *Transition;
+
+ /// The sequence of instructions to be promoted.
+ SmallVector<Instruction *, 4> InstsToBePromoted;
+
+ /// Cost of combining a store and an extract.
+ unsigned StoreExtractCombineCost;
+
+ /// Instruction that will be combined with the transition.
+ Instruction *CombineInst = nullptr;
+
+ /// The instruction that represents the current end of the transition.
+ /// Since we are faking the promotion until we reach the end of the chain
+ /// of computation, we need a way to get the current end of the transition.
+ Instruction *getEndOfTransition() const {
+ if (InstsToBePromoted.empty())
+ return Transition;
+ return InstsToBePromoted.back();
+ }
+
+ /// Return the index of the original value in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
+ /// c, is at index 0.
+ unsigned getTransitionOriginalValueIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 0;
+ }
+
+ /// Return the index of the index in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 0" the index
+ /// is at index 1.
+ unsigned getTransitionIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 1;
+ }
+
+ /// Get the type of the transition.
+ /// This is the type of the original value.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
+ /// transition is <2 x i32>.
+ Type *getTransitionType() const {
+ return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
+ }
+
+ /// Promote \p ToBePromoted by moving \p Def downward through.
+ /// I.e., we have the following sequence:
+ /// Def = Transition <ty1> a to <ty2>
+ /// b = ToBePromoted <ty2> Def, ...
+ /// =>
+ /// b = ToBePromoted <ty1> a, ...
+ /// Def = Transition <ty1> ToBePromoted to <ty2>
+ void promoteImpl(Instruction *ToBePromoted);
+
+ /// Check whether or not it is profitable to promote all the
+ /// instructions enqueued to be promoted.
+ bool isProfitableToPromote() {
+ Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
+ unsigned Index = isa<ConstantInt>(ValIdx)
+ ? cast<ConstantInt>(ValIdx)->getZExtValue()
+ : -1;
+ Type *PromotedType = getTransitionType();
+
+ StoreInst *ST = cast<StoreInst>(CombineInst);
+ unsigned AS = ST->getPointerAddressSpace();
+ // Check if this store is supported.
+ if (!TLI.allowsMisalignedMemoryAccesses(
+ TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
+ ST->getAlign())) {
+ // If this is not supported, there is no way we can combine
+ // the extract with the store.
+ return false;
+ }
+
+ // The scalar chain of computation has to pay for the transition
+ // scalar to vector.
+ // The vector chain has to account for the combining cost.
+ enum TargetTransformInfo::TargetCostKind CostKind =
+ TargetTransformInfo::TCK_RecipThroughput;
+ InstructionCost ScalarCost =
+ TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
+ InstructionCost VectorCost = StoreExtractCombineCost;
+ for (const auto &Inst : InstsToBePromoted) {
+ // Compute the cost.
+ // By construction, all instructions being promoted are arithmetic ones.
+ // Moreover, one argument is a constant that can be viewed as a splat
+ // constant.
+ Value *Arg0 = Inst->getOperand(0);
+ bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
+ isa<ConstantFP>(Arg0);
+ TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
+ if (IsArg0Constant)
+ Arg0Info.Kind = TargetTransformInfo::OK_UniformConstantValue;
+ else
+ Arg1Info.Kind = TargetTransformInfo::OK_UniformConstantValue;
+
+ ScalarCost += TTI.getArithmeticInstrCost(
+ Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
+ VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
+ CostKind, Arg0Info, Arg1Info);
+ }
+ LLVM_DEBUG(
+ dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
+ << ScalarCost << "\nVector: " << VectorCost << '\n');
+ return ScalarCost > VectorCost;
+ }
+
+ /// Generate a constant vector with \p Val with the same
+ /// number of elements as the transition.
+ /// \p UseSplat defines whether or not \p Val should be replicated
+ /// across the whole vector.
+ /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
+ /// otherwise we generate a vector with as many undef as possible:
+ /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
+ /// used at the index of the extract.
+ Value *getConstantVector(Constant *Val, bool UseSplat) const {
+ unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
+ if (!UseSplat) {
+ // If we cannot determine where the constant must be, we have to
+ // use a splat constant.
+ Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
+ ExtractIdx = CstVal->getSExtValue();
+ else
+ UseSplat = true;
+ }
+
+ ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
+ if (UseSplat)
+ return ConstantVector::getSplat(EC, Val);
+
+ if (!EC.isScalable()) {
+ SmallVector<Constant *, 4> ConstVec;
+ UndefValue *UndefVal = UndefValue::get(Val->getType());
+ for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
+ if (Idx == ExtractIdx)
+ ConstVec.push_back(Val);
+ else
+ ConstVec.push_back(UndefVal);
+ }
+ return ConstantVector::get(ConstVec);
+ } else
+ llvm_unreachable(
+ "Generate scalable vector for non-splat is unimplemented");
+ }
+
+ /// Check if promoting to a vector type an operand at \p OperandIdx
+ /// in \p Use can trigger undefined behavior.
+ static bool canCauseUndefinedBehavior(const Instruction *Use,
+ unsigned OperandIdx) {
+ // This is not safe to introduce undef when the operand is on
+ // the right hand side of a division-like instruction.
+ if (OperandIdx != 1)
+ return false;
+ switch (Use->getOpcode()) {
+ default:
+ return false;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ return true;
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return !Use->hasNoNaNs();
+ }
+ llvm_unreachable(nullptr);
+ }
+
+public:
+ VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
+ const TargetTransformInfo &TTI, Instruction *Transition,
+ unsigned CombineCost)
+ : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
+ StoreExtractCombineCost(CombineCost) {
+ assert(Transition && "Do not know how to promote null");
+ }
+
+ /// Check if we can promote \p ToBePromoted to \p Type.
+ bool canPromote(const Instruction *ToBePromoted) const {
+ // We could support CastInst too.
+ return isa<BinaryOperator>(ToBePromoted);
+ }
+
+ /// Check if it is profitable to promote \p ToBePromoted
+ /// by moving downward the transition through.
+ bool shouldPromote(const Instruction *ToBePromoted) const {
+ // Promote only if all the operands can be statically expanded.
+ // Indeed, we do not want to introduce any new kind of transitions.
+ for (const Use &U : ToBePromoted->operands()) {
+ const Value *Val = U.get();
+ if (Val == getEndOfTransition()) {
+ // If the use is a division and the transition is on the rhs,
+ // we cannot promote the operation, otherwise we may create a
+ // division by zero.
+ if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
+ return false;
+ continue;
+ }
+ if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
+ !isa<ConstantFP>(Val))
+ return false;
+ }
+ // Check that the resulting operation is legal.
+ int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
+ if (!ISDOpcode)
+ return false;
+ return StressStoreExtract ||
+ TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
+ }
+
+ /// Check whether or not \p Use can be combined
+ /// with the transition.
+ /// I.e., is it possible to do Use(Transition) => AnotherUse?
+ bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
+
+ /// Record \p ToBePromoted as part of the chain to be promoted.
+ void enqueueForPromotion(Instruction *ToBePromoted) {
+ InstsToBePromoted.push_back(ToBePromoted);
+ }
+
+ /// Set the instruction that will be combined with the transition.
+ void recordCombineInstruction(Instruction *ToBeCombined) {
+ assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
+ CombineInst = ToBeCombined;
+ }
+
+ /// Promote all the instructions enqueued for promotion if it is
+ /// is profitable.
+ /// \return True if the promotion happened, false otherwise.
+ bool promote() {
+ // Check if there is something to promote.
+ // Right now, if we do not have anything to combine with,
+ // we assume the promotion is not profitable.
+ if (InstsToBePromoted.empty() || !CombineInst)
+ return false;
+
+ // Check cost.
+ if (!StressStoreExtract && !isProfitableToPromote())
+ return false;
+
+ // Promote.
+ for (auto &ToBePromoted : InstsToBePromoted)
+ promoteImpl(ToBePromoted);
+ InstsToBePromoted.clear();
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
+ // At this point, we know that all the operands of ToBePromoted but Def
+ // can be statically promoted.
+ // For Def, we need to use its parameter in ToBePromoted:
+ // b = ToBePromoted ty1 a
+ // Def = Transition ty1 b to ty2
+ // Move the transition down.
+ // 1. Replace all uses of the promoted operation by the transition.
+ // = ... b => = ... Def.
+ assert(ToBePromoted->getType() == Transition->getType() &&
+ "The type of the result of the transition does not match "
+ "the final type");
+ ToBePromoted->replaceAllUsesWith(Transition);
+ // 2. Update the type of the uses.
+ // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
+ Type *TransitionTy = getTransitionType();
+ ToBePromoted->mutateType(TransitionTy);
+ // 3. Update all the operands of the promoted operation with promoted
+ // operands.
+ // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
+ for (Use &U : ToBePromoted->operands()) {
+ Value *Val = U.get();
+ Value *NewVal = nullptr;
+ if (Val == Transition)
+ NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
+ else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
+ isa<ConstantFP>(Val)) {
+ // Use a splat constant if it is not safe to use undef.
+ NewVal = getConstantVector(
+ cast<Constant>(Val),
+ isa<UndefValue>(Val) ||
+ canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
+ } else
+ llvm_unreachable("Did you modified shouldPromote and forgot to update "
+ "this?");
+ ToBePromoted->setOperand(U.getOperandNo(), NewVal);
+ }
+ Transition->moveAfter(ToBePromoted);
+ Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
+}
+
+/// Some targets can do store(extractelement) with one instruction.
+/// Try to push the extractelement towards the stores when the target
+/// has this feature and this is profitable.
+bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
+ unsigned CombineCost = std::numeric_limits<unsigned>::max();
+ if (DisableStoreExtract ||
+ (!StressStoreExtract &&
+ !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
+ Inst->getOperand(1), CombineCost)))
+ return false;
+
+ // At this point we know that Inst is a vector to scalar transition.
+ // Try to move it down the def-use chain, until:
+ // - We can combine the transition with its single use
+ // => we got rid of the transition.
+ // - We escape the current basic block
+ // => we would need to check that we are moving it at a cheaper place and
+ // we do not do that for now.
+ BasicBlock *Parent = Inst->getParent();
+ LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
+ VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
+ // If the transition has more than one use, assume this is not going to be
+ // beneficial.
+ while (Inst->hasOneUse()) {
+ Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
+ LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
+
+ if (ToBePromoted->getParent() != Parent) {
+ LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
+ << ToBePromoted->getParent()->getName()
+ << ") than the transition (" << Parent->getName()
+ << ").\n");
+ return false;
+ }
+
+ if (VPH.canCombine(ToBePromoted)) {
+ LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
+ << "will be combined with: " << *ToBePromoted << '\n');
+ VPH.recordCombineInstruction(ToBePromoted);
+ bool Changed = VPH.promote();
+ NumStoreExtractExposed += Changed;
+ return Changed;
+ }
+
+ LLVM_DEBUG(dbgs() << "Try promoting.\n");
+ if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
+
+ VPH.enqueueForPromotion(ToBePromoted);
+ Inst = ToBePromoted;
+ }
+ return false;
+}
+
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficient to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+/// (store (or (zext (bitcast F to i32) to i64),
+/// (shl (zext I to i64), 32)), addr) -->
+/// (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8}, i32 store --> two i16 stores.
+/// For pair of {i8, i8}, i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+/// void goo(const std::pair<int, float> &);
+/// hoo() {
+/// ...
+/// goo(std::make_pair(tmp, ftmp));
+/// ...
+/// }
+///
+/// Although we already have similar splitting in DAG Combine, we duplicate
+/// it in CodeGenPrepare to catch the case in which pattern is across
+/// multiple BBs. The logic in DAG Combine is kept to catch case generated
+/// during code expansion.
+static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
+ const TargetLowering &TLI) {
+ // Handle simple but common cases only.
+ Type *StoreType = SI.getValueOperand()->getType();
+
+ // The code below assumes shifting a value by <number of bits>,
+ // whereas scalable vectors would have to be shifted by
+ // <2log(vscale) + number of bits> in order to store the
+ // low/high parts. Bailing out for now.
+ if (StoreType->isScalableTy())
+ return false;
+
+ if (!DL.typeSizeEqualsStoreSize(StoreType) ||
+ DL.getTypeSizeInBits(StoreType) == 0)
+ return false;
+
+ unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
+ Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
+ if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
+ return false;
+
+ // Don't split the store if it is volatile.
+ if (SI.isVolatile())
+ return false;
+
+ // Match the following patterns:
+ // (store (or (zext LValue to i64),
+ // (shl (zext HValue to i64), 32)), HalfValBitSize)
+ // or
+ // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
+ // (zext LValue to i64),
+ // Expect both operands of OR and the first operand of SHL have only
+ // one use.
+ Value *LValue, *HValue;
+ if (!match(SI.getValueOperand(),
+ m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
+ m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
+ m_SpecificInt(HalfValBitSize))))))
+ return false;
+
+ // Check LValue and HValue are int with size less or equal than 32.
+ if (!LValue->getType()->isIntegerTy() ||
+ DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
+ !HValue->getType()->isIntegerTy() ||
+ DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
+ return false;
+
+ // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
+ // as the input of target query.
+ auto *LBC = dyn_cast<BitCastInst>(LValue);
+ auto *HBC = dyn_cast<BitCastInst>(HValue);
+ EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
+ : EVT::getEVT(LValue->getType());
+ EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
+ : EVT::getEVT(HValue->getType());
+ if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+ return false;
+
+ // Start to split store.
+ IRBuilder<> Builder(SI.getContext());
+ Builder.SetInsertPoint(&SI);
+
+ // If LValue/HValue is a bitcast in another BB, create a new one in current
+ // BB so it may be merged with the splitted stores by dag combiner.
+ if (LBC && LBC->getParent() != SI.getParent())
+ LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
+ if (HBC && HBC->getParent() != SI.getParent())
+ HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
+
+ bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
+ auto CreateSplitStore = [&](Value *V, bool Upper) {
+ V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
+ Value *Addr = Builder.CreateBitCast(
+ SI.getOperand(1),
+ SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
+ Align Alignment = SI.getAlign();
+ const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
+ if (IsOffsetStore) {
+ Addr = Builder.CreateGEP(
+ SplitStoreType, Addr,
+ ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+
+ // When splitting the store in half, naturally one half will retain the
+ // alignment of the original wider store, regardless of whether it was
+ // over-aligned or not, while the other will require adjustment.
+ Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
+ }
+ Builder.CreateAlignedStore(V, Addr, Alignment);
+ };
+
+ CreateSplitStore(LValue, false);
+ CreateSplitStore(HValue, true);
+
+ // Delete the old store.
+ SI.eraseFromParent();
+ return true;
+}
+
+// Return true if the GEP has two operands, the first operand is of a sequential
+// type, and the second operand is a constant.
+static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
+ gep_type_iterator I = gep_type_begin(*GEP);
+ return GEP->getNumOperands() == 2 && I.isSequential() &&
+ isa<ConstantInt>(GEP->getOperand(1));
+}
+
+// Try unmerging GEPs to reduce liveness interference (register pressure) across
+// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
+// reducing liveness interference across those edges benefits global register
+// allocation. Currently handles only certain cases.
+//
+// For example, unmerge %GEPI and %UGEPI as below.
+//
+// ---------- BEFORE ----------
+// SrcBlock:
+// ...
+// %GEPIOp = ...
+// ...
+// %GEPI = gep %GEPIOp, Idx
+// ...
+// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
+// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
+// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
+// %UGEPI)
+//
+// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
+// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
+// ...
+//
+// DstBi:
+// ...
+// %UGEPI = gep %GEPIOp, UIdx
+// ...
+// ---------------------------
+//
+// ---------- AFTER ----------
+// SrcBlock:
+// ... (same as above)
+// (* %GEPI is still alive on the indirectbr edges)
+// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
+// unmerging)
+// ...
+//
+// DstBi:
+// ...
+// %UGEPI = gep %GEPI, (UIdx-Idx)
+// ...
+// ---------------------------
+//
+// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
+// no longer alive on them.
+//
+// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
+// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
+// not to disable further simplications and optimizations as a result of GEP
+// merging.
+//
+// Note this unmerging may increase the length of the data flow critical path
+// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
+// between the register pressure and the length of data-flow critical
+// path. Restricting this to the uncommon IndirectBr case would minimize the
+// impact of potentially longer critical path, if any, and the impact on compile
+// time.
+static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
+ const TargetTransformInfo *TTI) {
+ BasicBlock *SrcBlock = GEPI->getParent();
+ // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
+ // (non-IndirectBr) cases exit early here.
+ if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
+ return false;
+ // Check that GEPI is a simple gep with a single constant index.
+ if (!GEPSequentialConstIndexed(GEPI))
+ return false;
+ ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
+ // Check that GEPI is a cheap one.
+ if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency) >
+ TargetTransformInfo::TCC_Basic)
+ return false;
+ Value *GEPIOp = GEPI->getOperand(0);
+ // Check that GEPIOp is an instruction that's also defined in SrcBlock.
+ if (!isa<Instruction>(GEPIOp))
+ return false;
+ auto *GEPIOpI = cast<Instruction>(GEPIOp);
+ if (GEPIOpI->getParent() != SrcBlock)
+ return false;
+ // Check that GEP is used outside the block, meaning it's alive on the
+ // IndirectBr edge(s).
+ if (llvm::none_of(GEPI->users(), [&](User *Usr) {
+ if (auto *I = dyn_cast<Instruction>(Usr)) {
+ if (I->getParent() != SrcBlock) {
+ return true;
+ }
+ }
+ return false;
+ }))
+ return false;
+ // The second elements of the GEP chains to be unmerged.
+ std::vector<GetElementPtrInst *> UGEPIs;
+ // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
+ // on IndirectBr edges.
+ for (User *Usr : GEPIOp->users()) {
+ if (Usr == GEPI)
+ continue;
+ // Check if Usr is an Instruction. If not, give up.
+ if (!isa<Instruction>(Usr))
+ return false;
+ auto *UI = cast<Instruction>(Usr);
+ // Check if Usr in the same block as GEPIOp, which is fine, skip.
+ if (UI->getParent() == SrcBlock)
+ continue;
+ // Check if Usr is a GEP. If not, give up.
+ if (!isa<GetElementPtrInst>(Usr))
+ return false;
+ auto *UGEPI = cast<GetElementPtrInst>(Usr);
+ // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
+ // the pointer operand to it. If so, record it in the vector. If not, give
+ // up.
+ if (!GEPSequentialConstIndexed(UGEPI))
+ return false;
+ if (UGEPI->getOperand(0) != GEPIOp)
+ return false;
+ if (GEPIIdx->getType() !=
+ cast<ConstantInt>(UGEPI->getOperand(1))->getType())
+ return false;
+ ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+ if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency) >
+ TargetTransformInfo::TCC_Basic)
+ return false;
+ UGEPIs.push_back(UGEPI);
+ }
+ if (UGEPIs.size() == 0)
+ return false;
+ // Check the materializing cost of (Uidx-Idx).
+ for (GetElementPtrInst *UGEPI : UGEPIs) {
+ ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+ APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
+ InstructionCost ImmCost = TTI->getIntImmCost(
+ NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
+ if (ImmCost > TargetTransformInfo::TCC_Basic)
+ return false;
+ }
+ // Now unmerge between GEPI and UGEPIs.
+ for (GetElementPtrInst *UGEPI : UGEPIs) {
+ UGEPI->setOperand(0, GEPI);
+ ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+ Constant *NewUGEPIIdx = ConstantInt::get(
+ GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
+ UGEPI->setOperand(1, NewUGEPIIdx);
+ // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
+ // inbounds to avoid UB.
+ if (!GEPI->isInBounds()) {
+ UGEPI->setIsInBounds(false);
+ }
+ }
+ // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
+ // alive on IndirectBr edges).
+ assert(llvm::none_of(GEPIOp->users(),
+ [&](User *Usr) {
+ return cast<Instruction>(Usr)->getParent() != SrcBlock;
+ }) &&
+ "GEPIOp is used outside SrcBlock");
+ return true;
+}
+
+static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHugeFunc) {
+ // Try and convert
+ // %c = icmp ult %x, 8
+ // br %c, bla, blb
+ // %tc = lshr %x, 3
+ // to
+ // %tc = lshr %x, 3
+ // %c = icmp eq %tc, 0
+ // br %c, bla, blb
+ // Creating the cmp to zero can be better for the backend, especially if the
+ // lshr produces flags that can be used automatically.
+ if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
+ return false;
+
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
+ if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
+ return false;
+
+ Value *X = Cmp->getOperand(0);
+ APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
+
+ for (auto *U : X->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ // A quick dominance check
+ if (!UI ||
+ (UI->getParent() != Branch->getParent() &&
+ UI->getParent() != Branch->getSuccessor(0) &&
+ UI->getParent() != Branch->getSuccessor(1)) ||
+ (UI->getParent() != Branch->getParent() &&
+ !UI->getParent()->getSinglePredecessor()))
+ continue;
+
+ if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
+ match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
+ IRBuilder<> Builder(Branch);
+ if (UI->getParent() != Branch->getParent())
+ UI->moveBefore(Branch);
+ Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
+ ConstantInt::get(UI->getType(), 0));
+ LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
+ LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
+ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
+ return true;
+ }
+ if (Cmp->isEquality() &&
+ (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
+ match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
+ IRBuilder<> Builder(Branch);
+ if (UI->getParent() != Branch->getParent())
+ UI->moveBefore(Branch);
+ Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
+ ConstantInt::get(UI->getType(), 0));
+ LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
+ LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
+ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
+ // Bail out if we inserted the instruction to prevent optimizations from
+ // stepping on each other's toes.
+ if (InsertedInsts.count(I))
+ return false;
+
+ // TODO: Move into the switch on opcode below here.
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
+ LargeOffsetGEPMap.erase(P);
+ replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
+ }
+ return false;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
+
+ if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
+ return true;
+
+ if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
+ TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent()), *TTI))
+ return true;
+
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ /// Sink a zext or sext into its user blocks if the target type doesn't
+ /// fit in one register
+ if (TLI->getTypeAction(CI->getContext(),
+ TLI->getValueType(*DL, CI->getType())) ==
+ TargetLowering::TypeExpandInteger) {
+ return SinkCast(CI);
+ } else {
+ if (TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent()), *TTI))
+ return true;
+
+ bool MadeChange = optimizeExt(I);
+ return MadeChange | optimizeExtUses(I);
+ }
+ }
+ return false;
+ }
+
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ if (optimizeCmp(Cmp, ModifiedDT))
+ return true;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
+ bool Modified = optimizeLoadExt(LI);
+ unsigned AS = LI->getPointerAddressSpace();
+ Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ return Modified;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (splitMergedValStore(*SI, *DL, *TLI))
+ return true;
+ SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
+ unsigned AS = SI->getPointerAddressSpace();
+ return optimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(), AS);
+ }
+
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ unsigned AS = RMW->getPointerAddressSpace();
+ return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
+ unsigned AS = CmpX->getPointerAddressSpace();
+ return optimizeMemoryInst(I, CmpX->getPointerOperand(),
+ CmpX->getCompareOperand()->getType(), AS);
+ }
+
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+
+ if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
+ sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
+ return true;
+
+ // TODO: Move this into the switch on opcode - it handles shifts already.
+ if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
+ BinOp->getOpcode() == Instruction::LShr)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (CI && TLI->hasExtractBitsInsn())
+ if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+ return true;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ NC->setDebugLoc(GEPI->getDebugLoc());
+ replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ GEPI, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ ++NumGEPsElim;
+ optimizeInst(NC, ModifiedDT);
+ return true;
+ }
+ if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
+ return true;
+ }
+ return false;
+ }
+
+ if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
+ // freeze(icmp a, const)) -> icmp (freeze a), const
+ // This helps generate efficient conditional jumps.
+ Instruction *CmpI = nullptr;
+ if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
+ CmpI = II;
+ else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
+ CmpI = F->getFastMathFlags().none() ? F : nullptr;
+
+ if (CmpI && CmpI->hasOneUse()) {
+ auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
+ bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
+ isa<ConstantPointerNull>(Op0);
+ bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
+ isa<ConstantPointerNull>(Op1);
+ if (Const0 || Const1) {
+ if (!Const0 || !Const1) {
+ auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI);
+ F->takeName(FI);
+ CmpI->setOperand(Const0 ? 1 : 0, F);
+ }
+ replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
+ FI->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ if (tryToSinkFreeOperands(I))
+ return true;
+
+ switch (I->getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return optimizeShiftInst(cast<BinaryOperator>(I));
+ case Instruction::Call:
+ return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
+ case Instruction::Select:
+ return optimizeSelectInst(cast<SelectInst>(I));
+ case Instruction::ShuffleVector:
+ return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
+ case Instruction::Switch:
+ return optimizeSwitchInst(cast<SwitchInst>(I));
+ case Instruction::ExtractElement:
+ return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+ case Instruction::Br:
+ return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
+ }
+
+ return false;
+}
+
+/// Given an OR instruction, check to see if this is a bitreverse
+/// idiom. If so, insert the new intrinsic and return true.
+bool CodeGenPrepare::makeBitReverse(Instruction &I) {
+ if (!I.getType()->isIntegerTy() ||
+ !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
+ TLI->getValueType(*DL, I.getType(), true)))
+ return false;
+
+ SmallVector<Instruction *, 4> Insts;
+ if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
+ return false;
+ Instruction *LastInst = Insts.back();
+ replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ &I, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ return true;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
+ SunkAddrs.clear();
+ bool MadeChange = false;
+
+ do {
+ CurInstIterator = BB.begin();
+ ModifiedDT = ModifyDT::NotModifyDT;
+ while (CurInstIterator != BB.end()) {
+ MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
+ if (ModifiedDT != ModifyDT::NotModifyDT) {
+ // For huge function we tend to quickly go though the inner optmization
+ // opportunities in the BB. So we go back to the BB head to re-optimize
+ // each instruction instead of go back to the function head.
+ if (IsHugeFunc) {
+ DT.reset();
+ getDT(*BB.getParent());
+ break;
+ } else {
+ return true;
+ }
+ }
+ }
+ } while (ModifiedDT == ModifyDT::ModifyInstDT);
+
+ bool MadeBitReverse = true;
+ while (MadeBitReverse) {
+ MadeBitReverse = false;
+ for (auto &I : reverse(BB)) {
+ if (makeBitReverse(I)) {
+ MadeBitReverse = MadeChange = true;
+ break;
+ }
+ }
+ }
+ MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
+
+ return MadeChange;
+}
+
+// Some CGP optimizations may move or alter what's computed in a block. Check
+// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
+bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
+ assert(isa<DbgValueInst>(I));
+ DbgValueInst &DVI = *cast<DbgValueInst>(I);
+
+ // Does this dbg.value refer to a sunk address calculation?
+ bool AnyChange = false;
+ SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
+ DVI.location_ops().end());
+ for (Value *Location : LocationOps) {
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ if (SunkAddr) {
+ // Point dbg.value at locally computed address, which should give the best
+ // opportunity to be accurately lowered. This update may change the type
+ // of pointer being referred to; however this makes no difference to
+ // debugging information, and we can't generate bitcasts that may affect
+ // codegen.
+ DVI.replaceVariableLocationOp(Location, SunkAddr);
+ AnyChange = true;
+ }
+ }
+ return AnyChange;
+}
+
+// A llvm.dbg.value may be using a value before its definition, due to
+// optimizations in this pass and others. Scan for such dbg.values, and rescue
+// them by moving the dbg.value to immediately after the value definition.
+// FIXME: Ideally this should never be necessary, and this has the potential
+// to re-order dbg.value intrinsics.
+bool CodeGenPrepare::placeDbgValues(Function &F) {
+ bool MadeChange = false;
+ DominatorTree DT(F);
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
+ if (!DVI)
+ continue;
+
+ SmallVector<Instruction *, 4> VIs;
+ for (Value *V : DVI->getValues())
+ if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
+ VIs.push_back(VI);
+
+ // This DVI may depend on multiple instructions, complicating any
+ // potential sink. This block takes the defensive approach, opting to
+ // "undef" the DVI if it has more than one instruction and any of them do
+ // not dominate DVI.
+ for (Instruction *VI : VIs) {
+ if (VI->isTerminator())
+ continue;
+
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
+
+ // If the defining instruction dominates the dbg.value, we do not need
+ // to move the dbg.value.
+ if (DT.dominates(VI, DVI))
+ continue;
+
+ // If we depend on multiple instructions and any of them doesn't
+ // dominate this DVI, we probably can't salvage it: moving it to
+ // after any of the instructions could cause us to lose the others.
+ if (VIs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Unable to find valid location for Debug Value, undefing:\n"
+ << *DVI);
+ DVI->setKillLocation();
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
+// probes can be chained dependencies of other regular DAG nodes and block DAG
+// combine optimizations.
+bool CodeGenPrepare::placePseudoProbes(Function &F) {
+ bool MadeChange = false;
+ for (auto &Block : F) {
+ // Move the rest probes to the beginning of the block.
+ auto FirstInst = Block.getFirstInsertionPt();
+ while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
+ ++FirstInst;
+ BasicBlock::iterator I(FirstInst);
+ I++;
+ while (I != Block.end()) {
+ if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
+ II->moveBefore(&*FirstInst);
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// Scale down both weights to fit into uint32_t.
+static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+ uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+ uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
+ NewTrue = NewTrue / Scale;
+ NewFalse = NewFalse / Scale;
+}
+
+/// Some targets prefer to split a conditional branch like:
+/// \code
+/// %0 = icmp ne i32 %a, 0
+/// %1 = icmp ne i32 %b, 0
+/// %or.cond = or i1 %0, %1
+/// br i1 %or.cond, label %TrueBB, label %FalseBB
+/// \endcode
+/// into multiple branch instructions like:
+/// \code
+/// bb1:
+/// %0 = icmp ne i32 %a, 0
+/// br i1 %0, label %TrueBB, label %bb2
+/// bb2:
+/// %1 = icmp ne i32 %b, 0
+/// br i1 %1, label %TrueBB, label %FalseBB
+/// \endcode
+/// This usually allows instruction selection to do even further optimizations
+/// and combine the compare with the branch instruction. Currently this is
+/// applied for targets which have "cheap" jump instructions.
+///
+/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
+///
+bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
+ if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
+ return false;
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ // Does this BB end with the following?
+ // %cond1 = icmp|fcmp|binary instruction ...
+ // %cond2 = icmp|fcmp|binary instruction ...
+ // %cond.or = or|and i1 %cond1, cond2
+ // br i1 %cond.or label %dest1, label %dest2"
+ Instruction *LogicOp;
+ BasicBlock *TBB, *FBB;
+ if (!match(BB.getTerminator(),
+ m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
+ continue;
+
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+ continue;
+
+ // The merging of mostly empty BB can cause a degenerate branch.
+ if (TBB == FBB)
+ continue;
+
+ unsigned Opc;
+ Value *Cond1, *Cond2;
+ if (match(LogicOp,
+ m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::And;
+ else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::Or;
+ else
+ continue;
+
+ auto IsGoodCond = [](Value *Cond) {
+ return match(
+ Cond,
+ m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
+ m_LogicalOr(m_Value(), m_Value()))));
+ };
+ if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
+
+ // Create a new BB.
+ auto *TmpBB =
+ BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
+ BB.getParent(), BB.getNextNode());
+ if (IsHugeFunc)
+ FreshBBs.insert(TmpBB);
+
+ // Update original basic block by using the first condition directly by the
+ // branch instruction and removing the no longer needed and/or instruction.
+ Br1->setCondition(Cond1);
+ LogicOp->eraseFromParent();
+
+ // Depending on the condition we have to either replace the true or the
+ // false successor of the original branch instruction.
+ if (Opc == Instruction::And)
+ Br1->setSuccessor(0, TmpBB);
+ else
+ Br1->setSuccessor(1, TmpBB);
+
+ // Fill in the new basic block.
+ auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
+ if (auto *I = dyn_cast<Instruction>(Cond2)) {
+ I->removeFromParent();
+ I->insertBefore(Br2);
+ }
+
+ // Update PHI nodes in both successors. The original BB needs to be
+ // replaced in one successor's PHI nodes, because the branch comes now from
+ // the newly generated BB (NewBB). In the other successor we need to add one
+ // incoming edge to the PHI nodes, because both branch instructions target
+ // now the same successor. Depending on the original branch condition
+ // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
+ // we perform the correct update for the PHI nodes.
+ // This doesn't change the successor order of the just created branch
+ // instruction (or any other instruction).
+ if (Opc == Instruction::Or)
+ std::swap(TBB, FBB);
+
+ // Replace the old BB with the new BB.
+ TBB->replacePhiUsesWith(&BB, TmpBB);
+
+ // Add another incoming edge from the new BB.
+ for (PHINode &PN : FBB->phis()) {
+ auto *Val = PN.getIncomingValueForBlock(&BB);
+ PN.addIncoming(Val, TmpBB);
+ }
+
+ // Update the branch weights (from SelectionDAGBuilder::
+ // FindMergedConditions).
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for NewBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original weights are A and B, one choice is to set BB1's
+ // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+ // assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = TrueWeight;
+ uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = TrueWeight;
+ NewFalseWeight = 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else {
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original weights are A and B, one choice is to set BB1's
+ // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+ // assumes that
+ // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
+ uint64_t NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = 2 * TrueWeight;
+ NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ }
+
+ ModifiedDT = ModifyDT::ModifyBBDT;
+ MadeChange = true;
+
+ LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
+ TmpBB->dump());
+ }
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
new file mode 100644
index 000000000000..c34a52a6f2de
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -0,0 +1,727 @@
+//===-- CommandFlags.cpp - Command Line Flags Interface ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains codegen-specific flags that are shared between different
+// command line tools. The tools "llc" and "opt" both use this file to prevent
+// flag duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
+#include <optional>
+
+using namespace llvm;
+
+#define CGOPT(TY, NAME) \
+ static cl::opt<TY> *NAME##View; \
+ TY codegen::get##NAME() { \
+ assert(NAME##View && "RegisterCodeGenFlags not created."); \
+ return *NAME##View; \
+ }
+
+#define CGLIST(TY, NAME) \
+ static cl::list<TY> *NAME##View; \
+ std::vector<TY> codegen::get##NAME() { \
+ assert(NAME##View && "RegisterCodeGenFlags not created."); \
+ return *NAME##View; \
+ }
+
+// Temporary macro for incremental transition to std::optional.
+#define CGOPT_EXP(TY, NAME) \
+ CGOPT(TY, NAME) \
+ std::optional<TY> codegen::getExplicit##NAME() { \
+ if (NAME##View->getNumOccurrences()) { \
+ TY res = *NAME##View; \
+ return res; \
+ } \
+ return std::nullopt; \
+ }
+
+CGOPT(std::string, MArch)
+CGOPT(std::string, MCPU)
+CGLIST(std::string, MAttrs)
+CGOPT_EXP(Reloc::Model, RelocModel)
+CGOPT(ThreadModel::Model, ThreadModel)
+CGOPT_EXP(CodeModel::Model, CodeModel)
+CGOPT(ExceptionHandling, ExceptionModel)
+CGOPT_EXP(CodeGenFileType, FileType)
+CGOPT(FramePointerKind, FramePointerUsage)
+CGOPT(bool, EnableUnsafeFPMath)
+CGOPT(bool, EnableNoInfsFPMath)
+CGOPT(bool, EnableNoNaNsFPMath)
+CGOPT(bool, EnableNoSignedZerosFPMath)
+CGOPT(bool, EnableApproxFuncFPMath)
+CGOPT(bool, EnableNoTrappingFPMath)
+CGOPT(bool, EnableAIXExtendedAltivecABI)
+CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
+CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
+CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
+CGOPT(FloatABI::ABIType, FloatABIForCalls)
+CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
+CGOPT(SwiftAsyncFramePointerMode, SwiftAsyncFramePointer)
+CGOPT(bool, DontPlaceZerosInBSS)
+CGOPT(bool, EnableGuaranteedTailCallOpt)
+CGOPT(bool, DisableTailCalls)
+CGOPT(bool, StackSymbolOrdering)
+CGOPT(bool, StackRealign)
+CGOPT(std::string, TrapFuncName)
+CGOPT(bool, UseCtors)
+CGOPT(bool, DisableIntegratedAS)
+CGOPT(bool, RelaxELFRelocations)
+CGOPT_EXP(bool, DataSections)
+CGOPT_EXP(bool, FunctionSections)
+CGOPT(bool, IgnoreXCOFFVisibility)
+CGOPT(bool, XCOFFTracebackTable)
+CGOPT(std::string, BBSections)
+CGOPT(unsigned, TLSSize)
+CGOPT_EXP(bool, EmulatedTLS)
+CGOPT(bool, UniqueSectionNames)
+CGOPT(bool, UniqueBasicBlockSectionNames)
+CGOPT(EABI, EABIVersion)
+CGOPT(DebuggerKind, DebuggerTuningOpt)
+CGOPT(bool, EnableStackSizeSection)
+CGOPT(bool, EnableAddrsig)
+CGOPT(bool, EmitCallSiteInfo)
+CGOPT(bool, EnableMachineFunctionSplitter)
+CGOPT(bool, EnableDebugEntryValues)
+CGOPT(bool, ForceDwarfFrameSection)
+CGOPT(bool, XRayFunctionIndex)
+CGOPT(bool, DebugStrictDwarf)
+CGOPT(unsigned, AlignLoops)
+CGOPT(bool, JMCInstrument)
+CGOPT(bool, XCOFFReadOnlyPointers)
+
+codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
+#define CGBINDOPT(NAME) \
+ do { \
+ NAME##View = std::addressof(NAME); \
+ } while (0)
+
+ static cl::opt<std::string> MArch(
+ "march", cl::desc("Architecture to generate code for (see --version)"));
+ CGBINDOPT(MArch);
+
+ static cl::opt<std::string> MCPU(
+ "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::init(""));
+ CGBINDOPT(MCPU);
+
+ static cl::list<std::string> MAttrs(
+ "mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."));
+ CGBINDOPT(MAttrs);
+
+ static cl::opt<Reloc::Model> RelocModel(
+ "relocation-model", cl::desc("Choose relocation model"),
+ cl::values(
+ clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ "Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ "Relocatable external references, non-relocatable code"),
+ clEnumValN(
+ Reloc::ROPI, "ropi",
+ "Code and read-only data relocatable, accessed PC-relative"),
+ clEnumValN(
+ Reloc::RWPI, "rwpi",
+ "Read-write data relocatable, accessed relative to static base"),
+ clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
+ "Combination of ropi and rwpi")));
+ CGBINDOPT(RelocModel);
+
+ static cl::opt<ThreadModel::Model> ThreadModel(
+ "thread-model", cl::desc("Choose threading model"),
+ cl::init(ThreadModel::POSIX),
+ cl::values(
+ clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"),
+ clEnumValN(ThreadModel::Single, "single", "Single thread model")));
+ CGBINDOPT(ThreadModel);
+
+ static cl::opt<CodeModel::Model> CodeModel(
+ "code-model", cl::desc("Choose code model"),
+ cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"),
+ clEnumValN(CodeModel::Small, "small", "Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
+ clEnumValN(CodeModel::Large, "large", "Large code model")));
+ CGBINDOPT(CodeModel);
+
+ static cl::opt<ExceptionHandling> ExceptionModel(
+ "exception-model", cl::desc("exception model"),
+ cl::init(ExceptionHandling::None),
+ cl::values(
+ clEnumValN(ExceptionHandling::None, "default",
+ "default exception handling model"),
+ clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
+ "DWARF-like CFI based exception handling"),
+ clEnumValN(ExceptionHandling::SjLj, "sjlj",
+ "SjLj exception handling"),
+ clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"),
+ clEnumValN(ExceptionHandling::WinEH, "wineh",
+ "Windows exception model"),
+ clEnumValN(ExceptionHandling::Wasm, "wasm",
+ "WebAssembly exception handling")));
+ CGBINDOPT(ExceptionModel);
+
+ static cl::opt<CodeGenFileType> FileType(
+ "filetype", cl::init(CGFT_AssemblyFile),
+ cl::desc(
+ "Choose a file type (not all types are supported by all targets):"),
+ cl::values(
+ clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"),
+ clEnumValN(CGFT_ObjectFile, "obj",
+ "Emit a native object ('.o') file"),
+ clEnumValN(CGFT_Null, "null",
+ "Emit nothing, for performance testing")));
+ CGBINDOPT(FileType);
+
+ static cl::opt<FramePointerKind> FramePointerUsage(
+ "frame-pointer",
+ cl::desc("Specify frame pointer elimination optimization"),
+ cl::init(FramePointerKind::None),
+ cl::values(
+ clEnumValN(FramePointerKind::All, "all",
+ "Disable frame pointer elimination"),
+ clEnumValN(FramePointerKind::NonLeaf, "non-leaf",
+ "Disable frame pointer elimination for non-leaf frame"),
+ clEnumValN(FramePointerKind::None, "none",
+ "Enable frame pointer elimination")));
+ CGBINDOPT(FramePointerUsage);
+
+ static cl::opt<bool> EnableUnsafeFPMath(
+ "enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::init(false));
+ CGBINDOPT(EnableUnsafeFPMath);
+
+ static cl::opt<bool> EnableNoInfsFPMath(
+ "enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::init(false));
+ CGBINDOPT(EnableNoInfsFPMath);
+
+ static cl::opt<bool> EnableNoNaNsFPMath(
+ "enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::init(false));
+ CGBINDOPT(EnableNoNaNsFPMath);
+
+ static cl::opt<bool> EnableNoSignedZerosFPMath(
+ "enable-no-signed-zeros-fp-math",
+ cl::desc("Enable FP math optimizations that assume "
+ "the sign of 0 is insignificant"),
+ cl::init(false));
+ CGBINDOPT(EnableNoSignedZerosFPMath);
+
+ static cl::opt<bool> EnableApproxFuncFPMath(
+ "enable-approx-func-fp-math",
+ cl::desc("Enable FP math optimizations that assume approx func"),
+ cl::init(false));
+ CGBINDOPT(EnableApproxFuncFPMath);
+
+ static cl::opt<bool> EnableNoTrappingFPMath(
+ "enable-no-trapping-fp-math",
+ cl::desc("Enable setting the FP exceptions build "
+ "attribute not to use exceptions"),
+ cl::init(false));
+ CGBINDOPT(EnableNoTrappingFPMath);
+
+ static const auto DenormFlagEnumOptions = cl::values(
+ clEnumValN(DenormalMode::IEEE, "ieee", "IEEE 754 denormal numbers"),
+ clEnumValN(DenormalMode::PreserveSign, "preserve-sign",
+ "the sign of a flushed-to-zero number is preserved "
+ "in the sign of 0"),
+ clEnumValN(DenormalMode::PositiveZero, "positive-zero",
+ "denormals are flushed to positive zero"),
+ clEnumValN(DenormalMode::Dynamic, "dynamic",
+ "denormals have unknown treatment"));
+
+ // FIXME: Doesn't have way to specify separate input and output modes.
+ static cl::opt<DenormalMode::DenormalModeKind> DenormalFPMath(
+ "denormal-fp-math",
+ cl::desc("Select which denormal numbers the code is permitted to require"),
+ cl::init(DenormalMode::IEEE),
+ DenormFlagEnumOptions);
+ CGBINDOPT(DenormalFPMath);
+
+ static cl::opt<DenormalMode::DenormalModeKind> DenormalFP32Math(
+ "denormal-fp-math-f32",
+ cl::desc("Select which denormal numbers the code is permitted to require for float"),
+ cl::init(DenormalMode::Invalid),
+ DenormFlagEnumOptions);
+ CGBINDOPT(DenormalFP32Math);
+
+ static cl::opt<bool> EnableHonorSignDependentRoundingFPMath(
+ "enable-sign-dependent-rounding-fp-math", cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::init(false));
+ CGBINDOPT(EnableHonorSignDependentRoundingFPMath);
+
+ static cl::opt<FloatABI::ABIType> FloatABIForCalls(
+ "float-abi", cl::desc("Choose float ABI type"),
+ cl::init(FloatABI::Default),
+ cl::values(clEnumValN(FloatABI::Default, "default",
+ "Target default float ABI type"),
+ clEnumValN(FloatABI::Soft, "soft",
+ "Soft float ABI (implied by -soft-float)"),
+ clEnumValN(FloatABI::Hard, "hard",
+ "Hard float ABI (uses FP registers)")));
+ CGBINDOPT(FloatABIForCalls);
+
+ static cl::opt<FPOpFusion::FPOpFusionMode> FuseFPOps(
+ "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"),
+ cl::init(FPOpFusion::Standard),
+ cl::values(
+ clEnumValN(FPOpFusion::Fast, "fast",
+ "Fuse FP ops whenever profitable"),
+ clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."),
+ clEnumValN(FPOpFusion::Strict, "off",
+ "Only fuse FP ops when the result won't be affected.")));
+ CGBINDOPT(FuseFPOps);
+
+ static cl::opt<SwiftAsyncFramePointerMode> SwiftAsyncFramePointer(
+ "swift-async-fp",
+ cl::desc("Determine when the Swift async frame pointer should be set"),
+ cl::init(SwiftAsyncFramePointerMode::Always),
+ cl::values(clEnumValN(SwiftAsyncFramePointerMode::DeploymentBased, "auto",
+ "Determine based on deployment target"),
+ clEnumValN(SwiftAsyncFramePointerMode::Always, "always",
+ "Always set the bit"),
+ clEnumValN(SwiftAsyncFramePointerMode::Never, "never",
+ "Never set the bit")));
+ CGBINDOPT(SwiftAsyncFramePointer);
+
+ static cl::opt<bool> DontPlaceZerosInBSS(
+ "nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::init(false));
+ CGBINDOPT(DontPlaceZerosInBSS);
+
+ static cl::opt<bool> EnableAIXExtendedAltivecABI(
+ "vec-extabi", cl::desc("Enable the AIX Extended Altivec ABI."),
+ cl::init(false));
+ CGBINDOPT(EnableAIXExtendedAltivecABI);
+
+ static cl::opt<bool> EnableGuaranteedTailCallOpt(
+ "tailcallopt",
+ cl::desc(
+ "Turn fastcc calls into tail calls by (potentially) changing ABI."),
+ cl::init(false));
+ CGBINDOPT(EnableGuaranteedTailCallOpt);
+
+ static cl::opt<bool> DisableTailCalls(
+ "disable-tail-calls", cl::desc("Never emit tail calls"), cl::init(false));
+ CGBINDOPT(DisableTailCalls);
+
+ static cl::opt<bool> StackSymbolOrdering(
+ "stack-symbol-ordering", cl::desc("Order local stack symbols."),
+ cl::init(true));
+ CGBINDOPT(StackSymbolOrdering);
+
+ static cl::opt<bool> StackRealign(
+ "stackrealign",
+ cl::desc("Force align the stack to the minimum alignment"),
+ cl::init(false));
+ CGBINDOPT(StackRealign);
+
+ static cl::opt<std::string> TrapFuncName(
+ "trap-func", cl::Hidden,
+ cl::desc("Emit a call to trap function rather than a trap instruction"),
+ cl::init(""));
+ CGBINDOPT(TrapFuncName);
+
+ static cl::opt<bool> UseCtors("use-ctors",
+ cl::desc("Use .ctors instead of .init_array."),
+ cl::init(false));
+ CGBINDOPT(UseCtors);
+
+ static cl::opt<bool> RelaxELFRelocations(
+ "relax-elf-relocations",
+ cl::desc(
+ "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
+ cl::init(true));
+ CGBINDOPT(RelaxELFRelocations);
+
+ static cl::opt<bool> DataSections(
+ "data-sections", cl::desc("Emit data into separate sections"),
+ cl::init(false));
+ CGBINDOPT(DataSections);
+
+ static cl::opt<bool> FunctionSections(
+ "function-sections", cl::desc("Emit functions into separate sections"),
+ cl::init(false));
+ CGBINDOPT(FunctionSections);
+
+ static cl::opt<bool> IgnoreXCOFFVisibility(
+ "ignore-xcoff-visibility",
+ cl::desc("Not emit the visibility attribute for asm in AIX OS or give "
+ "all symbols 'unspecified' visibility in XCOFF object file"),
+ cl::init(false));
+ CGBINDOPT(IgnoreXCOFFVisibility);
+
+ static cl::opt<bool> XCOFFTracebackTable(
+ "xcoff-traceback-table", cl::desc("Emit the XCOFF traceback table"),
+ cl::init(true));
+ CGBINDOPT(XCOFFTracebackTable);
+
+ static cl::opt<std::string> BBSections(
+ "basic-block-sections",
+ cl::desc("Emit basic blocks into separate sections"),
+ cl::value_desc("all | <function list (file)> | labels | none"),
+ cl::init("none"));
+ CGBINDOPT(BBSections);
+
+ static cl::opt<unsigned> TLSSize(
+ "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0));
+ CGBINDOPT(TLSSize);
+
+ static cl::opt<bool> EmulatedTLS(
+ "emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false));
+ CGBINDOPT(EmulatedTLS);
+
+ static cl::opt<bool> UniqueSectionNames(
+ "unique-section-names", cl::desc("Give unique names to every section"),
+ cl::init(true));
+ CGBINDOPT(UniqueSectionNames);
+
+ static cl::opt<bool> UniqueBasicBlockSectionNames(
+ "unique-basic-block-section-names",
+ cl::desc("Give unique names to every basic block section"),
+ cl::init(false));
+ CGBINDOPT(UniqueBasicBlockSectionNames);
+
+ static cl::opt<EABI> EABIVersion(
+ "meabi", cl::desc("Set EABI type (default depends on triple):"),
+ cl::init(EABI::Default),
+ cl::values(
+ clEnumValN(EABI::Default, "default", "Triple default EABI version"),
+ clEnumValN(EABI::EABI4, "4", "EABI version 4"),
+ clEnumValN(EABI::EABI5, "5", "EABI version 5"),
+ clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
+ CGBINDOPT(EABIVersion);
+
+ static cl::opt<DebuggerKind> DebuggerTuningOpt(
+ "debugger-tune", cl::desc("Tune debug info for a particular debugger"),
+ cl::init(DebuggerKind::Default),
+ cl::values(
+ clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
+ clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
+ clEnumValN(DebuggerKind::DBX, "dbx", "dbx"),
+ clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)")));
+ CGBINDOPT(DebuggerTuningOpt);
+
+ static cl::opt<bool> EnableStackSizeSection(
+ "stack-size-section",
+ cl::desc("Emit a section containing stack size metadata"),
+ cl::init(false));
+ CGBINDOPT(EnableStackSizeSection);
+
+ static cl::opt<bool> EnableAddrsig(
+ "addrsig", cl::desc("Emit an address-significance table"),
+ cl::init(false));
+ CGBINDOPT(EnableAddrsig);
+
+ static cl::opt<bool> EmitCallSiteInfo(
+ "emit-call-site-info",
+ cl::desc(
+ "Emit call site debug information, if debug information is enabled."),
+ cl::init(false));
+ CGBINDOPT(EmitCallSiteInfo);
+
+ static cl::opt<bool> EnableDebugEntryValues(
+ "debug-entry-values",
+ cl::desc("Enable debug info for the debug entry values."),
+ cl::init(false));
+ CGBINDOPT(EnableDebugEntryValues);
+
+ static cl::opt<bool> EnableMachineFunctionSplitter(
+ "split-machine-functions",
+ cl::desc("Split out cold basic blocks from machine functions based on "
+ "profile information"),
+ cl::init(false));
+ CGBINDOPT(EnableMachineFunctionSplitter);
+
+ static cl::opt<bool> ForceDwarfFrameSection(
+ "force-dwarf-frame-section",
+ cl::desc("Always emit a debug frame section."), cl::init(false));
+ CGBINDOPT(ForceDwarfFrameSection);
+
+ static cl::opt<bool> XRayFunctionIndex("xray-function-index",
+ cl::desc("Emit xray_fn_idx section"),
+ cl::init(true));
+ CGBINDOPT(XRayFunctionIndex);
+
+ static cl::opt<bool> DebugStrictDwarf(
+ "strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
+ CGBINDOPT(DebugStrictDwarf);
+
+ static cl::opt<unsigned> AlignLoops("align-loops",
+ cl::desc("Default alignment for loops"));
+ CGBINDOPT(AlignLoops);
+
+ static cl::opt<bool> JMCInstrument(
+ "enable-jmc-instrument",
+ cl::desc("Instrument functions with a call to __CheckForDebuggerJustMyCode"),
+ cl::init(false));
+ CGBINDOPT(JMCInstrument);
+
+ static cl::opt<bool> XCOFFReadOnlyPointers(
+ "mxcoff-roptr",
+ cl::desc("When set to true, const objects with relocatable address "
+ "values are put into the RO data section."),
+ cl::init(false));
+ CGBINDOPT(XCOFFReadOnlyPointers);
+
+ static cl::opt<bool> DisableIntegratedAS(
+ "no-integrated-as", cl::desc("Disable integrated assembler"),
+ cl::init(false));
+ CGBINDOPT(DisableIntegratedAS);
+
+#undef CGBINDOPT
+
+ mc::RegisterMCTargetOptionsFlags();
+}
+
+llvm::BasicBlockSection
+codegen::getBBSectionsMode(llvm::TargetOptions &Options) {
+ if (getBBSections() == "all")
+ return BasicBlockSection::All;
+ else if (getBBSections() == "labels")
+ return BasicBlockSection::Labels;
+ else if (getBBSections() == "none")
+ return BasicBlockSection::None;
+ else {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(getBBSections());
+ if (!MBOrErr) {
+ errs() << "Error loading basic block sections function list file: "
+ << MBOrErr.getError().message() << "\n";
+ } else {
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+ return BasicBlockSection::List;
+ }
+}
+
+// Common utility function tightly tied to the options listed here. Initializes
+// a TargetOptions object with CodeGen flags and returns it.
+TargetOptions
+codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
+ TargetOptions Options;
+ Options.AllowFPOpFusion = getFuseFPOps();
+ Options.UnsafeFPMath = getEnableUnsafeFPMath();
+ Options.NoInfsFPMath = getEnableNoInfsFPMath();
+ Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
+ Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
+ Options.ApproxFuncFPMath = getEnableApproxFuncFPMath();
+ Options.NoTrappingFPMath = getEnableNoTrappingFPMath();
+
+ DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
+
+ // FIXME: Should have separate input and output flags
+ Options.setFPDenormalMode(DenormalMode(DenormKind, DenormKind));
+
+ Options.HonorSignDependentRoundingFPMathOption =
+ getEnableHonorSignDependentRoundingFPMath();
+ if (getFloatABIForCalls() != FloatABI::Default)
+ Options.FloatABIType = getFloatABIForCalls();
+ Options.EnableAIXExtendedAltivecABI = getEnableAIXExtendedAltivecABI();
+ Options.NoZerosInBSS = getDontPlaceZerosInBSS();
+ Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
+ Options.StackSymbolOrdering = getStackSymbolOrdering();
+ Options.UseInitArray = !getUseCtors();
+ Options.DisableIntegratedAS = getDisableIntegratedAS();
+ Options.RelaxELFRelocations = getRelaxELFRelocations();
+ Options.DataSections =
+ getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections());
+ Options.FunctionSections = getFunctionSections();
+ Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility();
+ Options.XCOFFTracebackTable = getXCOFFTracebackTable();
+ Options.BBSections = getBBSectionsMode(Options);
+ Options.UniqueSectionNames = getUniqueSectionNames();
+ Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
+ Options.TLSSize = getTLSSize();
+ Options.EmulatedTLS =
+ getExplicitEmulatedTLS().value_or(TheTriple.hasDefaultEmulatedTLS());
+ Options.ExceptionModel = getExceptionModel();
+ Options.EmitStackSizeSection = getEnableStackSizeSection();
+ Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
+ Options.EmitAddrsig = getEnableAddrsig();
+ Options.EmitCallSiteInfo = getEmitCallSiteInfo();
+ Options.EnableDebugEntryValues = getEnableDebugEntryValues();
+ Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
+ Options.XRayFunctionIndex = getXRayFunctionIndex();
+ Options.DebugStrictDwarf = getDebugStrictDwarf();
+ Options.LoopAlignment = getAlignLoops();
+ Options.JMCInstrument = getJMCInstrument();
+ Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers();
+
+ Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
+
+ Options.ThreadModel = getThreadModel();
+ Options.EABIVersion = getEABIVersion();
+ Options.DebuggerTuning = getDebuggerTuningOpt();
+ Options.SwiftAsyncFramePointer = getSwiftAsyncFramePointer();
+ return Options;
+}
+
+std::string codegen::getCPUStr() {
+ // If user asked for the 'native' CPU, autodetect here. If autodection fails,
+ // this will set the CPU to an empty string which tells the target to
+ // pick a basic default.
+ if (getMCPU() == "native")
+ return std::string(sys::getHostCPUName());
+
+ return getMCPU();
+}
+
+std::string codegen::getFeaturesStr() {
+ SubtargetFeatures Features;
+
+ // If user asked for the 'native' CPU, we need to autodetect features.
+ // This is necessary for x86 where the CPU might not support all the
+ // features the autodetected CPU name lists in the target. For example,
+ // not all Sandybridge processors support AVX.
+ if (getMCPU() == "native") {
+ StringMap<bool> HostFeatures;
+ if (sys::getHostCPUFeatures(HostFeatures))
+ for (const auto &[Feature, IsEnabled] : HostFeatures)
+ Features.AddFeature(Feature, IsEnabled);
+ }
+
+ for (auto const &MAttr : getMAttrs())
+ Features.AddFeature(MAttr);
+
+ return Features.getString();
+}
+
+std::vector<std::string> codegen::getFeatureList() {
+ SubtargetFeatures Features;
+
+ // If user asked for the 'native' CPU, we need to autodetect features.
+ // This is necessary for x86 where the CPU might not support all the
+ // features the autodetected CPU name lists in the target. For example,
+ // not all Sandybridge processors support AVX.
+ if (getMCPU() == "native") {
+ StringMap<bool> HostFeatures;
+ if (sys::getHostCPUFeatures(HostFeatures))
+ for (const auto &[Feature, IsEnabled] : HostFeatures)
+ Features.AddFeature(Feature, IsEnabled);
+ }
+
+ for (auto const &MAttr : getMAttrs())
+ Features.AddFeature(MAttr);
+
+ return Features.getFeatures();
+}
+
+void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) {
+ B.addAttribute(Name, Val ? "true" : "false");
+}
+
+#define HANDLE_BOOL_ATTR(CL, AttrName) \
+ do { \
+ if (CL->getNumOccurrences() > 0 && !F.hasFnAttribute(AttrName)) \
+ renderBoolStringAttr(NewAttrs, AttrName, *CL); \
+ } while (0)
+
+/// Set function attributes of function \p F based on CPU, Features, and command
+/// line flags.
+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
+ Function &F) {
+ auto &Ctx = F.getContext();
+ AttributeList Attrs = F.getAttributes();
+ AttrBuilder NewAttrs(Ctx);
+
+ if (!CPU.empty() && !F.hasFnAttribute("target-cpu"))
+ NewAttrs.addAttribute("target-cpu", CPU);
+ if (!Features.empty()) {
+ // Append the command line features to any that are already on the function.
+ StringRef OldFeatures =
+ F.getFnAttribute("target-features").getValueAsString();
+ if (OldFeatures.empty())
+ NewAttrs.addAttribute("target-features", Features);
+ else {
+ SmallString<256> Appended(OldFeatures);
+ Appended.push_back(',');
+ Appended.append(Features);
+ NewAttrs.addAttribute("target-features", Appended);
+ }
+ }
+ if (FramePointerUsageView->getNumOccurrences() > 0 &&
+ !F.hasFnAttribute("frame-pointer")) {
+ if (getFramePointerUsage() == FramePointerKind::All)
+ NewAttrs.addAttribute("frame-pointer", "all");
+ else if (getFramePointerUsage() == FramePointerKind::NonLeaf)
+ NewAttrs.addAttribute("frame-pointer", "non-leaf");
+ else if (getFramePointerUsage() == FramePointerKind::None)
+ NewAttrs.addAttribute("frame-pointer", "none");
+ }
+ if (DisableTailCallsView->getNumOccurrences() > 0)
+ NewAttrs.addAttribute("disable-tail-calls",
+ toStringRef(getDisableTailCalls()));
+ if (getStackRealign())
+ NewAttrs.addAttribute("stackrealign");
+
+ HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math");
+ HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
+ HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
+ HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
+ HANDLE_BOOL_ATTR(EnableApproxFuncFPMathView, "approx-func-fp-math");
+
+ if (DenormalFPMathView->getNumOccurrences() > 0 &&
+ !F.hasFnAttribute("denormal-fp-math")) {
+ DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
+
+ // FIXME: Command line flag should expose separate input/output modes.
+ NewAttrs.addAttribute("denormal-fp-math",
+ DenormalMode(DenormKind, DenormKind).str());
+ }
+
+ if (DenormalFP32MathView->getNumOccurrences() > 0 &&
+ !F.hasFnAttribute("denormal-fp-math-f32")) {
+ // FIXME: Command line flag should expose separate input/output modes.
+ DenormalMode::DenormalModeKind DenormKind = getDenormalFP32Math();
+
+ NewAttrs.addAttribute(
+ "denormal-fp-math-f32",
+ DenormalMode(DenormKind, DenormKind).str());
+ }
+
+ if (TrapFuncNameView->getNumOccurrences() > 0)
+ for (auto &B : F)
+ for (auto &I : B)
+ if (auto *Call = dyn_cast<CallInst>(&I))
+ if (const auto *F = Call->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::debugtrap ||
+ F->getIntrinsicID() == Intrinsic::trap)
+ Call->addFnAttr(
+ Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));
+
+ // Let NewAttrs override Attrs.
+ F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs));
+}
+
+/// Set function attributes of functions in Module M based on CPU,
+/// Features, and command line flags.
+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
+ Module &M) {
+ for (Function &F : M)
+ setFunctionAttributes(CPU, Features, F);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
new file mode 100644
index 000000000000..7979ac9a5fb7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -0,0 +1,2077 @@
+//===- ComplexDeinterleavingPass.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Identification:
+// This step is responsible for finding the patterns that can be lowered to
+// complex instructions, and building a graph to represent the complex
+// structures. Starting from the "Converging Shuffle" (a shuffle that
+// reinterleaves the complex components, with a mask of <0, 2, 1, 3>), the
+// operands are evaluated and identified as "Composite Nodes" (collections of
+// instructions that can potentially be lowered to a single complex
+// instruction). This is performed by checking the real and imaginary components
+// and tracking the data flow for each component while following the operand
+// pairs. Validity of each node is expected to be done upon creation, and any
+// validation errors should halt traversal and prevent further graph
+// construction.
+// Instead of relying on Shuffle operations, vector interleaving and
+// deinterleaving can be represented by vector.interleave2 and
+// vector.deinterleave2 intrinsics. Scalable vectors can be represented only by
+// these intrinsics, whereas, fixed-width vectors are recognized for both
+// shufflevector instruction and intrinsics.
+//
+// Replacement:
+// This step traverses the graph built up by identification, delegating to the
+// target to validate and generate the correct intrinsics, and plumbs them
+// together connecting each end of the new intrinsics graph to the existing
+// use-def chain. This step is assumed to finish successfully, as all
+// information is expected to be correct by this point.
+//
+//
+// Internal data structure:
+// ComplexDeinterleavingGraph:
+// Keeps references to all the valid CompositeNodes formed as part of the
+// transformation, and every Instruction contained within said nodes. It also
+// holds onto a reference to the root Instruction, and the root node that should
+// replace it.
+//
+// ComplexDeinterleavingCompositeNode:
+// A CompositeNode represents a single transformation point; each node should
+// transform into a single complex instruction (ignoring vector splitting, which
+// would generate more instructions per node). They are identified in a
+// depth-first manner, traversing and identifying the operands of each
+// instruction in the order they appear in the IR.
+// Each node maintains a reference to its Real and Imaginary instructions,
+// as well as any additional instructions that make up the identified operation
+// (Internal instructions should only have uses within their containing node).
+// A Node also contains the rotation and operation type that it represents.
+// Operands contains pointers to other CompositeNodes, acting as the edges in
+// the graph. ReplacementValue is the transformed Value* that has been emitted
+// to the IR.
+//
+// Note: If the operation of a Node is Shuffle, only the Real, Imaginary, and
+// ReplacementValue fields of that Node are relevant, where the ReplacementValue
+// should be pre-populated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "complex-deinterleaving"
+
+STATISTIC(NumComplexTransformations, "Amount of complex patterns transformed");
+
+static cl::opt<bool> ComplexDeinterleavingEnabled(
+ "enable-complex-deinterleaving",
+ cl::desc("Enable generation of complex instructions"), cl::init(true),
+ cl::Hidden);
+
+/// Checks the given mask, and determines whether said mask is interleaving.
+///
+/// To be interleaving, a mask must alternate between `i` and `i + (Length /
+/// 2)`, and must contain all numbers within the range of `[0..Length)` (e.g. a
+/// 4x vector interleaving mask would be <0, 2, 1, 3>).
+static bool isInterleavingMask(ArrayRef<int> Mask);
+
+/// Checks the given mask, and determines whether said mask is deinterleaving.
+///
+/// To be deinterleaving, a mask must increment in steps of 2, and either start
+/// with 0 or 1.
+/// (e.g. an 8x vector deinterleaving mask would be either <0, 2, 4, 6> or
+/// <1, 3, 5, 7>).
+static bool isDeinterleavingMask(ArrayRef<int> Mask);
+
+/// Returns true if the operation is a negation of V, and it works for both
+/// integers and floats.
+static bool isNeg(Value *V);
+
+/// Returns the operand for negation operation.
+static Value *getNegOperand(Value *V);
+
+namespace {
+
+class ComplexDeinterleavingLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ComplexDeinterleavingLegacyPass(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {
+ initializeComplexDeinterleavingLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Complex Deinterleaving Pass";
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+private:
+ const TargetMachine *TM;
+};
+
+class ComplexDeinterleavingGraph;
+struct ComplexDeinterleavingCompositeNode {
+
+ ComplexDeinterleavingCompositeNode(ComplexDeinterleavingOperation Op,
+ Value *R, Value *I)
+ : Operation(Op), Real(R), Imag(I) {}
+
+private:
+ friend class ComplexDeinterleavingGraph;
+ using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>;
+ using RawNodePtr = ComplexDeinterleavingCompositeNode *;
+
+public:
+ ComplexDeinterleavingOperation Operation;
+ Value *Real;
+ Value *Imag;
+
+ // This two members are required exclusively for generating
+ // ComplexDeinterleavingOperation::Symmetric operations.
+ unsigned Opcode;
+ std::optional<FastMathFlags> Flags;
+
+ ComplexDeinterleavingRotation Rotation =
+ ComplexDeinterleavingRotation::Rotation_0;
+ SmallVector<RawNodePtr> Operands;
+ Value *ReplacementNode = nullptr;
+
+ void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
+
+ void dump() { dump(dbgs()); }
+ void dump(raw_ostream &OS) {
+ auto PrintValue = [&](Value *V) {
+ if (V) {
+ OS << "\"";
+ V->print(OS, true);
+ OS << "\"\n";
+ } else
+ OS << "nullptr\n";
+ };
+ auto PrintNodeRef = [&](RawNodePtr Ptr) {
+ if (Ptr)
+ OS << Ptr << "\n";
+ else
+ OS << "nullptr\n";
+ };
+
+ OS << "- CompositeNode: " << this << "\n";
+ OS << " Real: ";
+ PrintValue(Real);
+ OS << " Imag: ";
+ PrintValue(Imag);
+ OS << " ReplacementNode: ";
+ PrintValue(ReplacementNode);
+ OS << " Operation: " << (int)Operation << "\n";
+ OS << " Rotation: " << ((int)Rotation * 90) << "\n";
+ OS << " Operands: \n";
+ for (const auto &Op : Operands) {
+ OS << " - ";
+ PrintNodeRef(Op);
+ }
+ }
+};
+
+class ComplexDeinterleavingGraph {
+public:
+ struct Product {
+ Value *Multiplier;
+ Value *Multiplicand;
+ bool IsPositive;
+ };
+
+ using Addend = std::pair<Value *, bool>;
+ using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr;
+ using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr;
+
+ // Helper struct for holding info about potential partial multiplication
+ // candidates
+ struct PartialMulCandidate {
+ Value *Common;
+ NodePtr Node;
+ unsigned RealIdx;
+ unsigned ImagIdx;
+ bool IsNodeInverted;
+ };
+
+ explicit ComplexDeinterleavingGraph(const TargetLowering *TL,
+ const TargetLibraryInfo *TLI)
+ : TL(TL), TLI(TLI) {}
+
+private:
+ const TargetLowering *TL = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+ SmallVector<NodePtr> CompositeNodes;
+ DenseMap<std::pair<Value *, Value *>, NodePtr> CachedResult;
+
+ SmallPtrSet<Instruction *, 16> FinalInstructions;
+
+ /// Root instructions are instructions from which complex computation starts
+ std::map<Instruction *, NodePtr> RootToNode;
+
+ /// Topologically sorted root instructions
+ SmallVector<Instruction *, 1> OrderedRoots;
+
+ /// When examining a basic block for complex deinterleaving, if it is a simple
+ /// one-block loop, then the only incoming block is 'Incoming' and the
+ /// 'BackEdge' block is the block itself."
+ BasicBlock *BackEdge = nullptr;
+ BasicBlock *Incoming = nullptr;
+
+ /// ReductionInfo maps from %ReductionOp to %PHInode and Instruction
+ /// %OutsideUser as it is shown in the IR:
+ ///
+ /// vector.body:
+ /// %PHInode = phi <vector type> [ zeroinitializer, %entry ],
+ /// [ %ReductionOp, %vector.body ]
+ /// ...
+ /// %ReductionOp = fadd i64 ...
+ /// ...
+ /// br i1 %condition, label %vector.body, %middle.block
+ ///
+ /// middle.block:
+ /// %OutsideUser = llvm.vector.reduce.fadd(..., %ReductionOp)
+ ///
+ /// %OutsideUser can be `llvm.vector.reduce.fadd` or `fadd` preceding
+ /// `llvm.vector.reduce.fadd` when unroll factor isn't one.
+ std::map<Instruction *, std::pair<PHINode *, Instruction *>> ReductionInfo;
+
+ /// In the process of detecting a reduction, we consider a pair of
+ /// %ReductionOP, which we refer to as real and imag (or vice versa), and
+ /// traverse the use-tree to detect complex operations. As this is a reduction
+ /// operation, it will eventually reach RealPHI and ImagPHI, which corresponds
+ /// to the %ReductionOPs that we suspect to be complex.
+ /// RealPHI and ImagPHI are used by the identifyPHINode method.
+ PHINode *RealPHI = nullptr;
+ PHINode *ImagPHI = nullptr;
+
+ /// Set this flag to true if RealPHI and ImagPHI were reached during reduction
+ /// detection.
+ bool PHIsFound = false;
+
+ /// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode.
+ /// The new PHINode corresponds to a vector of deinterleaved complex numbers.
+ /// This mapping is populated during
+ /// ComplexDeinterleavingOperation::ReductionPHI node replacement. It is then
+ /// used in the ComplexDeinterleavingOperation::ReductionOperation node
+ /// replacement process.
+ std::map<PHINode *, PHINode *> OldToNewPHI;
+
+ NodePtr prepareCompositeNode(ComplexDeinterleavingOperation Operation,
+ Value *R, Value *I) {
+ assert(((Operation != ComplexDeinterleavingOperation::ReductionPHI &&
+ Operation != ComplexDeinterleavingOperation::ReductionOperation) ||
+ (R && I)) &&
+ "Reduction related nodes must have Real and Imaginary parts");
+ return std::make_shared<ComplexDeinterleavingCompositeNode>(Operation, R,
+ I);
+ }
+
+ NodePtr submitCompositeNode(NodePtr Node) {
+ CompositeNodes.push_back(Node);
+ if (Node->Real && Node->Imag)
+ CachedResult[{Node->Real, Node->Imag}] = Node;
+ return Node;
+ }
+
+ /// Identifies a complex partial multiply pattern and its rotation, based on
+ /// the following patterns
+ ///
+ /// 0: r: cr + ar * br
+ /// i: ci + ar * bi
+ /// 90: r: cr - ai * bi
+ /// i: ci + ai * br
+ /// 180: r: cr - ar * br
+ /// i: ci - ar * bi
+ /// 270: r: cr + ai * bi
+ /// i: ci - ai * br
+ NodePtr identifyPartialMul(Instruction *Real, Instruction *Imag);
+
+ /// Identify the other branch of a Partial Mul, taking the CommonOperandI that
+ /// is partially known from identifyPartialMul, filling in the other half of
+ /// the complex pair.
+ NodePtr
+ identifyNodeWithImplicitAdd(Instruction *I, Instruction *J,
+ std::pair<Value *, Value *> &CommonOperandI);
+
+ /// Identifies a complex add pattern and its rotation, based on the following
+ /// patterns.
+ ///
+ /// 90: r: ar - bi
+ /// i: ai + br
+ /// 270: r: ar + bi
+ /// i: ai - br
+ NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
+ NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag);
+
+ NodePtr identifyNode(Value *R, Value *I);
+
+ /// Determine if a sum of complex numbers can be formed from \p RealAddends
+ /// and \p ImagAddens. If \p Accumulator is not null, add the result to it.
+ /// Return nullptr if it is not possible to construct a complex number.
+ /// \p Flags are needed to generate symmetric Add and Sub operations.
+ NodePtr identifyAdditions(std::list<Addend> &RealAddends,
+ std::list<Addend> &ImagAddends,
+ std::optional<FastMathFlags> Flags,
+ NodePtr Accumulator);
+
+ /// Extract one addend that have both real and imaginary parts positive.
+ NodePtr extractPositiveAddend(std::list<Addend> &RealAddends,
+ std::list<Addend> &ImagAddends);
+
+ /// Determine if sum of multiplications of complex numbers can be formed from
+ /// \p RealMuls and \p ImagMuls. If \p Accumulator is not null, add the result
+ /// to it. Return nullptr if it is not possible to construct a complex number.
+ NodePtr identifyMultiplications(std::vector<Product> &RealMuls,
+ std::vector<Product> &ImagMuls,
+ NodePtr Accumulator);
+
+ /// Go through pairs of multiplication (one Real and one Imag) and find all
+ /// possible candidates for partial multiplication and put them into \p
+ /// Candidates. Returns true if all Product has pair with common operand
+ bool collectPartialMuls(const std::vector<Product> &RealMuls,
+ const std::vector<Product> &ImagMuls,
+ std::vector<PartialMulCandidate> &Candidates);
+
+ /// If the code is compiled with -Ofast or expressions have `reassoc` flag,
+ /// the order of complex computation operations may be significantly altered,
+ /// and the real and imaginary parts may not be executed in parallel. This
+ /// function takes this into consideration and employs a more general approach
+ /// to identify complex computations. Initially, it gathers all the addends
+ /// and multiplicands and then constructs a complex expression from them.
+ NodePtr identifyReassocNodes(Instruction *I, Instruction *J);
+
+ NodePtr identifyRoot(Instruction *I);
+
+ /// Identifies the Deinterleave operation applied to a vector containing
+ /// complex numbers. There are two ways to represent the Deinterleave
+ /// operation:
+ /// * Using two shufflevectors with even indices for /pReal instruction and
+ /// odd indices for /pImag instructions (only for fixed-width vectors)
+ /// * Using two extractvalue instructions applied to `vector.deinterleave2`
+ /// intrinsic (for both fixed and scalable vectors)
+ NodePtr identifyDeinterleave(Instruction *Real, Instruction *Imag);
+
+ /// identifying the operation that represents a complex number repeated in a
+ /// Splat vector. There are two possible types of splats: ConstantExpr with
+ /// the opcode ShuffleVector and ShuffleVectorInstr. Both should have an
+ /// initialization mask with all values set to zero.
+ NodePtr identifySplat(Value *Real, Value *Imag);
+
+ NodePtr identifyPHINode(Instruction *Real, Instruction *Imag);
+
+ /// Identifies SelectInsts in a loop that has reduction with predication masks
+ /// and/or predicated tail folding
+ NodePtr identifySelectNode(Instruction *Real, Instruction *Imag);
+
+ Value *replaceNode(IRBuilderBase &Builder, RawNodePtr Node);
+
+ /// Complete IR modifications after producing new reduction operation:
+ /// * Populate the PHINode generated for
+ /// ComplexDeinterleavingOperation::ReductionPHI
+ /// * Deinterleave the final value outside of the loop and repurpose original
+ /// reduction users
+ void processReductionOperation(Value *OperationReplacement, RawNodePtr Node);
+
+public:
+ void dump() { dump(dbgs()); }
+ void dump(raw_ostream &OS) {
+ for (const auto &Node : CompositeNodes)
+ Node->dump(OS);
+ }
+
+ /// Returns false if the deinterleaving operation should be cancelled for the
+ /// current graph.
+ bool identifyNodes(Instruction *RootI);
+
+ /// In case \pB is one-block loop, this function seeks potential reductions
+ /// and populates ReductionInfo. Returns true if any reductions were
+ /// identified.
+ bool collectPotentialReductions(BasicBlock *B);
+
+ void identifyReductionNodes();
+
+ /// Check that every instruction, from the roots to the leaves, has internal
+ /// uses.
+ bool checkNodes();
+
+ /// Perform the actual replacement of the underlying instruction graph.
+ void replaceNodes();
+};
+
+class ComplexDeinterleaving {
+public:
+ ComplexDeinterleaving(const TargetLowering *tl, const TargetLibraryInfo *tli)
+ : TL(tl), TLI(tli) {}
+ bool runOnFunction(Function &F);
+
+private:
+ bool evaluateBasicBlock(BasicBlock *B);
+
+ const TargetLowering *TL = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+};
+
+} // namespace
+
+char ComplexDeinterleavingLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ComplexDeinterleavingLegacyPass, DEBUG_TYPE,
+ "Complex Deinterleaving", false, false)
+INITIALIZE_PASS_END(ComplexDeinterleavingLegacyPass, DEBUG_TYPE,
+ "Complex Deinterleaving", false, false)
+
+PreservedAnalyses ComplexDeinterleavingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ auto &TLI = AM.getResult<llvm::TargetLibraryAnalysis>(F);
+ if (!ComplexDeinterleaving(TL, &TLI).runOnFunction(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
+}
+
+FunctionPass *llvm::createComplexDeinterleavingPass(const TargetMachine *TM) {
+ return new ComplexDeinterleavingLegacyPass(TM);
+}
+
+bool ComplexDeinterleavingLegacyPass::runOnFunction(Function &F) {
+ const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ auto TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return ComplexDeinterleaving(TL, &TLI).runOnFunction(F);
+}
+
+bool ComplexDeinterleaving::runOnFunction(Function &F) {
+ if (!ComplexDeinterleavingEnabled) {
+ LLVM_DEBUG(
+ dbgs() << "Complex deinterleaving has been explicitly disabled.\n");
+ return false;
+ }
+
+ if (!TL->isComplexDeinterleavingSupported()) {
+ LLVM_DEBUG(
+ dbgs() << "Complex deinterleaving has been disabled, target does "
+ "not support lowering of complex number operations.\n");
+ return false;
+ }
+
+ bool Changed = false;
+ for (auto &B : F)
+ Changed |= evaluateBasicBlock(&B);
+
+ return Changed;
+}
+
+static bool isInterleavingMask(ArrayRef<int> Mask) {
+ // If the size is not even, it's not an interleaving mask
+ if ((Mask.size() & 1))
+ return false;
+
+ int HalfNumElements = Mask.size() / 2;
+ for (int Idx = 0; Idx < HalfNumElements; ++Idx) {
+ int MaskIdx = Idx * 2;
+ if (Mask[MaskIdx] != Idx || Mask[MaskIdx + 1] != (Idx + HalfNumElements))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isDeinterleavingMask(ArrayRef<int> Mask) {
+ int Offset = Mask[0];
+ int HalfNumElements = Mask.size() / 2;
+
+ for (int Idx = 1; Idx < HalfNumElements; ++Idx) {
+ if (Mask[Idx] != (Idx * 2) + Offset)
+ return false;
+ }
+
+ return true;
+}
+
+bool isNeg(Value *V) {
+ return match(V, m_FNeg(m_Value())) || match(V, m_Neg(m_Value()));
+}
+
+Value *getNegOperand(Value *V) {
+ assert(isNeg(V));
+ auto *I = cast<Instruction>(V);
+ if (I->getOpcode() == Instruction::FNeg)
+ return I->getOperand(0);
+
+ return I->getOperand(1);
+}
+
+bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
+ ComplexDeinterleavingGraph Graph(TL, TLI);
+ if (Graph.collectPotentialReductions(B))
+ Graph.identifyReductionNodes();
+
+ for (auto &I : *B)
+ Graph.identifyNodes(&I);
+
+ if (Graph.checkNodes()) {
+ Graph.replaceNodes();
+ return true;
+ }
+
+ return false;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
+ Instruction *Real, Instruction *Imag,
+ std::pair<Value *, Value *> &PartialMatch) {
+ LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << *Real << " / " << *Imag
+ << "\n");
+
+ if (!Real->hasOneUse() || !Imag->hasOneUse()) {
+ LLVM_DEBUG(dbgs() << " - Mul operand has multiple uses.\n");
+ return nullptr;
+ }
+
+ if ((Real->getOpcode() != Instruction::FMul &&
+ Real->getOpcode() != Instruction::Mul) ||
+ (Imag->getOpcode() != Instruction::FMul &&
+ Imag->getOpcode() != Instruction::Mul)) {
+ LLVM_DEBUG(
+ dbgs() << " - Real or imaginary instruction is not fmul or mul\n");
+ return nullptr;
+ }
+
+ Value *R0 = Real->getOperand(0);
+ Value *R1 = Real->getOperand(1);
+ Value *I0 = Imag->getOperand(0);
+ Value *I1 = Imag->getOperand(1);
+
+ // A +/+ has a rotation of 0. If any of the operands are fneg, we flip the
+ // rotations and use the operand.
+ unsigned Negs = 0;
+ Value *Op;
+ if (match(R0, m_Neg(m_Value(Op)))) {
+ Negs |= 1;
+ R0 = Op;
+ } else if (match(R1, m_Neg(m_Value(Op)))) {
+ Negs |= 1;
+ R1 = Op;
+ }
+
+ if (isNeg(I0)) {
+ Negs |= 2;
+ Negs ^= 1;
+ I0 = Op;
+ } else if (match(I1, m_Neg(m_Value(Op)))) {
+ Negs |= 2;
+ Negs ^= 1;
+ I1 = Op;
+ }
+
+ ComplexDeinterleavingRotation Rotation = (ComplexDeinterleavingRotation)Negs;
+
+ Value *CommonOperand;
+ Value *UncommonRealOp;
+ Value *UncommonImagOp;
+
+ if (R0 == I0 || R0 == I1) {
+ CommonOperand = R0;
+ UncommonRealOp = R1;
+ } else if (R1 == I0 || R1 == I1) {
+ CommonOperand = R1;
+ UncommonRealOp = R0;
+ } else {
+ LLVM_DEBUG(dbgs() << " - No equal operand\n");
+ return nullptr;
+ }
+
+ UncommonImagOp = (CommonOperand == I0) ? I1 : I0;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270)
+ std::swap(UncommonRealOp, UncommonImagOp);
+
+ // Between identifyPartialMul and here we need to have found a complete valid
+ // pair from the CommonOperand of each part.
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180)
+ PartialMatch.first = CommonOperand;
+ else
+ PartialMatch.second = CommonOperand;
+
+ if (!PartialMatch.first || !PartialMatch.second) {
+ LLVM_DEBUG(dbgs() << " - Incomplete partial match\n");
+ return nullptr;
+ }
+
+ NodePtr CommonNode = identifyNode(PartialMatch.first, PartialMatch.second);
+ if (!CommonNode) {
+ LLVM_DEBUG(dbgs() << " - No CommonNode identified\n");
+ return nullptr;
+ }
+
+ NodePtr UncommonNode = identifyNode(UncommonRealOp, UncommonImagOp);
+ if (!UncommonNode) {
+ LLVM_DEBUG(dbgs() << " - No UncommonNode identified\n");
+ return nullptr;
+ }
+
+ NodePtr Node = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
+ Node->Rotation = Rotation;
+ Node->addOperand(CommonNode);
+ Node->addOperand(UncommonNode);
+ return submitCompositeNode(Node);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
+ Instruction *Imag) {
+ LLVM_DEBUG(dbgs() << "identifyPartialMul " << *Real << " / " << *Imag
+ << "\n");
+ // Determine rotation
+ auto IsAdd = [](unsigned Op) {
+ return Op == Instruction::FAdd || Op == Instruction::Add;
+ };
+ auto IsSub = [](unsigned Op) {
+ return Op == Instruction::FSub || Op == Instruction::Sub;
+ };
+ ComplexDeinterleavingRotation Rotation;
+ if (IsAdd(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
+ Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ else if (IsSub(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if (IsSub(Real->getOpcode()) && IsSub(Imag->getOpcode()))
+ Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ else if (IsAdd(Real->getOpcode()) && IsSub(Imag->getOpcode()))
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+ else {
+ LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n");
+ return nullptr;
+ }
+
+ if (isa<FPMathOperator>(Real) &&
+ (!Real->getFastMathFlags().allowContract() ||
+ !Imag->getFastMathFlags().allowContract())) {
+ LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n");
+ return nullptr;
+ }
+
+ Value *CR = Real->getOperand(0);
+ Instruction *RealMulI = dyn_cast<Instruction>(Real->getOperand(1));
+ if (!RealMulI)
+ return nullptr;
+ Value *CI = Imag->getOperand(0);
+ Instruction *ImagMulI = dyn_cast<Instruction>(Imag->getOperand(1));
+ if (!ImagMulI)
+ return nullptr;
+
+ if (!RealMulI->hasOneUse() || !ImagMulI->hasOneUse()) {
+ LLVM_DEBUG(dbgs() << " - Mul instruction has multiple uses\n");
+ return nullptr;
+ }
+
+ Value *R0 = RealMulI->getOperand(0);
+ Value *R1 = RealMulI->getOperand(1);
+ Value *I0 = ImagMulI->getOperand(0);
+ Value *I1 = ImagMulI->getOperand(1);
+
+ Value *CommonOperand;
+ Value *UncommonRealOp;
+ Value *UncommonImagOp;
+
+ if (R0 == I0 || R0 == I1) {
+ CommonOperand = R0;
+ UncommonRealOp = R1;
+ } else if (R1 == I0 || R1 == I1) {
+ CommonOperand = R1;
+ UncommonRealOp = R0;
+ } else {
+ LLVM_DEBUG(dbgs() << " - No equal operand\n");
+ return nullptr;
+ }
+
+ UncommonImagOp = (CommonOperand == I0) ? I1 : I0;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270)
+ std::swap(UncommonRealOp, UncommonImagOp);
+
+ std::pair<Value *, Value *> PartialMatch(
+ (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180)
+ ? CommonOperand
+ : nullptr,
+ (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270)
+ ? CommonOperand
+ : nullptr);
+
+ auto *CRInst = dyn_cast<Instruction>(CR);
+ auto *CIInst = dyn_cast<Instruction>(CI);
+
+ if (!CRInst || !CIInst) {
+ LLVM_DEBUG(dbgs() << " - Common operands are not instructions.\n");
+ return nullptr;
+ }
+
+ NodePtr CNode = identifyNodeWithImplicitAdd(CRInst, CIInst, PartialMatch);
+ if (!CNode) {
+ LLVM_DEBUG(dbgs() << " - No cnode identified\n");
+ return nullptr;
+ }
+
+ NodePtr UncommonRes = identifyNode(UncommonRealOp, UncommonImagOp);
+ if (!UncommonRes) {
+ LLVM_DEBUG(dbgs() << " - No UncommonRes identified\n");
+ return nullptr;
+ }
+
+ assert(PartialMatch.first && PartialMatch.second);
+ NodePtr CommonRes = identifyNode(PartialMatch.first, PartialMatch.second);
+ if (!CommonRes) {
+ LLVM_DEBUG(dbgs() << " - No CommonRes identified\n");
+ return nullptr;
+ }
+
+ NodePtr Node = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
+ Node->Rotation = Rotation;
+ Node->addOperand(CommonRes);
+ Node->addOperand(UncommonRes);
+ Node->addOperand(CNode);
+ return submitCompositeNode(Node);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyAdd(Instruction *Real, Instruction *Imag) {
+ LLVM_DEBUG(dbgs() << "identifyAdd " << *Real << " / " << *Imag << "\n");
+
+ // Determine rotation
+ ComplexDeinterleavingRotation Rotation;
+ if ((Real->getOpcode() == Instruction::FSub &&
+ Imag->getOpcode() == Instruction::FAdd) ||
+ (Real->getOpcode() == Instruction::Sub &&
+ Imag->getOpcode() == Instruction::Add))
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if ((Real->getOpcode() == Instruction::FAdd &&
+ Imag->getOpcode() == Instruction::FSub) ||
+ (Real->getOpcode() == Instruction::Add &&
+ Imag->getOpcode() == Instruction::Sub))
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+ else {
+ LLVM_DEBUG(dbgs() << " - Unhandled case, rotation is not assigned.\n");
+ return nullptr;
+ }
+
+ auto *AR = dyn_cast<Instruction>(Real->getOperand(0));
+ auto *BI = dyn_cast<Instruction>(Real->getOperand(1));
+ auto *AI = dyn_cast<Instruction>(Imag->getOperand(0));
+ auto *BR = dyn_cast<Instruction>(Imag->getOperand(1));
+
+ if (!AR || !AI || !BR || !BI) {
+ LLVM_DEBUG(dbgs() << " - Not all operands are instructions.\n");
+ return nullptr;
+ }
+
+ NodePtr ResA = identifyNode(AR, AI);
+ if (!ResA) {
+ LLVM_DEBUG(dbgs() << " - AR/AI is not identified as a composite node.\n");
+ return nullptr;
+ }
+ NodePtr ResB = identifyNode(BR, BI);
+ if (!ResB) {
+ LLVM_DEBUG(dbgs() << " - BR/BI is not identified as a composite node.\n");
+ return nullptr;
+ }
+
+ NodePtr Node =
+ prepareCompositeNode(ComplexDeinterleavingOperation::CAdd, Real, Imag);
+ Node->Rotation = Rotation;
+ Node->addOperand(ResA);
+ Node->addOperand(ResB);
+ return submitCompositeNode(Node);
+}
+
+static bool isInstructionPairAdd(Instruction *A, Instruction *B) {
+ unsigned OpcA = A->getOpcode();
+ unsigned OpcB = B->getOpcode();
+
+ return (OpcA == Instruction::FSub && OpcB == Instruction::FAdd) ||
+ (OpcA == Instruction::FAdd && OpcB == Instruction::FSub) ||
+ (OpcA == Instruction::Sub && OpcB == Instruction::Add) ||
+ (OpcA == Instruction::Add && OpcB == Instruction::Sub);
+}
+
+static bool isInstructionPairMul(Instruction *A, Instruction *B) {
+ auto Pattern =
+ m_BinOp(m_FMul(m_Value(), m_Value()), m_FMul(m_Value(), m_Value()));
+
+ return match(A, Pattern) && match(B, Pattern);
+}
+
+static bool isInstructionPotentiallySymmetric(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
+ Instruction *Imag) {
+ if (Real->getOpcode() != Imag->getOpcode())
+ return nullptr;
+
+ if (!isInstructionPotentiallySymmetric(Real) ||
+ !isInstructionPotentiallySymmetric(Imag))
+ return nullptr;
+
+ auto *R0 = Real->getOperand(0);
+ auto *I0 = Imag->getOperand(0);
+
+ NodePtr Op0 = identifyNode(R0, I0);
+ NodePtr Op1 = nullptr;
+ if (Op0 == nullptr)
+ return nullptr;
+
+ if (Real->isBinaryOp()) {
+ auto *R1 = Real->getOperand(1);
+ auto *I1 = Imag->getOperand(1);
+ Op1 = identifyNode(R1, I1);
+ if (Op1 == nullptr)
+ return nullptr;
+ }
+
+ if (isa<FPMathOperator>(Real) &&
+ Real->getFastMathFlags() != Imag->getFastMathFlags())
+ return nullptr;
+
+ auto Node = prepareCompositeNode(ComplexDeinterleavingOperation::Symmetric,
+ Real, Imag);
+ Node->Opcode = Real->getOpcode();
+ if (isa<FPMathOperator>(Real))
+ Node->Flags = Real->getFastMathFlags();
+
+ Node->addOperand(Op0);
+ if (Real->isBinaryOp())
+ Node->addOperand(Op1);
+
+ return submitCompositeNode(Node);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
+ LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
+ assert(R->getType() == I->getType() &&
+ "Real and imaginary parts should not have different types");
+
+ auto It = CachedResult.find({R, I});
+ if (It != CachedResult.end()) {
+ LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
+ return It->second;
+ }
+
+ if (NodePtr CN = identifySplat(R, I))
+ return CN;
+
+ auto *Real = dyn_cast<Instruction>(R);
+ auto *Imag = dyn_cast<Instruction>(I);
+ if (!Real || !Imag)
+ return nullptr;
+
+ if (NodePtr CN = identifyDeinterleave(Real, Imag))
+ return CN;
+
+ if (NodePtr CN = identifyPHINode(Real, Imag))
+ return CN;
+
+ if (NodePtr CN = identifySelectNode(Real, Imag))
+ return CN;
+
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ bool HasCMulSupport = TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CMulPartial, NewVTy);
+ bool HasCAddSupport = TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CAdd, NewVTy);
+
+ if (HasCMulSupport && isInstructionPairMul(Real, Imag)) {
+ if (NodePtr CN = identifyPartialMul(Real, Imag))
+ return CN;
+ }
+
+ if (HasCAddSupport && isInstructionPairAdd(Real, Imag)) {
+ if (NodePtr CN = identifyAdd(Real, Imag))
+ return CN;
+ }
+
+ if (HasCMulSupport && HasCAddSupport) {
+ if (NodePtr CN = identifyReassocNodes(Real, Imag))
+ return CN;
+ }
+
+ if (NodePtr CN = identifySymmetricOperation(Real, Imag))
+ return CN;
+
+ LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n");
+ CachedResult[{R, I}] = nullptr;
+ return nullptr;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyReassocNodes(Instruction *Real,
+ Instruction *Imag) {
+ auto IsOperationSupported = [](unsigned Opcode) -> bool {
+ return Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub;
+ };
+
+ if (!IsOperationSupported(Real->getOpcode()) ||
+ !IsOperationSupported(Imag->getOpcode()))
+ return nullptr;
+
+ std::optional<FastMathFlags> Flags;
+ if (isa<FPMathOperator>(Real)) {
+ if (Real->getFastMathFlags() != Imag->getFastMathFlags()) {
+ LLVM_DEBUG(dbgs() << "The flags in Real and Imaginary instructions are "
+ "not identical\n");
+ return nullptr;
+ }
+
+ Flags = Real->getFastMathFlags();
+ if (!Flags->allowReassoc()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "the 'Reassoc' attribute is missing in the FastMath flags\n");
+ return nullptr;
+ }
+ }
+
+ // Collect multiplications and addend instructions from the given instruction
+ // while traversing it operands. Additionally, verify that all instructions
+ // have the same fast math flags.
+ auto Collect = [&Flags](Instruction *Insn, std::vector<Product> &Muls,
+ std::list<Addend> &Addends) -> bool {
+ SmallVector<PointerIntPair<Value *, 1, bool>> Worklist = {{Insn, true}};
+ SmallPtrSet<Value *, 8> Visited;
+ while (!Worklist.empty()) {
+ auto [V, IsPositive] = Worklist.back();
+ Worklist.pop_back();
+ if (!Visited.insert(V).second)
+ continue;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ Addends.emplace_back(V, IsPositive);
+ continue;
+ }
+
+ // If an instruction has more than one user, it indicates that it either
+ // has an external user, which will be later checked by the checkNodes
+ // function, or it is a subexpression utilized by multiple expressions. In
+ // the latter case, we will attempt to separately identify the complex
+ // operation from here in order to create a shared
+ // ComplexDeinterleavingCompositeNode.
+ if (I != Insn && I->getNumUses() > 1) {
+ LLVM_DEBUG(dbgs() << "Found potential sub-expression: " << *I << "\n");
+ Addends.emplace_back(I, IsPositive);
+ continue;
+ }
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::Add:
+ Worklist.emplace_back(I->getOperand(1), IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ break;
+ case Instruction::FSub:
+ Worklist.emplace_back(I->getOperand(1), !IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ break;
+ case Instruction::Sub:
+ if (isNeg(I)) {
+ Worklist.emplace_back(getNegOperand(I), !IsPositive);
+ } else {
+ Worklist.emplace_back(I->getOperand(1), !IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ }
+ break;
+ case Instruction::FMul:
+ case Instruction::Mul: {
+ Value *A, *B;
+ if (isNeg(I->getOperand(0))) {
+ A = getNegOperand(I->getOperand(0));
+ IsPositive = !IsPositive;
+ } else {
+ A = I->getOperand(0);
+ }
+
+ if (isNeg(I->getOperand(1))) {
+ B = getNegOperand(I->getOperand(1));
+ IsPositive = !IsPositive;
+ } else {
+ B = I->getOperand(1);
+ }
+ Muls.push_back(Product{A, B, IsPositive});
+ break;
+ }
+ case Instruction::FNeg:
+ Worklist.emplace_back(I->getOperand(0), !IsPositive);
+ break;
+ default:
+ Addends.emplace_back(I, IsPositive);
+ continue;
+ }
+
+ if (Flags && I->getFastMathFlags() != *Flags) {
+ LLVM_DEBUG(dbgs() << "The instruction's fast math flags are "
+ "inconsistent with the root instructions' flags: "
+ << *I << "\n");
+ return false;
+ }
+ }
+ return true;
+ };
+
+ std::vector<Product> RealMuls, ImagMuls;
+ std::list<Addend> RealAddends, ImagAddends;
+ if (!Collect(Real, RealMuls, RealAddends) ||
+ !Collect(Imag, ImagMuls, ImagAddends))
+ return nullptr;
+
+ if (RealAddends.size() != ImagAddends.size())
+ return nullptr;
+
+ NodePtr FinalNode;
+ if (!RealMuls.empty() || !ImagMuls.empty()) {
+ // If there are multiplicands, extract positive addend and use it as an
+ // accumulator
+ FinalNode = extractPositiveAddend(RealAddends, ImagAddends);
+ FinalNode = identifyMultiplications(RealMuls, ImagMuls, FinalNode);
+ if (!FinalNode)
+ return nullptr;
+ }
+
+ // Identify and process remaining additions
+ if (!RealAddends.empty() || !ImagAddends.empty()) {
+ FinalNode = identifyAdditions(RealAddends, ImagAddends, Flags, FinalNode);
+ if (!FinalNode)
+ return nullptr;
+ }
+ assert(FinalNode && "FinalNode can not be nullptr here");
+ // Set the Real and Imag fields of the final node and submit it
+ FinalNode->Real = Real;
+ FinalNode->Imag = Imag;
+ submitCompositeNode(FinalNode);
+ return FinalNode;
+}
+
+bool ComplexDeinterleavingGraph::collectPartialMuls(
+ const std::vector<Product> &RealMuls, const std::vector<Product> &ImagMuls,
+ std::vector<PartialMulCandidate> &PartialMulCandidates) {
+ // Helper function to extract a common operand from two products
+ auto FindCommonInstruction = [](const Product &Real,
+ const Product &Imag) -> Value * {
+ if (Real.Multiplicand == Imag.Multiplicand ||
+ Real.Multiplicand == Imag.Multiplier)
+ return Real.Multiplicand;
+
+ if (Real.Multiplier == Imag.Multiplicand ||
+ Real.Multiplier == Imag.Multiplier)
+ return Real.Multiplier;
+
+ return nullptr;
+ };
+
+ // Iterating over real and imaginary multiplications to find common operands
+ // If a common operand is found, a partial multiplication candidate is created
+ // and added to the candidates vector The function returns false if no common
+ // operands are found for any product
+ for (unsigned i = 0; i < RealMuls.size(); ++i) {
+ bool FoundCommon = false;
+ for (unsigned j = 0; j < ImagMuls.size(); ++j) {
+ auto *Common = FindCommonInstruction(RealMuls[i], ImagMuls[j]);
+ if (!Common)
+ continue;
+
+ auto *A = RealMuls[i].Multiplicand == Common ? RealMuls[i].Multiplier
+ : RealMuls[i].Multiplicand;
+ auto *B = ImagMuls[j].Multiplicand == Common ? ImagMuls[j].Multiplier
+ : ImagMuls[j].Multiplicand;
+
+ auto Node = identifyNode(A, B);
+ if (Node) {
+ FoundCommon = true;
+ PartialMulCandidates.push_back({Common, Node, i, j, false});
+ }
+
+ Node = identifyNode(B, A);
+ if (Node) {
+ FoundCommon = true;
+ PartialMulCandidates.push_back({Common, Node, i, j, true});
+ }
+ }
+ if (!FoundCommon)
+ return false;
+ }
+ return true;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyMultiplications(
+ std::vector<Product> &RealMuls, std::vector<Product> &ImagMuls,
+ NodePtr Accumulator = nullptr) {
+ if (RealMuls.size() != ImagMuls.size())
+ return nullptr;
+
+ std::vector<PartialMulCandidate> Info;
+ if (!collectPartialMuls(RealMuls, ImagMuls, Info))
+ return nullptr;
+
+ // Map to store common instruction to node pointers
+ std::map<Value *, NodePtr> CommonToNode;
+ std::vector<bool> Processed(Info.size(), false);
+ for (unsigned I = 0; I < Info.size(); ++I) {
+ if (Processed[I])
+ continue;
+
+ PartialMulCandidate &InfoA = Info[I];
+ for (unsigned J = I + 1; J < Info.size(); ++J) {
+ if (Processed[J])
+ continue;
+
+ PartialMulCandidate &InfoB = Info[J];
+ auto *InfoReal = &InfoA;
+ auto *InfoImag = &InfoB;
+
+ auto NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common);
+ if (!NodeFromCommon) {
+ std::swap(InfoReal, InfoImag);
+ NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common);
+ }
+ if (!NodeFromCommon)
+ continue;
+
+ CommonToNode[InfoReal->Common] = NodeFromCommon;
+ CommonToNode[InfoImag->Common] = NodeFromCommon;
+ Processed[I] = true;
+ Processed[J] = true;
+ }
+ }
+
+ std::vector<bool> ProcessedReal(RealMuls.size(), false);
+ std::vector<bool> ProcessedImag(ImagMuls.size(), false);
+ NodePtr Result = Accumulator;
+ for (auto &PMI : Info) {
+ if (ProcessedReal[PMI.RealIdx] || ProcessedImag[PMI.ImagIdx])
+ continue;
+
+ auto It = CommonToNode.find(PMI.Common);
+ // TODO: Process independent complex multiplications. Cases like this:
+ // A.real() * B where both A and B are complex numbers.
+ if (It == CommonToNode.end()) {
+ LLVM_DEBUG({
+ dbgs() << "Unprocessed independent partial multiplication:\n";
+ for (auto *Mul : {&RealMuls[PMI.RealIdx], &RealMuls[PMI.RealIdx]})
+ dbgs().indent(4) << (Mul->IsPositive ? "+" : "-") << *Mul->Multiplier
+ << " multiplied by " << *Mul->Multiplicand << "\n";
+ });
+ return nullptr;
+ }
+
+ auto &RealMul = RealMuls[PMI.RealIdx];
+ auto &ImagMul = ImagMuls[PMI.ImagIdx];
+
+ auto NodeA = It->second;
+ auto NodeB = PMI.Node;
+ auto IsMultiplicandReal = PMI.Common == NodeA->Real;
+ // The following table illustrates the relationship between multiplications
+ // and rotations. If we consider the multiplication (X + iY) * (U + iV), we
+ // can see:
+ //
+ // Rotation | Real | Imag |
+ // ---------+--------+--------+
+ // 0 | x * u | x * v |
+ // 90 | -y * v | y * u |
+ // 180 | -x * u | -x * v |
+ // 270 | y * v | -y * u |
+ //
+ // Check if the candidate can indeed be represented by partial
+ // multiplication
+ // TODO: Add support for multiplication by complex one
+ if ((IsMultiplicandReal && PMI.IsNodeInverted) ||
+ (!IsMultiplicandReal && !PMI.IsNodeInverted))
+ continue;
+
+ // Determine the rotation based on the multiplications
+ ComplexDeinterleavingRotation Rotation;
+ if (IsMultiplicandReal) {
+ // Detect 0 and 180 degrees rotation
+ if (RealMul.IsPositive && ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_0;
+ else if (!RealMul.IsPositive && !ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_180;
+ else
+ continue;
+
+ } else {
+ // Detect 90 and 270 degrees rotation
+ if (!RealMul.IsPositive && ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_90;
+ else if (RealMul.IsPositive && !ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_270;
+ else
+ continue;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Identified partial multiplication (X, Y) * (U, V):\n";
+ dbgs().indent(4) << "X: " << *NodeA->Real << "\n";
+ dbgs().indent(4) << "Y: " << *NodeA->Imag << "\n";
+ dbgs().indent(4) << "U: " << *NodeB->Real << "\n";
+ dbgs().indent(4) << "V: " << *NodeB->Imag << "\n";
+ dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
+ });
+
+ NodePtr NodeMul = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, nullptr, nullptr);
+ NodeMul->Rotation = Rotation;
+ NodeMul->addOperand(NodeA);
+ NodeMul->addOperand(NodeB);
+ if (Result)
+ NodeMul->addOperand(Result);
+ submitCompositeNode(NodeMul);
+ Result = NodeMul;
+ ProcessedReal[PMI.RealIdx] = true;
+ ProcessedImag[PMI.ImagIdx] = true;
+ }
+
+ // Ensure all products have been processed, if not return nullptr.
+ if (!all_of(ProcessedReal, [](bool V) { return V; }) ||
+ !all_of(ProcessedImag, [](bool V) { return V; })) {
+
+ // Dump debug information about which partial multiplications are not
+ // processed.
+ LLVM_DEBUG({
+ dbgs() << "Unprocessed products (Real):\n";
+ for (size_t i = 0; i < ProcessedReal.size(); ++i) {
+ if (!ProcessedReal[i])
+ dbgs().indent(4) << (RealMuls[i].IsPositive ? "+" : "-")
+ << *RealMuls[i].Multiplier << " multiplied by "
+ << *RealMuls[i].Multiplicand << "\n";
+ }
+ dbgs() << "Unprocessed products (Imag):\n";
+ for (size_t i = 0; i < ProcessedImag.size(); ++i) {
+ if (!ProcessedImag[i])
+ dbgs().indent(4) << (ImagMuls[i].IsPositive ? "+" : "-")
+ << *ImagMuls[i].Multiplier << " multiplied by "
+ << *ImagMuls[i].Multiplicand << "\n";
+ }
+ });
+ return nullptr;
+ }
+
+ return Result;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyAdditions(
+ std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends,
+ std::optional<FastMathFlags> Flags, NodePtr Accumulator = nullptr) {
+ if (RealAddends.size() != ImagAddends.size())
+ return nullptr;
+
+ NodePtr Result;
+ // If we have accumulator use it as first addend
+ if (Accumulator)
+ Result = Accumulator;
+ // Otherwise find an element with both positive real and imaginary parts.
+ else
+ Result = extractPositiveAddend(RealAddends, ImagAddends);
+
+ if (!Result)
+ return nullptr;
+
+ while (!RealAddends.empty()) {
+ auto ItR = RealAddends.begin();
+ auto [R, IsPositiveR] = *ItR;
+
+ bool FoundImag = false;
+ for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
+ auto [I, IsPositiveI] = *ItI;
+ ComplexDeinterleavingRotation Rotation;
+ if (IsPositiveR && IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ else if (!IsPositiveR && IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if (!IsPositiveR && !IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ else
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+
+ NodePtr AddNode;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180) {
+ AddNode = identifyNode(R, I);
+ } else {
+ AddNode = identifyNode(I, R);
+ }
+ if (AddNode) {
+ LLVM_DEBUG({
+ dbgs() << "Identified addition:\n";
+ dbgs().indent(4) << "X: " << *R << "\n";
+ dbgs().indent(4) << "Y: " << *I << "\n";
+ dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
+ });
+
+ NodePtr TmpNode;
+ if (Rotation == llvm::ComplexDeinterleavingRotation::Rotation_0) {
+ TmpNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
+ if (Flags) {
+ TmpNode->Opcode = Instruction::FAdd;
+ TmpNode->Flags = *Flags;
+ } else {
+ TmpNode->Opcode = Instruction::Add;
+ }
+ } else if (Rotation ==
+ llvm::ComplexDeinterleavingRotation::Rotation_180) {
+ TmpNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
+ if (Flags) {
+ TmpNode->Opcode = Instruction::FSub;
+ TmpNode->Flags = *Flags;
+ } else {
+ TmpNode->Opcode = Instruction::Sub;
+ }
+ } else {
+ TmpNode = prepareCompositeNode(ComplexDeinterleavingOperation::CAdd,
+ nullptr, nullptr);
+ TmpNode->Rotation = Rotation;
+ }
+
+ TmpNode->addOperand(Result);
+ TmpNode->addOperand(AddNode);
+ submitCompositeNode(TmpNode);
+ Result = TmpNode;
+ RealAddends.erase(ItR);
+ ImagAddends.erase(ItI);
+ FoundImag = true;
+ break;
+ }
+ }
+ if (!FoundImag)
+ return nullptr;
+ }
+ return Result;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::extractPositiveAddend(
+ std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends) {
+ for (auto ItR = RealAddends.begin(); ItR != RealAddends.end(); ++ItR) {
+ for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
+ auto [R, IsPositiveR] = *ItR;
+ auto [I, IsPositiveI] = *ItI;
+ if (IsPositiveR && IsPositiveI) {
+ auto Result = identifyNode(R, I);
+ if (Result) {
+ RealAddends.erase(ItR);
+ ImagAddends.erase(ItI);
+ return Result;
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
+ // This potential root instruction might already have been recognized as
+ // reduction. Because RootToNode maps both Real and Imaginary parts to
+ // CompositeNode we should choose only one either Real or Imag instruction to
+ // use as an anchor for generating complex instruction.
+ auto It = RootToNode.find(RootI);
+ if (It != RootToNode.end()) {
+ auto RootNode = It->second;
+ assert(RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionOperation);
+ // Find out which part, Real or Imag, comes later, and only if we come to
+ // the latest part, add it to OrderedRoots.
+ auto *R = cast<Instruction>(RootNode->Real);
+ auto *I = cast<Instruction>(RootNode->Imag);
+ auto *ReplacementAnchor = R->comesBefore(I) ? I : R;
+ if (ReplacementAnchor != RootI)
+ return false;
+ OrderedRoots.push_back(RootI);
+ return true;
+ }
+
+ auto RootNode = identifyRoot(RootI);
+ if (!RootNode)
+ return false;
+
+ LLVM_DEBUG({
+ Function *F = RootI->getFunction();
+ BasicBlock *B = RootI->getParent();
+ dbgs() << "Complex deinterleaving graph for " << F->getName()
+ << "::" << B->getName() << ".\n";
+ dump(dbgs());
+ dbgs() << "\n";
+ });
+ RootToNode[RootI] = RootNode;
+ OrderedRoots.push_back(RootI);
+ return true;
+}
+
+bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) {
+ bool FoundPotentialReduction = false;
+
+ auto *Br = dyn_cast<BranchInst>(B->getTerminator());
+ if (!Br || Br->getNumSuccessors() != 2)
+ return false;
+
+ // Identify simple one-block loop
+ if (Br->getSuccessor(0) != B && Br->getSuccessor(1) != B)
+ return false;
+
+ SmallVector<PHINode *> PHIs;
+ for (auto &PHI : B->phis()) {
+ if (PHI.getNumIncomingValues() != 2)
+ continue;
+
+ if (!PHI.getType()->isVectorTy())
+ continue;
+
+ auto *ReductionOp = dyn_cast<Instruction>(PHI.getIncomingValueForBlock(B));
+ if (!ReductionOp)
+ continue;
+
+ // Check if final instruction is reduced outside of current block
+ Instruction *FinalReduction = nullptr;
+ auto NumUsers = 0u;
+ for (auto *U : ReductionOp->users()) {
+ ++NumUsers;
+ if (U == &PHI)
+ continue;
+ FinalReduction = dyn_cast<Instruction>(U);
+ }
+
+ if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B ||
+ isa<PHINode>(FinalReduction))
+ continue;
+
+ ReductionInfo[ReductionOp] = {&PHI, FinalReduction};
+ BackEdge = B;
+ auto BackEdgeIdx = PHI.getBasicBlockIndex(B);
+ auto IncomingIdx = BackEdgeIdx == 0 ? 1 : 0;
+ Incoming = PHI.getIncomingBlock(IncomingIdx);
+ FoundPotentialReduction = true;
+
+ // If the initial value of PHINode is an Instruction, consider it a leaf
+ // value of a complex deinterleaving graph.
+ if (auto *InitPHI =
+ dyn_cast<Instruction>(PHI.getIncomingValueForBlock(Incoming)))
+ FinalInstructions.insert(InitPHI);
+ }
+ return FoundPotentialReduction;
+}
+
+void ComplexDeinterleavingGraph::identifyReductionNodes() {
+ SmallVector<bool> Processed(ReductionInfo.size(), false);
+ SmallVector<Instruction *> OperationInstruction;
+ for (auto &P : ReductionInfo)
+ OperationInstruction.push_back(P.first);
+
+ // Identify a complex computation by evaluating two reduction operations that
+ // potentially could be involved
+ for (size_t i = 0; i < OperationInstruction.size(); ++i) {
+ if (Processed[i])
+ continue;
+ for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {
+ if (Processed[j])
+ continue;
+
+ auto *Real = OperationInstruction[i];
+ auto *Imag = OperationInstruction[j];
+ if (Real->getType() != Imag->getType())
+ continue;
+
+ RealPHI = ReductionInfo[Real].first;
+ ImagPHI = ReductionInfo[Imag].first;
+ PHIsFound = false;
+ auto Node = identifyNode(Real, Imag);
+ if (!Node) {
+ std::swap(Real, Imag);
+ std::swap(RealPHI, ImagPHI);
+ Node = identifyNode(Real, Imag);
+ }
+
+ // If a node is identified and reduction PHINode is used in the chain of
+ // operations, mark its operation instructions as used to prevent
+ // re-identification and attach the node to the real part
+ if (Node && PHIsFound) {
+ LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: "
+ << *Real << " / " << *Imag << "\n");
+ Processed[i] = true;
+ Processed[j] = true;
+ auto RootNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionOperation, Real, Imag);
+ RootNode->addOperand(Node);
+ RootToNode[Real] = RootNode;
+ RootToNode[Imag] = RootNode;
+ submitCompositeNode(RootNode);
+ break;
+ }
+ }
+ }
+
+ RealPHI = nullptr;
+ ImagPHI = nullptr;
+}
+
+bool ComplexDeinterleavingGraph::checkNodes() {
+ // Collect all instructions from roots to leaves
+ SmallPtrSet<Instruction *, 16> AllInstructions;
+ SmallVector<Instruction *, 8> Worklist;
+ for (auto &Pair : RootToNode)
+ Worklist.push_back(Pair.first);
+
+ // Extract all instructions that are used by all XCMLA/XCADD/ADD/SUB/NEG
+ // chains
+ while (!Worklist.empty()) {
+ auto *I = Worklist.back();
+ Worklist.pop_back();
+
+ if (!AllInstructions.insert(I).second)
+ continue;
+
+ for (Value *Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (!FinalInstructions.count(I))
+ Worklist.emplace_back(OpI);
+ }
+ }
+ }
+
+ // Find instructions that have users outside of chain
+ SmallVector<Instruction *, 2> OuterInstructions;
+ for (auto *I : AllInstructions) {
+ // Skip root nodes
+ if (RootToNode.count(I))
+ continue;
+
+ for (User *U : I->users()) {
+ if (AllInstructions.count(cast<Instruction>(U)))
+ continue;
+
+ // Found an instruction that is not used by XCMLA/XCADD chain
+ Worklist.emplace_back(I);
+ break;
+ }
+ }
+
+ // If any instructions are found to be used outside, find and remove roots
+ // that somehow connect to those instructions.
+ SmallPtrSet<Instruction *, 16> Visited;
+ while (!Worklist.empty()) {
+ auto *I = Worklist.back();
+ Worklist.pop_back();
+ if (!Visited.insert(I).second)
+ continue;
+
+ // Found an impacted root node. Removing it from the nodes to be
+ // deinterleaved
+ if (RootToNode.count(I)) {
+ LLVM_DEBUG(dbgs() << "Instruction " << *I
+ << " could be deinterleaved but its chain of complex "
+ "operations have an outside user\n");
+ RootToNode.erase(I);
+ }
+
+ if (!AllInstructions.count(I) || FinalInstructions.count(I))
+ continue;
+
+ for (User *U : I->users())
+ Worklist.emplace_back(cast<Instruction>(U));
+
+ for (Value *Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Worklist.emplace_back(OpI);
+ }
+ }
+ return !RootToNode.empty();
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyRoot(Instruction *RootI) {
+ if (auto *Intrinsic = dyn_cast<IntrinsicInst>(RootI)) {
+ if (Intrinsic->getIntrinsicID() !=
+ Intrinsic::experimental_vector_interleave2)
+ return nullptr;
+
+ auto *Real = dyn_cast<Instruction>(Intrinsic->getOperand(0));
+ auto *Imag = dyn_cast<Instruction>(Intrinsic->getOperand(1));
+ if (!Real || !Imag)
+ return nullptr;
+
+ return identifyNode(Real, Imag);
+ }
+
+ auto *SVI = dyn_cast<ShuffleVectorInst>(RootI);
+ if (!SVI)
+ return nullptr;
+
+ // Look for a shufflevector that takes separate vectors of the real and
+ // imaginary components and recombines them into a single vector.
+ if (!isInterleavingMask(SVI->getShuffleMask()))
+ return nullptr;
+
+ Instruction *Real;
+ Instruction *Imag;
+ if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag))))
+ return nullptr;
+
+ return identifyNode(Real, Imag);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyDeinterleave(Instruction *Real,
+ Instruction *Imag) {
+ Instruction *I = nullptr;
+ Value *FinalValue = nullptr;
+ if (match(Real, m_ExtractValue<0>(m_Instruction(I))) &&
+ match(Imag, m_ExtractValue<1>(m_Specific(I))) &&
+ match(I, m_Intrinsic<Intrinsic::experimental_vector_deinterleave2>(
+ m_Value(FinalValue)))) {
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ llvm::ComplexDeinterleavingOperation::Deinterleave, Real, Imag);
+ PlaceholderNode->ReplacementNode = FinalValue;
+ FinalInstructions.insert(Real);
+ FinalInstructions.insert(Imag);
+ return submitCompositeNode(PlaceholderNode);
+ }
+
+ auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
+ auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
+ if (!RealShuffle || !ImagShuffle) {
+ if (RealShuffle || ImagShuffle)
+ LLVM_DEBUG(dbgs() << " - There's a shuffle where there shouldn't be.\n");
+ return nullptr;
+ }
+
+ Value *RealOp1 = RealShuffle->getOperand(1);
+ if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
+ LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+ Value *ImagOp1 = ImagShuffle->getOperand(1);
+ if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
+ LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+
+ Value *RealOp0 = RealShuffle->getOperand(0);
+ Value *ImagOp0 = ImagShuffle->getOperand(0);
+
+ if (RealOp0 != ImagOp0) {
+ LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n");
+ return nullptr;
+ }
+
+ ArrayRef<int> RealMask = RealShuffle->getShuffleMask();
+ ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask();
+ if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) {
+ LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n");
+ return nullptr;
+ }
+
+ if (RealMask[0] != 0 || ImagMask[0] != 1) {
+ LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n");
+ return nullptr;
+ }
+
+ // Type checking, the shuffle type should be a vector type of the same
+ // scalar type, but half the size
+ auto CheckType = [&](ShuffleVectorInst *Shuffle) {
+ Value *Op = Shuffle->getOperand(0);
+ auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType());
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+
+ if (OpTy->getScalarType() != ShuffleTy->getScalarType())
+ return false;
+ if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
+ return false;
+
+ return true;
+ };
+
+ auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool {
+ if (!CheckType(Shuffle))
+ return false;
+
+ ArrayRef<int> Mask = Shuffle->getShuffleMask();
+ int Last = *Mask.rbegin();
+
+ Value *Op = Shuffle->getOperand(0);
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+ int NumElements = OpTy->getNumElements();
+
+ // Ensure that the deinterleaving shuffle only pulls from the first
+ // shuffle operand.
+ return Last < NumElements;
+ };
+
+ if (RealShuffle->getType() != ImagShuffle->getType()) {
+ LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n");
+ return nullptr;
+ }
+ if (!CheckDeinterleavingShuffle(RealShuffle)) {
+ LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n");
+ return nullptr;
+ }
+ if (!CheckDeinterleavingShuffle(ImagShuffle)) {
+ LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n");
+ return nullptr;
+ }
+
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Deinterleave,
+ RealShuffle, ImagShuffle);
+ PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
+ FinalInstructions.insert(RealShuffle);
+ FinalInstructions.insert(ImagShuffle);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
+ auto IsSplat = [](Value *V) -> bool {
+ // Fixed-width vector with constants
+ if (isa<ConstantDataVector>(V))
+ return true;
+
+ VectorType *VTy;
+ ArrayRef<int> Mask;
+ // Splats are represented differently depending on whether the repeated
+ // value is a constant or an Instruction
+ if (auto *Const = dyn_cast<ConstantExpr>(V)) {
+ if (Const->getOpcode() != Instruction::ShuffleVector)
+ return false;
+ VTy = cast<VectorType>(Const->getType());
+ Mask = Const->getShuffleMask();
+ } else if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
+ VTy = Shuf->getType();
+ Mask = Shuf->getShuffleMask();
+ } else {
+ return false;
+ }
+
+ // When the data type is <1 x Type>, it's not possible to differentiate
+ // between the ComplexDeinterleaving::Deinterleave and
+ // ComplexDeinterleaving::Splat operations.
+ if (!VTy->isScalableTy() && VTy->getElementCount().getKnownMinValue() == 1)
+ return false;
+
+ return all_equal(Mask) && Mask[0] == 0;
+ };
+
+ if (!IsSplat(R) || !IsSplat(I))
+ return nullptr;
+
+ auto *Real = dyn_cast<Instruction>(R);
+ auto *Imag = dyn_cast<Instruction>(I);
+ if ((!Real && Imag) || (Real && !Imag))
+ return nullptr;
+
+ if (Real && Imag) {
+ // Non-constant splats should be in the same basic block
+ if (Real->getParent() != Imag->getParent())
+ return nullptr;
+
+ FinalInstructions.insert(Real);
+ FinalInstructions.insert(Imag);
+ }
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(ComplexDeinterleavingOperation::Splat, R, I);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
+ Instruction *Imag) {
+ if (Real != RealPHI || Imag != ImagPHI)
+ return nullptr;
+
+ PHIsFound = true;
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionPHI, Real, Imag);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real,
+ Instruction *Imag) {
+ auto *SelectReal = dyn_cast<SelectInst>(Real);
+ auto *SelectImag = dyn_cast<SelectInst>(Imag);
+ if (!SelectReal || !SelectImag)
+ return nullptr;
+
+ Instruction *MaskA, *MaskB;
+ Instruction *AR, *AI, *RA, *BI;
+ if (!match(Real, m_Select(m_Instruction(MaskA), m_Instruction(AR),
+ m_Instruction(RA))) ||
+ !match(Imag, m_Select(m_Instruction(MaskB), m_Instruction(AI),
+ m_Instruction(BI))))
+ return nullptr;
+
+ if (MaskA != MaskB && !MaskA->isIdenticalTo(MaskB))
+ return nullptr;
+
+ if (!MaskA->getType()->isVectorTy())
+ return nullptr;
+
+ auto NodeA = identifyNode(AR, AI);
+ if (!NodeA)
+ return nullptr;
+
+ auto NodeB = identifyNode(RA, BI);
+ if (!NodeB)
+ return nullptr;
+
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionSelect, Real, Imag);
+ PlaceholderNode->addOperand(NodeA);
+ PlaceholderNode->addOperand(NodeB);
+ FinalInstructions.insert(MaskA);
+ FinalInstructions.insert(MaskB);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+static Value *replaceSymmetricNode(IRBuilderBase &B, unsigned Opcode,
+ std::optional<FastMathFlags> Flags,
+ Value *InputA, Value *InputB) {
+ Value *I;
+ switch (Opcode) {
+ case Instruction::FNeg:
+ I = B.CreateFNeg(InputA);
+ break;
+ case Instruction::FAdd:
+ I = B.CreateFAdd(InputA, InputB);
+ break;
+ case Instruction::Add:
+ I = B.CreateAdd(InputA, InputB);
+ break;
+ case Instruction::FSub:
+ I = B.CreateFSub(InputA, InputB);
+ break;
+ case Instruction::Sub:
+ I = B.CreateSub(InputA, InputB);
+ break;
+ case Instruction::FMul:
+ I = B.CreateFMul(InputA, InputB);
+ break;
+ case Instruction::Mul:
+ I = B.CreateMul(InputA, InputB);
+ break;
+ default:
+ llvm_unreachable("Incorrect symmetric opcode");
+ }
+ if (Flags)
+ cast<Instruction>(I)->setFastMathFlags(*Flags);
+ return I;
+}
+
+Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
+ RawNodePtr Node) {
+ if (Node->ReplacementNode)
+ return Node->ReplacementNode;
+
+ auto ReplaceOperandIfExist = [&](RawNodePtr &Node, unsigned Idx) -> Value * {
+ return Node->Operands.size() > Idx
+ ? replaceNode(Builder, Node->Operands[Idx])
+ : nullptr;
+ };
+
+ Value *ReplacementNode;
+ switch (Node->Operation) {
+ case ComplexDeinterleavingOperation::CAdd:
+ case ComplexDeinterleavingOperation::CMulPartial:
+ case ComplexDeinterleavingOperation::Symmetric: {
+ Value *Input0 = ReplaceOperandIfExist(Node, 0);
+ Value *Input1 = ReplaceOperandIfExist(Node, 1);
+ Value *Accumulator = ReplaceOperandIfExist(Node, 2);
+ assert(!Input1 || (Input0->getType() == Input1->getType() &&
+ "Node inputs need to be of the same type"));
+ assert(!Accumulator ||
+ (Input0->getType() == Accumulator->getType() &&
+ "Accumulator and input need to be of the same type"));
+ if (Node->Operation == ComplexDeinterleavingOperation::Symmetric)
+ ReplacementNode = replaceSymmetricNode(Builder, Node->Opcode, Node->Flags,
+ Input0, Input1);
+ else
+ ReplacementNode = TL->createComplexDeinterleavingIR(
+ Builder, Node->Operation, Node->Rotation, Input0, Input1,
+ Accumulator);
+ break;
+ }
+ case ComplexDeinterleavingOperation::Deinterleave:
+ llvm_unreachable("Deinterleave node should already have ReplacementNode");
+ break;
+ case ComplexDeinterleavingOperation::Splat: {
+ auto *NewTy = VectorType::getDoubleElementsVectorType(
+ cast<VectorType>(Node->Real->getType()));
+ auto *R = dyn_cast<Instruction>(Node->Real);
+ auto *I = dyn_cast<Instruction>(Node->Imag);
+ if (R && I) {
+ // Splats that are not constant are interleaved where they are located
+ Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode();
+ IRBuilder<> IRB(InsertPoint);
+ ReplacementNode =
+ IRB.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, NewTy,
+ {Node->Real, Node->Imag});
+ } else {
+ ReplacementNode =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
+ NewTy, {Node->Real, Node->Imag});
+ }
+ break;
+ }
+ case ComplexDeinterleavingOperation::ReductionPHI: {
+ // If Operation is ReductionPHI, a new empty PHINode is created.
+ // It is filled later when the ReductionOperation is processed.
+ auto *VTy = cast<VectorType>(Node->Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+ auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHI());
+ OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
+ ReplacementNode = NewPHI;
+ break;
+ }
+ case ComplexDeinterleavingOperation::ReductionOperation:
+ ReplacementNode = replaceNode(Builder, Node->Operands[0]);
+ processReductionOperation(ReplacementNode, Node);
+ break;
+ case ComplexDeinterleavingOperation::ReductionSelect: {
+ auto *MaskReal = cast<Instruction>(Node->Real)->getOperand(0);
+ auto *MaskImag = cast<Instruction>(Node->Imag)->getOperand(0);
+ auto *A = replaceNode(Builder, Node->Operands[0]);
+ auto *B = replaceNode(Builder, Node->Operands[1]);
+ auto *NewMaskTy = VectorType::getDoubleElementsVectorType(
+ cast<VectorType>(MaskReal->getType()));
+ auto *NewMask =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
+ NewMaskTy, {MaskReal, MaskImag});
+ ReplacementNode = Builder.CreateSelect(NewMask, A, B);
+ break;
+ }
+ }
+
+ assert(ReplacementNode && "Target failed to create Intrinsic call.");
+ NumComplexTransformations += 1;
+ Node->ReplacementNode = ReplacementNode;
+ return ReplacementNode;
+}
+
+void ComplexDeinterleavingGraph::processReductionOperation(
+ Value *OperationReplacement, RawNodePtr Node) {
+ auto *Real = cast<Instruction>(Node->Real);
+ auto *Imag = cast<Instruction>(Node->Imag);
+ auto *OldPHIReal = ReductionInfo[Real].first;
+ auto *OldPHIImag = ReductionInfo[Imag].first;
+ auto *NewPHI = OldToNewPHI[OldPHIReal];
+
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ // We have to interleave initial origin values coming from IncomingBlock
+ Value *InitReal = OldPHIReal->getIncomingValueForBlock(Incoming);
+ Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming);
+
+ IRBuilder<> Builder(Incoming->getTerminator());
+ auto *NewInit = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_interleave2, NewVTy, {InitReal, InitImag});
+
+ NewPHI->addIncoming(NewInit, Incoming);
+ NewPHI->addIncoming(OperationReplacement, BackEdge);
+
+ // Deinterleave complex vector outside of loop so that it can be finally
+ // reduced
+ auto *FinalReductionReal = ReductionInfo[Real].second;
+ auto *FinalReductionImag = ReductionInfo[Imag].second;
+
+ Builder.SetInsertPoint(
+ &*FinalReductionReal->getParent()->getFirstInsertionPt());
+ auto *Deinterleave = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_deinterleave2,
+ OperationReplacement->getType(), OperationReplacement);
+
+ auto *NewReal = Builder.CreateExtractValue(Deinterleave, (uint64_t)0);
+ FinalReductionReal->replaceUsesOfWith(Real, NewReal);
+
+ Builder.SetInsertPoint(FinalReductionImag);
+ auto *NewImag = Builder.CreateExtractValue(Deinterleave, 1);
+ FinalReductionImag->replaceUsesOfWith(Imag, NewImag);
+}
+
+void ComplexDeinterleavingGraph::replaceNodes() {
+ SmallVector<Instruction *, 16> DeadInstrRoots;
+ for (auto *RootInstruction : OrderedRoots) {
+ // Check if this potential root went through check process and we can
+ // deinterleave it
+ if (!RootToNode.count(RootInstruction))
+ continue;
+
+ IRBuilder<> Builder(RootInstruction);
+ auto RootNode = RootToNode[RootInstruction];
+ Value *R = replaceNode(Builder, RootNode.get());
+
+ if (RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionOperation) {
+ auto *RootReal = cast<Instruction>(RootNode->Real);
+ auto *RootImag = cast<Instruction>(RootNode->Imag);
+ ReductionInfo[RootReal].first->removeIncomingValue(BackEdge);
+ ReductionInfo[RootImag].first->removeIncomingValue(BackEdge);
+ DeadInstrRoots.push_back(cast<Instruction>(RootReal));
+ DeadInstrRoots.push_back(cast<Instruction>(RootImag));
+ } else {
+ assert(R && "Unable to find replacement for RootInstruction");
+ DeadInstrRoots.push_back(RootInstruction);
+ RootInstruction->replaceAllUsesWith(R);
+ }
+ }
+
+ for (auto *I : DeadInstrRoots)
+ RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 000000000000..106db7c51f27
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,698 @@
+//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI)
+ : MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = nullptr;
+
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+
+ // Clear "do not change" set.
+ KeepRegs.reset();
+
+ bool IsReturnBlock = BB->isReturnBlock();
+
+ // Examine the live-in regs of all successors.
+ for (const MachineBasicBlock *Succ : BB->successors())
+ for (const auto &LI : Succ->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI.getPristineRegs(MF);
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg))
+ continue;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+ RegRefs.clear();
+ KeepRegs.reset();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ // Kill instructions can define registers but are really nops, and there might
+ // be a real definition earlier that needs to be paired with uses dominated by
+ // this kill.
+
+ // FIXME: It may be possible to remove the isKill() restriction once PR18663
+ // has been properly fixed. There can be value in processing kills as seen in
+ // the AggressiveAntiDepBreaker class.
+ if (MI.isDebugInstr() || MI.isKill())
+ return;
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = nullptr;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (const SDep &P : SU->Preds) {
+ const SUnit *PredSU = P.getSUnit();
+ unsigned PredLatency = P.getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P.getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &P;
+ }
+ }
+ return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
+ // It's not safe to change register allocation for source operands of
+ // instructions that have special allocation requirements. Also assume all
+ // registers used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservative here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395]
+ // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12]
+ // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special =
+ MI.isCall() || MI.hasExtraSrcRegAllocReq() || TII->isPredicated(MI);
+
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC = nullptr;
+
+ if (i < MI.getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *AI;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ KeepRegs.set(SubReg);
+ }
+ }
+ }
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isValid())
+ continue;
+ // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
+ // it or any of its sub or super regs. We need to use KeepRegs to mark the
+ // reg because not all uses of the same reg within an instruction are
+ // necessarily tagged as tied.
+ // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
+ // def register but not the second (see PR20020 for details).
+ // FIXME: can this check be relaxed to account for undef uses
+ // of a register? In the above 'xor' example, the uses of %eax are undef, so
+ // earlier instructions could still replace %eax even though the 'xor'
+ // itself can't be changed.
+ if (MI.isRegTiedToUseOperand(I) &&
+ Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ KeepRegs.set(SubReg);
+ }
+ for (MCPhysReg SuperReg : TRI->superregs(Reg)) {
+ KeepRegs.set(SuperReg);
+ }
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
+ // Update liveness.
+ // Proceeding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+ assert(!MI.isKill() && "Attempting to scan a kill instruction");
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+
+ if (MO.isRegMask()) {
+ auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
+ return all_of(TRI->subregs_inclusive(PhysReg),
+ [&](MCPhysReg SR) { return MO.clobbersPhysReg(SR); });
+ };
+
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) {
+ if (ClobbersPhysRegAndSubRegs(i)) {
+ DefIndices[i] = Count;
+ KillIndices[i] = ~0u;
+ KeepRegs.reset(i);
+ Classes[i] = nullptr;
+ RegRefs.erase(i);
+ }
+ }
+ }
+
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+
+ // Ignore two-addr defs.
+ if (MI.isRegTiedToUseOperand(i))
+ continue;
+
+ // If we've already marked this reg as unchangeable, don't remove
+ // it or any of its subregs from KeepRegs.
+ bool Keep = KeepRegs.test(Reg);
+
+ // For the reg itself and all subregs: update the def to current;
+ // reset the kill state, any restrictions, and references.
+ for (MCPhysReg SubregReg : TRI->subregs_inclusive(Reg)) {
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ Classes[SubregReg] = nullptr;
+ RegRefs.erase(SubregReg);
+ if (!Keep)
+ KeepRegs.reset(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (MCPhysReg SR : TRI->superregs(Reg))
+ Classes[SR] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC = nullptr;
+ if (i < MI.getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ // Repeat for all aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instruction with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg) {
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instruction defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (const MachineOperand &CheckOper : MI->operands()) {
+ if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+ return true;
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg.
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who knows what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned CriticalAntiDepBreaker::
+findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &Forbid) {
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
+ for (unsigned NewReg : Order) {
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+ && "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+ && "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] != ~0u ||
+ Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+ KillIndices[AntiDepReg] > DefIndices[NewReg])
+ continue;
+ // If NewReg overlaps any of the forbidden registers, we can't use it.
+ bool Forbidden = false;
+ for (unsigned R : Forbid)
+ if (TRI->regsOverlap(NewReg, R)) {
+ Forbidden = true;
+ break;
+ }
+ if (Forbidden) continue;
+ return NewReg;
+ }
+
+ // No registers are free and available!
+ return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit> &SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ //
+ // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
+ DenseMap<MachineInstr *, const SUnit *> MISUnitMap;
+
+ // Find the node at the bottom of the critical path.
+ const SUnit *Max = nullptr;
+ for (const SUnit &SU : SUnits) {
+ MISUnitMap[SU.getInstr()] = &SU;
+ if (!Max || SU.getDepth() + SU.Latency > Max->getDepth() + Max->Latency)
+ Max = &SU;
+ }
+ assert(Max && "Failed to find bottom of the critical path");
+
+#ifndef NDEBUG
+ {
+ LLVM_DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ LLVM_DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] == ~0u)
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+#endif
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ const SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
+ MachineInstr &MI = *--I;
+ // Kill instructions can define registers but are really nops, and there
+ // might be a real definition earlier that needs to be paired with uses
+ // dominated by this kill.
+
+ // FIXME: It may be possible to remove the isKill() restriction once PR18663
+ // has been properly fixed. There can be value in processing kills as seen
+ // in the AggressiveAntiDepBreaker class.
+ if (MI.isDebugInstr() || MI.isKill())
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (&MI == CriticalPathMI) {
+ if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ const SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ if (!MRI.isAllocatable(AntiDepReg))
+ // Don't break anti-dependencies on non-allocatable registers.
+ AntiDepReg = 0;
+ else if (KeepRegs.test(AntiDepReg))
+ // Don't break anti-dependencies if a use down below requires
+ // this exact register.
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (const SDep &P : CriticalPathSU->Preds)
+ if (P.getSUnit() == NextSU
+ ? (P.getKind() != SDep::Anti || P.getReg() != AntiDepReg)
+ : (P.getKind() == SDep::Data &&
+ P.getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = nullptr;
+ CriticalPathMI = nullptr;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ SmallVector<unsigned, 2> ForbidRegs;
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI))
+ // If this instruction's defs have special allocation requirement, don't
+ // break this anti-dependency.
+ AntiDepReg = 0;
+ else if (AntiDepReg) {
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid. If the instruction defines other registers,
+ // save a list of them so that we don't pick a new register
+ // that overlaps any of them.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ if (MO.isDef() && Reg != AntiDepReg)
+ ForbidRegs.push_back(Reg);
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg]
+ : nullptr;
+ assert((AntiDepReg == 0 || RC != nullptr) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-dependence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
+ LastNewReg[AntiDepReg],
+ RC, ForbidRegs)) {
+ LLVM_DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << printReg(AntiDepReg, TRI) << " with "
+ << RegRefs.count(AntiDepReg) << " references"
+ << " using " << printReg(NewReg, TRI) << "!\n");
+
+ // Update the references to the old register to refer to the new
+ // register.
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ UpdateDbgValues(DbgValues, Q->second->getParent(),
+ AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-dependence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = nullptr;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ LastNewReg[AntiDepReg] = NewReg;
+ ++Broken;
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
+
+AntiDepBreaker *
+llvm::createCriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI) {
+ return new CriticalAntiDepBreaker(MFi, RCI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 000000000000..640506b6e9ed
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,112 @@
+//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/AntiDepBreaker.h"
+#include "llvm/Support/Compiler.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+
+class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ std::vector<const TargetRegisterClass *> Classes;
+
+ /// Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+
+ using RegRefIter =
+ std::multimap<unsigned, MachineOperand *>::const_iterator;
+
+ /// The index of the most recent kill (proceeding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// The index of the most recent complete def (proceeding
+ /// bottom up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ /// A set of registers which are live and cannot be changed to
+ /// break anti-dependencies.
+ BitVector KeepRegs;
+
+ public:
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI);
+ ~CriticalAntiDepBreaker() override;
+
+ /// Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB) override;
+
+ /// Identifiy anti-dependencies along the critical path
+ /// of the ScheduleDAG and break them by renaming registers.
+ unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) override;
+
+ /// Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) override;
+
+ /// Finish anti-dep breaking for a basic block.
+ void FinishBlock() override;
+
+ private:
+ void PrescanInstruction(MachineInstr &MI);
+ void ScanInstruction(MachineInstr &MI, unsigned Count);
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &Forbid);
+ };
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..48bb4a07662e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,288 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "packets"
+
+static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
+ cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
+
+static unsigned InstrCount = 0;
+
+// Check if the resources occupied by a MCInstrDesc are available in the
+// current state.
+bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
+ unsigned Action = ItinActions[MID->getSchedClass()];
+ if (MID->getSchedClass() == 0 || Action == 0)
+ return false;
+ return A.canAdd(Action);
+}
+
+// Reserve the resources occupied by a MCInstrDesc and change the current
+// state to reflect that change.
+void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
+ unsigned Action = ItinActions[MID->getSchedClass()];
+ if (MID->getSchedClass() == 0 || Action == 0)
+ return;
+ A.add(Action);
+}
+
+// Check if the resources occupied by a machine instruction are available
+// in the current state.
+bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
+ const MCInstrDesc &MID = MI.getDesc();
+ return canReserveResources(&MID);
+}
+
+// Reserve the resources occupied by a machine instruction and change the
+// current state to reflect that change.
+void DFAPacketizer::reserveResources(MachineInstr &MI) {
+ const MCInstrDesc &MID = MI.getDesc();
+ reserveResources(&MID);
+}
+
+unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
+ ArrayRef<NfaPath> NfaPaths = A.getNfaPaths();
+ assert(!NfaPaths.empty() && "Invalid bundle!");
+ const NfaPath &RS = NfaPaths.front();
+
+ // RS stores the cumulative resources used up to and including the I'th
+ // instruction. The 0th instruction is the base case.
+ if (InstIdx == 0)
+ return RS[0];
+ // Return the difference between the cumulative resources used by InstIdx and
+ // its predecessor.
+ return RS[InstIdx] ^ RS[InstIdx - 1];
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
+ MachineLoopInfo &MLI,
+ AAResults *AA)
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
+ CanHandleTerminators = true;
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void DefaultVLIWScheduler::postProcessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+void DefaultVLIWScheduler::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(AA);
+ postProcessDAG();
+}
+
+VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
+ MachineLoopInfo &mli, AAResults *aa)
+ : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
+ ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
+ ResourceTracker->setTrackResources(true);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
+}
+
+VLIWPacketizerList::~VLIWPacketizerList() {
+ delete VLIWScheduler;
+ delete ResourceTracker;
+}
+
+// End the current packet, bundle packet instructions and reset DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI) {
+ LLVM_DEBUG({
+ if (!CurrentPacketMIs.empty()) {
+ dbgs() << "Finalizing packet:\n";
+ unsigned Idx = 0;
+ for (MachineInstr *MI : CurrentPacketMIs) {
+ unsigned R = ResourceTracker->getUsedResources(Idx++);
+ dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
+ }
+ }
+ });
+ if (CurrentPacketMIs.size() > 1) {
+ MachineInstr &MIFirst = *CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
+ }
+ CurrentPacketMIs.clear();
+ ResourceTracker->clearResources();
+ LLVM_DEBUG(dbgs() << "End packet\n");
+}
+
+// Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator BeginItr,
+ MachineBasicBlock::iterator EndItr) {
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->startBlock(MBB);
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
+ std::distance(BeginItr, EndItr));
+ VLIWScheduler->schedule();
+
+ LLVM_DEBUG({
+ dbgs() << "Scheduling DAG of the packetize region\n";
+ VLIWScheduler->dump();
+ });
+
+ // Generate MI -> SU map.
+ MIToSUnit.clear();
+ for (SUnit &SU : VLIWScheduler->SUnits)
+ MIToSUnit[SU.getInstr()] = &SU;
+
+ bool LimitPresent = InstrLimit.getPosition();
+
+ // The main packetizer loop.
+ for (; BeginItr != EndItr; ++BeginItr) {
+ if (LimitPresent) {
+ if (InstrCount >= InstrLimit) {
+ EndItr = BeginItr;
+ break;
+ }
+ InstrCount++;
+ }
+ MachineInstr &MI = *BeginItr;
+ initPacketizerState();
+
+ // End the current packet if needed.
+ if (isSoloInstruction(MI)) {
+ endPacket(MBB, MI);
+ continue;
+ }
+
+ // Ignore pseudo instructions.
+ if (ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[&MI];
+ assert(SUI && "Missing SUnit Info!");
+
+ // Ask DFA if machine resource is available for MI.
+ LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
+
+ bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ LLVM_DEBUG({
+ if (ResourceAvail)
+ dbgs() << " Resources are available for adding MI to packet\n";
+ else
+ dbgs() << " Resources NOT available\n";
+ });
+ if (ResourceAvail && shouldAddToPacket(MI)) {
+ // Dependency check for MI with instructions in CurrentPacketMIs.
+ for (auto *MJ : CurrentPacketMIs) {
+ SUnit *SUJ = MIToSUnit[MJ];
+ assert(SUJ && "Missing SUnit Info!");
+
+ LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ);
+ // Is it legal to packetize SUI and SUJ together.
+ if (!isLegalToPacketizeTogether(SUI, SUJ)) {
+ LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
+ // Allow packetization if dependency can be pruned.
+ if (!isLegalToPruneDependencies(SUI, SUJ)) {
+ // End the packet if dependency cannot be pruned.
+ LLVM_DEBUG(dbgs()
+ << " Could not prune dependencies for adding MI\n");
+ endPacket(MBB, MI);
+ break;
+ }
+ LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n");
+ }
+ }
+ } else {
+ LLVM_DEBUG(if (ResourceAvail) dbgs()
+ << "Resources are available, but instruction should not be "
+ "added to packet\n "
+ << MI);
+ // End the packet if resource is not available, or if the instruction
+ // should not be added to the current packet.
+ endPacket(MBB, MI);
+ }
+
+ // Add MI to the current packet.
+ LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
+ BeginItr = addToPacket(MI);
+ } // For all instructions in the packetization range.
+
+ // End any packet left behind.
+ endPacket(MBB, EndItr);
+ VLIWScheduler->exitRegion();
+ VLIWScheduler->finishBlock();
+}
+
+bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
+ const MachineMemOperand &Op2,
+ bool UseTBAA) const {
+ if (!Op1.getValue() || !Op2.getValue())
+ return true;
+
+ int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
+ int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
+ int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
+
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
+ UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
+ MemoryLocation(Op2.getValue(), Overlapb,
+ UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
+
+ return AAResult != AliasResult::NoAlias;
+}
+
+bool VLIWPacketizerList::alias(const MachineInstr &MI1,
+ const MachineInstr &MI2,
+ bool UseTBAA) const {
+ if (MI1.memoperands_empty() || MI2.memoperands_empty())
+ return true;
+
+ for (const MachineMemOperand *Op1 : MI1.memoperands())
+ for (const MachineMemOperand *Op2 : MI2.memoperands())
+ if (alias(*Op1, *Op2, UseTBAA))
+ return true;
+ return false;
+}
+
+// Add a DAG mutation object to the ordered list.
+void VLIWPacketizerList::addMutation(
+ std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ VLIWScheduler->addMutation(std::move(Mutation));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 000000000000..6a7de3b241fe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,151 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dead-mi-elimination"
+
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class DeadMachineInstructionElim : public MachineFunctionPass {
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ LiveRegUnits LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool isDead(const MachineInstr *MI) const;
+
+ bool eliminateDeadMI(MachineFunction &MF);
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE,
+ "Remove dead machine instructions", false, false)
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Don't delete frame allocation labels.
+ if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+ return false;
+
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI())
+ return false;
+
+ // Examine each operand.
+ for (const MachineOperand &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
+ return false;
+ } else {
+ if (MO.isDead()) {
+#ifndef NDEBUG
+ // Basic check on the register. All of them should be 'undef'.
+ for (auto &U : MRI->use_nodbg_operands(Reg))
+ assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+#endif
+ continue;
+ }
+ for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+ if (&Use != MI)
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MRI = &MF.getRegInfo();
+
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ LivePhysRegs.init(*ST.getRegisterInfo());
+
+ bool AnyChanges = eliminateDeadMI(MF);
+ while (AnyChanges && eliminateDeadMI(MF))
+ ;
+ return AnyChanges;
+}
+
+bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
+ bool AnyChanges = false;
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
+ LivePhysRegs.addLiveOuts(*MBB);
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineInstr &MI : make_early_inc_range(reverse(*MBB))) {
+ // If the instruction is dead, delete it!
+ if (isDead(&MI)) {
+ LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. They will be deleted in the live debug variable
+ // analysis.
+ MI.eraseFromParent();
+ AnyChanges = true;
+ ++NumDeletes;
+ continue;
+ }
+
+ LivePhysRegs.stepBackward(MI);
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
new file mode 100644
index 000000000000..86e9f3abe010
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -0,0 +1,566 @@
+//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Analysis that tracks defined/used subregister lanes across COPY instructions
+/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
+/// INSERT_SUBREG, EXTRACT_SUBREG).
+/// The information is used to detect dead definitions and the usage of
+/// (completely) undefined values and mark the operands as such.
+/// This pass is necessary because the dead/undef status is not obvious anymore
+/// when subregisters are involved.
+///
+/// Example:
+/// %0 = some definition
+/// %1 = IMPLICIT_DEF
+/// %2 = REG_SEQUENCE %0, sub0, %1, sub1
+/// %3 = EXTRACT_SUBREG %2, sub1
+/// = use %3
+/// The %0 definition is dead and %3 contains an undefined value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DetectDeadLanes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "detect-dead-lanes"
+
+DeadLaneDetector::DeadLaneDetector(const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI)
+ : MRI(MRI), TRI(TRI) {
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ VRegInfos = std::unique_ptr<VRegInfo[]>(new VRegInfo[NumVirtRegs]);
+ WorklistMembers.resize(NumVirtRegs);
+ DefinedByCopy.resize(NumVirtRegs);
+}
+
+/// Returns true if \p MI will get lowered to a series of COPY instructions.
+/// We call this a COPY-like instruction.
+static bool lowersToCopies(const MachineInstr &MI) {
+ // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(),
+ // isExtractSubRegLike(), isInsertSubregLike() in the future even though they
+ // are not lowered to a COPY.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::EXTRACT_SUBREG:
+ return true;
+ }
+ return false;
+}
+
+static bool isCrossCopy(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ const TargetRegisterClass *DstRC,
+ const MachineOperand &MO) {
+ assert(lowersToCopies(MI));
+ Register SrcReg = MO.getReg();
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ if (DstRC == SrcRC)
+ return false;
+
+ unsigned SrcSubIdx = MO.getSubReg();
+
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned DstSubIdx = 0;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::INSERT_SUBREG:
+ if (MO.getOperandNo() == 2)
+ DstSubIdx = MI.getOperand(3).getImm();
+ break;
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned OpNum = MO.getOperandNo();
+ DstSubIdx = MI.getOperand(OpNum+1).getImm();
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubReg = MI.getOperand(2).getImm();
+ SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx);
+ }
+ }
+
+ unsigned PreA, PreB; // Unused.
+ if (SrcSubIdx && DstSubIdx)
+ return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA,
+ PreB);
+ if (SrcSubIdx)
+ return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx);
+ if (DstSubIdx)
+ return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx);
+ return !TRI.getCommonSubClass(SrcRC, DstRC);
+}
+
+void DeadLaneDetector::addUsedLanesOnOperand(const MachineOperand &MO,
+ LaneBitmask UsedLanes) {
+ if (!MO.readsReg())
+ return;
+ Register MOReg = MO.getReg();
+ if (!MOReg.isVirtual())
+ return;
+
+ unsigned MOSubReg = MO.getSubReg();
+ if (MOSubReg != 0)
+ UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
+ UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
+
+ unsigned MORegIdx = Register::virtReg2Index(MOReg);
+ DeadLaneDetector::VRegInfo &MORegInfo = VRegInfos[MORegIdx];
+ LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
+ // Any change at all?
+ if ((UsedLanes & ~PrevUsedLanes).none())
+ return;
+
+ // Set UsedLanes and remember instruction for further propagation.
+ MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes;
+ if (DefinedByCopy.test(MORegIdx))
+ PutInWorklist(MORegIdx);
+}
+
+void DeadLaneDetector::transferUsedLanesStep(const MachineInstr &MI,
+ LaneBitmask UsedLanes) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
+ addUsedLanesOnOperand(MO, UsedOnMO);
+ }
+}
+
+LaneBitmask
+DeadLaneDetector::transferUsedLanes(const MachineInstr &MI,
+ LaneBitmask UsedLanes,
+ const MachineOperand &MO) const {
+ unsigned OpNum = MO.getOperandNo();
+ assert(lowersToCopies(MI) &&
+ DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ return UsedLanes;
+ case TargetOpcode::REG_SEQUENCE: {
+ assert(OpNum % 2 == 1);
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ return TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ LaneBitmask MO2UsedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ if (OpNum == 2)
+ return MO2UsedLanes;
+
+ const MachineOperand &Def = MI.getOperand(0);
+ Register DefReg = Def.getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LaneBitmask MO1UsedLanes;
+ if (RC->CoveredBySubRegs)
+ MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx);
+ else
+ MO1UsedLanes = RC->LaneMask;
+
+ assert(OpNum == 1);
+ return MO1UsedLanes;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ assert(OpNum == 1);
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ return TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ }
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+}
+
+void DeadLaneDetector::transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes) {
+ if (!Use.readsReg())
+ return;
+ // Check whether the operand writes a vreg and is part of a COPY-like
+ // instruction.
+ const MachineInstr &MI = *Use.getParent();
+ if (MI.getDesc().getNumDefs() != 1)
+ return;
+ // FIXME: PATCHPOINT instructions announce a Def that does not always exist,
+ // they really need to be modeled differently!
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ return;
+ const MachineOperand &Def = *MI.defs().begin();
+ Register DefReg = Def.getReg();
+ if (!DefReg.isVirtual())
+ return;
+ unsigned DefRegIdx = Register::virtReg2Index(DefReg);
+ if (!DefinedByCopy.test(DefRegIdx))
+ return;
+
+ unsigned OpNum = Use.getOperandNo();
+ DefinedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
+ DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
+
+ VRegInfo &RegInfo = VRegInfos[DefRegIdx];
+ LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
+ // Any change at all?
+ if ((DefinedLanes & ~PrevDefinedLanes).none())
+ return;
+
+ RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
+ PutInWorklist(DefRegIdx);
+}
+
+LaneBitmask DeadLaneDetector::transferDefinedLanes(
+ const MachineOperand &Def, unsigned OpNum, LaneBitmask DefinedLanes) const {
+ const MachineInstr &MI = *Def.getParent();
+ // Translate DefinedLanes if necessary.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ break;
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ if (OpNum == 2) {
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ assert(OpNum == 1 && "INSERT_SUBREG must have two operands");
+ // Ignore lanes defined by operand 2.
+ DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx);
+ }
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only");
+ DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ break;
+ }
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ break;
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+
+ assert(Def.getSubReg() == 0 &&
+ "Should not have subregister defs in machine SSA phase");
+ DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg());
+ return DefinedLanes;
+}
+
+LaneBitmask DeadLaneDetector::determineInitialDefinedLanes(unsigned Reg) {
+ // Live-In or unused registers have no definition but are considered fully
+ // defined.
+ if (!MRI->hasOneDef(Reg))
+ return LaneBitmask::getAll();
+
+ const MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &DefMI = *Def.getParent();
+ if (lowersToCopies(DefMI)) {
+ // Start optimisatically with no used or defined lanes for copy
+ // instructions. The following dataflow analysis will add more bits.
+ unsigned RegIdx = Register::virtReg2Index(Reg);
+ DefinedByCopy.set(RegIdx);
+ PutInWorklist(RegIdx);
+
+ if (Def.isDead())
+ return LaneBitmask::getNone();
+
+ // COPY/PHI can copy across unrelated register classes (example: float/int)
+ // with incompatible subregister structure. Do not include these in the
+ // dataflow analysis since we cannot transfer lanemasks in a meaningful way.
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+
+ // Determine initially DefinedLanes.
+ LaneBitmask DefinedLanes;
+ for (const MachineOperand &MO : DefMI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ Register MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+
+ LaneBitmask MODefinedLanes;
+ if (MOReg.isPhysical()) {
+ MODefinedLanes = LaneBitmask::getAll();
+ } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
+ MODefinedLanes = LaneBitmask::getAll();
+ } else {
+ assert(MOReg.isVirtual());
+ if (MRI->hasOneDef(MOReg)) {
+ const MachineOperand &MODef = *MRI->def_begin(MOReg);
+ const MachineInstr &MODefMI = *MODef.getParent();
+ // Bits from copy-like operations will be added later.
+ if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef())
+ continue;
+ }
+ unsigned MOSubReg = MO.getSubReg();
+ MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg);
+ MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(
+ MOSubReg, MODefinedLanes);
+ }
+
+ unsigned OpNum = MO.getOperandNo();
+ DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
+ }
+ return DefinedLanes;
+ }
+ if (DefMI.isImplicitDef() || Def.isDead())
+ return LaneBitmask::getNone();
+
+ assert(Def.getSubReg() == 0 &&
+ "Should not have subregister defs in machine SSA phase");
+ return MRI->getMaxLaneMaskForVReg(Reg);
+}
+
+LaneBitmask DeadLaneDetector::determineInitialUsedLanes(unsigned Reg) {
+ LaneBitmask UsedLanes = LaneBitmask::getNone();
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ if (!MO.readsReg())
+ continue;
+
+ const MachineInstr &UseMI = *MO.getParent();
+ if (UseMI.isKill())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (lowersToCopies(UseMI)) {
+ assert(UseMI.getDesc().getNumDefs() == 1);
+ const MachineOperand &Def = *UseMI.defs().begin();
+ Register DefReg = Def.getReg();
+ // The used lanes of COPY-like instruction operands are determined by the
+ // following dataflow analysis.
+ if (DefReg.isVirtual()) {
+ // But ignore copies across incompatible register classes.
+ bool CrossCopy = false;
+ if (lowersToCopies(UseMI)) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
+ if (CrossCopy)
+ LLVM_DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
+ }
+
+ if (!CrossCopy)
+ continue;
+ }
+ }
+
+ // Shortcut: All lanes are used.
+ if (SubReg == 0)
+ return MRI->getMaxLaneMaskForVReg(Reg);
+
+ UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg);
+ }
+ return UsedLanes;
+}
+
+namespace {
+
+class DetectDeadLanes : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ DetectDeadLanes() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "Detect Dead Lanes"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ /// update the operand status.
+ /// The first return value shows whether MF been changed.
+ /// The second return value indicates we need to call
+ /// DeadLaneDetector::computeSubRegisterLaneBitInfo and this function again
+ /// to propagate changes.
+ std::pair<bool, bool>
+ modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
+ MachineFunction &MF);
+
+ bool isUndefRegAtInput(const MachineOperand &MO,
+ const DeadLaneDetector::VRegInfo &RegInfo) const;
+
+ bool isUndefInput(const DeadLaneDetector &DLD, const MachineOperand &MO,
+ bool *CrossCopy) const;
+
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+};
+
+} // end anonymous namespace
+
+char DetectDeadLanes::ID = 0;
+char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
+
+INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
+
+bool DetectDeadLanes::isUndefRegAtInput(
+ const MachineOperand &MO, const DeadLaneDetector::VRegInfo &RegInfo) const {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
+}
+
+bool DetectDeadLanes::isUndefInput(const DeadLaneDetector &DLD,
+ const MachineOperand &MO,
+ bool *CrossCopy) const {
+ if (!MO.isUse())
+ return false;
+ const MachineInstr &MI = *MO.getParent();
+ if (!lowersToCopies(MI))
+ return false;
+ const MachineOperand &Def = MI.getOperand(0);
+ Register DefReg = Def.getReg();
+ if (!DefReg.isVirtual())
+ return false;
+ unsigned DefRegIdx = Register::virtReg2Index(DefReg);
+ if (!DLD.isDefinedByCopy(DefRegIdx))
+ return false;
+
+ const DeadLaneDetector::VRegInfo &DefRegInfo = DLD.getVRegInfo(DefRegIdx);
+ LaneBitmask UsedLanes = DLD.transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
+ if (UsedLanes.any())
+ return false;
+
+ Register MOReg = MO.getReg();
+ if (MOReg.isVirtual()) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
+ }
+ return true;
+}
+
+void DeadLaneDetector::computeSubRegisterLaneBitInfo() {
+ // First pass: Populate defs/uses of vregs with initial values
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ Register Reg = Register::index2VirtReg(RegIdx);
+
+ // Determine used/defined lanes and add copy instructions to worklist.
+ VRegInfo &Info = VRegInfos[RegIdx];
+ Info.DefinedLanes = determineInitialDefinedLanes(Reg);
+ Info.UsedLanes = determineInitialUsedLanes(Reg);
+ }
+
+ // Iterate as long as defined lanes/used lanes keep changing.
+ while (!Worklist.empty()) {
+ unsigned RegIdx = Worklist.front();
+ Worklist.pop_front();
+ WorklistMembers.reset(RegIdx);
+ VRegInfo &Info = VRegInfos[RegIdx];
+ Register Reg = Register::index2VirtReg(RegIdx);
+
+ // Transfer UsedLanes to operands of DefMI (backwards dataflow).
+ MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &MI = *Def.getParent();
+ transferUsedLanesStep(MI, Info.UsedLanes);
+ // Transfer DefinedLanes to users of Reg (forward dataflow).
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg))
+ transferDefinedLanesStep(MO, Info.DefinedLanes);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Defined/Used lanes:\n";
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ Register Reg = Register::index2VirtReg(RegIdx);
+ const VRegInfo &Info = VRegInfos[RegIdx];
+ dbgs() << printReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
+ }
+ dbgs() << "\n";
+ });
+}
+
+std::pair<bool, bool>
+DetectDeadLanes::modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
+ MachineFunction &MF) {
+ bool Changed = false;
+ bool Again = false;
+ // Mark operands as dead/unused.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ unsigned RegIdx = Register::virtReg2Index(Reg);
+ const DeadLaneDetector::VRegInfo &RegInfo = DLD.getVRegInfo(RegIdx);
+ if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
+ LLVM_DEBUG(dbgs()
+ << "Marking operand '" << MO << "' as dead in " << MI);
+ MO.setIsDead();
+ Changed = true;
+ }
+ if (MO.readsReg()) {
+ bool CrossCopy = false;
+ if (isUndefRegAtInput(MO, RegInfo)) {
+ LLVM_DEBUG(dbgs()
+ << "Marking operand '" << MO << "' as undef in " << MI);
+ MO.setIsUndef();
+ Changed = true;
+ } else if (isUndefInput(DLD, MO, &CrossCopy)) {
+ LLVM_DEBUG(dbgs()
+ << "Marking operand '" << MO << "' as undef in " << MI);
+ MO.setIsUndef();
+ Changed = true;
+ if (CrossCopy)
+ Again = true;
+ }
+ }
+ }
+ }
+ }
+
+ return std::make_pair(Changed, Again);
+}
+
+bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
+ // Don't bother if we won't track subregister liveness later. This pass is
+ // required for correctness if subregister liveness is enabled because the
+ // register coalescer cannot deal with hidden dead defs. However without
+ // subregister liveness enabled, the expected benefits of this pass are small
+ // so we safe the compile time.
+ MRI = &MF.getRegInfo();
+ if (!MRI->subRegLivenessEnabled()) {
+ LLVM_DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
+ return false;
+ }
+
+ TRI = MRI->getTargetRegisterInfo();
+
+ DeadLaneDetector DLD(MRI, TRI);
+
+ bool Changed = false;
+ bool Again;
+ do {
+ DLD.computeSubRegisterLaneBitInfo();
+ bool LocalChanged;
+ std::tie(LocalChanged, Again) = modifySubRegisterOperandStatus(DLD, MF);
+ Changed |= LocalChanged;
+ } while (Again);
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 000000000000..32c94de7280c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,380 @@
+//===- DwarfEHPrepare - Prepare exception handling for code generation ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cstddef>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfehprepare"
+
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
+STATISTIC(NumCleanupLandingPadsUnreachable,
+ "Number of cleanup landing pads found unreachable");
+STATISTIC(NumCleanupLandingPadsRemaining,
+ "Number of cleanup landing pads remaining");
+STATISTIC(NumNoUnwind, "Number of functions with nounwind");
+STATISTIC(NumUnwind, "Number of functions with unwind");
+
+namespace {
+
+class DwarfEHPrepare {
+ CodeGenOpt::Level OptLevel;
+
+ Function &F;
+ const TargetLowering &TLI;
+ DomTreeUpdater *DTU;
+ const TargetTransformInfo *TTI;
+ const Triple &TargetTriple;
+
+ /// Return the exception object from the value passed into
+ /// the 'resume' instruction (typically an aggregate). Clean up any dead
+ /// instructions, including the 'resume' instruction.
+ Value *GetExceptionObject(ResumeInst *RI);
+
+ /// Replace resumes that are not reachable from a cleanup landing pad with
+ /// unreachable and then simplify those blocks.
+ size_t
+ pruneUnreachableResumes(SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads);
+
+ /// Convert the ResumeInsts that are still present
+ /// into calls to the appropriate _Unwind_Resume function.
+ bool InsertUnwindResumeCalls();
+
+public:
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_,
+ const TargetLowering &TLI_, DomTreeUpdater *DTU_,
+ const TargetTransformInfo *TTI_, const Triple &TargetTriple_)
+ : OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_),
+ TargetTriple(TargetTriple_) {}
+
+ bool run();
+};
+
+} // namespace
+
+Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+ Value *V = RI->getOperand(0);
+ Value *ExnObj = nullptr;
+ InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+ LoadInst *SelLoad = nullptr;
+ InsertValueInst *ExcIVI = nullptr;
+ bool EraseIVIs = false;
+
+ if (SelIVI) {
+ if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+ ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+ if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+ ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+ ExnObj = ExcIVI->getOperand(1);
+ SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+ EraseIVIs = true;
+ }
+ }
+ }
+
+ if (!ExnObj)
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
+
+ RI->eraseFromParent();
+
+ if (EraseIVIs) {
+ if (SelIVI->use_empty())
+ SelIVI->eraseFromParent();
+ if (ExcIVI->use_empty())
+ ExcIVI->eraseFromParent();
+ if (SelLoad && SelLoad->use_empty())
+ SelLoad->eraseFromParent();
+ }
+
+ return ExnObj;
+}
+
+size_t DwarfEHPrepare::pruneUnreachableResumes(
+ SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+ assert(DTU && "Should have DomTreeUpdater here.");
+
+ BitVector ResumeReachable(Resumes.size());
+ size_t ResumeIndex = 0;
+ for (auto *RI : Resumes) {
+ for (auto *LP : CleanupLPads) {
+ if (isPotentiallyReachable(LP, RI, nullptr, &DTU->getDomTree())) {
+ ResumeReachable.set(ResumeIndex);
+ break;
+ }
+ }
+ ++ResumeIndex;
+ }
+
+ // If everything is reachable, there is no change.
+ if (ResumeReachable.all())
+ return Resumes.size();
+
+ LLVMContext &Ctx = F.getContext();
+
+ // Otherwise, insert unreachable instructions and call simplifycfg.
+ size_t ResumesLeft = 0;
+ for (size_t I = 0, E = Resumes.size(); I < E; ++I) {
+ ResumeInst *RI = Resumes[I];
+ if (ResumeReachable[I]) {
+ Resumes[ResumesLeft++] = RI;
+ } else {
+ BasicBlock *BB = RI->getParent();
+ new UnreachableInst(Ctx, RI);
+ RI->eraseFromParent();
+ simplifyCFG(BB, *TTI, DTU);
+ }
+ }
+ Resumes.resize(ResumesLeft);
+ return ResumesLeft;
+}
+
+bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+ SmallVector<ResumeInst *, 16> Resumes;
+ SmallVector<LandingPadInst *, 16> CleanupLPads;
+ if (F.doesNotThrow())
+ NumNoUnwind++;
+ else
+ NumUnwind++;
+ for (BasicBlock &BB : F) {
+ if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
+ Resumes.push_back(RI);
+ if (auto *LP = BB.getLandingPadInst())
+ if (LP->isCleanup())
+ CleanupLPads.push_back(LP);
+ }
+
+ NumCleanupLandingPadsRemaining += CleanupLPads.size();
+
+ if (Resumes.empty())
+ return false;
+
+ // Check the personality, don't do anything if it's scope-based.
+ EHPersonality Pers = classifyEHPersonality(F.getPersonalityFn());
+ if (isScopedEHPersonality(Pers))
+ return false;
+
+ LLVMContext &Ctx = F.getContext();
+
+ size_t ResumesLeft = Resumes.size();
+ if (OptLevel != CodeGenOpt::None) {
+ ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads);
+#if LLVM_ENABLE_STATS
+ unsigned NumRemainingLPs = 0;
+ for (BasicBlock &BB : F) {
+ if (auto *LP = BB.getLandingPadInst())
+ if (LP->isCleanup())
+ NumRemainingLPs++;
+ }
+ NumCleanupLandingPadsUnreachable += CleanupLPads.size() - NumRemainingLPs;
+ NumCleanupLandingPadsRemaining -= CleanupLPads.size() - NumRemainingLPs;
+#endif
+ }
+
+ if (ResumesLeft == 0)
+ return true; // We pruned them all.
+
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee RewindFunction;
+ CallingConv::ID RewindFunctionCallingConv;
+ FunctionType *FTy;
+ const char *RewindName;
+ bool DoesRewindFunctionNeedExceptionObject;
+
+ if ((Pers == EHPersonality::GNU_CXX || Pers == EHPersonality::GNU_CXX_SjLj) &&
+ TargetTriple.isTargetEHABICompatible()) {
+ RewindName = TLI.getLibcallName(RTLIB::CXA_END_CLEANUP);
+ FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
+ RewindFunctionCallingConv =
+ TLI.getLibcallCallingConv(RTLIB::CXA_END_CLEANUP);
+ DoesRewindFunctionNeedExceptionObject = false;
+ } else {
+ RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
+ FTy =
+ FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
+ RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME);
+ DoesRewindFunctionNeedExceptionObject = true;
+ }
+ RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
+
+ // Create the basic block where the _Unwind_Resume call will live.
+ if (ResumesLeft == 1) {
+ // Instead of creating a new BB and PHI node, just append the call to
+ // _Unwind_Resume to the end of the single resume block.
+ ResumeInst *RI = Resumes.front();
+ BasicBlock *UnwindBB = RI->getParent();
+ Value *ExnObj = GetExceptionObject(RI);
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(ExnObj);
+
+ // Call the rewind function.
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ // The verifier requires that all calls of debug-info-bearing functions
+ // from debug-info-bearing functions have a debug location (for inlining
+ // purposes). Assign a dummy location to satisfy the constraint.
+ Function *RewindFn = dyn_cast<Function>(RewindFunction.getCallee());
+ if (RewindFn && RewindFn->getSubprogram())
+ if (DISubprogram *SP = F.getSubprogram())
+ CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP));
+ CI->setCallingConv(RewindFunctionCallingConv);
+
+ // We never expect _Unwind_Resume to return.
+ CI->setDoesNotReturn();
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+ }
+
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(Resumes.size());
+
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
+ UnwindBB);
+
+ // Extract the exception object from the ResumeInst and add it to the PHI node
+ // that feeds the _Unwind_Resume call.
+ for (ResumeInst *RI : Resumes) {
+ BasicBlock *Parent = RI->getParent();
+ BranchInst::Create(UnwindBB, Parent);
+ Updates.push_back({DominatorTree::Insert, Parent, UnwindBB});
+
+ Value *ExnObj = GetExceptionObject(RI);
+ PN->addIncoming(ExnObj, Parent);
+
+ ++NumResumesLowered;
+ }
+
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(PN);
+
+ // Call the function.
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
+
+ // We never expect _Unwind_Resume to return.
+ CI->setDoesNotReturn();
+ new UnreachableInst(Ctx, UnwindBB);
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ return true;
+}
+
+bool DwarfEHPrepare::run() {
+ bool Changed = InsertUnwindResumeCalls();
+
+ return Changed;
+}
+
+static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F,
+ const TargetLowering &TLI, DominatorTree *DT,
+ const TargetTransformInfo *TTI,
+ const Triple &TargetTriple) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ return DwarfEHPrepare(OptLevel, F, TLI, DT ? &DTU : nullptr, TTI,
+ TargetTriple)
+ .run();
+}
+
+namespace {
+
+class DwarfEHPrepareLegacyPass : public FunctionPass {
+
+ CodeGenOpt::Level OptLevel;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ DwarfEHPrepareLegacyPass(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
+ : FunctionPass(ID), OptLevel(OptLevel) {}
+
+ bool runOnFunction(Function &F) override {
+ const TargetMachine &TM =
+ getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering();
+ DominatorTree *DT = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ if (OptLevel != CodeGenOpt::None) {
+ if (!DT)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ }
+ return prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM.getTargetTriple());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (OptLevel != CodeGenOpt::None) {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ StringRef getPassName() const override {
+ return "Exception handling preparation";
+ }
+};
+
+} // end anonymous namespace
+
+char DwarfEHPrepareLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
+
+FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
+ return new DwarfEHPrepareLegacyPass(OptLevel);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
new file mode 100644
index 000000000000..b26aa792bb93
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
@@ -0,0 +1,82 @@
+//===-- EHContGuardCatchret.cpp - Catchret target symbols -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a machine function pass to insert a symbol before each
+/// valid catchret target and store this in the MachineFunction's
+/// CatchRetTargets vector. This will be used to emit the table of valid targets
+/// used by EHCont Guard.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ehcontguard-catchret"
+
+STATISTIC(EHContGuardCatchretTargets,
+ "Number of EHCont Guard catchret targets");
+
+namespace {
+
+/// MachineFunction pass to insert a symbol before each valid catchret target
+/// and store these in the MachineFunction's CatchRetTargets vector.
+class EHContGuardCatchret : public MachineFunctionPass {
+public:
+ static char ID;
+
+ EHContGuardCatchret() : MachineFunctionPass(ID) {
+ initializeEHContGuardCatchretPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "EH Cont Guard catchret targets";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char EHContGuardCatchret::ID = 0;
+
+INITIALIZE_PASS(EHContGuardCatchret, "EHContGuardCatchret",
+ "Insert symbols at valid catchret targets for /guard:ehcont",
+ false, false)
+FunctionPass *llvm::createEHContGuardCatchretPass() {
+ return new EHContGuardCatchret();
+}
+
+bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) {
+
+ // Skip modules for which the ehcontguard flag is not set.
+ if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard"))
+ return false;
+
+ // Skip functions that do not have catchret
+ if (!MF.hasEHCatchret())
+ return false;
+
+ bool Result = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHCatchretTarget()) {
+ MF.addCatchretTarget(MBB.getEHCatchretSymbol());
+ EHContGuardCatchretTargets++;
+ Result = true;
+ }
+ }
+
+ return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 000000000000..61867d74bfa2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,1244 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "early-ifcvt"
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by analyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by analyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg = 0, FReg = 0;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles = 0, TCycles = 0, FCycles = 0;
+
+ PHIInfo(MachineInstr *phi) : PHI(phi) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+ /// The branch condition determined by analyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+private:
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// predicated.
+ bool canPredicateInstrs(MachineBasicBlock *MBB);
+
+ /// Scan through instruction dependencies and update InsertAfter array.
+ /// Return false if any dependency is incompatible with if conversion.
+ bool InstrDependenciesAllowIfConv(MachineInstr *I);
+
+ /// Predicate all instructions of the basic block with current condition
+ /// except for terminators. Reverse the condition if ReversePredicate is set.
+ void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ /// If predicate is set try to predicate the block otherwise try to
+ /// speculatively execute it.
+ bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
+ bool Predicate = false);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) {
+ if (MI.isDebugInstr())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (MI.isPHI()) {
+ LLVM_DEBUG(dbgs() << "Can't hoist: " << MI);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (MI.mayLoad()) {
+ LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!MI.isSafeToMove(nullptr, DontMoveAcrossStore)) {
+ LLVM_DEBUG(dbgs() << "Can't speculate: " << MI);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ if (!InstrDependenciesAllowIfConv(&MI))
+ return false;
+ }
+ return true;
+}
+
+/// Check that there is no dependencies preventing if conversion.
+///
+/// If instruction uses any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask()) {
+ LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+
+ // Remember clobbered regunits.
+ if (MO.isDef() && Reg.isPhysical())
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ ClobberedRegUnits.set(Unit);
+
+ if (!MO.readsReg() || !Reg.isVirtual())
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI).second)
+ LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on "
+ << *DefMI);
+ if (DefMI->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+/// canPredicateInstrs - Returns true if all the instructions in MBB can safely
+/// be predicates. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ LLVM_DEBUG(dbgs() << "Can't predicate: " << *I);
+ return false;
+ }
+
+ // Check that instruction is predicable
+ if (!TII->isPredicable(*I)) {
+ LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I);
+ return false;
+ }
+
+ // Check that instruction is not already predicated.
+ if (TII->isPredicated(*I) && !TII->canPredicatePredicatedInstr(*I)) {
+ LLVM_DEBUG(dbgs() << "Is already predicated: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ if (!InstrDependenciesAllowIfConv(&(*I)))
+ return false;
+ }
+ return true;
+}
+
+// Apply predicate to all instructions in the machine block.
+void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {
+ auto Condition = Cond;
+ if (ReversePredicate) {
+ bool CanRevCond = !TII->reverseBranchCondition(Condition);
+ assert(CanRevCond && "Reversed predicate is not supported");
+ (void)CanRevCond;
+ }
+ // Terminators don't need to be predicated as they will be removed.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+ TII->PredicateInstruction(*I, Condition);
+ }
+}
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<MCRegister, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(&*I)) {
+ LLVM_DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (const MachineOperand &MO : I->operands()) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO.isDef())
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ LiveRegUnits.erase(Unit);
+ // Unless I reads Reg.
+ if (MO.readsReg())
+ Reads.push_back(Reg.asMCReg());
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnit Unit : TRI->regunits(Reads.pop_back_val()))
+ if (ClobberedRegUnits.test(Unit))
+ LiveRegUnits.insert(Unit);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ LLVM_DEBUG({
+ dbgs() << "Would clobber";
+ for (unsigned LRU : LiveRegUnits)
+ dbgs() << ' ' << printRegUnit(LRU, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ LLVM_DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
+ Head = MBB;
+ TBB = FBB = Tail = nullptr;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ Tail = Succ0->succ_begin()[0];
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*Succ0) << "/"
+ << printMBBReference(*Succ1) << " -> "
+ << printMBBReference(*Tail) << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ LLVM_DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*Succ0) << " -> "
+ << printMBBReference(*Tail) << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // Skip if we cannot predicate and there are no phis skip as there must be
+ // side effects that can only be handled with predication.
+ if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) {
+ LLVM_DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {
+ LLVM_DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // Make sure the analyzed branch is conditional; one of the successors
+ // could be a landing pad. (Empty landing pads can be generated on Windows.)
+ if (Cond.empty()) {
+ LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n");
+ return false;
+ }
+
+ // analyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
+ PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
+ PI.FCycles)) {
+ LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (Predicate) {
+ if (TBB != Tail && !canPredicateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canPredicateInstrs(FBB))
+ return false;
+ } else {
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+ }
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
+ return true;
+}
+
+/// \return true iff the two registers are known to have the same value.
+static bool hasSameValue(const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, Register TReg,
+ Register FReg) {
+ if (TReg == FReg)
+ return true;
+
+ if (!TReg.isVirtual() || !FReg.isVirtual())
+ return false;
+
+ const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg);
+ const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg);
+ if (!TDef || !FDef)
+ return false;
+
+ // If there are side-effects, all bets are off.
+ if (TDef->hasUnmodeledSideEffects())
+ return false;
+
+ // If the instruction could modify memory, or there may be some intervening
+ // store between the two, we can't consider them to be equal.
+ if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad())
+ return false;
+
+ // We also can't guarantee that they are the same if, for example, the
+ // instructions are both a copy from a physical reg, because some other
+ // instruction may have modified the value in that reg between the two
+ // defining insts.
+ if (any_of(TDef->uses(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isPhysical();
+ }))
+ return false;
+
+ // Check whether the two defining instructions produce the same value(s).
+ if (!TII->produceSameValue(*TDef, *FDef, &MRI))
+ return false;
+
+ // Further, check that the two defs come from corresponding operands.
+ int TIdx = TDef->findRegisterDefOperandIdx(TReg);
+ int FIdx = FDef->findRegisterDefOperandIdx(FReg);
+ if (TIdx == -1 || FIdx == -1)
+ return false;
+
+ return TIdx == FIdx;
+}
+
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ Register DstReg = PI.PHI->getOperand(0).getReg();
+ if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
+ // We do not need the select instruction if both incoming values are
+ // equal, but we do need a COPY.
+ BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(PI.TReg);
+ } else {
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg,
+ PI.FReg);
+ }
+ LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = nullptr;
+ }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ unsigned DstReg = 0;
+
+ LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
+ // We do not need the select instruction if both incoming values are
+ // equal.
+ DstReg = PI.TReg;
+ } else {
+ Register PHIDst = PI.PHI->getOperand(0).getReg();
+ DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL,
+ DstReg, Cond, PI.TReg, PI.FReg);
+ LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
+ }
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->removeOperand(i-1);
+ PI.PHI->removeOperand(i-2);
+ }
+ }
+ LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
+ bool Predicate) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail) {
+ if (Predicate)
+ PredicateBlock(TBB, /*ReversePredicate=*/false);
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ }
+ if (FBB != Tail) {
+ if (Predicate)
+ PredicateBlock(FBB, /*ReversePredicate=*/true);
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+ }
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB, true);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail, true);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail, true);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
+ TII->removeBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail)
+ << " into head " << printMBBReference(*Head) << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ LLVM_DEBUG(dbgs() << *Head);
+}
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ MachineTraceMetrics *Traces = nullptr;
+ MachineTraceMetrics::Ensemble *MinInstr = nullptr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "Early If-Conversion"; }
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void invalidateTraces();
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
+ "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
+ "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+namespace {
+/// Update the dominator tree after if-conversion erased some blocks.
+void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
+ ArrayRef<MachineBasicBlock *> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (auto *B : Removed) {
+ MachineDomTreeNode *Node = DomTree->getNode(B);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->back(), HeadNode);
+ }
+ DomTree->eraseNode(B);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void updateLoops(MachineLoopInfo *Loops,
+ ArrayRef<MachineBasicBlock *> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (auto *B : Removed)
+ Loops->removeBlock(B);
+}
+} // namespace
+
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+namespace {
+/// Helper class to simplify emission of cycle counts into optimization remarks.
+struct Cycles {
+ const char *Key;
+ unsigned Value;
+};
+template <typename Remark> Remark &operator<<(Remark &R, Cycles C) {
+ return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles");
+}
+} // anonymous namespace
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ // Do not try to if-convert if the condition has a high chance of being
+ // predictable.
+ MachineLoop *CurrentLoop = Loops->getLoopFor(IfConv.Head);
+ // If the condition is in a loop, consider it predictable if the condition
+ // itself or all its operands are loop-invariant. E.g. this considers a load
+ // from a loop-invariant address predictable; we were unable to prove that it
+ // doesn't alias any of the memory-writes in the loop, but it is likely to
+ // read to same value multiple times.
+ if (CurrentLoop && any_of(IfConv.Cond, [&](MachineOperand &MO) {
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ return CurrentLoop->isLoopInvariant(*Def) ||
+ all_of(Def->operands(), [&](MachineOperand &Op) {
+ if (Op.isImm())
+ return true;
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ return CurrentLoop->isLoopInvariant(*Def);
+ });
+ }))
+ return false;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel.MispredictPenalty/2;
+
+ MachineBasicBlock &MBB = *IfConv.Head;
+ MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr);
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ LLVM_DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
+ MBB.findDebugLoc(MBB.back()), &MBB);
+ R << "did not if-convert branch: the resulting critical path ("
+ << Cycles{"ResLength", ResLength}
+ << ") would extend the shorter leg's critical path ("
+ << Cycles{"MinCrit", MinCrit} << ") by more than the threshold of "
+ << Cycles{"CritLimit", CritLimit}
+ << ", which cannot be hidden by available ILP.";
+ return R;
+ });
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;
+ LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ struct CriticalPathInfo {
+ unsigned Extra; // Count of extra cycles that the component adds.
+ unsigned Depth; // Absolute depth of the component in cycles.
+ };
+ CriticalPathInfo Cond{};
+ CriticalPathInfo TBlock{};
+ CriticalPathInfo FBlock{};
+ bool ShouldConvert = true;
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
+ LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > Cond.Extra)
+ Cond = {Extra, CondDepth};
+ if (Extra > CritLimit) {
+ LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ ShouldConvert = false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > TBlock.Extra)
+ TBlock = {Extra, TDepth};
+ if (Extra > CritLimit) {
+ LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ ShouldConvert = false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > FBlock.Extra)
+ FBlock = {Extra, FDepth};
+ if (Extra > CritLimit) {
+ LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ ShouldConvert = false;
+ }
+ }
+ }
+
+ // Organize by "short" and "long" legs, since the diagnostics get confusing
+ // when referring to the "true" and "false" sides of the branch, given that
+ // those don't always correlate with what the user wrote in source-terms.
+ const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock;
+ const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock;
+
+ if (ShouldConvert) {
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion",
+ MBB.back().getDebugLoc(), &MBB);
+ R << "performing if-conversion on branch: the condition adds "
+ << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
+ if (Short.Extra > 0)
+ R << ", and the short leg adds another "
+ << Cycles{"ShortCycles", Short.Extra};
+ if (Long.Extra > 0)
+ R << ", and the long leg adds another "
+ << Cycles{"LongCycles", Long.Extra};
+ R << ", each staying under the threshold of "
+ << Cycles{"CritLimit", CritLimit} << ".";
+ return R;
+ });
+ } else {
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
+ MBB.back().getDebugLoc(), &MBB);
+ R << "did not if-convert branch: the condition would add "
+ << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
+ if (Cond.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ if (Short.Extra > 0) {
+ R << ", and the short leg would add another "
+ << Cycles{"ShortCycles", Short.Extra};
+ if (Short.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ }
+ if (Long.Extra > 0) {
+ R << ", and the long leg would add another "
+ << Cycles{"LongCycles", Long.Extra};
+ if (Long.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ }
+ R << ".";
+ return R;
+ });
+ }
+
+ return ShouldConvert;
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ invalidateTraces();
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(DomTree, IfConv, RemovedBlocks);
+ updateLoops(Loops, RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ // Only run if conversion if the target wants it.
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ if (!STI.enableEarlyIfConversion())
+ return false;
+
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = nullptr;
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (auto *DomNode : post_order(DomTree))
+ if (tryConvertIf(DomNode->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// EarlyIfPredicator Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfPredicator : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ TargetSchedModel SchedModel;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineBranchProbabilityInfo *MBPI = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfPredicator() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "Early If-predicator"; }
+
+protected:
+ bool tryConvertIf(MachineBasicBlock *);
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "early-if-predicator"
+
+char EarlyIfPredicator::ID = 0;
+char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
+ false)
+
+void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Apply the target heuristic to decide if the transformation is profitable.
+bool EarlyIfPredicator::shouldConvertIf() {
+ auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB);
+ if (IfConv.isTriangle()) {
+ MachineBasicBlock &IfBlock =
+ (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
+
+ unsigned ExtraPredCost = 0;
+ unsigned Cycles = 0;
+ for (MachineInstr &I : IfBlock) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ Cycles += NumCycles - 1;
+ ExtraPredCost += TII->getPredicationCost(I);
+ }
+
+ return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
+ TrueProbability);
+ }
+ unsigned TExtra = 0;
+ unsigned FExtra = 0;
+ unsigned TCycle = 0;
+ unsigned FCycle = 0;
+ for (MachineInstr &I : *IfConv.TBB) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ TCycle += NumCycles - 1;
+ TExtra += TII->getPredicationCost(I);
+ }
+ for (MachineInstr &I : *IfConv.FBB) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ FCycle += NumCycles - 1;
+ FExtra += TII->getPredicationCost(I);
+ }
+ return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
+ FCycle, FExtra, TrueProbability);
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks, /*Predicate*/ true);
+ Changed = true;
+ updateDomTree(DomTree, IfConv, RemovedBlocks);
+ updateLoops(Loops, RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(&STI);
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (auto *DomNode : post_order(DomTree))
+ if (tryConvertIf(DomNode->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..3dd354e8ab7e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,101 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* is_analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->getNumBlockIDs());
+
+ for (const auto &MBB : *MF) {
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ EC.join(OutE, 2 * Succ->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+
+ // Compute the reverse mapping.
+ Blocks.clear();
+ Blocks.resize(getNumBundles());
+
+ for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+ unsigned b0 = getBundle(i, false);
+ unsigned b1 = getBundle(i, true);
+ Blocks[b0].push_back(i);
+ if (b1 != b0)
+ Blocks[b1].push_back(i);
+ }
+
+ return false;
+}
+
+namespace llvm {
+
+/// Specialize WriteGraph, the standard implementation won't work.
+template<>
+raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const Twine &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (const auto &MBB : *MF) {
+ unsigned BB = MBB.getNumber();
+ O << "\t\"" << printMBBReference(MBB) << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"" << printMBBReference(MBB)
+ << "\"\n"
+ << "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true)
+ << '\n';
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ O << "\t\"" << printMBBReference(MBB) << "\" -> \""
+ << printMBBReference(*Succ) << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
+
+} // end namespace llvm
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExecutionDomainFix.cpp
new file mode 100644
index 000000000000..21a7d02a320c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExecutionDomainFix.cpp
@@ -0,0 +1,470 @@
+//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExecutionDomainFix.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "execution-deps-fix"
+
+iterator_range<SmallVectorImpl<int>::const_iterator>
+ExecutionDomainFix::regIndices(unsigned Reg) const {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ const auto &Entry = AliasMap[Reg];
+ return make_range(Entry.begin(), Entry.end());
+}
+
+DomainValue *ExecutionDomainFix::alloc(int domain) {
+ DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
+ : Avail.pop_back_val();
+ if (domain >= 0)
+ dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
+ return dv;
+}
+
+void ExecutionDomainFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do
+ DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
+}
+
+void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx])
+ release(LiveRegs[rx]);
+ LiveRegs[rx] = retain(dv);
+}
+
+void ExecutionDomainFix::kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ if (!LiveRegs[rx])
+ return;
+
+ release(LiveRegs[rx]);
+ LiveRegs[rx] = nullptr;
+}
+
+void ExecutionDomainFix::force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx]) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx] && "Not live after collapse?");
+ LiveRegs[rx]->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ setLiveReg(rx, alloc(domain));
+ }
+}
+
+void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (!LiveRegs.empty() && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv)
+ setLiveReg(rx, alloc(domain));
+}
+
+bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ assert(!LiveRegs.empty() && "no space allocated for live registers");
+ if (LiveRegs[rx] == B)
+ setLiveReg(rx, A);
+ }
+ return true;
+}
+
+void ExecutionDomainFix::enterBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+
+ MachineBasicBlock *MBB = TraversedMBB.MBB;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ // Set default domain values to 'no domain' (nullptr)
+ if (LiveRegs.empty())
+ LiveRegs.assign(NumRegs, nullptr);
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock *pred : MBB->predecessors()) {
+ assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming.empty())
+ continue;
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ DomainValue *pdv = resolve(Incoming[rx]);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx]) {
+ setLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force it.
+ unsigned Domain = LiveRegs[rx]->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ merge(LiveRegs[rx], pdv);
+ else
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
+ << (!TraversedMBB.IsDone ? ": incomplete\n"
+ : ": all preds known\n"));
+}
+
+void ExecutionDomainFix::leaveBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ unsigned MBBNumber = TraversedMBB.MBB->getNumber();
+ assert(MBBNumber < MBBOutRegsInfos.size() &&
+ "Unexpected basic block number.");
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
+ release(OldLiveReg);
+ }
+ MBBOutRegsInfos[MBBNumber] = LiveRegs;
+ LiveRegs.clear();
+}
+
+bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ return !DomP.first;
+}
+
+void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugInstr() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse())
+ continue;
+ for (int rx : regIndices(MO.getReg())) {
+ // This instruction explicitly defines rx.
+ LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI);
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+ }
+ }
+}
+
+void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands();
+ i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ force(rx, domain);
+ }
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ kill(rx);
+ force(rx, domain);
+ }
+ }
+}
+
+void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (!LiveRegs.empty())
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands();
+ i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ DomainValue *dv = LiveRegs[rx];
+ if (dv == nullptr)
+ continue;
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common)
+ available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = llvm::countr_zero(available);
+ TII->setExecutionDomain(*mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<int, 4> Regs;
+ for (int rx : used) {
+ assert(!LiveRegs.empty() && "no space allocated for live registers");
+ DomainValue *&LR = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!LR->getCommonDomains(available)) {
+ kill(rx);
+ continue;
+ }
+ // Sorted insertion.
+ // Enables giving priority to the latest domains during merging.
+ const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
+ auto I = partition_point(Regs, [&](int I) {
+ return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
+ });
+ Regs.insert(I, rx);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = nullptr;
+ while (!Regs.empty()) {
+ if (!dv) {
+ dv = LiveRegs[Regs.pop_back_val()];
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
+ continue;
+ }
+
+ DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (int i : used) {
+ assert(!LiveRegs.empty() && "no space allocated for live registers");
+ if (LiveRegs[i] == Latest)
+ kill(i);
+ }
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv. We must iterate through
+ // all the operators, including imp-def ones.
+ for (const MachineOperand &mo : mi->operands()) {
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
+ }
+ }
+ }
+}
+
+void ExecutionDomainFix::processBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ enterBasicBlock(TraversedMBB);
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ for (MachineInstr &MI : *TraversedMBB.MBB) {
+ if (!MI.isDebugInstr()) {
+ bool Kill = false;
+ if (TraversedMBB.PrimaryPass)
+ Kill = visitInstr(&MI);
+ processDefs(&MI, Kill);
+ }
+ }
+ leaveBasicBlock(TraversedMBB);
+}
+
+bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+ MF = &mf;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ LiveRegs.clear();
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+
+ LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "
+ << TRI->getRegClassName(RC) << " **********\n");
+
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
+ bool anyregs = false;
+ const MachineRegisterInfo &MRI = mf.getRegInfo();
+ for (unsigned Reg : *RC) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ anyregs = true;
+ break;
+ }
+ }
+ if (!anyregs)
+ return false;
+
+ RDA = &getAnalysis<ReachingDefAnalysis>();
+
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
+ // therefore the LiveRegs array.
+ AliasMap.resize(TRI->getNumRegs());
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
+ ++AI)
+ AliasMap[*AI].push_back(i);
+ }
+
+ // Initialize the MBBOutRegsInfos
+ MBBOutRegsInfos.resize(mf.getNumBlockIDs());
+
+ // Traverse the basic blocks.
+ LoopTraversal Traversal;
+ LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
+ for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder)
+ processBasicBlock(TraversedMBB);
+
+ for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos)
+ for (DomainValue *OutLiveReg : OutLiveRegs)
+ if (OutLiveReg)
+ release(OutLiveReg);
+
+ MBBOutRegsInfos.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
new file mode 100644
index 000000000000..057b5311db70
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -0,0 +1,139 @@
+//===--- ExpandLargeDivRem.cpp - Expand large div/rem ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands div/rem instructions with a bitwidth above a threshold
+// into a call to auto-generated functions.
+// This is useful for targets like x86_64 that cannot lower divisions
+// with more than 128 bits or targets like x86_32 that cannot lower divisions
+// with more than 64 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
+ cl::init(llvm::IntegerType::MAX_INT_BITS),
+ cl::desc("div and rem instructions on integers with "
+ "more than <N> bits are expanded."));
+
+static bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
+ auto *C = dyn_cast<ConstantInt>(V);
+ if (!C)
+ return false;
+
+ APInt Val = C->getValue();
+ if (SignedOp && Val.isNegative())
+ Val = -Val;
+ return Val.isPowerOf2();
+}
+
+static bool isSigned(unsigned int Opcode) {
+ return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+}
+
+static bool runImpl(Function &F, const TargetLowering &TLI) {
+ SmallVector<BinaryOperator *, 4> Replace;
+ bool Modified = false;
+
+ unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
+ if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
+ MaxLegalDivRemBitWidth = ExpandDivRemBits;
+
+ if (MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS)
+ return false;
+
+ for (auto &I : instructions(F)) {
+ switch (I.getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem: {
+ // TODO: This doesn't handle vectors.
+ auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth)
+ continue;
+
+ // The backend has peephole optimizations for powers of two.
+ if (isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode())))
+ continue;
+
+ Replace.push_back(&cast<BinaryOperator>(I));
+ Modified = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (Replace.empty())
+ return false;
+
+ while (!Replace.empty()) {
+ BinaryOperator *I = Replace.pop_back_val();
+
+ if (I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv) {
+ expandDivision(I);
+ } else {
+ expandRemainder(I);
+ }
+ }
+
+ return Modified;
+}
+
+namespace {
+class ExpandLargeDivRemLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandLargeDivRemLegacyPass() : FunctionPass(ID) {
+ initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ return runImpl(F, *TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+char ExpandLargeDivRemLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+ "Expand large div/rem", false, false)
+INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+ "Expand large div/rem", false, false)
+
+FunctionPass *llvm::createExpandLargeDivRemPass() {
+ return new ExpandLargeDivRemLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
new file mode 100644
index 000000000000..ca8056a53139
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
@@ -0,0 +1,664 @@
+//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
+// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
+// auto-generated functions. This is useful for targets like x86_64 that cannot
+// lower fp convertions with more than 128 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
+ cl::init(llvm::IntegerType::MAX_INT_BITS),
+ cl::desc("fp convert instructions on integers with "
+ "more than <N> bits are expanded."));
+
+/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
+/// the generated code. This currently generates code similarly to compiler-rt's
+/// implementations.
+///
+/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
+/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
+/// entry:
+/// %0 = bitcast float %a to i32
+/// %conv.i = zext i32 %0 to i64
+/// %tobool.not = icmp sgt i32 %0, -1
+/// %conv = select i1 %tobool.not, i64 1, i64 -1
+/// %and = lshr i64 %conv.i, 23
+/// %shr = and i64 %and, 255
+/// %and2 = and i64 %conv.i, 8388607
+/// %or = or i64 %and2, 8388608
+/// %cmp = icmp ult i64 %shr, 127
+/// br i1 %cmp, label %cleanup, label %if.end
+///
+/// if.end: ; preds = %entry
+/// %sub = add nuw nsw i64 %shr, 4294967169
+/// %conv5 = and i64 %sub, 4294967232
+/// %cmp6.not = icmp eq i64 %conv5, 0
+/// br i1 %cmp6.not, label %if.end12, label %if.then8
+///
+/// if.then8: ; preds = %if.end
+/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
+/// br label %cleanup
+///
+/// if.end12: ; preds = %if.end
+/// %cmp13 = icmp ult i64 %shr, 150
+/// br i1 %cmp13, label %if.then15, label %if.else
+///
+/// if.then15: ; preds = %if.end12
+/// %sub16 = sub nuw nsw i64 150, %shr
+/// %shr17 = lshr i64 %or, %sub16
+/// %mul = mul nsw i64 %shr17, %conv
+/// br label %cleanup
+///
+/// if.else: ; preds = %if.end12
+/// %sub18 = add nsw i64 %shr, -150
+/// %shl = shl i64 %or, %sub18
+/// %mul19 = mul nsw i64 %shl, %conv
+/// br label %cleanup
+///
+/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
+/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
+/// ret i64 %retval.0
+/// }
+///
+/// Replace fp to integer with generated code.
+static void expandFPToI(Instruction *FPToI) {
+ IRBuilder<> Builder(FPToI);
+ auto *FloatVal = FPToI->getOperand(0);
+ IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
+
+ unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
+ unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
+
+ // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
+ // to i32 first following a sext/zext to target integer type.
+ Value *A1 = nullptr;
+ if (FloatVal->getType()->isHalfTy()) {
+ if (FPToI->getOpcode() == Instruction::FPToUI) {
+ Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
+ A1 = Builder.CreateZExt(A0, IntTy);
+ } else { // FPToSI
+ Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
+ A1 = Builder.CreateSExt(A0, IntTy);
+ }
+ FPToI->replaceAllUsesWith(A1);
+ FPToI->dropAllReferences();
+ FPToI->eraseFromParent();
+ return;
+ }
+
+ // fp80 conversion is implemented by fpext to fp128 first then do the
+ // conversion.
+ FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
+ unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
+ unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
+ unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
+ Value *ImplicitBit = Builder.CreateShl(
+ Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
+ Value *SignificandMask =
+ Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
+ Value *NegOne = Builder.CreateSExt(
+ ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
+ Value *NegInf =
+ Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
+ ConstantInt::getSigned(IntTy, BitWidth - 1));
+
+ BasicBlock *Entry = Builder.GetInsertBlock();
+ Function *F = Entry->getParent();
+ Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
+ BasicBlock *End =
+ Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
+ BasicBlock *IfEnd =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
+ BasicBlock *IfThen5 =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
+ BasicBlock *IfEnd9 =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
+ BasicBlock *IfThen12 =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
+ BasicBlock *IfElse =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
+
+ Entry->getTerminator()->eraseFromParent();
+
+ // entry:
+ Builder.SetInsertPoint(Entry);
+ Value *FloatVal0 = FloatVal;
+ // fp80 conversion is implemented by fpext to fp128 first then do the
+ // conversion.
+ if (FloatVal->getType()->isX86_FP80Ty())
+ FloatVal0 =
+ Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
+ Value *ARep0 =
+ Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
+ Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
+ Value *PosOrNeg = Builder.CreateICmpSGT(
+ ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
+ Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
+ ConstantInt::getSigned(IntTy, -1));
+ Value *And =
+ Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
+ Value *And2 = Builder.CreateAnd(
+ And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
+ Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
+ Value *Or = Builder.CreateOr(Abs, ImplicitBit);
+ Value *Cmp =
+ Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
+ Builder.CreateCondBr(Cmp, End, IfEnd);
+
+ // if.end:
+ Builder.SetInsertPoint(IfEnd);
+ Value *Add1 = Builder.CreateAdd(
+ And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth)));
+ Value *Cmp3 =
+ Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth));
+ Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
+
+ // if.then5:
+ Builder.SetInsertPoint(IfThen5);
+ Value *PosInf = Builder.CreateXor(NegOne, NegInf);
+ Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
+ Builder.CreateBr(End);
+
+ // if.end9:
+ Builder.SetInsertPoint(IfEnd9);
+ Value *Cmp10 = Builder.CreateICmpULT(
+ And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
+ Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
+
+ // if.then12:
+ Builder.SetInsertPoint(IfThen12);
+ Value *Sub13 = Builder.CreateSub(
+ Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
+ Value *Shr14 = Builder.CreateLShr(Or, Sub13);
+ Value *Mul = Builder.CreateMul(Shr14, Sign);
+ Builder.CreateBr(End);
+
+ // if.else:
+ Builder.SetInsertPoint(IfElse);
+ Value *Sub15 = Builder.CreateAdd(
+ And2,
+ ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth)));
+ Value *Shl = Builder.CreateShl(Or, Sub15);
+ Value *Mul16 = Builder.CreateMul(Shl, Sign);
+ Builder.CreateBr(End);
+
+ // cleanup:
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
+
+ Retval0->addIncoming(Cond8, IfThen5);
+ Retval0->addIncoming(Mul, IfThen12);
+ Retval0->addIncoming(Mul16, IfElse);
+ Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
+
+ FPToI->replaceAllUsesWith(Retval0);
+ FPToI->dropAllReferences();
+ FPToI->eraseFromParent();
+}
+
+/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
+/// the generated code. This currently generates code similarly to compiler-rt's
+/// implementations. This implementation has an implicit assumption that integer
+/// width is larger than fp.
+///
+/// An example IR generated from compiler-rt/floatdisf.c looks like below:
+/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
+/// entry:
+/// %cmp = icmp eq i64 %a, 0
+/// br i1 %cmp, label %return, label %if.end
+///
+/// if.end: ; preds = %entry
+/// %shr = ashr i64 %a, 63
+/// %xor = xor i64 %shr, %a
+/// %sub = sub nsw i64 %xor, %shr
+/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
+/// %cast = trunc i64 %0 to i32
+/// %sub1 = sub nuw nsw i32 64, %cast
+/// %sub2 = xor i32 %cast, 63
+/// %cmp3 = icmp ult i32 %cast, 40
+/// br i1 %cmp3, label %if.then4, label %if.else
+///
+/// if.then4: ; preds = %if.end
+/// switch i32 %sub1, label %sw.default [
+/// i32 25, label %sw.bb
+/// i32 26, label %sw.epilog
+/// ]
+///
+/// sw.bb: ; preds = %if.then4
+/// %shl = shl i64 %sub, 1
+/// br label %sw.epilog
+///
+/// sw.default: ; preds = %if.then4
+/// %sub5 = sub nsw i64 38, %0
+/// %sh_prom = and i64 %sub5, 4294967295
+/// %shr6 = lshr i64 %sub, %sh_prom
+/// %shr9 = lshr i64 274877906943, %0
+/// %and = and i64 %shr9, %sub
+/// %cmp10 = icmp ne i64 %and, 0
+/// %conv11 = zext i1 %cmp10 to i64
+/// %or = or i64 %shr6, %conv11
+/// br label %sw.epilog
+///
+/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
+/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
+/// %1 = lshr i64 %a.addr.0, 2
+/// %2 = and i64 %1, 1
+/// %or16 = or i64 %2, %a.addr.0
+/// %inc = add nsw i64 %or16, 1
+/// %3 = and i64 %inc, 67108864
+/// %tobool.not = icmp eq i64 %3, 0
+/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
+/// %spec.select = ashr i64 %inc, %spec.select.v
+/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
+/// br label %if.end26
+///
+/// if.else: ; preds = %if.end
+/// %sub23 = add nuw nsw i64 %0, 4294967256
+/// %sh_prom24 = and i64 %sub23, 4294967295
+/// %shl25 = shl i64 %sub, %sh_prom24
+/// br label %if.end26
+///
+/// if.end26: ; preds = %sw.epilog, %if.else
+/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
+/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
+/// %conv27 = trunc i64 %shr to i32
+/// %and28 = and i32 %conv27, -2147483648
+/// %add = shl nuw nsw i32 %e.0, 23
+/// %shl29 = add nuw nsw i32 %add, 1065353216
+/// %conv31 = trunc i64 %a.addr.1 to i32
+/// %and32 = and i32 %conv31, 8388607
+/// %or30 = or i32 %and32, %and28
+/// %or33 = or i32 %or30, %shl29
+/// %4 = bitcast i32 %or33 to float
+/// br label %return
+///
+/// return: ; preds = %entry, %if.end26
+/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
+/// ret float %retval.0
+/// }
+///
+/// Replace integer to fp with generated code.
+static void expandIToFP(Instruction *IToFP) {
+ IRBuilder<> Builder(IToFP);
+ auto *IntVal = IToFP->getOperand(0);
+ IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
+
+ unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
+ unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
+ // fp80 conversion is implemented by conversion tp fp128 first following
+ // a fptrunc to fp80.
+ FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
+ // FIXME: As there is no related builtins added in compliler-rt,
+ // here currently utilized the fp32 <-> fp16 lib calls to implement.
+ FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
+ unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
+ bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
+
+ assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
+ "assumes integer width is larger than fp.");
+
+ Value *Temp1 =
+ Builder.CreateShl(Builder.getIntN(BitWidth, 1),
+ Builder.getIntN(BitWidth, FPMantissaWidth + 3));
+
+ BasicBlock *Entry = Builder.GetInsertBlock();
+ Function *F = Entry->getParent();
+ Entry->setName(Twine(Entry->getName(), "itofp-entry"));
+ BasicBlock *End =
+ Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
+ BasicBlock *IfEnd =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
+ BasicBlock *IfThen4 =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
+ BasicBlock *SwBB =
+ BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
+ BasicBlock *SwDefault =
+ BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
+ BasicBlock *SwEpilog =
+ BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
+ BasicBlock *IfThen20 =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
+ BasicBlock *IfElse =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
+ BasicBlock *IfEnd26 =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
+
+ Entry->getTerminator()->eraseFromParent();
+
+ Function *CTLZ =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
+ ConstantInt *True = Builder.getTrue();
+
+ // entry:
+ Builder.SetInsertPoint(Entry);
+ Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
+ Builder.CreateCondBr(Cmp, End, IfEnd);
+
+ // if.end:
+ Builder.SetInsertPoint(IfEnd);
+ Value *Shr =
+ Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
+ Value *Xor = Builder.CreateXor(Shr, IntVal);
+ Value *Sub = Builder.CreateSub(Xor, Shr);
+ Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
+ Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
+ int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
+ Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
+ FloatWidth == 128 ? Call : Cast);
+ Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
+ FloatWidth == 128 ? Call : Cast);
+ Value *Cmp3 = Builder.CreateICmpSGT(
+ Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
+ Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
+
+ // if.then4:
+ Builder.SetInsertPoint(IfThen4);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
+ SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
+ SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
+
+ // sw.bb:
+ Builder.SetInsertPoint(SwBB);
+ Value *Shl =
+ Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
+ Builder.CreateBr(SwEpilog);
+
+ // sw.default:
+ Builder.SetInsertPoint(SwDefault);
+ Value *Sub5 = Builder.CreateSub(
+ Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
+ FloatWidth == 128 ? Call : Cast);
+ Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
+ Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
+ FloatWidth == 128 ? Sub5 : ShProm);
+ Value *Sub8 =
+ Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
+ Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
+ Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
+ Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
+ FloatWidth == 128 ? Sub8 : ShProm9);
+ Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
+ Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
+ Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
+ Value *Or = Builder.CreateOr(Shr6, Conv11);
+ Builder.CreateBr(SwEpilog);
+
+ // sw.epilog:
+ Builder.SetInsertPoint(SwEpilog);
+ PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
+ AAddr0->addIncoming(Or, SwDefault);
+ AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
+ AAddr0->addIncoming(Shl, SwBB);
+ Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
+ Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
+ Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
+ Value *Conv16 = Builder.CreateZExt(A2, IntTy);
+ Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
+ Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
+ Value *Shr18 = nullptr;
+ if (IsSigned)
+ Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
+ else
+ Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
+ Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
+ Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
+ Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
+ Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
+ Value *ExtractT64 = nullptr;
+ if (FloatWidth > 80)
+ ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
+ else
+ ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
+ Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
+
+ // if.then20
+ Builder.SetInsertPoint(IfThen20);
+ Value *Shr21 = nullptr;
+ if (IsSigned)
+ Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
+ else
+ Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
+ Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
+ Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
+ Value *ExtractT62 = nullptr;
+ if (FloatWidth > 80)
+ ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
+ else
+ ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
+ Builder.CreateBr(IfEnd26);
+
+ // if.else:
+ Builder.SetInsertPoint(IfElse);
+ Value *Sub24 = Builder.CreateAdd(
+ FloatWidth == 128 ? Call : Cast,
+ ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
+ -(BitWidth - FPMantissaWidth - 1)));
+ Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
+ Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
+ FloatWidth == 128 ? Sub24 : ShProm25);
+ Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
+ Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
+ Value *ExtractT66 = nullptr;
+ if (FloatWidth > 80)
+ ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
+ else
+ ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
+ Builder.CreateBr(IfEnd26);
+
+ // if.end26:
+ Builder.SetInsertPoint(IfEnd26);
+ PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
+ AAddr1Off0->addIncoming(ExtractT, IfThen20);
+ AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
+ AAddr1Off0->addIncoming(ExtractT61, IfElse);
+ PHINode *AAddr1Off32 = nullptr;
+ if (FloatWidth > 32) {
+ AAddr1Off32 =
+ Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
+ AAddr1Off32->addIncoming(ExtractT62, IfThen20);
+ AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
+ AAddr1Off32->addIncoming(ExtractT66, IfElse);
+ }
+ PHINode *E0 = nullptr;
+ if (FloatWidth <= 80) {
+ E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
+ E0->addIncoming(Sub1, IfThen20);
+ E0->addIncoming(Sub2, SwEpilog);
+ E0->addIncoming(Sub2, IfElse);
+ }
+ Value *And29 = nullptr;
+ if (FloatWidth > 80) {
+ Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
+ Builder.getIntN(BitWidth, 63));
+ And29 = Builder.CreateAnd(Shr, Temp2, "and29");
+ } else {
+ Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
+ And29 = Builder.CreateAnd(
+ Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
+ }
+ unsigned TempMod = FPMantissaWidth % 32;
+ Value *And34 = nullptr;
+ Value *Shl30 = nullptr;
+ if (FloatWidth > 80) {
+ TempMod += 32;
+ Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
+ Shl30 = Builder.CreateAdd(
+ Add,
+ Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
+ And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
+ } else {
+ Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
+ Shl30 = Builder.CreateAdd(
+ Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
+ And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
+ Builder.getIntN(32, (1 << TempMod) - 1));
+ }
+ Value *Or35 = nullptr;
+ if (FloatWidth > 80) {
+ Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
+ Value *Or31 = Builder.CreateOr(And29Trunc, And34);
+ Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
+ Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
+ Builder.getIntN(128, FPMantissaWidth));
+ Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
+ Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
+ Or35 = Builder.CreateOr(Or34, A6);
+ } else {
+ Value *Or31 = Builder.CreateOr(And34, And29);
+ Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
+ }
+ Value *A4 = nullptr;
+ if (IToFP->getType()->isDoubleTy()) {
+ Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
+ Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
+ Value *And1 =
+ Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
+ Value *Or1 = Builder.CreateOr(Shl1, And1);
+ A4 = Builder.CreateBitCast(Or1, IToFP->getType());
+ } else if (IToFP->getType()->isX86_FP80Ty()) {
+ Value *A40 =
+ Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
+ A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
+ } else if (IToFP->getType()->isHalfTy()) {
+ // Deal with "half" situation. This is a workaround since we don't have
+ // floattihf.c currently as referring.
+ Value *A40 =
+ Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
+ A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
+ } else // float type
+ A4 = Builder.CreateBitCast(Or35, IToFP->getType());
+ Builder.CreateBr(End);
+
+ // return:
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
+ Retval0->addIncoming(A4, IfEnd26);
+ Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
+
+ IToFP->replaceAllUsesWith(Retval0);
+ IToFP->dropAllReferences();
+ IToFP->eraseFromParent();
+}
+
+static bool runImpl(Function &F, const TargetLowering &TLI) {
+ SmallVector<Instruction *, 4> Replace;
+ bool Modified = false;
+
+ unsigned MaxLegalFpConvertBitWidth =
+ TLI.getMaxLargeFPConvertBitWidthSupported();
+ if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
+ MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
+
+ if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
+ return false;
+
+ for (auto &I : instructions(F)) {
+ switch (I.getOpcode()) {
+ case Instruction::FPToUI:
+ case Instruction::FPToSI: {
+ // TODO: This pass doesn't handle vectors.
+ if (I.getOperand(0)->getType()->isVectorTy())
+ continue;
+
+ auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
+ continue;
+
+ Replace.push_back(&I);
+ Modified = true;
+ break;
+ }
+ case Instruction::UIToFP:
+ case Instruction::SIToFP: {
+ // TODO: This pass doesn't handle vectors.
+ if (I.getOperand(0)->getType()->isVectorTy())
+ continue;
+
+ auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
+ if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
+ continue;
+
+ Replace.push_back(&I);
+ Modified = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (Replace.empty())
+ return false;
+
+ while (!Replace.empty()) {
+ Instruction *I = Replace.pop_back_val();
+ if (I->getOpcode() == Instruction::FPToUI ||
+ I->getOpcode() == Instruction::FPToSI) {
+ expandFPToI(I);
+ } else {
+ expandIToFP(I);
+ }
+ }
+
+ return Modified;
+}
+
+namespace {
+class ExpandLargeFpConvertLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
+ initializeExpandLargeFpConvertLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ return runImpl(F, *TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+char ExpandLargeFpConvertLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
+ "Expand large fp convert", false, false)
+INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
+ "Expand large fp convert", false, false)
+
+FunctionPass *llvm::createExpandLargeFpConvertPass() {
+ return new ExpandLargeFpConvertLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
new file mode 100644
index 000000000000..500f31bd8e89
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -0,0 +1,916 @@
+//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to expand memcmp() calls into optimally-sized loads and
+// compares for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <optional>
+
+using namespace llvm;
+
+namespace llvm {
+class TargetLowering;
+}
+
+#define DEBUG_TYPE "expandmemcmp"
+
+STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
+STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
+STATISTIC(NumMemCmpGreaterThanMax,
+ "Number of memcmp calls with size greater than max size");
+STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
+
+static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
+ "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
+ cl::desc("The number of loads per basic block for inline expansion of "
+ "memcmp that is only being compared against zero."));
+
+static cl::opt<unsigned> MaxLoadsPerMemcmp(
+ "max-loads-per-memcmp", cl::Hidden,
+ cl::desc("Set maximum number of loads used in expanded memcmp"));
+
+static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
+ "max-loads-per-memcmp-opt-size", cl::Hidden,
+ cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
+
+namespace {
+
+
+// This class provides helper functions to expand a memcmp library call into an
+// inline expansion.
+class MemCmpExpansion {
+ struct ResultBlock {
+ BasicBlock *BB = nullptr;
+ PHINode *PhiSrc1 = nullptr;
+ PHINode *PhiSrc2 = nullptr;
+
+ ResultBlock() = default;
+ };
+
+ CallInst *const CI = nullptr;
+ ResultBlock ResBlock;
+ const uint64_t Size;
+ unsigned MaxLoadSize = 0;
+ uint64_t NumLoadsNonOneByte = 0;
+ const uint64_t NumLoadsPerBlockForZeroCmp;
+ std::vector<BasicBlock *> LoadCmpBlocks;
+ BasicBlock *EndBlock = nullptr;
+ PHINode *PhiRes = nullptr;
+ const bool IsUsedForZeroCmp;
+ const DataLayout &DL;
+ DomTreeUpdater *DTU = nullptr;
+ IRBuilder<> Builder;
+ // Represents the decomposition in blocks of the expansion. For example,
+ // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
+ // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
+ struct LoadEntry {
+ LoadEntry(unsigned LoadSize, uint64_t Offset)
+ : LoadSize(LoadSize), Offset(Offset) {
+ }
+
+ // The size of the load for this block, in bytes.
+ unsigned LoadSize;
+ // The offset of this load from the base pointer, in bytes.
+ uint64_t Offset;
+ };
+ using LoadEntryVector = SmallVector<LoadEntry, 8>;
+ LoadEntryVector LoadSequence;
+
+ void createLoadCmpBlocks();
+ void createResultBlock();
+ void setupResultBlockPHINodes();
+ void setupEndBlockPHINodes();
+ Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
+ void emitLoadCompareBlock(unsigned BlockIndex);
+ void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
+ unsigned &LoadIndex);
+ void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
+ void emitMemCmpResultBlock();
+ Value *getMemCmpExpansionZeroCase();
+ Value *getMemCmpEqZeroOneBlock();
+ Value *getMemCmpOneBlock();
+ struct LoadPair {
+ Value *Lhs = nullptr;
+ Value *Rhs = nullptr;
+ };
+ LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
+ unsigned OffsetBytes);
+
+ static LoadEntryVector
+ computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+ unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
+ static LoadEntryVector
+ computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
+ unsigned MaxNumLoads,
+ unsigned &NumLoadsNonOneByte);
+
+public:
+ MemCmpExpansion(CallInst *CI, uint64_t Size,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+ DomTreeUpdater *DTU);
+
+ unsigned getNumBlocks();
+ uint64_t getNumLoads() const { return LoadSequence.size(); }
+
+ Value *getMemCmpExpansion();
+};
+
+MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
+ uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+ const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
+ NumLoadsNonOneByte = 0;
+ LoadEntryVector LoadSequence;
+ uint64_t Offset = 0;
+ while (Size && !LoadSizes.empty()) {
+ const unsigned LoadSize = LoadSizes.front();
+ const uint64_t NumLoadsForThisSize = Size / LoadSize;
+ if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+ // Do not expand if the total number of loads is larger than what the
+ // target allows. Note that it's important that we exit before completing
+ // the expansion to avoid using a ton of memory to store the expansion for
+ // large sizes.
+ return {};
+ }
+ if (NumLoadsForThisSize > 0) {
+ for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+ LoadSequence.push_back({LoadSize, Offset});
+ Offset += LoadSize;
+ }
+ if (LoadSize > 1)
+ ++NumLoadsNonOneByte;
+ Size = Size % LoadSize;
+ }
+ LoadSizes = LoadSizes.drop_front();
+ }
+ return LoadSequence;
+}
+
+MemCmpExpansion::LoadEntryVector
+MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
+ const unsigned MaxLoadSize,
+ const unsigned MaxNumLoads,
+ unsigned &NumLoadsNonOneByte) {
+ // These are already handled by the greedy approach.
+ if (Size < 2 || MaxLoadSize < 2)
+ return {};
+
+ // We try to do as many non-overlapping loads as possible starting from the
+ // beginning.
+ const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
+ assert(NumNonOverlappingLoads && "there must be at least one load");
+ // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
+ // an overlapping load.
+ Size = Size - NumNonOverlappingLoads * MaxLoadSize;
+ // Bail if we do not need an overloapping store, this is already handled by
+ // the greedy approach.
+ if (Size == 0)
+ return {};
+ // Bail if the number of loads (non-overlapping + potential overlapping one)
+ // is larger than the max allowed.
+ if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
+ return {};
+
+ // Add non-overlapping loads.
+ LoadEntryVector LoadSequence;
+ uint64_t Offset = 0;
+ for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
+ LoadSequence.push_back({MaxLoadSize, Offset});
+ Offset += MaxLoadSize;
+ }
+
+ // Add the last overlapping load.
+ assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
+ LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
+ NumLoadsNonOneByte = 1;
+ return LoadSequence;
+}
+
+// Initialize the basic block structure required for expansion of memcmp call
+// with given maximum load size and memcmp size parameter.
+// This structure includes:
+// 1. A list of load compare blocks - LoadCmpBlocks.
+// 2. An EndBlock, split from original instruction point, which is the block to
+// return from.
+// 3. ResultBlock, block to branch to for early exit when a
+// LoadCmpBlock finds a difference.
+MemCmpExpansion::MemCmpExpansion(
+ CallInst *const CI, uint64_t Size,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+ DomTreeUpdater *DTU)
+ : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+ IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
+ Builder(CI) {
+ assert(Size > 0 && "zero blocks");
+ // Scale the max size down if the target can load more bytes than we need.
+ llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
+ while (!LoadSizes.empty() && LoadSizes.front() > Size) {
+ LoadSizes = LoadSizes.drop_front();
+ }
+ assert(!LoadSizes.empty() && "cannot load Size bytes");
+ MaxLoadSize = LoadSizes.front();
+ // Compute the decomposition.
+ unsigned GreedyNumLoadsNonOneByte = 0;
+ LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
+ GreedyNumLoadsNonOneByte);
+ NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
+ // If we allow overlapping loads and the load sequence is not already optimal,
+ // use overlapping loads.
+ if (Options.AllowOverlappingLoads &&
+ (LoadSequence.empty() || LoadSequence.size() > 2)) {
+ unsigned OverlappingNumLoadsNonOneByte = 0;
+ auto OverlappingLoads = computeOverlappingLoadSequence(
+ Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
+ if (!OverlappingLoads.empty() &&
+ (LoadSequence.empty() ||
+ OverlappingLoads.size() < LoadSequence.size())) {
+ LoadSequence = OverlappingLoads;
+ NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
+ }
+ }
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
+}
+
+unsigned MemCmpExpansion::getNumBlocks() {
+ if (IsUsedForZeroCmp)
+ return getNumLoads() / NumLoadsPerBlockForZeroCmp +
+ (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
+ return getNumLoads();
+}
+
+void MemCmpExpansion::createLoadCmpBlocks() {
+ for (unsigned i = 0; i < getNumBlocks(); i++) {
+ BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
+ EndBlock->getParent(), EndBlock);
+ LoadCmpBlocks.push_back(BB);
+ }
+}
+
+void MemCmpExpansion::createResultBlock() {
+ ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
+ EndBlock->getParent(), EndBlock);
+}
+
+MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
+ bool NeedsBSwap,
+ Type *CmpSizeType,
+ unsigned OffsetBytes) {
+ // Get the memory source at offset `OffsetBytes`.
+ Value *LhsSource = CI->getArgOperand(0);
+ Value *RhsSource = CI->getArgOperand(1);
+ Align LhsAlign = LhsSource->getPointerAlignment(DL);
+ Align RhsAlign = RhsSource->getPointerAlignment(DL);
+ if (OffsetBytes > 0) {
+ auto *ByteType = Type::getInt8Ty(CI->getContext());
+ LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
+ RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
+ LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
+ RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
+ }
+
+ // Create a constant or a load from the source.
+ Value *Lhs = nullptr;
+ if (auto *C = dyn_cast<Constant>(LhsSource))
+ Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
+ if (!Lhs)
+ Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
+
+ Value *Rhs = nullptr;
+ if (auto *C = dyn_cast<Constant>(RhsSource))
+ Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
+ if (!Rhs)
+ Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
+
+ // Swap bytes if required.
+ if (NeedsBSwap) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ Lhs = Builder.CreateCall(Bswap, Lhs);
+ Rhs = Builder.CreateCall(Bswap, Rhs);
+ }
+
+ // Zero extend if required.
+ if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
+ Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
+ Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
+ }
+ return {Lhs, Rhs};
+}
+
+// This function creates the IR instructions for loading and comparing 1 byte.
+// It loads 1 byte from each source of the memcmp parameters with the given
+// GEPIndex. It then subtracts the two loaded values and adds this result to the
+// final phi node for selecting the memcmp result.
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
+ unsigned OffsetBytes) {
+ BasicBlock *BB = LoadCmpBlocks[BlockIndex];
+ Builder.SetInsertPoint(BB);
+ const LoadPair Loads =
+ getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
+ Type::getInt32Ty(CI->getContext()), OffsetBytes);
+ Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
+
+ PhiRes->addIncoming(Diff, BB);
+
+ if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
+ // Early exit branch if difference found to EndBlock. Otherwise, continue to
+ // next LoadCmpBlock,
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
+ ConstantInt::get(Diff->getType(), 0));
+ BranchInst *CmpBr =
+ BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BB, EndBlock},
+ {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
+ } else {
+ // The last block has an unconditional branch to EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
+ }
+}
+
+/// Generate an equality comparison for one or more pairs of loaded values.
+/// This is used in the case where the memcmp() call is compared equal or not
+/// equal to zero.
+Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
+ unsigned &LoadIndex) {
+ assert(LoadIndex < getNumLoads() &&
+ "getCompareLoadPairs() called with no remaining loads");
+ std::vector<Value *> XorList, OrList;
+ Value *Diff = nullptr;
+
+ const unsigned NumLoads =
+ std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
+
+ // For a single-block expansion, start inserting before the memcmp call.
+ if (LoadCmpBlocks.empty())
+ Builder.SetInsertPoint(CI);
+ else
+ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+
+ Value *Cmp = nullptr;
+ // If we have multiple loads per block, we need to generate a composite
+ // comparison using xor+or. The type for the combinations is the largest load
+ // type.
+ IntegerType *const MaxLoadType =
+ NumLoads == 1 ? nullptr
+ : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
+ const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
+ const LoadPair Loads = getLoadPair(
+ IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
+ /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
+
+ if (NumLoads != 1) {
+ // If we have multiple loads per block, we need to generate a composite
+ // comparison using xor+or.
+ Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
+ Diff = Builder.CreateZExt(Diff, MaxLoadType);
+ XorList.push_back(Diff);
+ } else {
+ // If there's only one load per block, we just compare the loaded values.
+ Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
+ }
+ }
+
+ auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
+ std::vector<Value *> OutList;
+ for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
+ Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
+ OutList.push_back(Or);
+ }
+ if (InList.size() % 2 != 0)
+ OutList.push_back(InList.back());
+ return OutList;
+ };
+
+ if (!Cmp) {
+ // Pairwise OR the XOR results.
+ OrList = pairWiseOr(XorList);
+
+ // Pairwise OR the OR results until one result left.
+ while (OrList.size() != 1) {
+ OrList = pairWiseOr(OrList);
+ }
+
+ assert(Diff && "Failed to find comparison diff");
+ Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
+ }
+
+ return Cmp;
+}
+
+void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
+ unsigned &LoadIndex) {
+ Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
+
+ BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[BlockIndex + 1];
+ // Early exit branch if difference found to ResultBlock. Otherwise,
+ // continue to next LoadCmpBlock or EndBlock.
+ BasicBlock *BB = Builder.GetInsertBlock();
+ BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
+ {DominatorTree::Insert, BB, NextBB}});
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes).
+ if (BlockIndex == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
+ }
+}
+
+// This function creates the IR intructions for loading and comparing using the
+// given LoadSize. It loads the number of bytes specified by LoadSize from each
+// source of the memcmp parameters. It then does a subtract to see if there was
+// a difference in the loaded values. If a difference is found, it branches
+// with an early exit to the ResultBlock for calculating which source was
+// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
+// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
+// a special case through emitLoadCompareByteBlock. The special handling can
+// simply subtract the loaded values and add it to the result phi node.
+void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
+ // There is one load per block in this case, BlockIndex == LoadIndex.
+ const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
+
+ if (CurLoadEntry.LoadSize == 1) {
+ MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
+ return;
+ }
+
+ Type *LoadSizeType =
+ IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
+
+ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+
+ const LoadPair Loads =
+ getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
+ CurLoadEntry.Offset);
+
+ // Add the loaded values to the phi nodes for calculating memcmp result only
+ // if result is not used in a zero equality.
+ if (!IsUsedForZeroCmp) {
+ ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
+ ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
+ }
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
+ BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[BlockIndex + 1];
+ // Early exit branch if difference found to ResultBlock. Otherwise, continue
+ // to next LoadCmpBlock or EndBlock.
+ BasicBlock *BB = Builder.GetInsertBlock();
+ BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
+ Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
+ {DominatorTree::Insert, BB, ResBlock.BB}});
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes).
+ if (BlockIndex == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
+ }
+}
+
+// This function populates the ResultBlock with a sequence to calculate the
+// memcmp result. It compares the two loaded source values and returns -1 if
+// src1 < src2 and 1 if src1 > src2.
+void MemCmpExpansion::emitMemCmpResultBlock() {
+ // Special case: if memcmp result is used in a zero equality, result does not
+ // need to be calculated and can simply return 1.
+ if (IsUsedForZeroCmp) {
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+ Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
+ return;
+ }
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
+ ResBlock.PhiSrc2);
+
+ Value *Res =
+ Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+
+ PhiRes->addIncoming(Res, ResBlock.BB);
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
+}
+
+void MemCmpExpansion::setupResultBlockPHINodes() {
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ Builder.SetInsertPoint(ResBlock.BB);
+ // Note: this assumes one load per block.
+ ResBlock.PhiSrc1 =
+ Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
+ ResBlock.PhiSrc2 =
+ Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
+}
+
+void MemCmpExpansion::setupEndBlockPHINodes() {
+ Builder.SetInsertPoint(&EndBlock->front());
+ PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
+}
+
+Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
+ unsigned LoadIndex = 0;
+ // This loop populates each of the LoadCmpBlocks with the IR sequence to
+ // handle multiple loads per block.
+ for (unsigned I = 0; I < getNumBlocks(); ++I) {
+ emitLoadCompareBlockMultipleLoads(I, LoadIndex);
+ }
+
+ emitMemCmpResultBlock();
+ return PhiRes;
+}
+
+/// A memcmp expansion that compares equality with 0 and only has one block of
+/// load and compare can bypass the compare, branch, and phi IR that is required
+/// in the general case.
+Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
+ unsigned LoadIndex = 0;
+ Value *Cmp = getCompareLoadPairs(0, LoadIndex);
+ assert(LoadIndex == getNumLoads() && "some entries were not consumed");
+ return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
+}
+
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock() {
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
+
+ // The i8 and i16 cases don't need compares. We zext the loaded values and
+ // subtract them to get the suitable negative, zero, or positive i32 result.
+ if (Size < 4) {
+ const LoadPair Loads =
+ getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
+ /*Offset*/ 0);
+ return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
+ }
+
+ const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
+ /*Offset*/ 0);
+ // The result of memcmp is negative, zero, or positive, so produce that by
+ // subtracting 2 extended compare bits: sub (ugt, ult).
+ // If a target prefers to use selects to get -1/0/1, they should be able
+ // to transform this later. The inverse transform (going from selects to math)
+ // may not be possible in the DAG because the selects got converted into
+ // branches before we got there.
+ Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
+ Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
+ Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
+ Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
+ return Builder.CreateSub(ZextUGT, ZextULT);
+}
+
+// This function expands the memcmp call into an inline expansion and returns
+// the memcmp result.
+Value *MemCmpExpansion::getMemCmpExpansion() {
+ // Create the basic block framework for a multi-block expansion.
+ if (getNumBlocks() != 1) {
+ BasicBlock *StartBlock = CI->getParent();
+ EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
+ /*MSSAU=*/nullptr, "endblock");
+ setupEndBlockPHINodes();
+ createResultBlock();
+
+ // If return value of memcmp is not used in a zero equality, we need to
+ // calculate which source was larger. The calculation requires the
+ // two loaded source values of each load compare block.
+ // These will be saved in the phi nodes created by setupResultBlockPHINodes.
+ if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
+
+ // Create the number of required load compare basic blocks.
+ createLoadCmpBlocks();
+
+ // Update the terminator added by SplitBlock to branch to the first
+ // LoadCmpBlock.
+ StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
+ {DominatorTree::Delete, StartBlock, EndBlock}});
+ }
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ if (IsUsedForZeroCmp)
+ return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
+ : getMemCmpExpansionZeroCase();
+
+ if (getNumBlocks() == 1)
+ return getMemCmpOneBlock();
+
+ for (unsigned I = 0; I < getNumBlocks(); ++I) {
+ emitLoadCompareBlock(I);
+ }
+
+ emitMemCmpResultBlock();
+ return PhiRes;
+}
+
+// This function checks to see if an expansion of memcmp can be generated.
+// It checks for constant compare size that is less than the max inline size.
+// If an expansion cannot occur, returns false to leave as a library call.
+// Otherwise, the library call is replaced with a new IR instruction sequence.
+/// We want to transform:
+/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
+/// To:
+/// loadbb:
+/// %0 = bitcast i32* %buffer2 to i8*
+/// %1 = bitcast i32* %buffer1 to i8*
+/// %2 = bitcast i8* %1 to i64*
+/// %3 = bitcast i8* %0 to i64*
+/// %4 = load i64, i64* %2
+/// %5 = load i64, i64* %3
+/// %6 = call i64 @llvm.bswap.i64(i64 %4)
+/// %7 = call i64 @llvm.bswap.i64(i64 %5)
+/// %8 = sub i64 %6, %7
+/// %9 = icmp ne i64 %8, 0
+/// br i1 %9, label %res_block, label %loadbb1
+/// res_block: ; preds = %loadbb2,
+/// %loadbb1, %loadbb
+/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
+/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
+/// %10 = icmp ult i64 %phi.src1, %phi.src2
+/// %11 = select i1 %10, i32 -1, i32 1
+/// br label %endblock
+/// loadbb1: ; preds = %loadbb
+/// %12 = bitcast i32* %buffer2 to i8*
+/// %13 = bitcast i32* %buffer1 to i8*
+/// %14 = bitcast i8* %13 to i32*
+/// %15 = bitcast i8* %12 to i32*
+/// %16 = getelementptr i32, i32* %14, i32 2
+/// %17 = getelementptr i32, i32* %15, i32 2
+/// %18 = load i32, i32* %16
+/// %19 = load i32, i32* %17
+/// %20 = call i32 @llvm.bswap.i32(i32 %18)
+/// %21 = call i32 @llvm.bswap.i32(i32 %19)
+/// %22 = zext i32 %20 to i64
+/// %23 = zext i32 %21 to i64
+/// %24 = sub i64 %22, %23
+/// %25 = icmp ne i64 %24, 0
+/// br i1 %25, label %res_block, label %loadbb2
+/// loadbb2: ; preds = %loadbb1
+/// %26 = bitcast i32* %buffer2 to i8*
+/// %27 = bitcast i32* %buffer1 to i8*
+/// %28 = bitcast i8* %27 to i16*
+/// %29 = bitcast i8* %26 to i16*
+/// %30 = getelementptr i16, i16* %28, i16 6
+/// %31 = getelementptr i16, i16* %29, i16 6
+/// %32 = load i16, i16* %30
+/// %33 = load i16, i16* %31
+/// %34 = call i16 @llvm.bswap.i16(i16 %32)
+/// %35 = call i16 @llvm.bswap.i16(i16 %33)
+/// %36 = zext i16 %34 to i64
+/// %37 = zext i16 %35 to i64
+/// %38 = sub i64 %36, %37
+/// %39 = icmp ne i64 %38, 0
+/// br i1 %39, label %res_block, label %loadbb3
+/// loadbb3: ; preds = %loadbb2
+/// %40 = bitcast i32* %buffer2 to i8*
+/// %41 = bitcast i32* %buffer1 to i8*
+/// %42 = getelementptr i8, i8* %41, i8 14
+/// %43 = getelementptr i8, i8* %40, i8 14
+/// %44 = load i8, i8* %42
+/// %45 = load i8, i8* %43
+/// %46 = zext i8 %44 to i32
+/// %47 = zext i8 %45 to i32
+/// %48 = sub i32 %46, %47
+/// br label %endblock
+/// endblock: ; preds = %res_block,
+/// %loadbb3
+/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
+/// ret i32 %phi.res
+static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
+ const TargetLowering *TLI, const DataLayout *DL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU, const bool IsBCmp) {
+ NumMemCmpCalls++;
+
+ // Early exit from expansion if -Oz.
+ if (CI->getFunction()->hasMinSize())
+ return false;
+
+ // Early exit from expansion if size is not a constant.
+ ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeCast) {
+ NumMemCmpNotConstant++;
+ return false;
+ }
+ const uint64_t SizeVal = SizeCast->getZExtValue();
+
+ if (SizeVal == 0) {
+ return false;
+ }
+ // TTI call to check if target would like to expand memcmp. Also, get the
+ // available load sizes.
+ const bool IsUsedForZeroCmp =
+ IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ auto Options = TTI->enableMemCmpExpansion(OptForSize,
+ IsUsedForZeroCmp);
+ if (!Options) return false;
+
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
+
+ if (OptForSize &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
+
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
+
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
+
+ // Don't expand if this will require more loads than desired by the target.
+ if (Expansion.getNumLoads() == 0) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
+
+ NumMemCmpInlined++;
+
+ Value *Res = Expansion.getMemCmpExpansion();
+
+ // Replace call with result of expansion and erase call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+
+ return true;
+}
+
+class ExpandMemCmpPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandMemCmpPass() : FunctionPass(ID) {
+ initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F)) return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC) {
+ return false;
+ }
+ const TargetLowering* TL =
+ TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
+
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ DominatorTree *DT = nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
+ return !PA.areAllPreserved();
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT);
+ // Returns true if a change was made.
+ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
+};
+
+bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU) {
+ for (Instruction& I : BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI) {
+ continue;
+ }
+ LibFunc Func;
+ if (TLI->getLibFunc(*CI, Func) &&
+ (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+PreservedAnalyses
+ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT) {
+ std::optional<DomTreeUpdater> DTU;
+ if (DT)
+ DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ const DataLayout& DL = F.getParent()->getDataLayout();
+ bool MadeChanges = false;
+ for (auto BBIt = F.begin(); BBIt != F.end();) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
+ MadeChanges = true;
+ // If changes were made, restart the function from the beginning, since
+ // the structure of the function was changed.
+ BBIt = F.begin();
+ } else {
+ ++BBIt;
+ }
+ }
+ if (MadeChanges)
+ for (BasicBlock &BB : F)
+ SimplifyInstructionsInBlock(&BB);
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+} // namespace
+
+char ExpandMemCmpPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
+ "Expand memcmp() to load/stores", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
+ "Expand memcmp() to load/stores", false, false)
+
+FunctionPass *llvm::createExpandMemCmpPass() {
+ return new ExpandMemCmpPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
new file mode 100644
index 000000000000..3a79f20f4732
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -0,0 +1,161 @@
+//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo
+// instructions after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "postrapseudos"
+
+namespace {
+struct ExpandPostRA : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandPostRA() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&) override;
+
+private:
+ bool LowerSubregToReg(MachineInstr *MI);
+};
+} // end anonymous namespace
+
+char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
+
+INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE,
+ "Post-RA pseudo instruction expansion pass", false, false)
+
+bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ Register DstReg = MI->getOperand(0).getReg();
+ Register InsReg = MI->getOperand(2).getReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ Register DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(DstReg.isPhysical() &&
+ "Insert destination must be in a physical register");
+ assert(InsReg.isPhysical() &&
+ "Inserted value must be in a physical register");
+
+ LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->removeOperand(3); // SubIdx
+ MI->removeOperand(1); // Imm
+ LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+ return true;
+ }
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identity copy instruction.
+ // Watch out for case like this:
+ // %rax = SUBREG_TO_REG 0, killed %eax, 3
+ // We must leave %rax live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->removeOperand(3); // SubIdx
+ MI->removeOperand(1); // Imm
+ LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+ LLVM_DEBUG(dbgs() << "subreg: " << *CopyMI);
+ }
+
+ LLVM_DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "Machine Function\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ // Only expand pseudos.
+ if (!MI.isPseudo())
+ continue;
+
+ // Give targets a chance to expand even standard pseudos.
+ if (TII->expandPostRAPseudo(MI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Expand standard pseudos.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::SUBREG_TO_REG:
+ MadeChange |= LowerSubregToReg(&MI);
+ break;
+ case TargetOpcode::COPY:
+ TII->lowerCopy(&MI, TRI);
+ MadeChange = true;
+ break;
+ case TargetOpcode::DBG_VALUE:
+ continue;
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::EXTRACT_SUBREG:
+ llvm_unreachable("Sub-register pseudos should have been eliminated.");
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
new file mode 100644
index 000000000000..79b6dc9154b3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -0,0 +1,240 @@
+//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for reduction intrinsics, allowing targets
+// to enable the intrinsics until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+unsigned getOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::vector_reduce_fadd:
+ return Instruction::FAdd;
+ case Intrinsic::vector_reduce_fmul:
+ return Instruction::FMul;
+ case Intrinsic::vector_reduce_add:
+ return Instruction::Add;
+ case Intrinsic::vector_reduce_mul:
+ return Instruction::Mul;
+ case Intrinsic::vector_reduce_and:
+ return Instruction::And;
+ case Intrinsic::vector_reduce_or:
+ return Instruction::Or;
+ case Intrinsic::vector_reduce_xor:
+ return Instruction::Xor;
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ return Instruction::ICmp;
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unexpected ID");
+ }
+}
+
+RecurKind getRK(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::vector_reduce_smax:
+ return RecurKind::SMax;
+ case Intrinsic::vector_reduce_smin:
+ return RecurKind::SMin;
+ case Intrinsic::vector_reduce_umax:
+ return RecurKind::UMax;
+ case Intrinsic::vector_reduce_umin:
+ return RecurKind::UMin;
+ case Intrinsic::vector_reduce_fmax:
+ return RecurKind::FMax;
+ case Intrinsic::vector_reduce_fmin:
+ return RecurKind::FMin;
+ default:
+ return RecurKind::None;
+ }
+}
+
+bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
+ bool Changed = false;
+ SmallVector<IntrinsicInst *, 4> Worklist;
+ for (auto &I : instructions(F)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ if (TTI->shouldExpandReduction(II))
+ Worklist.push_back(II);
+
+ break;
+ }
+ }
+ }
+
+ for (auto *II : Worklist) {
+ FastMathFlags FMF =
+ isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+ Intrinsic::ID ID = II->getIntrinsicID();
+ RecurKind RK = getRK(ID);
+
+ Value *Rdx = nullptr;
+ IRBuilder<> Builder(II);
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
+ switch (ID) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ // FMFs must be attached to the call, otherwise it's an ordered reduction
+ // and it can't be handled by generating a shuffle sequence.
+ Value *Acc = II->getArgOperand(0);
+ Value *Vec = II->getArgOperand(1);
+ if (!FMF.allowReassoc())
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
+ else {
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()))
+ continue;
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+ Acc, Rdx, "bin.rdx");
+ }
+ break;
+ }
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or: {
+ // Canonicalize logical or/and reductions:
+ // Or reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp ne iReduxWidth %val, 0
+ // And reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp eq iReduxWidth %val, 11111
+ Value *Vec = II->getArgOperand(0);
+ auto *FTy = cast<FixedVectorType>(Vec->getType());
+ unsigned NumElts = FTy->getNumElements();
+ if (!isPowerOf2_32(NumElts))
+ continue;
+
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
+ if (ID == Intrinsic::vector_reduce_and) {
+ Rdx = Builder.CreateICmpEQ(
+ Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
+ } else {
+ assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
+ Rdx = Builder.CreateIsNotNull(Rdx);
+ }
+ break;
+ }
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin: {
+ Value *Vec = II->getArgOperand(0);
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()))
+ continue;
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin: {
+ // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
+ // semantics of the reduction.
+ Value *Vec = II->getArgOperand(0);
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
+ !FMF.noNaNs())
+ continue;
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
+ }
+ II->replaceAllUsesWith(Rdx);
+ II->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+}
+
+class ExpandReductions : public FunctionPass {
+public:
+ static char ID;
+ ExpandReductions() : FunctionPass(ID) {
+ initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return expandReductions(F, TTI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char ExpandReductions::ID;
+INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+
+FunctionPass *llvm::createExpandReductionsPass() {
+ return new ExpandReductions();
+}
+
+PreservedAnalyses ExpandReductionsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!expandReductions(F, &TTI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
new file mode 100644
index 000000000000..9807be0bea39
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -0,0 +1,769 @@
+//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for vector predication intrinsics, allowing
+// targets to enable vector predication until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandVectorPredication.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <optional>
+
+using namespace llvm;
+
+using VPLegalization = TargetTransformInfo::VPLegalization;
+using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
+
+// Keep this in sync with TargetTransformInfo::VPLegalization.
+#define VPINTERNAL_VPLEGAL_CASES \
+ VPINTERNAL_CASE(Legal) \
+ VPINTERNAL_CASE(Discard) \
+ VPINTERNAL_CASE(Convert)
+
+#define VPINTERNAL_CASE(X) "|" #X
+
+// Override options.
+static cl::opt<std::string> EVLTransformOverride(
+ "expandvp-override-evl-transform", cl::init(""), cl::Hidden,
+ cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
+ ". If non-empty, ignore "
+ "TargetTransformInfo and "
+ "always use this transformation for the %evl parameter (Used in "
+ "testing)."));
+
+static cl::opt<std::string> MaskTransformOverride(
+ "expandvp-override-mask-transform", cl::init(""), cl::Hidden,
+ cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
+ ". If non-empty, Ignore "
+ "TargetTransformInfo and "
+ "always use this transformation for the %mask parameter (Used in "
+ "testing)."));
+
+#undef VPINTERNAL_CASE
+#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
+
+static VPTransform parseOverrideOption(const std::string &TextOpt) {
+ return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
+}
+
+#undef VPINTERNAL_VPLEGAL_CASES
+
+// Whether any override options are set.
+static bool anyExpandVPOverridesSet() {
+ return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
+}
+
+#define DEBUG_TYPE "expandvp"
+
+STATISTIC(NumFoldedVL, "Number of folded vector length params");
+STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
+
+///// Helpers {
+
+/// \returns Whether the vector mask \p MaskVal has all lane bits set.
+static bool isAllTrueMask(Value *MaskVal) {
+ if (Value *SplattedVal = getSplatValue(MaskVal))
+ if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+ return ConstValue->isAllOnesValue();
+
+ return false;
+}
+
+/// \returns A non-excepting divisor constant for this type.
+static Constant *getSafeDivisor(Type *DivTy) {
+ assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
+ return ConstantInt::get(DivTy, 1u, false);
+}
+
+/// Transfer operation properties from \p OldVPI to \p NewVal.
+static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
+ auto *NewInst = dyn_cast<Instruction>(&NewVal);
+ if (!NewInst || !isa<FPMathOperator>(NewVal))
+ return;
+
+ auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
+ if (!OldFMOp)
+ return;
+
+ NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
+}
+
+/// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
+/// OldVP gets erased.
+static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
+ transferDecorations(NewOp, OldOp);
+ OldOp.replaceAllUsesWith(&NewOp);
+ OldOp.eraseFromParent();
+}
+
+static bool maySpeculateLanes(VPIntrinsic &VPI) {
+ // The result of VP reductions depends on the mask and evl.
+ if (isa<VPReductionIntrinsic>(VPI))
+ return false;
+ // Fallback to whether the intrinsic is speculatable.
+ std::optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
+ unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
+ return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI);
+}
+
+//// } Helpers
+
+namespace {
+
+// Expansion pass state at function scope.
+struct CachingVPExpander {
+ Function &F;
+ const TargetTransformInfo &TTI;
+
+ /// \returns A (fixed length) vector with ascending integer indices
+ /// (<0, 1, ..., NumElems-1>).
+ /// \p Builder
+ /// Used for instruction creation.
+ /// \p LaneTy
+ /// Integer element type of the result vector.
+ /// \p NumElems
+ /// Number of vector elements.
+ Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
+ unsigned NumElems);
+
+ /// \returns A bitmask that is true where the lane position is less-than \p
+ /// EVLParam
+ ///
+ /// \p Builder
+ /// Used for instruction creation.
+ /// \p VLParam
+ /// The explicit vector length parameter to test against the lane
+ /// positions.
+ /// \p ElemCount
+ /// Static (potentially scalable) number of vector elements.
+ Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
+ ElementCount ElemCount);
+
+ Value *foldEVLIntoMask(VPIntrinsic &VPI);
+
+ /// "Remove" the %evl parameter of \p PI by setting it to the static vector
+ /// length of the operation.
+ void discardEVLParameter(VPIntrinsic &PI);
+
+ /// Lower this VP binary operator to a unpredicated binary operator.
+ Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
+ VPIntrinsic &PI);
+
+ /// Lower this VP fp call to a unpredicated fp call.
+ Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI,
+ unsigned UnpredicatedIntrinsicID);
+
+ /// Lower this VP reduction to a call to an unpredicated reduction intrinsic.
+ Value *expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &PI);
+
+ /// Lower this VP memory operation to a non-VP intrinsic.
+ Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI);
+
+ /// Lower this VP comparison to a call to an unpredicated comparison.
+ Value *expandPredicationInComparison(IRBuilder<> &Builder,
+ VPCmpIntrinsic &PI);
+
+ /// Query TTI and expand the vector predication in \p P accordingly.
+ Value *expandPredication(VPIntrinsic &PI);
+
+ /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This
+ /// overrides TTI with the cl::opts listed at the top of this file.
+ VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
+ bool UsingTTIOverrides;
+
+public:
+ CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
+ : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
+
+ bool expandVectorPredication();
+};
+
+//// CachingVPExpander {
+
+Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
+ unsigned NumElems) {
+ // TODO add caching
+ SmallVector<Constant *, 16> ConstElems;
+
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx)
+ ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
+
+ return ConstantVector::get(ConstElems);
+}
+
+Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
+ Value *EVLParam,
+ ElementCount ElemCount) {
+ // TODO add caching
+ // Scalable vector %evl conversion.
+ if (ElemCount.isScalable()) {
+ auto *M = Builder.GetInsertBlock()->getModule();
+ Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
+ Function *ActiveMaskFunc = Intrinsic::getDeclaration(
+ M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
+ // `get_active_lane_mask` performs an implicit less-than comparison.
+ Value *ConstZero = Builder.getInt32(0);
+ return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
+ }
+
+ // Fixed vector %evl conversion.
+ Type *LaneTy = EVLParam->getType();
+ unsigned NumElems = ElemCount.getFixedValue();
+ Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
+ Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
+ return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
+}
+
+Value *
+CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
+ assert(Instruction::isBinaryOp(OC));
+
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Value *Mask = VPI.getMaskParam();
+
+ // Blend in safe operands.
+ if (Mask && !isAllTrueMask(Mask)) {
+ switch (OC) {
+ default:
+ // Can safely ignore the predicate.
+ break;
+
+ // Division operators need a safe divisor on masked-off lanes (1).
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ // 2nd operand must not be zero.
+ Value *SafeDivisor = getSafeDivisor(VPI.getType());
+ Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
+ }
+ }
+
+ Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
+
+ replaceOperation(*NewBinOp, VPI);
+ return NewBinOp;
+}
+
+Value *CachingVPExpander::expandPredicationToFPCall(
+ IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ switch (UnpredicatedIntrinsicID) {
+ case Intrinsic::fabs:
+ case Intrinsic::sqrt: {
+ Value *Op0 = VPI.getOperand(0);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fmuladd: {
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Value *Op2 = VPI.getOperand(2);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp =
+ Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ }
+
+ return nullptr;
+}
+
+static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
+ Type *EltTy) {
+ bool Negative = false;
+ unsigned EltBits = EltTy->getScalarSizeInBits();
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Expecting a VP reduction intrinsic");
+ case Intrinsic::vp_reduce_add:
+ case Intrinsic::vp_reduce_or:
+ case Intrinsic::vp_reduce_xor:
+ case Intrinsic::vp_reduce_umax:
+ return Constant::getNullValue(EltTy);
+ case Intrinsic::vp_reduce_mul:
+ return ConstantInt::get(EltTy, 1, /*IsSigned*/ false);
+ case Intrinsic::vp_reduce_and:
+ case Intrinsic::vp_reduce_umin:
+ return ConstantInt::getAllOnesValue(EltTy);
+ case Intrinsic::vp_reduce_smin:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMaxValue(EltBits));
+ case Intrinsic::vp_reduce_smax:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMinValue(EltBits));
+ case Intrinsic::vp_reduce_fmax:
+ Negative = true;
+ [[fallthrough]];
+ case Intrinsic::vp_reduce_fmin: {
+ FastMathFlags Flags = VPI.getFastMathFlags();
+ const fltSemantics &Semantics = EltTy->getFltSemantics();
+ return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
+ : !Flags.noInfs()
+ ? ConstantFP::getInfinity(EltTy, Negative)
+ : ConstantFP::get(EltTy,
+ APFloat::getLargest(Semantics, Negative));
+ }
+ case Intrinsic::vp_reduce_fadd:
+ return ConstantFP::getNegativeZero(EltTy);
+ case Intrinsic::vp_reduce_fmul:
+ return ConstantFP::get(EltTy, 1.0);
+ }
+}
+
+Value *
+CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &VPI) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ Value *Mask = VPI.getMaskParam();
+ Value *RedOp = VPI.getOperand(VPI.getVectorParamPos());
+
+ // Insert neutral element in masked-out positions
+ if (Mask && !isAllTrueMask(Mask)) {
+ auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType());
+ auto *NeutralVector = Builder.CreateVectorSplat(
+ cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt);
+ RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector);
+ }
+
+ Value *Reduction;
+ Value *Start = VPI.getOperand(VPI.getStartParamPos());
+
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Impossible reduction kind");
+ case Intrinsic::vp_reduce_add:
+ Reduction = Builder.CreateAddReduce(RedOp);
+ Reduction = Builder.CreateAdd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_mul:
+ Reduction = Builder.CreateMulReduce(RedOp);
+ Reduction = Builder.CreateMul(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_and:
+ Reduction = Builder.CreateAndReduce(RedOp);
+ Reduction = Builder.CreateAnd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_or:
+ Reduction = Builder.CreateOrReduce(RedOp);
+ Reduction = Builder.CreateOr(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_xor:
+ Reduction = Builder.CreateXorReduce(RedOp);
+ Reduction = Builder.CreateXor(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmax:
+ Reduction = Builder.CreateFPMaxReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmin:
+ Reduction = Builder.CreateFPMinReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fadd:
+ Reduction = Builder.CreateFAddReduce(Start, RedOp);
+ break;
+ case Intrinsic::vp_reduce_fmul:
+ Reduction = Builder.CreateFMulReduce(Start, RedOp);
+ break;
+ }
+
+ replaceOperation(*Reduction, VPI);
+ return Reduction;
+}
+
+Value *
+CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ assert(VPI.canIgnoreVectorLengthParam());
+
+ const auto &DL = F.getParent()->getDataLayout();
+
+ Value *MaskParam = VPI.getMaskParam();
+ Value *PtrParam = VPI.getMemoryPointerParam();
+ Value *DataParam = VPI.getMemoryDataParam();
+ bool IsUnmasked = isAllTrueMask(MaskParam);
+
+ MaybeAlign AlignOpt = VPI.getPointerAlignment();
+
+ Value *NewMemoryInst = nullptr;
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Not a VP memory intrinsic");
+ case Intrinsic::vp_store:
+ if (IsUnmasked) {
+ StoreInst *NewStore =
+ Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false);
+ if (AlignOpt.has_value())
+ NewStore->setAlignment(*AlignOpt);
+ NewMemoryInst = NewStore;
+ } else
+ NewMemoryInst = Builder.CreateMaskedStore(
+ DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam);
+
+ break;
+ case Intrinsic::vp_load:
+ if (IsUnmasked) {
+ LoadInst *NewLoad =
+ Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false);
+ if (AlignOpt.has_value())
+ NewLoad->setAlignment(*AlignOpt);
+ NewMemoryInst = NewLoad;
+ } else
+ NewMemoryInst = Builder.CreateMaskedLoad(
+ VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam);
+
+ break;
+ case Intrinsic::vp_scatter: {
+ auto *ElementType =
+ cast<VectorType>(DataParam->getType())->getElementType();
+ NewMemoryInst = Builder.CreateMaskedScatter(
+ DataParam, PtrParam,
+ AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam);
+ break;
+ }
+ case Intrinsic::vp_gather: {
+ auto *ElementType = cast<VectorType>(VPI.getType())->getElementType();
+ NewMemoryInst = Builder.CreateMaskedGather(
+ VPI.getType(), PtrParam,
+ AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr,
+ VPI.getName());
+ break;
+ }
+ }
+
+ assert(NewMemoryInst);
+ replaceOperation(*NewMemoryInst, VPI);
+ return NewMemoryInst;
+}
+
+Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder,
+ VPCmpIntrinsic &VPI) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ assert(*VPI.getFunctionalOpcode() == Instruction::ICmp ||
+ *VPI.getFunctionalOpcode() == Instruction::FCmp);
+
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ auto Pred = VPI.getPredicate();
+
+ auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1);
+
+ replaceOperation(*NewCmp, VPI);
+ return NewCmp;
+}
+
+void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
+
+ if (VPI.canIgnoreVectorLengthParam())
+ return;
+
+ Value *EVLParam = VPI.getVectorLengthParam();
+ if (!EVLParam)
+ return;
+
+ ElementCount StaticElemCount = VPI.getStaticVectorLength();
+ Value *MaxEVL = nullptr;
+ Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
+ if (StaticElemCount.isScalable()) {
+ // TODO add caching
+ auto *M = VPI.getModule();
+ Function *VScaleFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
+ IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
+ Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
+ Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
+ MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
+ /*NUW*/ true, /*NSW*/ false);
+ } else {
+ MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
+ }
+ VPI.setVectorLengthParam(MaxEVL);
+}
+
+Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
+
+ IRBuilder<> Builder(&VPI);
+
+ // Ineffective %evl parameter and so nothing to do here.
+ if (VPI.canIgnoreVectorLengthParam())
+ return &VPI;
+
+ // Only VP intrinsics can have an %evl parameter.
+ Value *OldMaskParam = VPI.getMaskParam();
+ Value *OldEVLParam = VPI.getVectorLengthParam();
+ assert(OldMaskParam && "no mask param to fold the vl param into");
+ assert(OldEVLParam && "no EVL param to fold away");
+
+ LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
+ LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
+
+ // Convert the %evl predication into vector mask predication.
+ ElementCount ElemCount = VPI.getStaticVectorLength();
+ Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
+ Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
+ VPI.setMaskParam(NewMaskParam);
+
+ // Drop the %evl parameter.
+ discardEVLParameter(VPI);
+ assert(VPI.canIgnoreVectorLengthParam() &&
+ "transformation did not render the evl param ineffective!");
+
+ // Reassess the modified instruction.
+ return &VPI;
+}
+
+Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
+
+ IRBuilder<> Builder(&VPI);
+
+ // Try lowering to a LLVM instruction first.
+ auto OC = VPI.getFunctionalOpcode();
+
+ if (OC && Instruction::isBinaryOp(*OC))
+ return expandPredicationInBinaryOperator(Builder, VPI);
+
+ if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
+ return expandPredicationInReduction(Builder, *VPRI);
+
+ if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
+ return expandPredicationInComparison(Builder, *VPCmp);
+
+ switch (VPI.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::vp_fneg: {
+ Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName());
+ replaceOperation(*NewNegOp, VPI);
+ return NewNegOp;
+ }
+ case Intrinsic::vp_fabs:
+ return expandPredicationToFPCall(Builder, VPI, Intrinsic::fabs);
+ case Intrinsic::vp_sqrt:
+ return expandPredicationToFPCall(Builder, VPI, Intrinsic::sqrt);
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store:
+ case Intrinsic::vp_gather:
+ case Intrinsic::vp_scatter:
+ return expandPredicationInMemoryIntrinsic(Builder, VPI);
+ }
+
+ if (auto CID = VPI.getConstrainedIntrinsicID())
+ if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID))
+ return Call;
+
+ return &VPI;
+}
+
+//// } CachingVPExpander
+
+struct TransformJob {
+ VPIntrinsic *PI;
+ TargetTransformInfo::VPLegalization Strategy;
+ TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
+ : PI(PI), Strategy(InitStrat) {}
+
+ bool isDone() const { return Strategy.shouldDoNothing(); }
+};
+
+void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
+ // Operations with speculatable lanes do not strictly need predication.
+ if (maySpeculateLanes(VPI)) {
+ // Converting a speculatable VP intrinsic means dropping %mask and %evl.
+ // No need to expand %evl into the %mask only to ignore that code.
+ if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
+ LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
+ return;
+ }
+
+ // We have to preserve the predicating effect of %evl for this
+ // non-speculatable VP intrinsic.
+ // 1) Never discard %evl.
+ // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
+ // %evl gets folded into %mask.
+ if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
+ (LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
+ LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
+ }
+}
+
+VPLegalization
+CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
+ auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
+ if (LLVM_LIKELY(!UsingTTIOverrides)) {
+ // No overrides - we are in production.
+ return VPStrat;
+ }
+
+ // Overrides set - we are in testing, the following does not need to be
+ // efficient.
+ VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
+ VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
+ return VPStrat;
+}
+
+/// Expand llvm.vp.* intrinsics as requested by \p TTI.
+bool CachingVPExpander::expandVectorPredication() {
+ SmallVector<TransformJob, 16> Worklist;
+
+ // Collect all VPIntrinsics that need expansion and determine their expansion
+ // strategy.
+ for (auto &I : instructions(F)) {
+ auto *VPI = dyn_cast<VPIntrinsic>(&I);
+ if (!VPI)
+ continue;
+ auto VPStrat = getVPLegalizationStrategy(*VPI);
+ sanitizeStrategy(*VPI, VPStrat);
+ if (!VPStrat.shouldDoNothing())
+ Worklist.emplace_back(VPI, VPStrat);
+ }
+ if (Worklist.empty())
+ return false;
+
+ // Transform all VPIntrinsics on the worklist.
+ LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
+ << " instructions ::::\n");
+ for (TransformJob Job : Worklist) {
+ // Transform the EVL parameter.
+ switch (Job.Strategy.EVLParamStrategy) {
+ case VPLegalization::Legal:
+ break;
+ case VPLegalization::Discard:
+ discardEVLParameter(*Job.PI);
+ break;
+ case VPLegalization::Convert:
+ if (foldEVLIntoMask(*Job.PI))
+ ++NumFoldedVL;
+ break;
+ }
+ Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
+
+ // Replace with a non-predicated operation.
+ switch (Job.Strategy.OpStrategy) {
+ case VPLegalization::Legal:
+ break;
+ case VPLegalization::Discard:
+ llvm_unreachable("Invalid strategy for operators.");
+ case VPLegalization::Convert:
+ expandPredication(*Job.PI);
+ ++NumLoweredVPOps;
+ break;
+ }
+ Job.Strategy.OpStrategy = VPLegalization::Legal;
+
+ assert(Job.isDone() && "incomplete transformation");
+ }
+
+ return true;
+}
+class ExpandVectorPredication : public FunctionPass {
+public:
+ static char ID;
+ ExpandVectorPredication() : FunctionPass(ID) {
+ initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ CachingVPExpander VPExpander(F, *TTI);
+ return VPExpander.expandVectorPredication();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+char ExpandVectorPredication::ID;
+INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
+ "Expand vector predication intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
+ "Expand vector predication intrinsics", false, false)
+
+FunctionPass *llvm::createExpandVectorPredicationPass() {
+ return new ExpandVectorPredication();
+}
+
+PreservedAnalyses
+ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ CachingVPExpander VPExpander(F, TTI);
+ if (!VPExpander.expandVectorPredication())
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
new file mode 100644
index 000000000000..68304dd41db0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -0,0 +1,50 @@
+//===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file edits function bodies to insert fentry calls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+namespace {
+struct FEntryInserter : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ FEntryInserter() : MachineFunctionPass(ID) {
+ initializeFEntryInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
+ const std::string FEntryName = std::string(
+ MF.getFunction().getFnAttribute("fentry-call").getValueAsString());
+ if (FEntryName != "true")
+ return false;
+
+ auto &FirstMBB = *MF.begin();
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::FENTRY_CALL));
+ return true;
+}
+
+char FEntryInserter::ID = 0;
+char &llvm::FEntryInserterID = FEntryInserter::ID;
+INITIALIZE_PASS(FEntryInserter, "fentry-insert", "Insert fentry calls", false,
+ false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
new file mode 100644
index 000000000000..3f8fe2402d65
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
@@ -0,0 +1,114 @@
+//===- FaultMaps.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FaultMaps.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "faultmaps"
+
+static const int FaultMapVersion = 1;
+const char *FaultMaps::WFMP = "Fault Maps: ";
+
+FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {}
+
+void FaultMaps::recordFaultingOp(FaultKind FaultTy,
+ const MCSymbol *FaultingLabel,
+ const MCSymbol *HandlerLabel) {
+ MCContext &OutContext = AP.OutStreamer->getContext();
+
+ const MCExpr *FaultingOffset = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(FaultingLabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+ const MCExpr *HandlerOffset = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(HandlerLabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+ FunctionInfos[AP.CurrentFnSym].emplace_back(FaultTy, FaultingOffset,
+ HandlerOffset);
+}
+
+void FaultMaps::serializeToFaultMapSection() {
+ if (FunctionInfos.empty())
+ return;
+
+ MCContext &OutContext = AP.OutStreamer->getContext();
+ MCStreamer &OS = *AP.OutStreamer;
+
+ // Create the section.
+ MCSection *FaultMapSection =
+ OutContext.getObjectFileInfo()->getFaultMapSection();
+ OS.switchSection(FaultMapSection);
+
+ // Emit a dummy symbol to force section inclusion.
+ OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
+
+ LLVM_DEBUG(dbgs() << "********** Fault Map Output **********\n");
+
+ // Header
+ OS.emitIntValue(FaultMapVersion, 1); // Version.
+ OS.emitIntValue(0, 1); // Reserved.
+ OS.emitInt16(0); // Reserved.
+
+ LLVM_DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");
+ OS.emitInt32(FunctionInfos.size());
+
+ LLVM_DEBUG(dbgs() << WFMP << "functions:\n");
+
+ for (const auto &FFI : FunctionInfos)
+ emitFunctionInfo(FFI.first, FFI.second);
+}
+
+void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
+ const FunctionFaultInfos &FFI) {
+ MCStreamer &OS = *AP.OutStreamer;
+
+ LLVM_DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n");
+ OS.emitSymbolValue(FnLabel, 8);
+
+ LLVM_DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n");
+ OS.emitInt32(FFI.size());
+
+ OS.emitInt32(0); // Reserved
+
+ for (const auto &Fault : FFI) {
+ LLVM_DEBUG(dbgs() << WFMP << " fault type: "
+ << faultTypeToString(Fault.Kind) << "\n");
+ OS.emitInt32(Fault.Kind);
+
+ LLVM_DEBUG(dbgs() << WFMP << " faulting PC offset: "
+ << *Fault.FaultingOffsetExpr << "\n");
+ OS.emitValue(Fault.FaultingOffsetExpr, 4);
+
+ LLVM_DEBUG(dbgs() << WFMP << " fault handler PC offset: "
+ << *Fault.HandlerOffsetExpr << "\n");
+ OS.emitValue(Fault.HandlerOffsetExpr, 4);
+ }
+}
+
+const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
+ switch (FT) {
+ default:
+ llvm_unreachable("unhandled fault type!");
+ case FaultMaps::FaultingLoad:
+ return "FaultingLoad";
+ case FaultMaps::FaultingLoadStore:
+ return "FaultingLoadStore";
+ case FaultMaps::FaultingStore:
+ return "FaultingStore";
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
new file mode 100644
index 000000000000..329c9587e321
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -0,0 +1,75 @@
+//===-- llvm/CodeGen/FinalizeISel.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// This pass expands Pseudo-instructions produced by ISel, fixes register
+/// reservations and may do machine frame information adjustments.
+/// The pseudo instructions are used to allow the expansion to contain control
+/// flow, such as a conditional move implemented with a conditional branch and a
+/// phi, or an atomic operation implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "finalize-isel"
+
+namespace {
+ class FinalizeISel : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ FinalizeISel() : MachineFunctionPass(ID) {}
+
+ private:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char FinalizeISel::ID = 0;
+char &llvm::FinalizeISelID = FinalizeISel::ID;
+INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE,
+ "Finalize ISel and expand pseudo-instructions", false, false)
+
+bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr &MI = *MBBI++;
+
+ // If MI is a pseudo, expand it.
+ if (MI.usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB->getIterator();
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ TLI->finalizeLowering(MF);
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
new file mode 100644
index 000000000000..75504ef32250
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -0,0 +1,628 @@
+//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Statepoint instruction in deopt parameters contains values which are
+/// meaningful to the runtime and should be able to be read at the moment the
+/// call returns. So we can say that we need to encode the fact that these
+/// values are "late read" by runtime. If we could express this notion for
+/// register allocator it would produce the right form for us.
+/// The need to fixup (i.e this pass) is specifically handling the fact that
+/// we cannot describe such a late read for the register allocator.
+/// Register allocator may put the value on a register clobbered by the call.
+/// This pass forces the spill of such registers and replaces corresponding
+/// statepoint operands to added spill slots.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "fixup-statepoint-caller-saved"
+STATISTIC(NumSpilledRegisters, "Number of spilled register");
+STATISTIC(NumSpillSlotsAllocated, "Number of spill slots allocated");
+STATISTIC(NumSpillSlotsExtended, "Number of spill slots extended");
+
+static cl::opt<bool> FixupSCSExtendSlotSize(
+ "fixup-scs-extend-slot-size", cl::Hidden, cl::init(false),
+ cl::desc("Allow spill in spill slot of greater size than register size"),
+ cl::Hidden);
+
+static cl::opt<bool> PassGCPtrInCSR(
+ "fixup-allow-gcptr-in-csr", cl::Hidden, cl::init(false),
+ cl::desc("Allow passing GC Pointer arguments in callee saved registers"));
+
+static cl::opt<bool> EnableCopyProp(
+ "fixup-scs-enable-copy-propagation", cl::Hidden, cl::init(true),
+ cl::desc("Enable simple copy propagation during register reloading"));
+
+// This is purely debugging option.
+// It may be handy for investigating statepoint spilling issues.
+static cl::opt<unsigned> MaxStatepointsWithRegs(
+ "fixup-max-csr-statepoints", cl::Hidden,
+ cl::desc("Max number of statepoints allowed to pass GC Ptrs in registers"));
+
+namespace {
+
+class FixupStatepointCallerSaved : public MachineFunctionPass {
+public:
+ static char ID;
+
+ FixupStatepointCallerSaved() : MachineFunctionPass(ID) {
+ initializeFixupStatepointCallerSavedPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "Fixup Statepoint Caller Saved";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End anonymous namespace.
+
+char FixupStatepointCallerSaved::ID = 0;
+char &llvm::FixupStatepointCallerSavedID = FixupStatepointCallerSaved::ID;
+
+INITIALIZE_PASS_BEGIN(FixupStatepointCallerSaved, DEBUG_TYPE,
+ "Fixup Statepoint Caller Saved", false, false)
+INITIALIZE_PASS_END(FixupStatepointCallerSaved, DEBUG_TYPE,
+ "Fixup Statepoint Caller Saved", false, false)
+
+// Utility function to get size of the register.
+static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) {
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ return TRI.getSpillSize(*RC);
+}
+
+// Try to eliminate redundant copy to register which we're going to
+// spill, i.e. try to change:
+// X = COPY Y
+// SPILL X
+// to
+// SPILL Y
+// If there are no uses of X between copy and STATEPOINT, that COPY
+// may be eliminated.
+// Reg - register we're about to spill
+// RI - On entry points to statepoint.
+// On successful copy propagation set to new spill point.
+// IsKill - set to true if COPY is Kill (there are no uses of Y)
+// Returns either found source copy register or original one.
+static Register performCopyPropagation(Register Reg,
+ MachineBasicBlock::iterator &RI,
+ bool &IsKill, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ // First check if statepoint itself uses Reg in non-meta operands.
+ int Idx = RI->findRegisterUseOperandIdx(Reg, false, &TRI);
+ if (Idx >= 0 && (unsigned)Idx < StatepointOpers(&*RI).getNumDeoptArgsIdx()) {
+ IsKill = false;
+ return Reg;
+ }
+
+ if (!EnableCopyProp)
+ return Reg;
+
+ MachineBasicBlock *MBB = RI->getParent();
+ MachineBasicBlock::reverse_iterator E = MBB->rend();
+ MachineInstr *Def = nullptr, *Use = nullptr;
+ for (auto It = ++(RI.getReverse()); It != E; ++It) {
+ if (It->readsRegister(Reg, &TRI) && !Use)
+ Use = &*It;
+ if (It->modifiesRegister(Reg, &TRI)) {
+ Def = &*It;
+ break;
+ }
+ }
+
+ if (!Def)
+ return Reg;
+
+ auto DestSrc = TII.isCopyInstr(*Def);
+ if (!DestSrc || DestSrc->Destination->getReg() != Reg)
+ return Reg;
+
+ Register SrcReg = DestSrc->Source->getReg();
+
+ if (getRegisterSize(TRI, Reg) != getRegisterSize(TRI, SrcReg))
+ return Reg;
+
+ LLVM_DEBUG(dbgs() << "spillRegisters: perform copy propagation "
+ << printReg(Reg, &TRI) << " -> " << printReg(SrcReg, &TRI)
+ << "\n");
+
+ // Insert spill immediately after Def
+ RI = ++MachineBasicBlock::iterator(Def);
+ IsKill = DestSrc->Source->isKill();
+
+ if (!Use) {
+ // There are no uses of original register between COPY and STATEPOINT.
+ // There can't be any after STATEPOINT, so we can eliminate Def.
+ LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def);
+ Def->eraseFromParent();
+ } else if (IsKill) {
+ // COPY will remain in place, spill will be inserted *after* it, so it is
+ // not a kill of source anymore.
+ const_cast<MachineOperand *>(DestSrc->Source)->setIsKill(false);
+ }
+
+ return SrcReg;
+}
+
+namespace {
+// Pair {Register, FrameIndex}
+using RegSlotPair = std::pair<Register, int>;
+
+// Keeps track of what reloads were inserted in MBB.
+class RegReloadCache {
+ using ReloadSet = SmallSet<RegSlotPair, 8>;
+ DenseMap<const MachineBasicBlock *, ReloadSet> Reloads;
+
+public:
+ RegReloadCache() = default;
+
+ // Record reload of Reg from FI in block MBB
+ void recordReload(Register Reg, int FI, const MachineBasicBlock *MBB) {
+ RegSlotPair RSP(Reg, FI);
+ auto Res = Reloads[MBB].insert(RSP);
+ (void)Res;
+ assert(Res.second && "reload already exists");
+ }
+
+ // Does basic block MBB contains reload of Reg from FI?
+ bool hasReload(Register Reg, int FI, const MachineBasicBlock *MBB) {
+ RegSlotPair RSP(Reg, FI);
+ return Reloads.count(MBB) && Reloads[MBB].count(RSP);
+ }
+};
+
+// Cache used frame indexes during statepoint re-write to re-use them in
+// processing next statepoint instruction.
+// Two strategies. One is to preserve the size of spill slot while another one
+// extends the size of spill slots to reduce the number of them, causing
+// the less total frame size. But unspill will have "implicit" any extend.
+class FrameIndexesCache {
+private:
+ struct FrameIndexesPerSize {
+ // List of used frame indexes during processing previous statepoints.
+ SmallVector<int, 8> Slots;
+ // Current index of un-used yet frame index.
+ unsigned Index = 0;
+ };
+ MachineFrameInfo &MFI;
+ const TargetRegisterInfo &TRI;
+ // Map size to list of frame indexes of this size. If the mode is
+ // FixupSCSExtendSlotSize then the key 0 is used to keep all frame indexes.
+ // If the size of required spill slot is greater than in a cache then the
+ // size will be increased.
+ DenseMap<unsigned, FrameIndexesPerSize> Cache;
+
+ // Keeps track of slots reserved for the shared landing pad processing.
+ // Initialized from GlobalIndices for the current EHPad.
+ SmallSet<int, 8> ReservedSlots;
+
+ // Landing pad can be destination of several statepoints. Every register
+ // defined by such statepoints must be spilled to the same stack slot.
+ // This map keeps that information.
+ DenseMap<const MachineBasicBlock *, SmallVector<RegSlotPair, 8>>
+ GlobalIndices;
+
+ FrameIndexesPerSize &getCacheBucket(unsigned Size) {
+ // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
+ // for all sizes.
+ return Cache[FixupSCSExtendSlotSize ? 0 : Size];
+ }
+
+public:
+ FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI)
+ : MFI(MFI), TRI(TRI) {}
+ // Reset the current state of used frame indexes. After invocation of
+ // this function all frame indexes are available for allocation with
+ // the exception of slots reserved for landing pad processing (if any).
+ void reset(const MachineBasicBlock *EHPad) {
+ for (auto &It : Cache)
+ It.second.Index = 0;
+
+ ReservedSlots.clear();
+ if (EHPad && GlobalIndices.count(EHPad))
+ for (auto &RSP : GlobalIndices[EHPad])
+ ReservedSlots.insert(RSP.second);
+ }
+
+ // Get frame index to spill the register.
+ int getFrameIndex(Register Reg, MachineBasicBlock *EHPad) {
+ // Check if slot for Reg is already reserved at EHPad.
+ auto It = GlobalIndices.find(EHPad);
+ if (It != GlobalIndices.end()) {
+ auto &Vec = It->second;
+ auto Idx = llvm::find_if(
+ Vec, [Reg](RegSlotPair &RSP) { return Reg == RSP.first; });
+ if (Idx != Vec.end()) {
+ int FI = Idx->second;
+ LLVM_DEBUG(dbgs() << "Found global FI " << FI << " for register "
+ << printReg(Reg, &TRI) << " at "
+ << printMBBReference(*EHPad) << "\n");
+ assert(ReservedSlots.count(FI) && "using unreserved slot");
+ return FI;
+ }
+ }
+
+ unsigned Size = getRegisterSize(TRI, Reg);
+ FrameIndexesPerSize &Line = getCacheBucket(Size);
+ while (Line.Index < Line.Slots.size()) {
+ int FI = Line.Slots[Line.Index++];
+ if (ReservedSlots.count(FI))
+ continue;
+ // If all sizes are kept together we probably need to extend the
+ // spill slot size.
+ if (MFI.getObjectSize(FI) < Size) {
+ MFI.setObjectSize(FI, Size);
+ MFI.setObjectAlignment(FI, Align(Size));
+ NumSpillSlotsExtended++;
+ }
+ return FI;
+ }
+ int FI = MFI.CreateSpillStackObject(Size, Align(Size));
+ NumSpillSlotsAllocated++;
+ Line.Slots.push_back(FI);
+ ++Line.Index;
+
+ // Remember assignment {Reg, FI} for EHPad
+ if (EHPad) {
+ GlobalIndices[EHPad].push_back(std::make_pair(Reg, FI));
+ LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling reg "
+ << printReg(Reg, &TRI) << " at landing pad "
+ << printMBBReference(*EHPad) << "\n");
+ }
+
+ return FI;
+ }
+
+ // Sort all registers to spill in descendent order. In the
+ // FixupSCSExtendSlotSize mode it will minimize the total frame size.
+ // In non FixupSCSExtendSlotSize mode we can skip this step.
+ void sortRegisters(SmallVectorImpl<Register> &Regs) {
+ if (!FixupSCSExtendSlotSize)
+ return;
+ llvm::sort(Regs, [&](Register &A, Register &B) {
+ return getRegisterSize(TRI, A) > getRegisterSize(TRI, B);
+ });
+ }
+};
+
+// Describes the state of the current processing statepoint instruction.
+class StatepointState {
+private:
+ // statepoint instruction.
+ MachineInstr &MI;
+ MachineFunction &MF;
+ // If non-null then statepoint is invoke, and this points to the landing pad.
+ MachineBasicBlock *EHPad;
+ const TargetRegisterInfo &TRI;
+ const TargetInstrInfo &TII;
+ MachineFrameInfo &MFI;
+ // Mask with callee saved registers.
+ const uint32_t *Mask;
+ // Cache of frame indexes used on previous instruction processing.
+ FrameIndexesCache &CacheFI;
+ bool AllowGCPtrInCSR;
+ // Operands with physical registers requiring spilling.
+ SmallVector<unsigned, 8> OpsToSpill;
+ // Set of register to spill.
+ SmallVector<Register, 8> RegsToSpill;
+ // Set of registers to reload after statepoint.
+ SmallVector<Register, 8> RegsToReload;
+ // Map Register to Frame Slot index.
+ DenseMap<Register, int> RegToSlotIdx;
+
+public:
+ StatepointState(MachineInstr &MI, const uint32_t *Mask,
+ FrameIndexesCache &CacheFI, bool AllowGCPtrInCSR)
+ : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()),
+ TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()),
+ Mask(Mask), CacheFI(CacheFI), AllowGCPtrInCSR(AllowGCPtrInCSR) {
+
+ // Find statepoint's landing pad, if any.
+ EHPad = nullptr;
+ MachineBasicBlock *MBB = MI.getParent();
+ // Invoke statepoint must be last one in block.
+ bool Last = std::none_of(++MI.getIterator(), MBB->end().getInstrIterator(),
+ [](MachineInstr &I) {
+ return I.getOpcode() == TargetOpcode::STATEPOINT;
+ });
+
+ if (!Last)
+ return;
+
+ auto IsEHPad = [](MachineBasicBlock *B) { return B->isEHPad(); };
+
+ assert(llvm::count_if(MBB->successors(), IsEHPad) < 2 && "multiple EHPads");
+
+ auto It = llvm::find_if(MBB->successors(), IsEHPad);
+ if (It != MBB->succ_end())
+ EHPad = *It;
+ }
+
+ MachineBasicBlock *getEHPad() const { return EHPad; }
+
+ // Return true if register is callee saved.
+ bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; }
+
+ // Iterates over statepoint meta args to find caller saver registers.
+ // Also cache the size of found registers.
+ // Returns true if caller save registers found.
+ bool findRegistersToSpill() {
+ SmallSet<Register, 8> GCRegs;
+ // All GC pointer operands assigned to registers produce new value.
+ // Since they're tied to their defs, it is enough to collect def registers.
+ for (const auto &Def : MI.defs())
+ GCRegs.insert(Def.getReg());
+
+ SmallSet<Register, 8> VisitedRegs;
+ for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
+ EndIdx = MI.getNumOperands();
+ Idx < EndIdx; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ // Leave `undef` operands as is, StackMaps will rewrite them
+ // into a constant.
+ if (!MO.isReg() || MO.isImplicit() || MO.isUndef())
+ continue;
+ Register Reg = MO.getReg();
+ assert(Reg.isPhysical() && "Only physical regs are expected");
+
+ if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !GCRegs.contains(Reg)))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index "
+ << Idx << "\n");
+
+ if (VisitedRegs.insert(Reg).second)
+ RegsToSpill.push_back(Reg);
+ OpsToSpill.push_back(Idx);
+ }
+ CacheFI.sortRegisters(RegsToSpill);
+ return !RegsToSpill.empty();
+ }
+
+ // Spill all caller saved registers right before statepoint instruction.
+ // Remember frame index where register is spilled.
+ void spillRegisters() {
+ for (Register Reg : RegsToSpill) {
+ int FI = CacheFI.getFrameIndex(Reg, EHPad);
+
+ NumSpilledRegisters++;
+ RegToSlotIdx[Reg] = FI;
+
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, &TRI) << " to FI " << FI
+ << "\n");
+
+ // Perform trivial copy propagation
+ bool IsKill = true;
+ MachineBasicBlock::iterator InsertBefore(MI);
+ Reg = performCopyPropagation(Reg, InsertBefore, IsKill, TII, TRI);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+
+ LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore);
+ TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI,
+ RC, &TRI, Register());
+ }
+ }
+
+ void insertReloadBefore(unsigned Reg, MachineBasicBlock::iterator It,
+ MachineBasicBlock *MBB) {
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ int FI = RegToSlotIdx[Reg];
+ if (It != MBB->end()) {
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register());
+ return;
+ }
+
+ // To insert reload at the end of MBB, insert it before last instruction
+ // and then swap them.
+ assert(!MBB->empty() && "Empty block");
+ --It;
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register());
+ MachineInstr *Reload = It->getPrevNode();
+ int Dummy = 0;
+ (void)Dummy;
+ assert(TII.isLoadFromStackSlot(*Reload, Dummy) == Reg);
+ assert(Dummy == FI);
+ MBB->remove(Reload);
+ MBB->insertAfter(It, Reload);
+ }
+
+ // Insert reloads of (relocated) registers spilled in statepoint.
+ void insertReloads(MachineInstr *NewStatepoint, RegReloadCache &RC) {
+ MachineBasicBlock *MBB = NewStatepoint->getParent();
+ auto InsertPoint = std::next(NewStatepoint->getIterator());
+
+ for (auto Reg : RegsToReload) {
+ insertReloadBefore(Reg, InsertPoint, MBB);
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(Reg, &TRI) << " from FI "
+ << RegToSlotIdx[Reg] << " after statepoint\n");
+
+ if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
+ RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
+ auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
+ insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
+ LLVM_DEBUG(dbgs() << "...also reload at EHPad "
+ << printMBBReference(*EHPad) << "\n");
+ }
+ }
+ }
+
+ // Re-write statepoint machine instruction to replace caller saved operands
+ // with indirect memory location (frame index).
+ MachineInstr *rewriteStatepoint() {
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ unsigned NumOps = MI.getNumOperands();
+
+ // New indices for the remaining defs.
+ SmallVector<unsigned, 8> NewIndices;
+ unsigned NumDefs = MI.getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ MachineOperand &DefMO = MI.getOperand(I);
+ assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand");
+ Register Reg = DefMO.getReg();
+ assert(DefMO.isTied() && "Def is expected to be tied");
+ // We skipped undef uses and did not spill them, so we should not
+ // proceed with defs here.
+ if (MI.getOperand(MI.findTiedOperandIdx(I)).isUndef()) {
+ if (AllowGCPtrInCSR) {
+ NewIndices.push_back(NewMI->getNumOperands());
+ MIB.addReg(Reg, RegState::Define);
+ }
+ continue;
+ }
+ if (!AllowGCPtrInCSR) {
+ assert(is_contained(RegsToSpill, Reg));
+ RegsToReload.push_back(Reg);
+ } else {
+ if (isCalleeSaved(Reg)) {
+ NewIndices.push_back(NewMI->getNumOperands());
+ MIB.addReg(Reg, RegState::Define);
+ } else {
+ NewIndices.push_back(NumOps);
+ RegsToReload.push_back(Reg);
+ }
+ }
+ }
+
+ // Add End marker.
+ OpsToSpill.push_back(MI.getNumOperands());
+ unsigned CurOpIdx = 0;
+
+ for (unsigned I = NumDefs; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (I == OpsToSpill[CurOpIdx]) {
+ int FI = RegToSlotIdx[MO.getReg()];
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(getRegisterSize(TRI, MO.getReg()));
+ assert(MO.isReg() && "Should be register");
+ assert(MO.getReg().isPhysical() && "Should be physical register");
+ MIB.addFrameIndex(FI);
+ MIB.addImm(0);
+ ++CurOpIdx;
+ } else {
+ MIB.add(MO);
+ unsigned OldDef;
+ if (AllowGCPtrInCSR && MI.isRegTiedToDefOperand(I, &OldDef)) {
+ assert(OldDef < NumDefs);
+ assert(NewIndices[OldDef] < NumOps);
+ MIB->tieOperands(NewIndices[OldDef], MIB->getNumOperands() - 1);
+ }
+ }
+ }
+ assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed");
+ // Add mem operands.
+ NewMI->setMemRefs(MF, MI.memoperands());
+ for (auto It : RegToSlotIdx) {
+ Register R = It.first;
+ int FrameIndex = It.second;
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
+ if (is_contained(RegsToReload, R))
+ Flags |= MachineMemOperand::MOStore;
+ auto *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, getRegisterSize(TRI, R),
+ MFI.getObjectAlign(FrameIndex));
+ NewMI->addMemOperand(MF, MMO);
+ }
+
+ // Insert new statepoint and erase old one.
+ MI.getParent()->insert(MI, NewMI);
+
+ LLVM_DEBUG(dbgs() << "rewritten statepoint to : " << *NewMI << "\n");
+ MI.eraseFromParent();
+ return NewMI;
+ }
+};
+
+class StatepointProcessor {
+private:
+ MachineFunction &MF;
+ const TargetRegisterInfo &TRI;
+ FrameIndexesCache CacheFI;
+ RegReloadCache ReloadCache;
+
+public:
+ StatepointProcessor(MachineFunction &MF)
+ : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()),
+ CacheFI(MF.getFrameInfo(), TRI) {}
+
+ bool process(MachineInstr &MI, bool AllowGCPtrInCSR) {
+ StatepointOpers SO(&MI);
+ uint64_t Flags = SO.getFlags();
+ // Do nothing for LiveIn, it supports all registers.
+ if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn)
+ return false;
+ LLVM_DEBUG(dbgs() << "\nMBB " << MI.getParent()->getNumber() << " "
+ << MI.getParent()->getName() << " : process statepoint "
+ << MI);
+ CallingConv::ID CC = SO.getCallingConv();
+ const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC);
+ StatepointState SS(MI, Mask, CacheFI, AllowGCPtrInCSR);
+ CacheFI.reset(SS.getEHPad());
+
+ if (!SS.findRegistersToSpill())
+ return false;
+
+ SS.spillRegisters();
+ auto *NewStatepoint = SS.rewriteStatepoint();
+ SS.insertReloads(NewStatepoint, ReloadCache);
+ return true;
+ }
+};
+} // namespace
+
+bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const Function &F = MF.getFunction();
+ if (!F.hasGC())
+ return false;
+
+ SmallVector<MachineInstr *, 16> Statepoints;
+ for (MachineBasicBlock &BB : MF)
+ for (MachineInstr &I : BB)
+ if (I.getOpcode() == TargetOpcode::STATEPOINT)
+ Statepoints.push_back(&I);
+
+ if (Statepoints.empty())
+ return false;
+
+ bool Changed = false;
+ StatepointProcessor SPP(MF);
+ unsigned NumStatepoints = 0;
+ bool AllowGCPtrInCSR = PassGCPtrInCSR;
+ for (MachineInstr *I : Statepoints) {
+ ++NumStatepoints;
+ if (MaxStatepointsWithRegs.getNumOccurrences() &&
+ NumStatepoints >= MaxStatepointsWithRegs)
+ AllowGCPtrInCSR = false;
+ Changed |= SPP.process(*I, AllowGCPtrInCSR);
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp
new file mode 100644
index 000000000000..f1222a88b054
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -0,0 +1,62 @@
+//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations which result in
+// funclets being contiguous.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "funclet-layout"
+
+namespace {
+class FuncletLayout : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ FuncletLayout() : MachineFunctionPass(ID) {
+ initializeFuncletLayoutPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+}
+
+char FuncletLayout::ID = 0;
+char &llvm::FuncletLayoutID = FuncletLayout::ID;
+INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE,
+ "Contiguously Lay Out Funclets", false, false)
+
+bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
+ // Even though this gets information from getEHScopeMembership(), this pass is
+ // only necessary for funclet-based EH personalities, in which these EH scopes
+ // are outlined at the end.
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership =
+ getEHScopeMembership(F);
+ if (FuncletMembership.empty())
+ return false;
+
+ F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto FuncletX = FuncletMembership.find(&X);
+ auto FuncletY = FuncletMembership.find(&Y);
+ assert(FuncletX != FuncletMembership.end());
+ assert(FuncletY != FuncletMembership.end());
+ return FuncletX->second < FuncletY->second;
+ });
+
+ // Conservatively assume we changed something.
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 000000000000..4d27143c5298
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,150 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+class Printer : public FunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+
+public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+ StringRef getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() = default;
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getGCStrategy(F.getGC());
+ Functions.push_back(std::make_unique<GCFunctionInfo>(F, *S));
+ GCFunctionInfo *GFI = Functions.back().get();
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ Functions.clear();
+ FInfoMap.clear();
+ GCStrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+StringRef Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasGC())
+ return false;
+
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end();
+ RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
+ ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": " << "post-call"
+ << ", live = {";
+
+ ListSeparator LS(",");
+ for (const GCRoot &R : make_range(FD->live_begin(PI), FD->live_end(PI)))
+ OS << LS << " " << R.Num;
+
+ OS << " }\n";
+ }
+
+ return false;
+}
+
+bool Printer::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Printer didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
+
+GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
+ // TODO: Arguably, just doing a linear search would be faster for small N
+ auto NMI = GCStrategyMap.find(Name);
+ if (NMI != GCStrategyMap.end())
+ return NMI->getValue();
+
+ std::unique_ptr<GCStrategy> S = llvm::getGCStrategy(Name);
+ S->Name = std::string(Name);
+ GCStrategyMap[Name] = S.get();
+ GCStrategyList.push_back(std::move(S));
+ return GCStrategyList.back().get();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 000000000000..500dba9aea37
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,21 @@
+//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+
+using namespace llvm;
+
+LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry)
+
+GCMetadataPrinter::GCMetadataPrinter() = default;
+
+GCMetadataPrinter::~GCMetadataPrinter() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
new file mode 100644
index 000000000000..c0ce37091933
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -0,0 +1,328 @@
+//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering for the gc.root mechanism.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+
+using namespace llvm;
+
+namespace {
+
+/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+/// directed by the GCStrategy. It also performs automatic root initialization
+/// and custom intrinsic lowering.
+class LowerIntrinsics : public FunctionPass {
+ bool DoLowering(Function &F, GCStrategy &S);
+
+public:
+ static char ID;
+
+ LowerIntrinsics();
+ StringRef getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+};
+
+/// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+/// function representation to identify safe points for the garbage collector
+/// in the machine code. It inserts labels at safe points and populates a
+/// GCMetadata record for each function.
+class GCMachineCodeAnalysis : public MachineFunctionPass {
+ GCFunctionInfo *FI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator CI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const DebugLoc &DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+public:
+ static char ID;
+
+ GCMachineCodeAnalysis();
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); }
+
+char LowerIntrinsics::ID = 0;
+char &llvm::GCLoweringID = LowerIntrinsics::ID;
+
+LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+}
+
+StringRef LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Function &F : M)
+ if (!F.isDeclaration() && F.hasGC())
+ MI->getFunctionInfo(F); // Instantiate the GC strategy.
+
+ return false;
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+static bool CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) ||
+ isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (Intrinsic::ID IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP))
+ ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst *, 16> InitedRoots;
+ for (; !CouldBecomeSafePoint(&*IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst *Root : Roots)
+ if (!InitedRoots.count(Root)) {
+ new StoreInst(
+ ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())),
+ Root, Root->getNextNode());
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ return DoLowering(F, S);
+}
+
+/// Lower barriers out of existance (if the associated GCStrategy hasn't
+/// already done so...), and insert initializing stores to roots as a defensive
+/// measure. Given we're going to report all roots live at all safepoints, we
+/// need to be able to ensure each root has been initialized by the point the
+/// first safepoint is reached. This really should have been done by the
+/// frontend, but the old API made this non-obvious, so we do a potentially
+/// redundant store just in case.
+bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
+ SmallVector<AllocaInst *, 32> Roots;
+
+ bool MadeChange = false;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I);
+ if (!CI)
+ continue;
+
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::gcwrite: {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ MadeChange = true;
+ break;
+ }
+ case Intrinsic::gcread: {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ MadeChange = true;
+ break;
+ }
+ case Intrinsic::gcroot: {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ break;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots);
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), since that's what will be on
+ // the stack when the call is suspended and we need to inspect the stack.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(Label, CI->getDebugLoc());
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.isCall()) {
+ // Do not treat tail or sibling call sites as safe points. This is
+ // legal since any arguments passed to the callee which live in the
+ // remnants of the callers frame will be owned and updated by the
+ // callee if required.
+ if (MI.isTerminator())
+ continue;
+ VisitCallPoint(&MI);
+ }
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+ RI != FI->roots_end();) {
+ // If the root references a dead object, no need to keep it.
+ if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) {
+ RI = FI->removeStackRoot(RI);
+ } else {
+ Register FrameReg; // FIXME: surely GCRoot ought to store the
+ // register that the offset is from?
+ auto FrameOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
+ assert(!FrameOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ RI->StackOffset = FrameOffset.getFixed();
+ ++RI;
+ }
+ }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction().hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction());
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Find the size of the stack frame. There may be no correct static frame
+ // size, we use UINT64_MAX to represent this.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const bool DynamicFrameSize =
+ MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF);
+ FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize());
+
+ // Find all safe points.
+ if (FI->getStrategy().needsSafePoints())
+ FindSafePoints(MF);
+
+ // Find the concrete stack offsets for all roots (stack slots)
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
new file mode 100644
index 000000000000..e047996f9aa8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -0,0 +1,452 @@
+//===- CSEInfo.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Error.h"
+
+#define DEBUG_TYPE "cseinfo"
+
+using namespace llvm;
+char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+GISelCSEAnalysisWrapperPass::GISelCSEAnalysisWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+ "Analysis containing CSE Info", false, true)
+INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+ "Analysis containing CSE Info", false, true)
+
+/// -------- UniqueMachineInstr -------------//
+
+void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
+ GISelInstProfileBuilder(ID, MI->getMF()->getRegInfo()).addNodeID(MI);
+}
+/// -----------------------------------------
+
+/// --------- CSEConfigFull ---------- ///
+bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_SELECT:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC:
+ case TargetOpcode::G_SEXT_INREG:
+ return true;
+ }
+ return false;
+}
+
+bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT ||
+ Opc == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+std::unique_ptr<CSEConfigBase>
+llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
+ std::unique_ptr<CSEConfigBase> Config;
+ if (Level == CodeGenOpt::None)
+ Config = std::make_unique<CSEConfigConstantOnly>();
+ else
+ Config = std::make_unique<CSEConfigFull>();
+ return Config;
+}
+
+/// -----------------------------------------
+
+/// -------- GISelCSEInfo -------------//
+void GISelCSEInfo::setMF(MachineFunction &MF) {
+ this->MF = &MF;
+ this->MRI = &MF.getRegInfo();
+}
+
+GISelCSEInfo::~GISelCSEInfo() = default;
+
+bool GISelCSEInfo::isUniqueMachineInstValid(
+ const UniqueMachineInstr &UMI) const {
+ // Should we check here and assert that the instruction has been fully
+ // constructed?
+ // FIXME: Any other checks required to be done here? Remove this method if
+ // none.
+ return true;
+}
+
+void GISelCSEInfo::invalidateUniqueMachineInstr(UniqueMachineInstr *UMI) {
+ bool Removed = CSEMap.RemoveNode(UMI);
+ (void)Removed;
+ assert(Removed && "Invalidation called on invalid UMI");
+ // FIXME: Should UMI be deallocated/destroyed?
+}
+
+UniqueMachineInstr *GISelCSEInfo::getNodeIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos) {
+ auto *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (Node) {
+ if (!isUniqueMachineInstValid(*Node)) {
+ invalidateUniqueMachineInstr(Node);
+ return nullptr;
+ }
+
+ if (Node->MI->getParent() != MBB)
+ return nullptr;
+ }
+ return Node;
+}
+
+void GISelCSEInfo::insertNode(UniqueMachineInstr *UMI, void *InsertPos) {
+ handleRecordedInsts();
+ assert(UMI);
+ UniqueMachineInstr *MaybeNewNode = UMI;
+ if (InsertPos)
+ CSEMap.InsertNode(UMI, InsertPos);
+ else
+ MaybeNewNode = CSEMap.GetOrInsertNode(UMI);
+ if (MaybeNewNode != UMI) {
+ // A similar node exists in the folding set. Let's ignore this one.
+ return;
+ }
+ assert(InstrMapping.count(UMI->MI) == 0 &&
+ "This instruction should not be in the map");
+ InstrMapping[UMI->MI] = MaybeNewNode;
+}
+
+UniqueMachineInstr *GISelCSEInfo::getUniqueInstrForMI(const MachineInstr *MI) {
+ assert(shouldCSE(MI->getOpcode()) && "Trying to CSE an unsupported Node");
+ auto *Node = new (UniqueInstrAllocator) UniqueMachineInstr(MI);
+ return Node;
+}
+
+void GISelCSEInfo::insertInstr(MachineInstr *MI, void *InsertPos) {
+ assert(MI);
+ // If it exists in temporary insts, remove it.
+ TemporaryInsts.remove(MI);
+ auto *Node = getUniqueInstrForMI(MI);
+ insertNode(Node, InsertPos);
+}
+
+MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos) {
+ handleRecordedInsts();
+ if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
+ LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI;);
+ return const_cast<MachineInstr *>(Inst->MI);
+ }
+ return nullptr;
+}
+
+void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
+#ifndef NDEBUG
+ if (OpcodeHitTable.count(Opc))
+ OpcodeHitTable[Opc] += 1;
+ else
+ OpcodeHitTable[Opc] = 1;
+#endif
+ // Else do nothing.
+}
+
+void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
+ if (shouldCSE(MI->getOpcode())) {
+ TemporaryInsts.insert(MI);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Recording new MI " << *MI);
+ }
+}
+
+void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
+ assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
+ auto *UMI = InstrMapping.lookup(MI);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Handling recorded MI " << *MI);
+ if (UMI) {
+ // Invalidate this MI.
+ invalidateUniqueMachineInstr(UMI);
+ InstrMapping.erase(MI);
+ }
+ /// Now insert the new instruction.
+ if (UMI) {
+ /// We'll reuse the same UniqueMachineInstr to avoid the new
+ /// allocation.
+ *UMI = UniqueMachineInstr(MI);
+ insertNode(UMI, nullptr);
+ } else {
+ /// This is a new instruction. Allocate a new UniqueMachineInstr and
+ /// Insert.
+ insertInstr(MI);
+ }
+}
+
+void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
+ if (auto *UMI = InstrMapping.lookup(MI)) {
+ invalidateUniqueMachineInstr(UMI);
+ InstrMapping.erase(MI);
+ }
+ TemporaryInsts.remove(MI);
+}
+
+void GISelCSEInfo::handleRecordedInsts() {
+ if (HandlingRecordedInstrs)
+ return;
+ HandlingRecordedInstrs = true;
+ while (!TemporaryInsts.empty()) {
+ auto *MI = TemporaryInsts.pop_back_val();
+ handleRecordedInst(MI);
+ }
+ HandlingRecordedInstrs = false;
+}
+
+bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
+ assert(CSEOpt.get() && "CSEConfig not set");
+ return CSEOpt->shouldCSEOpc(Opc);
+}
+
+void GISelCSEInfo::erasingInstr(MachineInstr &MI) { handleRemoveInst(&MI); }
+void GISelCSEInfo::createdInstr(MachineInstr &MI) { recordNewInstruction(&MI); }
+void GISelCSEInfo::changingInstr(MachineInstr &MI) {
+ // For now, perform erase, followed by insert.
+ erasingInstr(MI);
+ createdInstr(MI);
+}
+void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); }
+
+void GISelCSEInfo::analyze(MachineFunction &MF) {
+ setMF(MF);
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ for (MachineInstr &MI : MBB) {
+ if (!shouldCSE(MI.getOpcode()))
+ continue;
+ LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI);
+ insertInstr(&MI);
+ }
+ }
+}
+
+void GISelCSEInfo::releaseMemory() {
+ print();
+ CSEMap.clear();
+ InstrMapping.clear();
+ UniqueInstrAllocator.Reset();
+ TemporaryInsts.clear();
+ CSEOpt.reset();
+ MRI = nullptr;
+ MF = nullptr;
+#ifndef NDEBUG
+ OpcodeHitTable.clear();
+#endif
+}
+
+#ifndef NDEBUG
+static const char *stringify(const MachineInstr *MI, std::string &S) {
+ raw_string_ostream OS(S);
+ OS << *MI;
+ return OS.str().c_str();
+}
+#endif
+
+Error GISelCSEInfo::verify() {
+#ifndef NDEBUG
+ std::string S1, S2;
+ handleRecordedInsts();
+ // For each instruction in map from MI -> UMI,
+ // Profile(MI) and make sure UMI is found for that profile.
+ for (auto &It : InstrMapping) {
+ FoldingSetNodeID TmpID;
+ GISelInstProfileBuilder(TmpID, *MRI).addNodeID(It.first);
+ void *InsertPos;
+ UniqueMachineInstr *FoundNode =
+ CSEMap.FindNodeOrInsertPos(TmpID, InsertPos);
+ if (FoundNode != It.second)
+ return createStringError(std::errc::not_supported,
+ "CSEMap mismatch, InstrMapping has MIs without "
+ "corresponding Nodes in CSEMap:\n%s",
+ stringify(It.second->MI, S1));
+ }
+
+ // For every node in the CSEMap, make sure that the InstrMapping
+ // points to it.
+ for (const UniqueMachineInstr &UMI : CSEMap) {
+ if (!InstrMapping.count(UMI.MI))
+ return createStringError(std::errc::not_supported,
+ "Node in CSE without InstrMapping:\n%s",
+ stringify(UMI.MI, S1));
+
+ if (InstrMapping[UMI.MI] != &UMI)
+ return createStringError(std::make_error_code(std::errc::not_supported),
+ "Mismatch in CSE mapping:\n%s\n%s",
+ stringify(InstrMapping[UMI.MI]->MI, S1),
+ stringify(UMI.MI, S2));
+ }
+#endif
+ return Error::success();
+}
+
+void GISelCSEInfo::print() {
+ LLVM_DEBUG(for (auto &It
+ : OpcodeHitTable) {
+ dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second
+ << "\n";
+ };);
+}
+/// -----------------------------------------
+// ---- Profiling methods for FoldingSetNode --- //
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const {
+ addNodeIDMBB(MI->getParent());
+ addNodeIDOpcode(MI->getOpcode());
+ for (const auto &Op : MI->operands())
+ addNodeIDMachineOperand(Op);
+ addNodeIDFlag(MI->getFlags());
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const {
+ ID.AddInteger(Opc);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const LLT Ty) const {
+ uint64_t Val = Ty.getUniqueRAWLLTData();
+ ID.AddInteger(Val);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const TargetRegisterClass *RC) const {
+ ID.AddPointer(RC);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const RegisterBank *RB) const {
+ ID.AddPointer(RB);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const {
+ ID.AddInteger(Imm);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegNum(Register Reg) const {
+ ID.AddInteger(Reg);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const Register Reg) const {
+ addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false));
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDMBB(const MachineBasicBlock *MBB) const {
+ ID.AddPointer(MBB);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
+ if (Flag)
+ ID.AddInteger(Flag);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ addNodeIDRegType(Ty);
+
+ if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
+ if (const auto *RB = dyn_cast_if_present<const RegisterBank *>(RCOrRB))
+ addNodeIDRegType(RB);
+ else if (const auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RCOrRB))
+ addNodeIDRegType(RC);
+ }
+ return *this;
+}
+
+const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
+ const MachineOperand &MO) const {
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (!MO.isDef())
+ addNodeIDRegNum(Reg);
+
+ // Profile the register properties.
+ addNodeIDReg(Reg);
+ assert(!MO.isImplicit() && "Unhandled case");
+ } else if (MO.isImm())
+ ID.AddInteger(MO.getImm());
+ else if (MO.isCImm())
+ ID.AddPointer(MO.getCImm());
+ else if (MO.isFPImm())
+ ID.AddPointer(MO.getFPImm());
+ else if (MO.isPredicate())
+ ID.AddInteger(MO.getPredicate());
+ else
+ llvm_unreachable("Unhandled operand type");
+ // Handle other types
+ return *this;
+}
+
+GISelCSEInfo &
+GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt,
+ bool Recompute) {
+ if (!AlreadyComputed || Recompute) {
+ Info.releaseMemory();
+ Info.setCSEConfig(std::move(CSEOpt));
+ Info.analyze(*MF);
+ AlreadyComputed = true;
+ }
+ return Info;
+}
+void GISelCSEAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelCSEAnalysisWrapperPass::runOnMachineFunction(MachineFunction &MF) {
+ releaseMemory();
+ Wrapper.setMF(MF);
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
new file mode 100644
index 000000000000..64e2d517e3b9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -0,0 +1,354 @@
+//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the CSEMIRBuilder class which CSEs as it builds
+/// instructions.
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+using namespace llvm;
+
+bool CSEMIRBuilder::dominates(MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) const {
+ auto MBBEnd = getMBB().end();
+ if (B == MBBEnd)
+ return true;
+ assert(A->getParent() == B->getParent() &&
+ "Iterators should be in same block");
+ const MachineBasicBlock *BBA = A->getParent();
+ MachineBasicBlock::const_iterator I = BBA->begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+ return &*I == A;
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
+ void *&NodeInsertPos) {
+ GISelCSEInfo *CSEInfo = getCSEInfo();
+ assert(CSEInfo && "Can't get here without setting CSEInfo");
+ MachineBasicBlock *CurMBB = &getMBB();
+ MachineInstr *MI =
+ CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
+ if (MI) {
+ CSEInfo->countOpcodeHit(MI->getOpcode());
+ auto CurrPos = getInsertPt();
+ auto MII = MachineBasicBlock::iterator(MI);
+ if (MII == CurrPos) {
+ // Move the insert point ahead of the instruction so any future uses of
+ // this builder will have the def ready.
+ setInsertPt(*CurMBB, std::next(MII));
+ } else if (!dominates(MI, CurrPos)) {
+ CurMBB->splice(CurrPos, CurMBB, MI);
+ }
+ return MachineInstrBuilder(getMF(), MI);
+ }
+ return MachineInstrBuilder();
+}
+
+bool CSEMIRBuilder::canPerformCSEForOpc(unsigned Opc) const {
+ const GISelCSEInfo *CSEInfo = getCSEInfo();
+ if (!CSEInfo || !CSEInfo->shouldCSE(Opc))
+ return false;
+ return true;
+}
+
+void CSEMIRBuilder::profileDstOp(const DstOp &Op,
+ GISelInstProfileBuilder &B) const {
+ switch (Op.getDstOpKind()) {
+ case DstOp::DstType::Ty_RC:
+ B.addNodeIDRegType(Op.getRegClass());
+ break;
+ case DstOp::DstType::Ty_Reg: {
+ // Regs can have LLT&(RB|RC). If those exist, profile them as well.
+ B.addNodeIDReg(Op.getReg());
+ break;
+ }
+ default:
+ B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
+ break;
+ }
+}
+
+void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
+ GISelInstProfileBuilder &B) const {
+ switch (Op.getSrcOpKind()) {
+ case SrcOp::SrcType::Ty_Imm:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+ break;
+ case SrcOp::SrcType::Ty_Predicate:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
+ break;
+ default:
+ B.addNodeIDRegType(Op.getReg());
+ break;
+ }
+}
+
+void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B,
+ unsigned Opc) const {
+ // First add the MBB (Local CSE).
+ B.addNodeIDMBB(&getMBB());
+ // Then add the opcode.
+ B.addNodeIDOpcode(Opc);
+}
+
+void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ std::optional<unsigned> Flags,
+ GISelInstProfileBuilder &B) const {
+
+ profileMBBOpcode(B, Opc);
+ // Then add the DstOps.
+ profileDstOps(DstOps, B);
+ // Then add the SrcOps.
+ profileSrcOps(SrcOps, B);
+ // Add Flags if passed in.
+ if (Flags)
+ B.addNodeIDFlag(*Flags);
+}
+
+MachineInstrBuilder CSEMIRBuilder::memoizeMI(MachineInstrBuilder MIB,
+ void *NodeInsertPos) {
+ assert(canPerformCSEForOpc(MIB->getOpcode()) &&
+ "Attempting to CSE illegal op");
+ MachineInstr *MIBInstr = MIB;
+ getCSEInfo()->insertInstr(MIBInstr, NodeInsertPos);
+ return MIB;
+}
+
+bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
+ if (DstOps.size() == 1)
+ return true; // always possible to emit copy to just 1 vreg.
+
+ return llvm::all_of(DstOps, [](const DstOp &Op) {
+ DstOp::DstType DT = Op.getDstOpKind();
+ return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
+ });
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
+ MachineInstrBuilder &MIB) {
+ assert(checkCopyToDefsPossible(DstOps) &&
+ "Impossible return a single MIB with copies to multiple defs");
+ if (DstOps.size() == 1) {
+ const DstOp &Op = DstOps[0];
+ if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
+ return buildCopy(Op.getReg(), MIB.getReg(0));
+ }
+
+ // If we didn't generate a copy then we're re-using an existing node directly
+ // instead of emitting any code. Merge the debug location we wanted to emit
+ // into the instruction we're CSE'ing with. Debug locations arent part of the
+ // profile so we don't need to recompute it.
+ if (getDebugLoc()) {
+ GISelChangeObserver *Observer = getState().Observer;
+ if (Observer)
+ Observer->changingInstr(*MIB);
+ MIB->setDebugLoc(
+ DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc()));
+ if (Observer)
+ Observer->changedInstr(*MIB);
+ }
+
+ return MIB;
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
+ ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ std::optional<unsigned> Flag) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 2 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ LLT SrcTy = SrcOps[0].getLLTTy(*getMRI());
+
+ if (Opc == TargetOpcode::G_PTR_ADD &&
+ getDataLayout().isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+ break;
+
+ if (SrcTy.isVector()) {
+ // Try to constant fold vector constants.
+ SmallVector<APInt> VecCst = ConstantFoldVectorBinop(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI());
+ if (!VecCst.empty())
+ return buildBuildVectorConstant(DstOps[0], VecCst);
+ break;
+ }
+
+ if (std::optional<APInt> Cst = ConstantFoldBinOp(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()))
+ return buildConstant(DstOps[0], *Cst);
+ break;
+ }
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FCOPYSIGN: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 2 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (std::optional<APFloat> Cst = ConstantFoldFPBinOp(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()))
+ return buildFConstant(DstOps[0], *Cst);
+ break;
+ }
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(DstOps.size() == 1 && "Invalid dst ops");
+ assert(SrcOps.size() == 2 && "Invalid src ops");
+ const DstOp &Dst = DstOps[0];
+ const SrcOp &Src0 = SrcOps[0];
+ const SrcOp &Src1 = SrcOps[1];
+ if (auto MaybeCst =
+ ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI()))
+ return buildConstant(Dst, *MaybeCst);
+ break;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 1 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (std::optional<APFloat> Cst = ConstantFoldIntToFloat(
+ Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI()))
+ return buildFConstant(DstOps[0], *Cst);
+ break;
+ }
+ case TargetOpcode::G_CTLZ: {
+ assert(SrcOps.size() == 1 && "Expected one source");
+ assert(DstOps.size() == 1 && "Expected one dest");
+ auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
+ if (!MaybeCsts)
+ break;
+ if (MaybeCsts->size() == 1)
+ return buildConstant(DstOps[0], (*MaybeCsts)[0]);
+ // This was a vector constant. Build a G_BUILD_VECTOR for them.
+ SmallVector<Register> ConstantRegs;
+ LLT VecTy = DstOps[0].getLLTTy(*getMRI());
+ for (unsigned Cst : *MaybeCsts)
+ ConstantRegs.emplace_back(
+ buildConstant(VecTy.getScalarType(), Cst).getReg(0));
+ return buildBuildVector(DstOps[0], ConstantRegs);
+ }
+ }
+ bool CanCopy = checkCopyToDefsPossible(DstOps);
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ // If we can CSE this instruction, but involves generating copies to multiple
+ // regs, give up. This frequently happens to UNMERGEs.
+ if (!CanCopy) {
+ auto MIB = MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ // CSEInfo would have tracked this instruction. Remove it from the temporary
+ // insts.
+ getCSEInfo()->handleRemoveInst(&*MIB);
+ return MIB;
+ }
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileEverything(Opc, DstOps, SrcOps, Flag, ProfBuilder);
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired(DstOps, MIB);
+ }
+ // This instruction does not exist in the CSEInfo. Build it and CSE it.
+ MachineInstrBuilder NewMIB =
+ MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
+ const ConstantInt &Val) {
+ constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildConstant(Res, Val);
+
+ // For vectors, CSE the element only for now.
+ LLT Ty = Res.getLLTTy(*getMRI());
+ if (Ty.isVector())
+ return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
+
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileMBBOpcode(ProfBuilder, Opc);
+ profileDstOp(Res, ProfBuilder);
+ ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateCImm(&Val));
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired({Res}, MIB);
+ }
+
+ MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
+ return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) {
+ constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildFConstant(Res, Val);
+
+ // For vectors, CSE the element only for now.
+ LLT Ty = Res.getLLTTy(*getMRI());
+ if (Ty.isVector())
+ return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val));
+
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileMBBOpcode(ProfBuilder, Opc);
+ profileDstOp(Res, ProfBuilder);
+ ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateFPImm(&Val));
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired({Res}, MIB);
+ }
+ MachineInstrBuilder NewMIB = MachineIRBuilder::buildFConstant(Res, Val);
+ return memoizeMI(NewMIB, InsertPos);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
new file mode 100644
index 000000000000..28c33e2038e4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -0,0 +1,1241 @@
+//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements some simple delegations needed for call lowering.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "call-lowering"
+
+using namespace llvm;
+
+void CallLowering::anchor() {}
+
+/// Helper function which updates \p Flags when \p AttrFn returns true.
+static void
+addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
+ const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+ if (AttrFn(Attribute::SExt))
+ Flags.setSExt();
+ if (AttrFn(Attribute::ZExt))
+ Flags.setZExt();
+ if (AttrFn(Attribute::InReg))
+ Flags.setInReg();
+ if (AttrFn(Attribute::StructRet))
+ Flags.setSRet();
+ if (AttrFn(Attribute::Nest))
+ Flags.setNest();
+ if (AttrFn(Attribute::ByVal))
+ Flags.setByVal();
+ if (AttrFn(Attribute::Preallocated))
+ Flags.setPreallocated();
+ if (AttrFn(Attribute::InAlloca))
+ Flags.setInAlloca();
+ if (AttrFn(Attribute::Returned))
+ Flags.setReturned();
+ if (AttrFn(Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (AttrFn(Attribute::SwiftAsync))
+ Flags.setSwiftAsync();
+ if (AttrFn(Attribute::SwiftError))
+ Flags.setSwiftError();
+}
+
+ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
+ unsigned ArgIdx) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) {
+ return Call.paramHasAttr(ArgIdx, Attr);
+ });
+ return Flags;
+}
+
+ISD::ArgFlagsTy
+CallLowering::getAttributesForReturn(const CallBase &Call) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call](Attribute::AttrKind Attr) {
+ return Call.hasRetAttr(Attr);
+ });
+ return Flags;
+}
+
+void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+ const AttributeList &Attrs,
+ unsigned OpIdx) const {
+ addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
+ return Attrs.hasAttributeAtIndex(OpIdx, Attr);
+ });
+}
+
+bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
+ ArrayRef<Register> ResRegs,
+ ArrayRef<ArrayRef<Register>> ArgRegs,
+ Register SwiftErrorVReg,
+ std::function<unsigned()> GetCalleeReg) const {
+ CallLoweringInfo Info;
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool CanBeTailCalled = CB.isTailCall() &&
+ isInTailCallPosition(CB, MF.getTarget()) &&
+ (MF.getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
+
+ CallingConv::ID CallConv = CB.getCallingConv();
+ Type *RetTy = CB.getType();
+ bool IsVarArg = CB.getFunctionType()->isVarArg();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
+ Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+
+ if (!Info.CanLowerReturn) {
+ // Callee requires sret demotion.
+ insertSRetOutgoingArgument(MIRBuilder, CB, Info);
+
+ // The sret demotion isn't compatible with tail-calls, since the sret
+ // argument points into the caller's stack frame.
+ CanBeTailCalled = false;
+ }
+
+
+ // First step is to marshall all the function's parameters into the correct
+ // physregs and memory locations. Gather the sequence of argument types that
+ // we'll pass to the assigner function.
+ unsigned i = 0;
+ unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
+ for (const auto &Arg : CB.args()) {
+ ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i),
+ i < NumFixedArgs};
+ setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg))
+ CanBeTailCalled = false;
+
+ Info.OrigArgs.push_back(OrigArg);
+ ++i;
+ }
+
+ // Try looking through a bitcast from one function type to another.
+ // Commonly happens with calls to objc_msgSend().
+ const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
+ if (const Function *F = dyn_cast<Function>(CalleeV))
+ Info.Callee = MachineOperand::CreateGA(F, 0);
+ else
+ Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+
+ Register ReturnHintAlignReg;
+ Align ReturnHintAlign;
+
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(CB)};
+
+ if (!Info.OrigRet.Ty->isVoidTy()) {
+ setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
+
+ if (MaybeAlign Alignment = CB.getRetAlign()) {
+ if (*Alignment > Align(1)) {
+ ReturnHintAlignReg = MRI.cloneVirtualRegister(ResRegs[0]);
+ Info.OrigRet.Regs[0] = ReturnHintAlignReg;
+ ReturnHintAlign = *Alignment;
+ }
+ }
+ }
+
+ auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi);
+ if (Bundle && CB.isIndirectCall()) {
+ Info.CFIType = cast<ConstantInt>(Bundle->Inputs[0]);
+ assert(Info.CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
+ }
+
+ Info.CB = &CB;
+ Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
+ Info.CallConv = CallConv;
+ Info.SwiftErrorVReg = SwiftErrorVReg;
+ Info.IsMustTailCall = CB.isMustTailCall();
+ Info.IsTailCall = CanBeTailCalled;
+ Info.IsVarArg = IsVarArg;
+ if (!lowerCall(MIRBuilder, Info))
+ return false;
+
+ if (ReturnHintAlignReg && !Info.IsTailCall) {
+ MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg,
+ ReturnHintAlign);
+ }
+
+ return true;
+}
+
+template <typename FuncInfoTy>
+void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+ const DataLayout &DL,
+ const FuncInfoTy &FuncInfo) const {
+ auto &Flags = Arg.Flags[0];
+ const AttributeList &Attrs = FuncInfo.getAttributes();
+ addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
+
+ PointerType *PtrTy = dyn_cast<PointerType>(Arg.Ty->getScalarType());
+ if (PtrTy) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace());
+ }
+
+ Align MemAlign = DL.getABITypeAlign(Arg.Ty);
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
+ assert(OpIdx >= AttributeList::FirstArgIndex);
+ unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex;
+
+ Type *ElementTy = FuncInfo.getParamByValType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamInAllocaType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx);
+ assert(ElementTy && "Must have byval, inalloca or preallocated type");
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx))
+ MemAlign = *ParamAlign;
+ else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx)))
+ MemAlign = *ParamAlign;
+ else
+ MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
+ } else if (OpIdx >= AttributeList::FirstArgIndex) {
+ if (auto ParamAlign =
+ FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex))
+ MemAlign = *ParamAlign;
+ }
+ Flags.setMemAlign(MemAlign);
+ Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
+
+ // Don't try to use the returned attribute if the argument is marked as
+ // swiftself, since it won't be passed in x0.
+ if (Flags.isSwiftSelf())
+ Flags.setReturned(false);
+}
+
+template void
+CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+ const DataLayout &DL,
+ const Function &FuncInfo) const;
+
+template void
+CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+ const DataLayout &DL,
+ const CallBase &FuncInfo) const;
+
+void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ CallingConv::ID CallConv,
+ SmallVectorImpl<uint64_t> *Offsets) const {
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0);
+
+ if (SplitVTs.size() == 0)
+ return;
+
+ if (SplitVTs.size() == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
+ OrigArg.OrigArgIndex, OrigArg.Flags[0],
+ OrigArg.IsFixed, OrigArg.OrigValue);
+ return;
+ }
+
+ // Create one ArgInfo for each virtual register in the original ArgInfo.
+ assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+ OrigArg.Ty, CallConv, false, DL);
+ for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+ Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+ SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.OrigArgIndex,
+ OrigArg.Flags[0], OrigArg.IsFixed);
+ if (NeedsRegBlock)
+ SplitArgs.back().Flags[0].setInConsecutiveRegs();
+ }
+
+ SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
+}
+
+/// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
+static MachineInstrBuilder
+mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
+ ArrayRef<Register> SrcRegs) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT LLTy = MRI.getType(DstRegs[0]);
+ LLT PartLLT = MRI.getType(SrcRegs[0]);
+
+ // Deal with v3s16 split into v2s16
+ LLT LCMTy = getCoverTy(LLTy, PartLLT);
+ if (LCMTy == LLTy) {
+ // Common case where no padding is needed.
+ assert(DstRegs.size() == 1);
+ return B.buildConcatVectors(DstRegs[0], SrcRegs);
+ }
+
+ // We need to create an unmerge to the result registers, which may require
+ // widening the original value.
+ Register UnmergeSrcReg;
+ if (LCMTy != PartLLT) {
+ assert(DstRegs.size() == 1);
+ return B.buildDeleteTrailingVectorElements(
+ DstRegs[0], B.buildMergeLikeInstr(LCMTy, SrcRegs));
+ } else {
+ // We don't need to widen anything if we're extracting a scalar which was
+ // promoted to a vector e.g. s8 -> v4s8 -> s8
+ assert(SrcRegs.size() == 1);
+ UnmergeSrcReg = SrcRegs[0];
+ }
+
+ int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
+
+ SmallVector<Register, 8> PadDstRegs(NumDst);
+ std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
+
+ // Create the excess dead defs for the unmerge.
+ for (int I = DstRegs.size(); I != NumDst; ++I)
+ PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+
+ if (PadDstRegs.size() == 1)
+ return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg);
+ return B.buildUnmerge(PadDstRegs, UnmergeSrcReg);
+}
+
+/// Create a sequence of instructions to combine pieces split into register
+/// typed values to the original IR value. \p OrigRegs contains the destination
+/// value registers of type \p LLTy, and \p Regs contains the legalized pieces
+/// with type \p PartLLT. This is used for incoming values (physregs to vregs).
+static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
+ ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT,
+ const ISD::ArgFlagsTy Flags) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+
+ if (PartLLT == LLTy) {
+ // We should have avoided introducing a new virtual register, and just
+ // directly assigned here.
+ assert(OrigRegs[0] == Regs[0]);
+ return;
+ }
+
+ if (PartLLT.getSizeInBits() == LLTy.getSizeInBits() && OrigRegs.size() == 1 &&
+ Regs.size() == 1) {
+ B.buildBitcast(OrigRegs[0], Regs[0]);
+ return;
+ }
+
+ // A vector PartLLT needs extending to LLTy's element size.
+ // E.g. <2 x s64> = G_SEXT <2 x s32>.
+ if (PartLLT.isVector() == LLTy.isVector() &&
+ PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() &&
+ (!PartLLT.isVector() ||
+ PartLLT.getNumElements() == LLTy.getNumElements()) &&
+ OrigRegs.size() == 1 && Regs.size() == 1) {
+ Register SrcReg = Regs[0];
+
+ LLT LocTy = MRI.getType(SrcReg);
+
+ if (Flags.isSExt()) {
+ SrcReg = B.buildAssertSExt(LocTy, SrcReg, LLTy.getScalarSizeInBits())
+ .getReg(0);
+ } else if (Flags.isZExt()) {
+ SrcReg = B.buildAssertZExt(LocTy, SrcReg, LLTy.getScalarSizeInBits())
+ .getReg(0);
+ }
+
+ // Sometimes pointers are passed zero extended.
+ LLT OrigTy = MRI.getType(OrigRegs[0]);
+ if (OrigTy.isPointer()) {
+ LLT IntPtrTy = LLT::scalar(OrigTy.getSizeInBits());
+ B.buildIntToPtr(OrigRegs[0], B.buildTrunc(IntPtrTy, SrcReg));
+ return;
+ }
+
+ B.buildTrunc(OrigRegs[0], SrcReg);
+ return;
+ }
+
+ if (!LLTy.isVector() && !PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
+ LLT OrigTy = MRI.getType(OrigRegs[0]);
+
+ unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size();
+ if (SrcSize == OrigTy.getSizeInBits())
+ B.buildMergeValues(OrigRegs[0], Regs);
+ else {
+ auto Widened = B.buildMergeLikeInstr(LLT::scalar(SrcSize), Regs);
+ B.buildTrunc(OrigRegs[0], Widened);
+ }
+
+ return;
+ }
+
+ if (PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
+ SmallVector<Register> CastRegs(Regs.begin(), Regs.end());
+
+ // If PartLLT is a mismatched vector in both number of elements and element
+ // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
+ // have the same elt type, i.e. v4s32.
+ if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() &&
+ PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 &&
+ Regs.size() == 1) {
+ LLT NewTy = PartLLT.changeElementType(LLTy.getElementType())
+ .changeElementCount(PartLLT.getElementCount() * 2);
+ CastRegs[0] = B.buildBitcast(NewTy, Regs[0]).getReg(0);
+ PartLLT = NewTy;
+ }
+
+ if (LLTy.getScalarType() == PartLLT.getElementType()) {
+ mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs);
+ } else {
+ unsigned I = 0;
+ LLT GCDTy = getGCDType(LLTy, PartLLT);
+
+ // We are both splitting a vector, and bitcasting its element types. Cast
+ // the source pieces into the appropriate number of pieces with the result
+ // element type.
+ for (Register SrcReg : CastRegs)
+ CastRegs[I++] = B.buildBitcast(GCDTy, SrcReg).getReg(0);
+ mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs);
+ }
+
+ return;
+ }
+
+ assert(LLTy.isVector() && !PartLLT.isVector());
+
+ LLT DstEltTy = LLTy.getElementType();
+
+ // Pointer information was discarded. We'll need to coerce some register types
+ // to avoid violating type constraints.
+ LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType();
+
+ assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits());
+
+ if (DstEltTy == PartLLT) {
+ // Vector was trivially scalarized.
+
+ if (RealDstEltTy.isPointer()) {
+ for (Register Reg : Regs)
+ MRI.setType(Reg, RealDstEltTy);
+ }
+
+ B.buildBuildVector(OrigRegs[0], Regs);
+ } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
+ // Deal with vector with 64-bit elements decomposed to 32-bit
+ // registers. Need to create intermediate 64-bit elements.
+ SmallVector<Register, 8> EltMerges;
+ int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
+
+ assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
+
+ for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
+ auto Merge =
+ B.buildMergeLikeInstr(RealDstEltTy, Regs.take_front(PartsPerElt));
+ // Fix the type in case this is really a vector of pointers.
+ MRI.setType(Merge.getReg(0), RealDstEltTy);
+ EltMerges.push_back(Merge.getReg(0));
+ Regs = Regs.drop_front(PartsPerElt);
+ }
+
+ B.buildBuildVector(OrigRegs[0], EltMerges);
+ } else {
+ // Vector was split, and elements promoted to a wider type.
+ // FIXME: Should handle floating point promotions.
+ LLT BVType = LLT::fixed_vector(LLTy.getNumElements(), PartLLT);
+ auto BV = B.buildBuildVector(BVType, Regs);
+ B.buildTrunc(OrigRegs[0], BV);
+ }
+}
+
+/// Create a sequence of instructions to expand the value in \p SrcReg (of type
+/// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should
+/// contain the type of scalar value extension if necessary.
+///
+/// This is used for outgoing values (vregs to physregs)
+static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
+ Register SrcReg, LLT SrcTy, LLT PartTy,
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT) {
+ // We could just insert a regular copy, but this is unreachable at the moment.
+ assert(SrcTy != PartTy && "identical part types shouldn't reach here");
+
+ const unsigned PartSize = PartTy.getSizeInBits();
+
+ if (PartTy.isVector() == SrcTy.isVector() &&
+ PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) {
+ assert(DstRegs.size() == 1);
+ B.buildInstr(ExtendOp, {DstRegs[0]}, {SrcReg});
+ return;
+ }
+
+ if (SrcTy.isVector() && !PartTy.isVector() &&
+ PartSize > SrcTy.getElementType().getSizeInBits()) {
+ // Vector was scalarized, and the elements extended.
+ auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg);
+ for (int i = 0, e = DstRegs.size(); i != e; ++i)
+ B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
+ return;
+ }
+
+ if (SrcTy.isVector() && PartTy.isVector() &&
+ PartTy.getScalarSizeInBits() == SrcTy.getScalarSizeInBits() &&
+ SrcTy.getNumElements() < PartTy.getNumElements()) {
+ // A coercion like: v2f32 -> v4f32.
+ Register DstReg = DstRegs.front();
+ B.buildPadVectorWithUndefElements(DstReg, SrcReg);
+ return;
+ }
+
+ LLT GCDTy = getGCDType(SrcTy, PartTy);
+ if (GCDTy == PartTy) {
+ // If this already evenly divisible, we can create a simple unmerge.
+ B.buildUnmerge(DstRegs, SrcReg);
+ return;
+ }
+
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT DstTy = MRI.getType(DstRegs[0]);
+ LLT LCMTy = getCoverTy(SrcTy, PartTy);
+
+ if (PartTy.isVector() && LCMTy == PartTy) {
+ assert(DstRegs.size() == 1);
+ B.buildPadVectorWithUndefElements(DstRegs[0], SrcReg);
+ return;
+ }
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ unsigned CoveringSize = LCMTy.getSizeInBits();
+
+ Register UnmergeSrc = SrcReg;
+
+ if (!LCMTy.isVector() && CoveringSize != SrcSize) {
+ // For scalars, it's common to be able to use a simple extension.
+ if (SrcTy.isScalar() && DstTy.isScalar()) {
+ CoveringSize = alignTo(SrcSize, DstSize);
+ LLT CoverTy = LLT::scalar(CoveringSize);
+ UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0);
+ } else {
+ // Widen to the common type.
+ // FIXME: This should respect the extend type
+ Register Undef = B.buildUndef(SrcTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(1, SrcReg);
+ for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize)
+ MergeParts.push_back(Undef);
+ UnmergeSrc = B.buildMergeLikeInstr(LCMTy, MergeParts).getReg(0);
+ }
+ }
+
+ if (LCMTy.isVector() && CoveringSize != SrcSize)
+ UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0);
+
+ B.buildUnmerge(DstRegs, UnmergeSrc);
+}
+
+bool CallLowering::determineAndHandleAssignments(
+ ValueHandler &Handler, ValueAssigner &Assigner,
+ SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv, bool IsVarArg,
+ ArrayRef<Register> ThisReturnRegs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ SmallVector<CCValAssign, 16> ArgLocs;
+
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext());
+ if (!determineAssignments(Assigner, Args, CCInfo))
+ return false;
+
+ return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder,
+ ThisReturnRegs);
+}
+
+static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) {
+ if (Flags.isSExt())
+ return TargetOpcode::G_SEXT;
+ if (Flags.isZExt())
+ return TargetOpcode::G_ZEXT;
+ return TargetOpcode::G_ANYEXT;
+}
+
+bool CallLowering::determineAssignments(ValueAssigner &Assigner,
+ SmallVectorImpl<ArgInfo> &Args,
+ CCState &CCInfo) const {
+ LLVMContext &Ctx = CCInfo.getContext();
+ const CallingConv::ID CallConv = CCInfo.getCallingConv();
+
+ unsigned NumArgs = Args.size();
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ EVT CurVT = EVT::getEVT(Args[i].Ty);
+
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(Ctx, CallConv, CurVT);
+
+ // If we need to split the type over multiple regs, check it's a scenario
+ // we currently support.
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(Ctx, CallConv, CurVT);
+
+ if (NumParts == 1) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
+ return false;
+ continue;
+ }
+
+ // For incoming arguments (physregs to vregs), we could have values in
+ // physregs (or memlocs) which we want to extract and copy to vregs.
+ // During this, we might have to deal with the LLT being split across
+ // multiple regs, so we have to record this information for later.
+ //
+ // If we have outgoing args, then we have the opposite case. We have a
+ // vreg with an LLT which we want to assign to a physical location, and
+ // we might have to record that the value has to be split later.
+
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].Flags.clear();
+
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align(1));
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+
+ Args[i].Flags.push_back(Flags);
+ if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool CallLowering::handleAssignments(ValueHandler &Handler,
+ SmallVectorImpl<ArgInfo> &Args,
+ CCState &CCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ MachineIRBuilder &MIRBuilder,
+ ArrayRef<Register> ThisReturnRegs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ const unsigned NumArgs = Args.size();
+
+ // Stores thunks for outgoing register assignments. This is used so we delay
+ // generating register copies until mem loc assignments are done. We do this
+ // so that if the target is using the delayed stack protector feature, we can
+ // find the split point of the block accurately. E.g. if we have:
+ // G_STORE %val, %memloc
+ // $x0 = COPY %foo
+ // $x1 = COPY %bar
+ // CALL func
+ // ... then the split point for the block will correctly be at, and including,
+ // the copy to $x0. If instead the G_STORE instruction immediately precedes
+ // the CALL, then we'd prematurely choose the CALL as the split point, thus
+ // generating a split block with a CALL that uses undefined physregs.
+ SmallVector<std::function<void()>> DelayedOutgoingRegAssignments;
+
+ for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
+ assert(j < ArgLocs.size() && "Skipped too many arg locs");
+ CCValAssign &VA = ArgLocs[j];
+ assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
+
+ if (VA.needsCustom()) {
+ std::function<void()> Thunk;
+ unsigned NumArgRegs = Handler.assignCustomValue(
+ Args[i], ArrayRef(ArgLocs).slice(j), &Thunk);
+ if (Thunk)
+ DelayedOutgoingRegAssignments.emplace_back(Thunk);
+ if (!NumArgRegs)
+ return false;
+ j += NumArgRegs;
+ continue;
+ }
+
+ const MVT ValVT = VA.getValVT();
+ const MVT LocVT = VA.getLocVT();
+
+ const LLT LocTy(LocVT);
+ const LLT ValTy(ValVT);
+ const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
+ const EVT OrigVT = EVT::getEVT(Args[i].Ty);
+ const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+
+ // Expected to be multiple regs for a single incoming arg.
+ // There should be Regs.size() ArgLocs per argument.
+ // This should be the same as getNumRegistersForCallingConv
+ const unsigned NumParts = Args[i].Flags.size();
+
+ // Now split the registers into the assigned types.
+ Args[i].OrigRegs.assign(Args[i].Regs.begin(), Args[i].Regs.end());
+
+ if (NumParts != 1 || NewLLT != OrigTy) {
+ // If we can't directly assign the register, we need one or more
+ // intermediate values.
+ Args[i].Regs.resize(NumParts);
+
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part)
+ Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ }
+
+ assert((j + (NumParts - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+
+ // Coerce into outgoing value types before register assignment.
+ if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+ assert(Args[i].OrigRegs.size() == 1);
+ buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
+ ValTy, extendOpFromFlags(Args[i].Flags[0]));
+ }
+
+ bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register ArgReg = Args[i].Regs[Part];
+ // There should be Regs.size() ArgLocs per argument.
+ unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part;
+ CCValAssign &VA = ArgLocs[j + Idx];
+ const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
+
+ if (VA.isMemLoc() && !Flags.isByVal()) {
+ // Individual pieces may have been spilled to the stack and others
+ // passed in registers.
+
+ // TODO: The memory size may be larger than the value we need to
+ // store. We may need to adjust the offset for big endian targets.
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+
+ Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
+ continue;
+ }
+
+ if (VA.isMemLoc() && Flags.isByVal()) {
+ assert(Args[i].Regs.size() == 1 &&
+ "didn't expect split byval pointer");
+
+ if (Handler.isIncomingArgumentHandler()) {
+ // We just need to copy the frame index value to the pointer.
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(
+ Flags.getByValSize(), VA.getLocMemOffset(), MPO, Flags);
+ MIRBuilder.buildCopy(Args[i].Regs[0], StackAddr);
+ } else {
+ // For outgoing byval arguments, insert the implicit copy byval
+ // implies, such that writes in the callee do not modify the caller's
+ // value.
+ uint64_t MemSize = Flags.getByValSize();
+ int64_t Offset = VA.getLocMemOffset();
+
+ MachinePointerInfo DstMPO;
+ Register StackAddr =
+ Handler.getStackAddress(MemSize, Offset, DstMPO, Flags);
+
+ MachinePointerInfo SrcMPO(Args[i].OrigValue);
+ if (!Args[i].OrigValue) {
+ // We still need to accurately track the stack address space if we
+ // don't know the underlying value.
+ const LLT PtrTy = MRI.getType(StackAddr);
+ SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace());
+ }
+
+ Align DstAlign = std::max(Flags.getNonZeroByValAlign(),
+ inferAlignFromPtrInfo(MF, DstMPO));
+
+ Align SrcAlign = std::max(Flags.getNonZeroByValAlign(),
+ inferAlignFromPtrInfo(MF, SrcMPO));
+
+ Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0],
+ DstMPO, DstAlign, SrcMPO, SrcAlign,
+ MemSize, VA);
+ }
+ continue;
+ }
+
+ assert(!VA.needsCustom() && "custom loc should have been handled already");
+
+ if (i == 0 && !ThisReturnRegs.empty() &&
+ Handler.isIncomingArgumentHandler() &&
+ isTypeIsValidForThisReturn(ValVT)) {
+ Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
+ continue;
+ }
+
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ });
+ }
+ }
+
+ // Now that all pieces have been assigned, re-pack the register typed values
+ // into the original value typed registers.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ // Merge the split registers into the expected larger result vregs of
+ // the original call.
+ buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
+ LocTy, Args[i].Flags[0]);
+ }
+
+ j += NumParts - 1;
+ }
+ for (auto &Fn : DelayedOutgoingRegAssignments)
+ Fn();
+
+ return true;
+}
+
+void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg,
+ int FI) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL);
+
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MRI.getType(VRegs[I]),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs,
+ Register DemoteReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL);
+
+ MachinePointerInfo PtrInfo(AS);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MRI.getType(VRegs[I]),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetIncomingArgument(
+ const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg,
+ MachineRegisterInfo &MRI, const DataLayout &DL) const {
+ unsigned AS = DL.getAllocaAddrSpace();
+ DemoteReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AS, DL.getPointerSizeInBits(AS)));
+
+ Type *PtrTy = PointerType::get(F.getReturnType(), AS);
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs);
+
+ // NOTE: Assume that a pointer won't get split into more than one VT.
+ assert(ValueVTs.size() == 1);
+
+ ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()),
+ ArgInfo::NoArgIndex);
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
+ DemoteArg.Flags[0].setSRet();
+ SplitArgs.insert(SplitArgs.begin(), DemoteArg);
+}
+
+void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+ const CallBase &CB,
+ CallLoweringInfo &Info) const {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ Type *RetTy = CB.getType();
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+
+ int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject(
+ DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
+
+ Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
+ ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS),
+ ArgInfo::NoArgIndex);
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
+ DemoteArg.Flags[0].setSRet();
+
+ Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg);
+ Info.DemoteStackIndex = FI;
+ Info.DemoteRegister = DemoteReg;
+}
+
+bool CallLowering::checkReturn(CCState &CCInfo,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ CCAssignFn *Fn) const {
+ for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
+ MVT VT = MVT::getVT(Outs[I].Ty);
+ if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
+ return false;
+ }
+ return true;
+}
+
+void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy,
+ AttributeList Attrs,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ const DataLayout &DL) const {
+ LLVMContext &Context = RetTy->getContext();
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs);
+ addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex);
+
+ for (EVT VT : SplitVTs) {
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(Context, CallConv, VT);
+ MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT);
+ Type *PartTy = EVT(RegVT).getTypeForEVT(Context);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Outs.emplace_back(PartTy, Flags);
+ }
+ }
+}
+
+bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
+ const auto &F = MF.getFunction();
+ Type *ReturnType = F.getReturnType();
+ CallingConv::ID CallConv = F.getCallingConv();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs,
+ MF.getDataLayout());
+ return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
+}
+
+bool CallLowering::parametersInCSRMatch(
+ const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &OutLocs,
+ const SmallVectorImpl<ArgInfo> &OutArgs) const {
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ const auto &ArgLoc = OutLocs[i];
+ // If it's not a register, it's fine.
+ if (!ArgLoc.isRegLoc())
+ continue;
+
+ MCRegister PhysReg = ArgLoc.getLocReg();
+
+ // Only look at callee-saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs()
+ << "... Call has an argument passed in a callee-saved register.\n");
+
+ // Check if it was copied from.
+ const ArgInfo &OutInfo = OutArgs[i];
+
+ if (OutInfo.Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs() << "... Cannot handle arguments in multiple registers.\n");
+ return false;
+ }
+
+ // Check if we copy the register, walking through copies from virtual
+ // registers. Note that getDefIgnoringCopies does not ignore copies from
+ // physical registers.
+ MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
+ if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Parameter was not copied into a VReg, cannot tail call.\n");
+ return false;
+ }
+
+ // Got a copy. Verify that it's the same as the register we want.
+ Register CopyRHS = RegDef->getOperand(1).getReg();
+ if (CopyRHS != PhysReg) {
+ LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
+ "VReg, cannot tail call.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
+ MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ ValueAssigner &CalleeAssigner,
+ ValueAssigner &CallerAssigner) const {
+ const Function &F = MF.getFunction();
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CallingConv::ID CallerCC = F.getCallingConv();
+
+ if (CallerCC == CalleeCC)
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs1;
+ CCState CCInfo1(CalleeCC, Info.IsVarArg, MF, ArgLocs1, F.getContext());
+ if (!determineAssignments(CalleeAssigner, InArgs, CCInfo1))
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs2;
+ CCState CCInfo2(CallerCC, F.isVarArg(), MF, ArgLocs2, F.getContext());
+ if (!determineAssignments(CallerAssigner, InArgs, CCInfo2))
+ return false;
+
+ // We need the argument locations to match up exactly. If there's more in
+ // one than the other, then we are done.
+ if (ArgLocs1.size() != ArgLocs2.size())
+ return false;
+
+ // Make sure that each location is passed in exactly the same way.
+ for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) {
+ const CCValAssign &Loc1 = ArgLocs1[i];
+ const CCValAssign &Loc2 = ArgLocs2[i];
+
+ // We need both of them to be the same. So if one is a register and one
+ // isn't, we're done.
+ if (Loc1.isRegLoc() != Loc2.isRegLoc())
+ return false;
+
+ if (Loc1.isRegLoc()) {
+ // If they don't have the same register location, we're done.
+ if (Loc1.getLocReg() != Loc2.getLocReg())
+ return false;
+
+ // They matched, so we can move to the next ArgLoc.
+ continue;
+ }
+
+ // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match.
+ if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset())
+ return false;
+ }
+
+ return true;
+}
+
+LLT CallLowering::ValueHandler::getStackValueStoreType(
+ const DataLayout &DL, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const {
+ const MVT ValVT = VA.getValVT();
+ if (ValVT != MVT::iPTR) {
+ LLT ValTy(ValVT);
+
+ // We lost the pointeriness going through CCValAssign, so try to restore it
+ // based on the flags.
+ if (Flags.isPointer()) {
+ LLT PtrTy = LLT::pointer(Flags.getPointerAddrSpace(),
+ ValTy.getScalarSizeInBits());
+ if (ValVT.isVector())
+ return LLT::vector(ValTy.getElementCount(), PtrTy);
+ return PtrTy;
+ }
+
+ return ValTy;
+ }
+
+ unsigned AddrSpace = Flags.getPointerAddrSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSize(AddrSpace));
+}
+
+void CallLowering::ValueHandler::copyArgumentMemory(
+ const ArgInfo &Arg, Register DstPtr, Register SrcPtr,
+ const MachinePointerInfo &DstPtrInfo, Align DstAlign,
+ const MachinePointerInfo &SrcPtrInfo, Align SrcAlign, uint64_t MemSize,
+ CCValAssign &VA) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *SrcMMO = MF.getMachineMemOperand(
+ SrcPtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable, MemSize,
+ SrcAlign);
+
+ MachineMemOperand *DstMMO = MF.getMachineMemOperand(
+ DstPtrInfo,
+ MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable,
+ MemSize, DstAlign);
+
+ const LLT PtrTy = MRI.getType(DstPtr);
+ const LLT SizeTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ auto SizeConst = MIRBuilder.buildConstant(SizeTy, MemSize);
+ MIRBuilder.buildMemCpy(DstPtr, SrcPtr, SizeConst, *DstMMO, *SrcMMO);
+}
+
+Register CallLowering::ValueHandler::extendRegister(Register ValReg,
+ CCValAssign &VA,
+ unsigned MaxSizeBits) {
+ LLT LocTy{VA.getLocVT()};
+ LLT ValTy{VA.getValVT()};
+
+ if (LocTy.getSizeInBits() == ValTy.getSizeInBits())
+ return ValReg;
+
+ if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) {
+ if (MaxSizeBits <= ValTy.getSizeInBits())
+ return ValReg;
+ LocTy = LLT::scalar(MaxSizeBits);
+ }
+
+ const LLT ValRegTy = MRI.getType(ValReg);
+ if (ValRegTy.isPointer()) {
+ // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so
+ // we have to cast to do the extension.
+ LLT IntPtrTy = LLT::scalar(ValRegTy.getSizeInBits());
+ ValReg = MIRBuilder.buildPtrToInt(IntPtrTy, ValReg).getReg(0);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: break;
+ case CCValAssign::Full:
+ case CCValAssign::BCvt:
+ // FIXME: bitconverting between vector types may or may not be a
+ // nop in big-endian situations.
+ return ValReg;
+ case CCValAssign::AExt: {
+ auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
+ return MIB.getReg(0);
+ }
+ case CCValAssign::SExt: {
+ Register NewReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildSExt(NewReg, ValReg);
+ return NewReg;
+ }
+ case CCValAssign::ZExt: {
+ Register NewReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildZExt(NewReg, ValReg);
+ return NewReg;
+ }
+ }
+ llvm_unreachable("unable to extend register");
+}
+
+void CallLowering::ValueAssigner::anchor() {}
+
+Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
+ Register SrcReg,
+ LLT NarrowTy) {
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::ZExt: {
+ return MIRBuilder
+ .buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
+ NarrowTy.getScalarSizeInBits())
+ .getReg(0);
+ }
+ case CCValAssign::LocInfo::SExt: {
+ return MIRBuilder
+ .buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
+ NarrowTy.getScalarSizeInBits())
+ .getReg(0);
+ break;
+ }
+ default:
+ return SrcReg;
+ }
+}
+
+/// Check if we can use a basic COPY instruction between the two types.
+///
+/// We're currently building on top of the infrastructure using MVT, which loses
+/// pointer information in the CCValAssign. We accept copies from physical
+/// registers that have been reported as integers if it's to an equivalent sized
+/// pointer LLT.
+static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
+ if (SrcTy == DstTy)
+ return true;
+
+ if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ return false;
+
+ SrcTy = SrcTy.getScalarType();
+ DstTy = DstTy.getScalarType();
+
+ return (SrcTy.isPointer() && DstTy.isScalar()) ||
+ (DstTy.isPointer() && SrcTy.isScalar());
+}
+
+void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
+ Register PhysReg,
+ CCValAssign VA) {
+ const MVT LocVT = VA.getLocVT();
+ const LLT LocTy(LocVT);
+ const LLT RegTy = MRI.getType(ValVReg);
+
+ if (isCopyCompatibleType(RegTy, LocTy)) {
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ return;
+ }
+
+ auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg);
+ auto Hint = buildExtensionHint(VA, Copy.getReg(0), RegTy);
+ MIRBuilder.buildTrunc(ValVReg, Hint);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
new file mode 100644
index 000000000000..748fa273d499
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -0,0 +1,166 @@
+//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file constains common code to combine machine functions at generic
+// level.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+namespace llvm {
+cl::OptionCategory GICombinerOptionCategory(
+ "GlobalISel Combiner",
+ "Control the rules which are enabled. These options all take a comma "
+ "separated list of rules to disable and may be specified by number "
+ "or number range (e.g. 1-10)."
+#ifndef NDEBUG
+ " They may also be specified by name."
+#endif
+);
+} // end namespace llvm
+
+namespace {
+/// This class acts as the glue the joins the CombinerHelper to the overall
+/// Combine algorithm. The CombinerHelper is intended to report the
+/// modifications it makes to the MIR to the GISelChangeObserver and the
+/// observer subclass will act on these events. In this case, instruction
+/// erasure will cancel any future visits to the erased instruction and
+/// instruction creation will schedule that instruction for a future visit.
+/// Other Combiner implementations may require more complex behaviour from
+/// their GISelChangeObserver subclass.
+class WorkListMaintainer : public GISelChangeObserver {
+ using WorkListTy = GISelWorkList<512>;
+ WorkListTy &WorkList;
+ /// The instructions that have been created but we want to report once they
+ /// have their operands. This is only maintained if debug output is requested.
+#ifndef NDEBUG
+ SetVector<const MachineInstr *> CreatedInstrs;
+#endif
+
+public:
+ WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
+ virtual ~WorkListMaintainer() = default;
+
+ void erasingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
+ WorkList.remove(&MI);
+ }
+ void createdInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
+ WorkList.insert(&MI);
+ LLVM_DEBUG(CreatedInstrs.insert(&MI));
+ }
+ void changingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
+ WorkList.insert(&MI);
+ }
+ void changedInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
+ WorkList.insert(&MI);
+ }
+
+ void reportFullyCreatedInstrs() {
+ LLVM_DEBUG(for (const auto *MI
+ : CreatedInstrs) {
+ dbgs() << "Created: ";
+ MI->print(dbgs());
+ });
+ LLVM_DEBUG(CreatedInstrs.clear());
+ }
+};
+}
+
+Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
+ : CInfo(Info), TPC(TPC) {
+ (void)this->TPC; // FIXME: Remove when used.
+}
+
+bool Combiner::combineMachineInstrs(MachineFunction &MF,
+ GISelCSEInfo *CSEInfo) {
+ // If the ISel pipeline failed, do not bother running this pass.
+ // FIXME: Should this be here or in individual combiner passes.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ Builder =
+ CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>();
+ MRI = &MF.getRegInfo();
+ Builder->setMF(MF);
+ if (CSEInfo)
+ Builder->setCSEInfo(CSEInfo);
+
+ LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
+
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+ bool MFChanged = false;
+ bool Changed;
+ MachineIRBuilder &B = *Builder;
+
+ do {
+ // Collect all instructions. Do a post order traversal for basic blocks and
+ // insert with list bottom up, so while we pop_back_val, we'll traverse top
+ // down RPOT.
+ Changed = false;
+ GISelWorkList<512> WorkList;
+ WorkListMaintainer Observer(WorkList);
+ GISelObserverWrapper WrapperObserver(&Observer);
+ if (CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
+ for (MachineInstr &CurMI :
+ llvm::make_early_inc_range(llvm::reverse(*MBB))) {
+ // Erase dead insts before even adding to the list.
+ if (isTriviallyDead(CurMI, *MRI)) {
+ LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
+ llvm::salvageDebugInfo(*MRI, CurMI);
+ CurMI.eraseFromParent();
+ continue;
+ }
+ WorkList.deferred_insert(&CurMI);
+ }
+ }
+ WorkList.finalize();
+ // Main Loop. Process the instructions here.
+ while (!WorkList.empty()) {
+ MachineInstr *CurrInst = WorkList.pop_back_val();
+ LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
+ Changed |= CInfo.combine(WrapperObserver, *CurrInst, B);
+ Observer.reportFullyCreatedInstrs();
+ }
+ MFChanged |= Changed;
+ } while (Changed);
+
+#ifndef NDEBUG
+ if (CSEInfo) {
+ if (auto E = CSEInfo->verify()) {
+ errs() << E << '\n';
+ assert(false && "CSEInfo is not consistent. Likely missing calls to "
+ "observer on mutations.");
+ }
+ }
+#endif
+ return MFChanged;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
new file mode 100644
index 000000000000..cc7fb3ee1109
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -0,0 +1,6029 @@
+//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cmath>
+#include <optional>
+#include <tuple>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+// Option to allow testing of the combiner while no targets know about indexed
+// addressing.
+static cl::opt<bool>
+ ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
+ cl::desc("Force all indexed operations to be "
+ "legal for the GlobalISel combiner"));
+
+CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
+ MachineIRBuilder &B, bool IsPreLegalize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
+ MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
+ RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
+ TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
+ (void)this->KB;
+}
+
+const TargetLowering &CombinerHelper::getTargetLowering() const {
+ return *Builder.getMF().getSubtarget().getTargetLowering();
+}
+
+/// \returns The little endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 0
+static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return I;
+}
+
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(V);
+ auto Ctlz = MIB.buildCTLZ(Ty, V);
+ auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
+ return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
+}
+
+/// \returns The big endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 3
+static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return ByteWidth - I - 1;
+}
+
+/// Given a map from byte offsets in memory to indices in a load/store,
+/// determine if that map corresponds to a little or big endian byte pattern.
+///
+/// \param MemOffset2Idx maps memory offsets to address offsets.
+/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
+///
+/// \returns true if the map corresponds to a big endian byte pattern, false if
+/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
+///
+/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
+/// are as follows:
+///
+/// AddrOffset Little endian Big endian
+/// 0 0 3
+/// 1 1 2
+/// 2 2 1
+/// 3 3 0
+static std::optional<bool>
+isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ int64_t LowestIdx) {
+ // Need at least two byte positions to decide on endianness.
+ unsigned Width = MemOffset2Idx.size();
+ if (Width < 2)
+ return std::nullopt;
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
+ auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
+ if (MemOffsetAndIdx == MemOffset2Idx.end())
+ return std::nullopt;
+ const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
+ assert(Idx >= 0 && "Expected non-negative byte offset?");
+ LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
+ BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
+ if (!BigEndian && !LittleEndian)
+ return std::nullopt;
+ }
+
+ assert((BigEndian != LittleEndian) &&
+ "Pattern cannot be both big and little endian!");
+ return BigEndian;
+}
+
+bool CombinerHelper::isPreLegalize() const { return IsPreLegalize; }
+
+bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
+ assert(LI && "Must have LegalizerInfo to query isLegal!");
+ return LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
+bool CombinerHelper::isLegalOrBeforeLegalizer(
+ const LegalityQuery &Query) const {
+ return isPreLegalize() || isLegal(Query);
+}
+
+bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
+ if (!Ty.isVector())
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
+ // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
+ if (isPreLegalize())
+ return true;
+ LLT EltTy = Ty.getElementType();
+ return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
+ isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
+}
+
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
+ Register ToReg) const {
+ Observer.changingAllUsesOfReg(MRI, FromReg);
+
+ if (MRI.constrainRegAttrs(ToReg, FromReg))
+ MRI.replaceRegWith(FromReg, ToReg);
+ else
+ Builder.buildCopy(ToReg, FromReg);
+
+ Observer.finishedChangingAllUsesOfReg();
+}
+
+void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
+ MachineOperand &FromRegOp,
+ Register ToReg) const {
+ assert(FromRegOp.getParent() && "Expected an operand in an MI");
+ Observer.changingInstr(*FromRegOp.getParent());
+
+ FromRegOp.setReg(ToReg);
+
+ Observer.changedInstr(*FromRegOp.getParent());
+}
+
+void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
+ unsigned ToOpcode) const {
+ Observer.changingInstr(FromMI);
+
+ FromMI.setDesc(Builder.getTII().get(ToOpcode));
+
+ Observer.changedInstr(FromMI);
+}
+
+const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
+ return RBI->getRegBank(Reg, MRI, *TRI);
+}
+
+void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
+ if (RegBank)
+ MRI.setRegBank(Reg, *RegBank);
+}
+
+bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
+ if (matchCombineCopy(MI)) {
+ applyCombineCopy(MI);
+ return true;
+ }
+ return false;
+}
+bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
+ if (MI.getOpcode() != TargetOpcode::COPY)
+ return false;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ return canReplaceReg(DstReg, SrcReg, MRI);
+}
+void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+}
+
+bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
+ bool IsUndef = false;
+ SmallVector<Register, 4> Ops;
+ if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
+ applyCombineConcatVectors(MI, IsUndef, Ops);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+ "Invalid instruction");
+ IsUndef = true;
+ MachineInstr *Undef = nullptr;
+
+ // Walk over all the operands of concat vectors and check if they are
+ // build_vector themselves or undef.
+ // Then collect their operands in Ops.
+ for (const MachineOperand &MO : MI.uses()) {
+ Register Reg = MO.getReg();
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ assert(Def && "Operand not defined");
+ switch (Def->getOpcode()) {
+ case TargetOpcode::G_BUILD_VECTOR:
+ IsUndef = false;
+ // Remember the operands of the build_vector to fold
+ // them into the yet-to-build flattened concat vectors.
+ for (const MachineOperand &BuildVecMO : Def->uses())
+ Ops.push_back(BuildVecMO.getReg());
+ break;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ LLT OpType = MRI.getType(Reg);
+ // Keep one undef value for all the undef operands.
+ if (!Undef) {
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Undef = Builder.buildUndef(OpType.getScalarType());
+ }
+ assert(MRI.getType(Undef->getOperand(0).getReg()) ==
+ OpType.getScalarType() &&
+ "All undefs should have the same type");
+ // Break the undef vector in as many scalar elements as needed
+ // for the flattening.
+ for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
+ EltIdx != EltEnd; ++EltIdx)
+ Ops.push_back(Undef->getOperand(0).getReg());
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+void CombinerHelper::applyCombineConcatVectors(
+ MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
+ // We determined that the concat_vectors can be flatten.
+ // Generate the flattened build_vector.
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ // Note: IsUndef is sort of redundant. We could have determine it by
+ // checking that at all Ops are undef. Alternatively, we could have
+ // generate a build_vector of undefs and rely on another combine to
+ // clean that up. For now, given we already gather this information
+ // in tryCombineConcatVectors, just save compile time and issue the
+ // right thing.
+ if (IsUndef)
+ Builder.buildUndef(NewDstReg);
+ else
+ Builder.buildBuildVector(NewDstReg, Ops);
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
+bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
+ SmallVector<Register, 4> Ops;
+ if (matchCombineShuffleVector(MI, Ops)) {
+ applyCombineShuffleVector(MI, Ops);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT SrcType = MRI.getType(Src1);
+ // As bizarre as it may look, shuffle vector can actually produce
+ // scalar! This is because at the IR level a <1 x ty> shuffle
+ // vector is perfectly valid.
+ unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
+ unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
+
+ // If the resulting vector is smaller than the size of the source
+ // vectors being concatenated, we won't be able to replace the
+ // shuffle vector into a concat_vectors.
+ //
+ // Note: We may still be able to produce a concat_vectors fed by
+ // extract_vector_elt and so on. It is less clear that would
+ // be better though, so don't bother for now.
+ //
+ // If the destination is a scalar, the size of the sources doesn't
+ // matter. we will lower the shuffle to a plain copy. This will
+ // work only if the source and destination have the same size. But
+ // that's covered by the next condition.
+ //
+ // TODO: If the size between the source and destination don't match
+ // we could still emit an extract vector element in that case.
+ if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
+ return false;
+
+ // Check that the shuffle mask can be broken evenly between the
+ // different sources.
+ if (DstNumElts % SrcNumElts != 0)
+ return false;
+
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = DstNumElts / SrcNumElts;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ int Idx = Mask[i];
+ // Undef value.
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcType sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
+ return false;
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together.
+ // Collect the different operands for that.
+ Register UndefReg;
+ Register Src2 = MI.getOperand(2).getReg();
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0) {
+ if (!UndefReg) {
+ Builder.setInsertPt(*MI.getParent(), MI);
+ UndefReg = Builder.buildUndef(SrcType).getReg(0);
+ }
+ Ops.push_back(UndefReg);
+ } else if (Src == 0)
+ Ops.push_back(Src1);
+ else
+ Ops.push_back(Src2);
+ }
+ return true;
+}
+
+void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
+ const ArrayRef<Register> Ops) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ if (Ops.size() == 1)
+ Builder.buildCopy(NewDstReg, Ops[0]);
+ else
+ Builder.buildMergeLikeInstr(NewDstReg, Ops);
+
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
+namespace {
+
+/// Select a preference between two uses. CurrentUse is the current preference
+/// while *ForCandidate is attributes of the candidate under consideration.
+PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
+ PreferredTuple &CurrentUse,
+ const LLT TyForCandidate,
+ unsigned OpcodeForCandidate,
+ MachineInstr *MIForCandidate) {
+ if (!CurrentUse.Ty.isValid()) {
+ if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
+ CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ return CurrentUse;
+ }
+
+ // We permit the extend to hoist through basic blocks but this is only
+ // sensible if the target has extending loads. If you end up lowering back
+ // into a load and extend during the legalizer then the end result is
+ // hoisting the extend up to the load.
+
+ // Prefer defined extensions to undefined extensions as these are more
+ // likely to reduce the number of instructions.
+ if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
+ CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
+ return CurrentUse;
+ else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
+ OpcodeForCandidate != TargetOpcode::G_ANYEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+
+ // Prefer sign extensions to zero extensions as sign-extensions tend to be
+ // more expensive. Don't do this if the load is already a zero-extend load
+ // though, otherwise we'll rewrite a zero-extend load into a sign-extend
+ // later.
+ if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
+ if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
+ OpcodeForCandidate == TargetOpcode::G_ZEXT)
+ return CurrentUse;
+ else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
+ OpcodeForCandidate == TargetOpcode::G_SEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ }
+
+ // This is potentially target specific. We've chosen the largest type
+ // because G_TRUNC is usually free. One potential catch with this is that
+ // some targets have a reduced number of larger registers than smaller
+ // registers and this choice potentially increases the live-range for the
+ // larger value.
+ if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ }
+ return CurrentUse;
+}
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+ MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+ std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
+ MachineOperand &UseMO)>
+ Inserter) {
+ MachineInstr &UseMI = *UseMO.getParent();
+
+ MachineBasicBlock *InsertBB = UseMI.getParent();
+
+ // If the use is a PHI then we want the predecessor block instead.
+ if (UseMI.isPHI()) {
+ MachineOperand *PredBB = std::next(&UseMO);
+ InsertBB = PredBB->getMBB();
+ }
+
+ // If the block is the same block as the def then we want to insert just after
+ // the def instead of at the start of the block.
+ if (InsertBB == DefMI.getParent()) {
+ MachineBasicBlock::iterator InsertPt = &DefMI;
+ Inserter(InsertBB, std::next(InsertPt), UseMO);
+ return;
+ }
+
+ // Otherwise we want the start of the BB
+ Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
+}
+} // end anonymous namespace
+
+bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
+ PreferredTuple Preferred;
+ if (matchCombineExtendingLoads(MI, Preferred)) {
+ applyCombineExtendingLoads(MI, Preferred);
+ return true;
+ }
+ return false;
+}
+
+static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
+ unsigned CandidateLoadOpc;
+ switch (ExtOpc) {
+ case TargetOpcode::G_ANYEXT:
+ CandidateLoadOpc = TargetOpcode::G_LOAD;
+ break;
+ case TargetOpcode::G_SEXT:
+ CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
+ break;
+ case TargetOpcode::G_ZEXT:
+ CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
+ break;
+ default:
+ llvm_unreachable("Unexpected extend opc");
+ }
+ return CandidateLoadOpc;
+}
+
+bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
+ PreferredTuple &Preferred) {
+ // We match the loads and follow the uses to the extend instead of matching
+ // the extends and following the def to the load. This is because the load
+ // must remain in the same position for correctness (unless we also add code
+ // to find a safe place to sink it) whereas the extend is freely movable.
+ // It also prevents us from duplicating the load for the volatile case or just
+ // for performance.
+ GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
+ if (!LoadMI)
+ return false;
+
+ Register LoadReg = LoadMI->getDstReg();
+
+ LLT LoadValueTy = MRI.getType(LoadReg);
+ if (!LoadValueTy.isScalar())
+ return false;
+
+ // Most architectures are going to legalize <s8 loads into at least a 1 byte
+ // load, and the MMOs can only describe memory accesses in multiples of bytes.
+ // If we try to perform extload combining on those, we can end up with
+ // %a(s8) = extload %ptr (load 1 byte from %ptr)
+ // ... which is an illegal extload instruction.
+ if (LoadValueTy.getSizeInBits() < 8)
+ return false;
+
+ // For non power-of-2 types, they will very likely be legalized into multiple
+ // loads. Don't bother trying to match them into extending loads.
+ if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
+ return false;
+
+ // Find the preferred type aside from the any-extends (unless it's the only
+ // one) and non-extending ops. We'll emit an extending load to that type and
+ // and emit a variant of (extend (trunc X)) for the others according to the
+ // relative type sizes. At the same time, pick an extend to use based on the
+ // extend involved in the chosen type.
+ unsigned PreferredOpcode =
+ isa<GLoad>(&MI)
+ ? TargetOpcode::G_ANYEXT
+ : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ Preferred = {LLT(), PreferredOpcode, nullptr};
+ for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
+ if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
+ UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
+ (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
+ const auto &MMO = LoadMI->getMMO();
+ // For atomics, only form anyextending loads.
+ if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
+ continue;
+ // Check for legality.
+ if (!isPreLegalize()) {
+ LegalityQuery::MemDesc MMDesc(MMO);
+ unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
+ LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
+ if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
+ .Action != LegalizeActions::Legal)
+ continue;
+ }
+ Preferred = ChoosePreferredUse(MI, Preferred,
+ MRI.getType(UseMI.getOperand(0).getReg()),
+ UseMI.getOpcode(), &UseMI);
+ }
+ }
+
+ // There were no extends
+ if (!Preferred.MI)
+ return false;
+ // It should be impossible to chose an extend without selecting a different
+ // type since by definition the result of an extend is larger.
+ assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
+
+ LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+ return true;
+}
+
+void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
+ PreferredTuple &Preferred) {
+ // Rewrite the load to the chosen extending load.
+ Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+
+ // Inserter to insert a truncate back to the original type at a given point
+ // with some basic CSE to limit truncate duplication to one per BB.
+ DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+ auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore,
+ MachineOperand &UseMO) {
+ MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+ if (PreviouslyEmitted) {
+ Observer.changingInstr(*UseMO.getParent());
+ UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
+ Observer.changedInstr(*UseMO.getParent());
+ return;
+ }
+
+ Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+ Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+ EmittedInsns[InsertIntoBB] = NewMI;
+ replaceRegOpWith(MRI, UseMO, NewDstReg);
+ };
+
+ Observer.changingInstr(MI);
+ unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
+ MI.setDesc(Builder.getTII().get(LoadOpc));
+
+ // Rewrite all the uses to fix up the types.
+ auto &LoadValue = MI.getOperand(0);
+ SmallVector<MachineOperand *, 4> Uses;
+ for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
+ Uses.push_back(&UseMO);
+
+ for (auto *UseMO : Uses) {
+ MachineInstr *UseMI = UseMO->getParent();
+
+ // If the extend is compatible with the preferred extend then we should fix
+ // up the type and extend so that it uses the preferred use.
+ if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
+ UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
+ Register UseDstReg = UseMI->getOperand(0).getReg();
+ MachineOperand &UseSrcMO = UseMI->getOperand(1);
+ const LLT UseDstTy = MRI.getType(UseDstReg);
+ if (UseDstReg != ChosenDstReg) {
+ if (Preferred.Ty == UseDstTy) {
+ // If the use has the same type as the preferred use, then merge
+ // the vregs and erase the extend. For example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s32) = G_SEXT %1(s8)
+ // %3:_(s32) = G_ANYEXT %1(s8)
+ // ... = ... %3(s32)
+ // rewrites to:
+ // %2:_(s32) = G_SEXTLOAD ...
+ // ... = ... %2(s32)
+ replaceRegWith(MRI, UseDstReg, ChosenDstReg);
+ Observer.erasingInstr(*UseMO->getParent());
+ UseMO->getParent()->eraseFromParent();
+ } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
+ // If the preferred size is smaller, then keep the extend but extend
+ // from the result of the extending load. For example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s32) = G_SEXT %1(s8)
+ // %3:_(s64) = G_ANYEXT %1(s8)
+ // ... = ... %3(s64)
+ /// rewrites to:
+ // %2:_(s32) = G_SEXTLOAD ...
+ // %3:_(s64) = G_ANYEXT %2:_(s32)
+ // ... = ... %3(s64)
+ replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
+ } else {
+ // If the preferred size is large, then insert a truncate. For
+ // example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s64) = G_SEXT %1(s8)
+ // %3:_(s32) = G_ZEXT %1(s8)
+ // ... = ... %3(s32)
+ /// rewrites to:
+ // %2:_(s64) = G_SEXTLOAD ...
+ // %4:_(s8) = G_TRUNC %2:_(s32)
+ // %3:_(s64) = G_ZEXT %2:_(s8)
+ // ... = ... %3(s64)
+ InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
+ InsertTruncAt);
+ }
+ continue;
+ }
+ // The use is (one of) the uses of the preferred use we chose earlier.
+ // We're going to update the load to def this value later so just erase
+ // the old extend.
+ Observer.erasingInstr(*UseMO->getParent());
+ UseMO->getParent()->eraseFromParent();
+ continue;
+ }
+
+ // The use isn't an extend. Truncate back to the type we originally loaded.
+ // This is free on many targets.
+ InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
+ }
+
+ MI.getOperand(0).setReg(ChosenDstReg);
+ Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // If we have the following code:
+ // %mask = G_CONSTANT 255
+ // %ld = G_LOAD %ptr, (load s16)
+ // %and = G_AND %ld, %mask
+ //
+ // Try to fold it into
+ // %ld = G_ZEXTLOAD %ptr, (load s8)
+
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI.getType(Dst).isVector())
+ return false;
+
+ auto MaybeMask =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeMask)
+ return false;
+
+ APInt MaskVal = MaybeMask->Value;
+
+ if (!MaskVal.isMask())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ // Don't use getOpcodeDef() here since intermediate instructions may have
+ // multiple users.
+ GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
+ return false;
+
+ Register LoadReg = LoadMI->getDstReg();
+ LLT RegTy = MRI.getType(LoadReg);
+ Register PtrReg = LoadMI->getPointerReg();
+ unsigned RegSize = RegTy.getSizeInBits();
+ uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
+ unsigned MaskSizeBits = MaskVal.countr_one();
+
+ // The mask may not be larger than the in-memory type, as it might cover sign
+ // extended bits
+ if (MaskSizeBits > LoadSizeBits)
+ return false;
+
+ // If the mask covers the whole destination register, there's nothing to
+ // extend
+ if (MaskSizeBits >= RegSize)
+ return false;
+
+ // Most targets cannot deal with loads of size < 8 and need to re-legalize to
+ // at least byte loads. Avoid creating such loads here
+ if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
+ return false;
+
+ const MachineMemOperand &MMO = LoadMI->getMMO();
+ LegalityQuery::MemDesc MemDesc(MMO);
+
+ // Don't modify the memory access size if this is atomic/volatile, but we can
+ // still adjust the opcode to indicate the high bit behavior.
+ if (LoadMI->isSimple())
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
+ return false;
+
+ // TODO: Could check if it's legal with the reduced or original memory size.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*LoadMI);
+ auto &MF = B.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
+ B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ LoadMI->eraseFromParent();
+ };
+ return true;
+}
+
+bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
+ "shouldn't consider debug uses");
+ assert(DefMI.getParent() == UseMI.getParent());
+ if (&DefMI == &UseMI)
+ return true;
+ const MachineBasicBlock &MBB = *DefMI.getParent();
+ auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
+ return &MI == &DefMI || &MI == &UseMI;
+ });
+ if (DefOrUse == MBB.end())
+ llvm_unreachable("Block must contain both DefMI and UseMI!");
+ return &*DefOrUse == &DefMI;
+}
+
+bool CombinerHelper::dominates(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
+ "shouldn't consider debug uses");
+ if (MDT)
+ return MDT->dominates(&DefMI, &UseMI);
+ else if (DefMI.getParent() != UseMI.getParent())
+ return false;
+
+ return isPredecessor(DefMI, UseMI);
+}
+
+bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register LoadUser = SrcReg;
+
+ if (MRI.getType(SrcReg).isVector())
+ return false;
+
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
+ LoadUser = TruncSrc;
+
+ uint64_t SizeInBits = MI.getOperand(2).getImm();
+ // If the source is a G_SEXTLOAD from the same bit width, then we don't
+ // need any extend at all, just a truncate.
+ if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
+ // If truncating more than the original extended value, abort.
+ auto LoadSizeBits = LoadMI->getMemSizeInBits();
+ if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
+ return false;
+ if (LoadSizeBits == SizeInBits)
+ return true;
+ }
+ return false;
+}
+
+void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchSextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT RegTy = MRI.getType(DstReg);
+
+ // Only supports scalars for now.
+ if (RegTy.isVector())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
+ if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
+ return false;
+
+ uint64_t MemBits = LoadDef->getMemSizeInBits();
+
+ // If the sign extend extends from a narrower width than the load's width,
+ // then we can narrow the load width when we combine to a G_SEXTLOAD.
+ // Avoid widening the load at all.
+ unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
+
+ // Don't generate G_SEXTLOADs with a < 1 byte width.
+ if (NewSizeBits < 8)
+ return false;
+ // Don't bother creating a non-power-2 sextload, it will likely be broken up
+ // anyway for most targets.
+ if (!isPowerOf2_32(NewSizeBits))
+ return false;
+
+ const MachineMemOperand &MMO = LoadDef->getMMO();
+ LegalityQuery::MemDesc MMDesc(MMO);
+
+ // Don't modify the memory access size if this is atomic/volatile, but we can
+ // still adjust the opcode to indicate the high bit behavior.
+ if (LoadDef->isSimple())
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
+ return false;
+
+ // TODO: Could check if it's legal with the reduced or original memory size.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
+ {MRI.getType(LoadDef->getDstReg()),
+ MRI.getType(LoadDef->getPointerReg())},
+ {MMDesc}}))
+ return false;
+
+ MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
+ return true;
+}
+
+void CombinerHelper::applySextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register LoadReg;
+ unsigned ScalarSizeBits;
+ std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
+ GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
+
+ // If we have the following:
+ // %ld = G_LOAD %ptr, (load 2)
+ // %ext = G_SEXT_INREG %ld, 8
+ // ==>
+ // %ld = G_SEXTLOAD %ptr (load 1)
+
+ auto &MMO = LoadDef->getMMO();
+ Builder.setInstrAndDebugLoc(*LoadDef);
+ auto &MF = Builder.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
+ Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
+ LoadDef->getPointerReg(), *NewMMO);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+#ifndef NDEBUG
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+#endif
+
+ Base = MI.getOperand(1).getReg();
+ MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
+ if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
+ // FIXME: The following use traversal needs a bail out for patholigical cases.
+ for (auto &Use : MRI.use_nodbg_instructions(Base)) {
+ if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
+ continue;
+
+ Offset = Use.getOperand(2).getReg();
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
+ << Use);
+ continue;
+ }
+
+ // Make sure the offset calculation is before the potentially indexed op.
+ // FIXME: we really care about dependency here. The offset calculation might
+ // be movable.
+ MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
+ if (!OffsetDef || !dominates(*OffsetDef, MI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
+ << Use);
+ continue;
+ }
+
+ // FIXME: check whether all uses of Base are load/store with foldable
+ // addressing modes. If so, using the normal addr-modes is better than
+ // forming an indexed one.
+
+ bool MemOpDominatesAddrUses = true;
+ for (auto &PtrAddUse :
+ MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
+ if (!dominates(MI, PtrAddUse)) {
+ MemOpDominatesAddrUses = false;
+ break;
+ }
+ }
+
+ if (!MemOpDominatesAddrUses) {
+ LLVM_DEBUG(
+ dbgs() << " Ignoring candidate as memop does not dominate uses: "
+ << Use);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << " Found match: " << Use);
+ Addr = Use.getOperand(0).getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+#ifndef NDEBUG
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+#endif
+
+ Addr = MI.getOperand(1).getReg();
+ MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
+ if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
+ return false;
+
+ Base = AddrDef->getOperand(1).getReg();
+ Offset = AddrDef->getOperand(2).getReg();
+
+ LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
+
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
+ return false;
+ }
+
+ MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
+ if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
+ return false;
+ }
+
+ if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ // Would require a copy.
+ if (Base == MI.getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
+ return false;
+ }
+
+ // We're expecting one use of Addr in MI, but it could also be the
+ // value stored, which isn't actually dominated by the instruction.
+ if (MI.getOperand(0).getReg() == Addr) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
+ return false;
+ }
+ }
+
+ // FIXME: check whether all uses of the base pointer are constant PtrAdds.
+ // That might allow us to end base's liveness here by adjusting the constant.
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
+ if (!dominates(MI, UseMI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+ IndexedLoadStoreMatchInfo MatchInfo;
+ if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
+ applyCombineIndexedLoadStore(MI, MatchInfo);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
+ Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+ return false;
+
+ // For now, no targets actually support these opcodes so don't waste time
+ // running these unless we're forced to for testing.
+ if (!ForceLegalIndexing)
+ return false;
+
+ MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset);
+ if (!MatchInfo.IsPre &&
+ !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset))
+ return false;
+
+ return true;
+}
+
+void CombinerHelper::applyCombineIndexedLoadStore(
+ MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+ MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
+ MachineIRBuilder MIRBuilder(MI);
+ unsigned Opcode = MI.getOpcode();
+ bool IsStore = Opcode == TargetOpcode::G_STORE;
+ unsigned NewOpcode;
+ switch (Opcode) {
+ case TargetOpcode::G_LOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_LOAD;
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
+ break;
+ case TargetOpcode::G_STORE:
+ NewOpcode = TargetOpcode::G_INDEXED_STORE;
+ break;
+ default:
+ llvm_unreachable("Unknown load/store opcode");
+ }
+
+ auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ if (IsStore) {
+ MIB.addDef(MatchInfo.Addr);
+ MIB.addUse(MI.getOperand(0).getReg());
+ } else {
+ MIB.addDef(MI.getOperand(0).getReg());
+ MIB.addDef(MatchInfo.Addr);
+ }
+
+ MIB.addUse(MatchInfo.Base);
+ MIB.addUse(MatchInfo.Offset);
+ MIB.addImm(MatchInfo.IsPre);
+ MI.eraseFromParent();
+ AddrDef.eraseFromParent();
+
+ LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
+}
+
+bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
+ MachineInstr *&OtherMI) {
+ unsigned Opcode = MI.getOpcode();
+ bool IsDiv, IsSigned;
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV: {
+ IsDiv = true;
+ IsSigned = Opcode == TargetOpcode::G_SDIV;
+ break;
+ }
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM: {
+ IsDiv = false;
+ IsSigned = Opcode == TargetOpcode::G_SREM;
+ break;
+ }
+ }
+
+ Register Src1 = MI.getOperand(1).getReg();
+ unsigned DivOpcode, RemOpcode, DivremOpcode;
+ if (IsSigned) {
+ DivOpcode = TargetOpcode::G_SDIV;
+ RemOpcode = TargetOpcode::G_SREM;
+ DivremOpcode = TargetOpcode::G_SDIVREM;
+ } else {
+ DivOpcode = TargetOpcode::G_UDIV;
+ RemOpcode = TargetOpcode::G_UREM;
+ DivremOpcode = TargetOpcode::G_UDIVREM;
+ }
+
+ if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
+ return false;
+
+ // Combine:
+ // %div:_ = G_[SU]DIV %src1:_, %src2:_
+ // %rem:_ = G_[SU]REM %src1:_, %src2:_
+ // into:
+ // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
+
+ // Combine:
+ // %rem:_ = G_[SU]REM %src1:_, %src2:_
+ // %div:_ = G_[SU]DIV %src1:_, %src2:_
+ // into:
+ // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
+ if (MI.getParent() == UseMI.getParent() &&
+ ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
+ (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
+ matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
+ matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
+ OtherMI = &UseMI;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
+ MachineInstr *&OtherMI) {
+ unsigned Opcode = MI.getOpcode();
+ assert(OtherMI && "OtherMI shouldn't be empty.");
+
+ Register DestDivReg, DestRemReg;
+ if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
+ DestDivReg = MI.getOperand(0).getReg();
+ DestRemReg = OtherMI->getOperand(0).getReg();
+ } else {
+ DestDivReg = OtherMI->getOperand(0).getReg();
+ DestRemReg = MI.getOperand(0).getReg();
+ }
+
+ bool IsSigned =
+ Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
+
+ // Check which instruction is first in the block so we don't break def-use
+ // deps by "moving" the instruction incorrectly. Also keep track of which
+ // instruction is first so we pick it's operands, avoiding use-before-def
+ // bugs.
+ MachineInstr *FirstInst;
+ if (dominates(MI, *OtherMI)) {
+ Builder.setInstrAndDebugLoc(MI);
+ FirstInst = &MI;
+ } else {
+ Builder.setInstrAndDebugLoc(*OtherMI);
+ FirstInst = OtherMI;
+ }
+
+ Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
+ : TargetOpcode::G_UDIVREM,
+ {DestDivReg, DestRemReg},
+ { FirstInst->getOperand(1), FirstInst->getOperand(2) });
+ MI.eraseFromParent();
+ OtherMI->eraseFromParent();
+}
+
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
+ MachineInstr *&BrCond) {
+ assert(MI.getOpcode() == TargetOpcode::G_BR);
+
+ // Try to match the following:
+ // bb1:
+ // G_BRCOND %c1, %bb2
+ // G_BR %bb3
+ // bb2:
+ // ...
+ // bb3:
+
+ // The above pattern does not have a fall through to the successor bb2, always
+ // resulting in a branch no matter which path is taken. Here we try to find
+ // and replace that pattern with conditional branch to bb3 and otherwise
+ // fallthrough to bb2. This is generally better for branch predictors.
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator BrIt(MI);
+ if (BrIt == MBB->begin())
+ return false;
+ assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
+
+ BrCond = &*std::prev(BrIt);
+ if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
+ return false;
+
+ // Check that the next block is the conditional branch target. Also make sure
+ // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+ MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+ return BrCondTarget != MI.getOperand(0).getMBB() &&
+ MBB->isLayoutSuccessor(BrCondTarget);
+}
+
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
+ MachineInstr *&BrCond) {
+ MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
+ Builder.setInstrAndDebugLoc(*BrCond);
+ LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+ // FIXME: Does int/fp matter for this? If so, we might need to restrict
+ // this to i1 only since we might not know for sure what kind of
+ // compare generated the condition value.
+ auto True = Builder.buildConstant(
+ Ty, getICmpTrueVal(getTargetLowering(), false, false));
+ auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
+
+ auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setMBB(FallthroughBB);
+ Observer.changedInstr(MI);
+
+ // Change the conditional branch to use the inverted condition and
+ // new target block.
+ Observer.changingInstr(*BrCond);
+ BrCond->getOperand(0).setReg(Xor.getReg(0));
+ BrCond->getOperand(1).setMBB(BrTarget);
+ Observer.changedInstr(*BrCond);
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemcpyInline(MI) ==
+ LegalizerHelper::LegalizeResult::Legalized;
+}
+
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+ LegalizerHelper::LegalizeResult::Legalized;
+}
+
+static APFloat constantFoldFpUnary(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const APFloat &Val) {
+ APFloat Result(Val);
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_FNEG: {
+ Result.changeSign();
+ return Result;
+ }
+ case TargetOpcode::G_FABS: {
+ Result.clearSign();
+ return Result;
+ }
+ case TargetOpcode::G_FPTRUNC: {
+ bool Unused;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return Result;
+ }
+ case TargetOpcode::G_FSQRT: {
+ bool Unused;
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(sqrt(Result.convertToDouble()));
+ break;
+ }
+ case TargetOpcode::G_FLOG2: {
+ bool Unused;
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(log2(Result.convertToDouble()));
+ break;
+ }
+ }
+ // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
+ // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
+ // `G_FLOG2` reach here.
+ bool Unused;
+ Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
+ return Result;
+}
+
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ const ConstantFP *Cst) {
+ Builder.setInstrAndDebugLoc(MI);
+ APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
+ const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
+ Builder.buildFConstant(MI.getOperand(0), *NewCst);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
+ PtrAddChain &MatchInfo) {
+ // We're trying to match the following pattern:
+ // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
+ // %root = G_PTR_ADD %t1, G_CONSTANT imm2
+ // -->
+ // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
+
+ if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ Register Add2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Add2Def = MRI.getVRegDef(Add2);
+ if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ Register Base = Add2Def->getOperand(1).getReg();
+ Register Imm2 = Add2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Check if the new combined immediate forms an illegal addressing mode.
+ // Do not combine if it was legal before but would get illegal.
+ // To do so, we need to find a load/store user of the pointer to get
+ // the access type.
+ Type *AccessTy = nullptr;
+ auto &MF = *MI.getMF();
+ for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
+ if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
+ AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
+ MF.getFunction().getContext());
+ break;
+ }
+ }
+ TargetLoweringBase::AddrMode AMNew;
+ APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ AMNew.BaseOffs = CombinedImm.getSExtValue();
+ if (AccessTy) {
+ AMNew.HasBaseReg = true;
+ TargetLoweringBase::AddrMode AMOld;
+ AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.HasBaseReg = true;
+ unsigned AS = MRI.getType(Add2).getAddressSpace();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
+ !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
+ return false;
+ }
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm = AMNew.BaseOffs;
+ MatchInfo.Base = Base;
+ MatchInfo.Bank = getRegBank(Imm2);
+ return true;
+}
+
+void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
+ PtrAddChain &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
+ MachineIRBuilder MIB(MI);
+ LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
+ auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Base);
+ MI.getOperand(2).setReg(NewOffset.getReg(0));
+ Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
+ // %t1 = SHIFT %base, G_CONSTANT imm1
+ // %root = SHIFT %t1, G_CONSTANT imm2
+ // -->
+ // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
+
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Register Shl2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
+ if (Shl2Def->getOpcode() != Opcode)
+ return false;
+
+ Register Base = Shl2Def->getOperand(1).getReg();
+ Register Imm2 = Shl2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm =
+ (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Reg = Base;
+
+ // There is no simple replacement for a saturating unsigned left shift that
+ // exceeds the scalar size.
+ if (Opcode == TargetOpcode::G_USHLSAT &&
+ MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
+ return false;
+
+ return true;
+}
+
+void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Builder.setInstrAndDebugLoc(MI);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
+ auto Imm = MatchInfo.Imm;
+
+ if (Imm >= ScalarSizeInBits) {
+ // Any logical shift that exceeds scalar size will produce zero.
+ if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
+ Builder.buildConstant(MI.getOperand(0), 0);
+ MI.eraseFromParent();
+ return;
+ }
+ // Arithmetic shift and saturating signed left shift have no effect beyond
+ // scalar size.
+ Imm = ScalarSizeInBits - 1;
+ }
+
+ LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
+ Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Reg);
+ MI.getOperand(2).setReg(NewImm);
+ Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
+ // with any of G_AND/G_OR/G_XOR logic instructions.
+ // %t1 = SHIFT %X, G_CONSTANT C0
+ // %t2 = LOGIC %t1, %Y
+ // %root = SHIFT %t2, G_CONSTANT C1
+ // -->
+ // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
+ // %t4 = SHIFT %Y, G_CONSTANT C1
+ // %root = LOGIC %t3, %t4
+ unsigned ShiftOpcode = MI.getOpcode();
+ assert((ShiftOpcode == TargetOpcode::G_SHL ||
+ ShiftOpcode == TargetOpcode::G_ASHR ||
+ ShiftOpcode == TargetOpcode::G_LSHR ||
+ ShiftOpcode == TargetOpcode::G_USHLSAT ||
+ ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ // Match a one-use bitwise logic op.
+ Register LogicDest = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LogicDest))
+ return false;
+
+ MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
+ unsigned LogicOpcode = LogicMI->getOpcode();
+ if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
+ LogicOpcode != TargetOpcode::G_XOR)
+ return false;
+
+ // Find a matching one-use shift by constant.
+ const Register C1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
+
+ auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
+ // Shift should match previous one and should be a one-use.
+ if (MI->getOpcode() != ShiftOpcode ||
+ !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ // Must be a constant.
+ auto MaybeImmVal =
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
+ MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
+ Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
+ MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
+ uint64_t C0Val;
+
+ if (matchFirstShift(LogicMIOp1, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg2;
+ MatchInfo.Shift2 = LogicMIOp1;
+ } else if (matchFirstShift(LogicMIOp2, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg1;
+ MatchInfo.Shift2 = LogicMIOp2;
+ } else
+ return false;
+
+ MatchInfo.ValSum = C0Val + C1Val;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
+ return false;
+
+ MatchInfo.Logic = LogicMI;
+ return true;
+}
+
+void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
+ Opcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
+ LLT DestType = MRI.getType(MI.getOperand(0).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
+
+ Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
+ Register Shift1 =
+ Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+
+ // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
+ // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
+ // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
+ // remove old shift1. And it will cause crash later. So erase it earlier to
+ // avoid the crash.
+ MatchInfo.Shift2->eraseFromParent();
+
+ Register Shift2Const = MI.getOperand(2).getReg();
+ Register Shift2 = Builder
+ .buildInstr(Opcode, {DestType},
+ {MatchInfo.LogicNonShiftReg, Shift2Const})
+ .getReg(0);
+
+ Register Dest = MI.getOperand(0).getReg();
+ Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
+
+ // This was one use so it's safe to remove it.
+ MatchInfo.Logic->eraseFromParent();
+
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
+ // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ auto &Shl = cast<GenericMachineInstr>(MI);
+ Register DstReg = Shl.getReg(0);
+ Register SrcReg = Shl.getReg(1);
+ Register ShiftReg = Shl.getReg(2);
+ Register X, C1;
+
+ if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
+ return false;
+
+ if (!mi_match(SrcReg, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)),
+ m_GOr(m_Reg(X), m_Reg(C1))))))
+ return false;
+
+ APInt C1Val, C2Val;
+ if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
+ !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
+ return false;
+
+ auto *SrcDef = MRI.getVRegDef(SrcReg);
+ assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
+ SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
+ LLT SrcTy = MRI.getType(SrcReg);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto S1 = B.buildShl(SrcTy, X, ShiftReg);
+ auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
+ B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
+ unsigned &ShiftVal) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ auto MaybeImmVal =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.exactLogBase2();
+ return (static_cast<int32_t>(ShiftVal) != -1);
+}
+
+void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
+ unsigned &ShiftVal) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ MachineIRBuilder MIB(MI);
+ LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
+ auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
+ Observer.changingInstr(MI);
+ MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
+ MI.getOperand(2).setReg(ShiftCst.getReg(0));
+ Observer.changedInstr(MI);
+}
+
+// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
+bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
+ RegisterImmPair &MatchData) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+
+ Register LHS = MI.getOperand(1).getReg();
+
+ Register ExtSrc;
+ if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
+ return false;
+
+ Register RHS = MI.getOperand(2).getReg();
+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
+ if (!MaybeShiftAmtVal)
+ return false;
+
+ if (LI) {
+ LLT SrcTy = MRI.getType(ExtSrc);
+
+ // We only really care about the legality with the shifted value. We can
+ // pick any type the constant shift amount, so ask the target what to
+ // use. Otherwise we would have to guess and hope it is reported as legal.
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
+ return false;
+ }
+
+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
+ MatchData.Reg = ExtSrc;
+ MatchData.Imm = ShiftAmt;
+
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
+}
+
+void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData) {
+ Register ExtSrcReg = MatchData.Reg;
+ int64_t ShiftAmtVal = MatchData.Imm;
+
+ LLT ExtSrcTy = MRI.getType(ExtSrcReg);
+ Builder.setInstrAndDebugLoc(MI);
+ auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
+ auto NarrowShift =
+ Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
+ Builder.buildZExt(MI.getOperand(0), NarrowShift);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
+ Register &MatchInfo) {
+ GMerge &Merge = cast<GMerge>(MI);
+ SmallVector<Register, 16> MergedValues;
+ for (unsigned I = 0; I < Merge.getNumSources(); ++I)
+ MergedValues.emplace_back(Merge.getSourceReg(I));
+
+ auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
+ if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
+ return false;
+
+ for (unsigned I = 0; I < MergedValues.size(); ++I)
+ if (MergedValues[I] != Unmerge->getReg(I))
+ return false;
+
+ MatchInfo = Unmerge->getSourceReg();
+ return true;
+}
+
+static Register peekThroughBitcast(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
+ ;
+
+ return Reg;
+}
+
+bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
+
+ auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
+ if (!SrcInstr)
+ return false;
+
+ // Check the source type of the merge.
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
+ LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
+ bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
+ if (SrcMergeTy != Dst0Ty && !SameSize)
+ return false;
+ // They are the same now (modulo a bitcast).
+ // We can collect all the src registers.
+ for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
+ Operands.push_back(SrcInstr->getSourceReg(Idx));
+ return true;
+}
+
+void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Operands.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+
+ LLT SrcTy = MRI.getType(Operands[0]);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ bool CanReuseInputDirectly = DstTy == SrcTy;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Register SrcReg = Operands[Idx];
+
+ // This combine may run after RegBankSelect, so we need to be aware of
+ // register banks.
+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
+ MRI.setRegClassOrRegBank(SrcReg, DstCB);
+ }
+
+ if (CanReuseInputDirectly)
+ replaceRegWith(MRI, DstReg, SrcReg);
+ else
+ Builder.buildCast(DstReg, SrcReg);
+ }
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+ SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+ // Break down the big constant in smaller ones.
+ const MachineOperand &CstVal = SrcInstr->getOperand(1);
+ APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+ ? CstVal.getCImm()->getValue()
+ : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+ // Unmerge a constant.
+ for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+ Csts.emplace_back(Val.trunc(ShiftAmt));
+ Val = Val.lshr(ShiftAmt);
+ }
+
+ return true;
+}
+
+void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Csts.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Builder.buildConstant(DstReg, Csts[Idx]);
+ }
+
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineUnmergeUndef(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MatchInfo = [&MI](MachineIRBuilder &B) {
+ unsigned NumElems = MI.getNumOperands() - 1;
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ B.buildUndef(DstReg);
+ }
+ };
+ return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
+}
+
+bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ // Check that all the lanes are dead except the first one.
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
+ return false;
+ }
+ return true;
+}
+
+void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ // Truncating a vector is going to truncate every single lane,
+ // whereas we want the full lowbits.
+ // Do the operation on a scalar instead.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ SrcReg =
+ Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ if (Dst0Ty.isVector()) {
+ auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
+ Builder.buildCast(Dst0Reg, MIB);
+ } else
+ Builder.buildTrunc(Dst0Reg, SrcReg);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ // G_ZEXT on vector applies to each lane, so it will
+ // affect all destinations. Therefore we won't be able
+ // to simplify the unmerge to just the first definition.
+ if (Dst0Ty.isVector())
+ return false;
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return false;
+
+ Register ZExtSrcReg;
+ if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
+ return false;
+
+ // Finally we can replace the first definition with
+ // a zext of the source if the definition is big enough to hold
+ // all of ZExtSrc bits.
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+ return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
+}
+
+void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+
+ MachineInstr *ZExtInstr =
+ MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
+ assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
+ "Expecting a G_ZEXT");
+
+ Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
+ Builder.buildZExt(Dst0Reg, ZExtSrcReg);
+ } else {
+ assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
+ "ZExt src doesn't fit in destination");
+ replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
+ }
+
+ Register ZeroReg;
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!ZeroReg)
+ ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
+ replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
+ }
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
+ unsigned TargetShiftSize,
+ unsigned &ShiftVal) {
+ assert((MI.getOpcode() == TargetOpcode::G_SHL ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector()) // TODO:
+ return false;
+
+ // Don't narrow further than the requested size.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size <= TargetShiftSize)
+ return false;
+
+ auto MaybeImmVal =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
+ return ShiftVal >= Size / 2 && ShiftVal < Size;
+}
+
+void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
+ const unsigned &ShiftVal) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
+ unsigned Size = Ty.getSizeInBits();
+ unsigned HalfSize = Size / 2;
+ assert(ShiftVal >= HalfSize);
+
+ LLT HalfTy = LLT::scalar(HalfSize);
+
+ Builder.setInstr(MI);
+ auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
+ unsigned NarrowShiftAmt = ShiftVal - HalfSize;
+
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ Register Narrowed = Unmerge.getReg(1);
+
+ // dst = G_LSHR s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
+
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildLShr(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
+ } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ Register Narrowed = Unmerge.getReg(0);
+ // dst = G_SHL s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildShl(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
+ } else {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ auto Hi = Builder.buildAShr(
+ HalfTy, Unmerge.getReg(1),
+ Builder.buildConstant(HalfTy, HalfSize - 1));
+
+ if (ShiftVal == HalfSize) {
+ // (G_ASHR i64:x, 32) ->
+ // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
+ Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
+ } else if (ShiftVal == Size - 1) {
+ // Don't need a second shift.
+ // (G_ASHR i64:x, 63) ->
+ // %narrowed = (G_ASHR hi_32(x), 31)
+ // G_MERGE_VALUES %narrowed, %narrowed
+ Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
+ } else {
+ auto Lo = Builder.buildAShr(
+ HalfTy, Unmerge.getReg(1),
+ Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
+
+ // (G_ASHR i64:x, C) ->, for C >= 32
+ // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
+ Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
+ }
+ }
+
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
+ unsigned TargetShiftAmount) {
+ unsigned ShiftAmt;
+ if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
+ applyCombineShiftToUnmerge(MI, ShiftAmt);
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI,
+ m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
+}
+
+void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildCopy(DstReg, Reg);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildZExtOrTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT IntTy = MRI.getType(LHS);
+
+ // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
+ // instruction.
+ PtrReg.second = false;
+ for (Register SrcReg : {LHS, RHS}) {
+ if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
+ // Don't handle cases where the integer is implicitly converted to the
+ // pointer width.
+ LLT PtrTy = MRI.getType(PtrReg.first);
+ if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
+ return true;
+ }
+
+ PtrReg.second = true;
+ }
+
+ return false;
+}
+
+void CombinerHelper::applyCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ const bool DoCommute = PtrReg.second;
+ if (DoCommute)
+ std::swap(LHS, RHS);
+ LHS = PtrReg.first;
+
+ LLT PtrTy = MRI.getType(LHS);
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
+ Builder.buildPtrToInt(Dst, PtrAdd);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
+ APInt &NewCst) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register LHS = PtrAdd.getBaseReg();
+ Register RHS = PtrAdd.getOffsetReg();
+ MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
+
+ if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
+ APInt Cst;
+ if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
+ auto DstTy = MRI.getType(PtrAdd.getReg(0));
+ // G_INTTOPTR uses zero-extension
+ NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
+ NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+ APInt &NewCst) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register Dst = PtrAdd.getReg(0);
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildConstant(Dst, NewCst);
+ PtrAdd.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ return mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+}
+
+bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
+ unsigned DstSize = DstTy.getScalarSizeInBits();
+ unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
+ return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
+ unsigned Opc = MI.getOpcode();
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (Opc == SrcOpc ||
+ (Opc == TargetOpcode::G_ANYEXT &&
+ (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
+ (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
+ MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+void CombinerHelper::applyCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+
+ Register Reg = std::get<0>(MatchInfo);
+ unsigned SrcExtOp = std::get<1>(MatchInfo);
+
+ // Combine exts with the same opcode.
+ if (MI.getOpcode() == SrcExtOp) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Reg);
+ Observer.changedInstr(MI);
+ return;
+ }
+
+ // Combine:
+ // - anyext([sz]ext x) to [sz]ext x
+ // - sext(zext x) to zext x
+ if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ (MI.getOpcode() == TargetOpcode::G_SEXT &&
+ SrcExtOp == TargetOpcode::G_ZEXT)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
+ MI.eraseFromParent();
+ }
+}
+
+void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
+ MI.getFlags());
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Src = MI.getOperand(1).getReg();
+ Register NegSrc;
+
+ if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NegSrc);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+ SrcOpc == TargetOpcode::G_ZEXT) {
+ MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+void CombinerHelper::applyCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MatchInfo.first;
+ unsigned SrcExtOp = MatchInfo.second;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
+ if (SrcTy == DstTy) {
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+ return;
+ }
+ Builder.setInstrAndDebugLoc(MI);
+ if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+ Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+ else
+ Builder.buildTrunc(DstReg, SrcReg);
+ MI.eraseFromParent();
+}
+
+static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
+ const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
+ const unsigned TruncSize = TruncTy.getScalarSizeInBits();
+
+ // ShiftTy > 32 > TruncTy -> 32
+ if (ShiftSize > 32 && TruncSize < 32)
+ return ShiftTy.changeElementSize(32);
+
+ // TODO: We could also reduce to 16 bits, but that's more target-dependent.
+ // Some targets like it, some don't, some only like it under certain
+ // conditions/processor versions, etc.
+ // A TL hook might be needed for this.
+
+ // Don't combine
+ return ShiftTy;
+}
+
+bool CombinerHelper::matchCombineTruncOfShift(
+ MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ if (!MRI.hasOneNonDBGUse(SrcReg))
+ return false;
+
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
+
+ MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
+ const auto &TL = getTargetLowering();
+
+ LLT NewShiftTy;
+ switch (SrcMI->getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_SHL: {
+ NewShiftTy = DstTy;
+
+ // Make sure new shift amount is legal.
+ KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
+ if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
+ return false;
+ break;
+ }
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR: {
+ // For right shifts, we conservatively do not do the transform if the TRUNC
+ // has any STORE users. The reason is that if we change the type of the
+ // shift, we may break the truncstore combine.
+ //
+ // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
+ for (auto &User : MRI.use_instructions(DstReg))
+ if (User.getOpcode() == TargetOpcode::G_STORE)
+ return false;
+
+ NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
+ if (NewShiftTy == SrcTy)
+ return false;
+
+ // Make sure we won't lose information by truncating the high bits.
+ KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
+ if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
+ DstTy.getScalarSizeInBits()))
+ return false;
+ break;
+ }
+ }
+
+ if (!isLegalOrBeforeLegalizer(
+ {SrcMI->getOpcode(),
+ {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
+ return false;
+
+ MatchInfo = std::make_pair(SrcMI, NewShiftTy);
+ return true;
+}
+
+void CombinerHelper::applyCombineTruncOfShift(
+ MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+
+ MachineInstr *ShiftMI = MatchInfo.first;
+ LLT NewShiftTy = MatchInfo.second;
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ Register ShiftAmt = ShiftMI->getOperand(2).getReg();
+ Register ShiftSrc = ShiftMI->getOperand(1).getReg();
+ ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
+
+ Register NewShift =
+ Builder
+ .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
+ .getReg(0);
+
+ if (NewShiftTy == DstTy)
+ replaceRegWith(MRI, Dst, NewShift);
+ else
+ Builder.buildTrunc(Dst, NewShift);
+
+ eraseInst(MI);
+}
+
+bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
+ return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+ });
+}
+
+bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
+ return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
+ return !MO.isReg() ||
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+ });
+}
+
+bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ return all_of(Mask, [](int Elt) { return Elt < 0; });
+}
+
+bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_STORE);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
+ MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
+ "Expected an insert/extract element op");
+ LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned IdxIdx =
+ MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+ auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
+ if (!Idx)
+ return false;
+ return Idx->getZExtValue() >= VecTy.getNumElements();
+}
+
+bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
+ GSelect &SelMI = cast<GSelect>(MI);
+ auto Cst =
+ isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
+ if (!Cst)
+ return false;
+ OpIdx = Cst->isZero() ? 3 : 2;
+ return true;
+}
+
+void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
+
+bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
+ const MachineOperand &MOP2) {
+ if (!MOP1.isReg() || !MOP2.isReg())
+ return false;
+ auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
+ if (!InstAndDef1)
+ return false;
+ auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
+ if (!InstAndDef2)
+ return false;
+ MachineInstr *I1 = InstAndDef1->MI;
+ MachineInstr *I2 = InstAndDef2->MI;
+
+ // Handle a case like this:
+ //
+ // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
+ //
+ // Even though %0 and %1 are produced by the same instruction they are not
+ // the same values.
+ if (I1 == I2)
+ return MOP1.getReg() == MOP2.getReg();
+
+ // If we have an instruction which loads or stores, we can't guarantee that
+ // it is identical.
+ //
+ // For example, we may have
+ //
+ // %x1 = G_LOAD %addr (load N from @somewhere)
+ // ...
+ // call @foo
+ // ...
+ // %x2 = G_LOAD %addr (load N from @somewhere)
+ // ...
+ // %or = G_OR %x1, %x2
+ //
+ // It's possible that @foo will modify whatever lives at the address we're
+ // loading from. To be safe, let's just assume that all loads and stores
+ // are different (unless we have something which is guaranteed to not
+ // change.)
+ if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
+ return false;
+
+ // If both instructions are loads or stores, they are equal only if both
+ // are dereferenceable invariant loads with the same number of bits.
+ if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
+ GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
+ GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
+ if (!LS1 || !LS2)
+ return false;
+
+ if (!I2->isDereferenceableInvariantLoad() ||
+ (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
+ return false;
+ }
+
+ // Check for physical registers on the instructions first to avoid cases
+ // like this:
+ //
+ // %a = COPY $physreg
+ // ...
+ // SOMETHING implicit-def $physreg
+ // ...
+ // %b = COPY $physreg
+ //
+ // These copies are not equivalent.
+ if (any_of(I1->uses(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isPhysical();
+ })) {
+ // Check if we have a case like this:
+ //
+ // %a = COPY $physreg
+ // %b = COPY %a
+ //
+ // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
+ // From that, we know that they must have the same value, since they must
+ // have come from the same COPY.
+ return I1->isIdenticalTo(*I2);
+ }
+
+ // We don't have any physical registers, so we don't necessarily need the
+ // same vreg defs.
+ //
+ // On the off-chance that there's some target instruction feeding into the
+ // instruction, let's use produceSameValue instead of isIdenticalTo.
+ if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
+ // Handle instructions with multiple defs that produce same values. Values
+ // are same for operands with same index.
+ // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // I1 and I2 are different instructions but produce same values,
+ // %1 and %6 are same, %1 and %7 are not the same value.
+ return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
+ I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
+ }
+ return false;
+}
+
+bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
+ if (!MOP.isReg())
+ return false;
+ auto *MI = MRI.getVRegDef(MOP.getReg());
+ auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
+ return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
+ MaybeCst->getSExtValue() == C;
+}
+
+void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+ unsigned OpIdx) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ Register Replacement = MI.getOperand(OpIdx).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+}
+
+void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+ Register Replacement) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+}
+
+bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ // Match (cond ? x : x)
+ return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
+ return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
+ return matchConstantOp(MI.getOperand(OpIdx), 0) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+}
+
+bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
+}
+
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildFConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildUndef(MI.getOperand(0));
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchSimplifyAddToSub(
+ MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register &NewLHS = std::get<0>(MatchInfo);
+ Register &NewRHS = std::get<1>(MatchInfo);
+
+ // Helper lambda to check for opportunities for
+ // ((0-A) + B) -> B - A
+ // (A + (0-B)) -> A - B
+ auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
+ if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
+ return false;
+ NewLHS = MaybeNewLHS;
+ return true;
+ };
+
+ return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
+}
+
+bool CombinerHelper::matchCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+ "Invalid opcode");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+ unsigned NumElts = DstTy.getNumElements();
+ // If this MI is part of a sequence of insert_vec_elts, then
+ // don't do the combine in the middle of the sequence.
+ if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+ TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ MachineInstr *CurrInst = &MI;
+ MachineInstr *TmpInst;
+ int64_t IntImm;
+ Register TmpReg;
+ MatchInfo.resize(NumElts);
+ while (mi_match(
+ CurrInst->getOperand(0).getReg(), MRI,
+ m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+ if (IntImm >= NumElts || IntImm < 0)
+ return false;
+ if (!MatchInfo[IntImm])
+ MatchInfo[IntImm] = TmpReg;
+ CurrInst = TmpInst;
+ }
+ // Variable index.
+ if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+ if (!MatchInfo[I - 1].isValid())
+ MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+ }
+ return true;
+ }
+ // If we didn't end in a G_IMPLICIT_DEF, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+void CombinerHelper::applyCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register UndefReg;
+ auto GetUndef = [&]() {
+ if (UndefReg)
+ return UndefReg;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+ return UndefReg;
+ };
+ for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+ if (!MatchInfo[I])
+ MatchInfo[I] = GetUndef();
+ }
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::applySimplifyAddToSub(
+ MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register SubLHS, SubRHS;
+ std::tie(SubLHS, SubRHS) = MatchInfo;
+ Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
+ //
+ // Creates the new hand + logic instruction (but does not insert them.)
+ //
+ // On success, MatchInfo is populated with the new instructions. These are
+ // inserted in applyHoistLogicOpWithSameOpcodeHands.
+ unsigned LogicOpcode = MI.getOpcode();
+ assert(LogicOpcode == TargetOpcode::G_AND ||
+ LogicOpcode == TargetOpcode::G_OR ||
+ LogicOpcode == TargetOpcode::G_XOR);
+ MachineIRBuilder MIB(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ // Don't recompute anything.
+ if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
+ return false;
+
+ // Make sure we have (hand x, ...), (hand y, ...)
+ MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
+ MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
+ if (!LeftHandInst || !RightHandInst)
+ return false;
+ unsigned HandOpcode = LeftHandInst->getOpcode();
+ if (HandOpcode != RightHandInst->getOpcode())
+ return false;
+ if (!LeftHandInst->getOperand(1).isReg() ||
+ !RightHandInst->getOperand(1).isReg())
+ return false;
+
+ // Make sure the types match up, and if we're doing this post-legalization,
+ // we end up with legal types.
+ Register X = LeftHandInst->getOperand(1).getReg();
+ Register Y = RightHandInst->getOperand(1).getReg();
+ LLT XTy = MRI.getType(X);
+ LLT YTy = MRI.getType(Y);
+ if (!XTy.isValid() || XTy != YTy)
+ return false;
+
+ // Optional extra source register.
+ Register ExtraHandOpSrcReg;
+ switch (HandOpcode) {
+ default:
+ return false;
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT: {
+ // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ break;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
+ MachineOperand &ZOp = LeftHandInst->getOperand(2);
+ if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
+ return false;
+ ExtraHandOpSrcReg = ZOp.getReg();
+ break;
+ }
+ }
+
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
+ // Record the steps to build the new instructions.
+ //
+ // Steps to build (logic x, y)
+ auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
+ OperandBuildSteps LogicBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
+ InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
+
+ // Steps to build hand (logic x, y), ...z
+ OperandBuildSteps HandBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
+ if (ExtraHandOpSrcReg.isValid())
+ HandBuildSteps.push_back(
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
+ InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
+
+ MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
+ return true;
+}
+
+void CombinerHelper::applyBuildInstructionSteps(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ assert(MatchInfo.InstrsToBuild.size() &&
+ "Expected at least one instr to build?");
+ Builder.setInstr(MI);
+ for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
+ assert(InstrToBuild.Opcode && "Expected a valid opcode?");
+ assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
+ MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
+ for (auto &OperandFn : InstrToBuild.OperandFns)
+ OperandFn(Instr);
+ }
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchAshrShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ int64_t ShlCst, AshrCst;
+ Register Src;
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
+ m_ICstOrSplat(AshrCst))))
+ return false;
+ if (ShlCst != AshrCst)
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+ return false;
+ MatchInfo = std::make_tuple(Src, ShlCst);
+ return true;
+}
+
+void CombinerHelper::applyAshShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ Register Src;
+ int64_t ShiftAmt;
+ std::tie(Src, ShiftAmt) = MatchInfo;
+ unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+ MI.eraseFromParent();
+}
+
+/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
+bool CombinerHelper::matchOverlappingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ Register R;
+ int64_t C1;
+ int64_t C2;
+ if (!mi_match(
+ Dst, MRI,
+ m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (C1 & C2) {
+ B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
+ return;
+ }
+ auto Zero = B.buildConstant(Ty, 0);
+ replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
+ };
+ return true;
+}
+
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+ Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_AND %x, %y
+ //
+ // Eliminate the G_AND when it is known that x & y == x or x & y == y.
+ //
+ // Patterns like this can appear as a result of legalization. E.g.
+ //
+ // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+ // %one:_(s32) = G_CONSTANT i32 1
+ // %and:_(s32) = G_AND %cmp, %one
+ //
+ // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ if (!KB)
+ return false;
+
+ Register AndDst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x & Mask == x.
+ // x & 1 == x, always
+ // x & 0 == x, only if x is also 0
+ // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+ //
+ // Check if we can replace AndDst with the LHS of the G_AND
+ if (canReplaceReg(AndDst, LHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace AndDst with the RHS of the G_AND
+ if (canReplaceReg(AndDst, RHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_OR %x, %y
+ //
+ // Eliminate the G_OR when it is known that x | y == x or x | y == y.
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ if (!KB)
+ return false;
+
+ Register OrDst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x | Mask == x.
+ // x | 0 == x, always
+ // x | 1 == x, only if x is also 1
+ // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
+ //
+ // Check if we can replace OrDst with the LHS of the G_OR
+ if (canReplaceReg(OrDst, LHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace OrDst with the RHS of the G_OR
+ if (canReplaceReg(OrDst, RHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+ // If the input is already sign extended, just drop the extension.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned ExtBits = MI.getOperand(2).getImm();
+ unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+ return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
+static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
+ int64_t Cst, bool IsVector, bool IsFP) {
+ // For i1, Cst will always be -1 regardless of boolean contents.
+ return (ScalarSizeBits == 1 && Cst == -1) ||
+ isConstTrueVal(TLI, Cst, IsVector, IsFP);
+}
+
+bool CombinerHelper::matchNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
+ Register XorSrc;
+ Register CstReg;
+ // We match xor(src, true) here.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+ return false;
+
+ if (!MRI.hasOneNonDBGUse(XorSrc))
+ return false;
+
+ // Check that XorSrc is the root of a tree of comparisons combined with ANDs
+ // and ORs. The suffix of RegsToNegate starting from index I is used a work
+ // list of tree nodes to visit.
+ RegsToNegate.push_back(XorSrc);
+ // Remember whether the comparisons are all integer or all floating point.
+ bool IsInt = false;
+ bool IsFP = false;
+ for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
+ Register Reg = RegsToNegate[I];
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ switch (Def->getOpcode()) {
+ default:
+ // Don't match if the tree contains anything other than ANDs, ORs and
+ // comparisons.
+ return false;
+ case TargetOpcode::G_ICMP:
+ if (IsFP)
+ return false;
+ IsInt = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_FCMP:
+ if (IsInt)
+ return false;
+ IsFP = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ // Implement De Morgan's laws:
+ // ~(x & y) -> ~x | ~y
+ // ~(x | y) -> ~x & ~y
+ // When we apply the combine we will change the opcode and recursively
+ // negate the operands.
+ RegsToNegate.push_back(Def->getOperand(1).getReg());
+ RegsToNegate.push_back(Def->getOperand(2).getReg());
+ break;
+ }
+ }
+
+ // Now we know whether the comparisons are integer or floating point, check
+ // the constant in the xor.
+ int64_t Cst;
+ if (Ty.isVector()) {
+ MachineInstr *CstDef = MRI.getVRegDef(CstReg);
+ auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
+ if (!MaybeCst)
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
+ return false;
+ } else {
+ if (!mi_match(CstReg, MRI, m_ICst(Cst)))
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
+ return false;
+ }
+
+ return true;
+}
+
+void CombinerHelper::applyNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ for (Register Reg : RegsToNegate) {
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ Observer.changingInstr(*Def);
+ // For each comparison, invert the opcode. For each AND and OR, change the
+ // opcode.
+ switch (Def->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ MachineOperand &PredOp = Def->getOperand(1);
+ CmpInst::Predicate NewP = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)PredOp.getPredicate());
+ PredOp.setPredicate(NewP);
+ break;
+ }
+ case TargetOpcode::G_AND:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
+ break;
+ case TargetOpcode::G_OR:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ break;
+ }
+ Observer.changedInstr(*Def);
+ }
+
+ replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Match (xor (and x, y), y) (or any of its commuted cases)
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ Register &X = MatchInfo.first;
+ Register &Y = MatchInfo.second;
+ Register AndReg = MI.getOperand(1).getReg();
+ Register SharedReg = MI.getOperand(2).getReg();
+
+ // Find a G_AND on either side of the G_XOR.
+ // Look for one of
+ //
+ // (xor (and x, y), SharedReg)
+ // (xor SharedReg, (and x, y))
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+ std::swap(AndReg, SharedReg);
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+ return false;
+ }
+
+ // Only do this if we'll eliminate the G_AND.
+ if (!MRI.hasOneNonDBGUse(AndReg))
+ return false;
+
+ // We can combine if SharedReg is the same as either the LHS or RHS of the
+ // G_AND.
+ if (Y != SharedReg)
+ std::swap(X, Y);
+ return Y == SharedReg;
+}
+
+void CombinerHelper::applyXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Fold (xor (and x, y), y) -> (and (not x), y)
+ Builder.setInstrAndDebugLoc(MI);
+ Register X, Y;
+ std::tie(X, Y) = MatchInfo;
+ auto Not = Builder.buildNot(MRI.getType(X), X);
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register DstReg = PtrAdd.getReg(0);
+ LLT Ty = MRI.getType(DstReg);
+ const DataLayout &DL = Builder.getMF().getDataLayout();
+
+ if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+ return false;
+
+ if (Ty.isPointer()) {
+ auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
+ return ConstVal && *ConstVal == 0;
+ }
+
+ assert(Ty.isVector() && "Expecting a vector type");
+ const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
+ return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Builder.setInstrAndDebugLoc(PtrAdd);
+ Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
+ PtrAdd.eraseFromParent();
+}
+
+/// The second source operand is known to be a power of 2.
+void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Pow2Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ Builder.setInstrAndDebugLoc(MI);
+
+ // Fold (urem x, pow2) -> (and x, pow2-1)
+ auto NegOne = Builder.buildConstant(Ty, -1);
+ auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
+ Builder.buildAnd(DstReg, Src0, Add);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
+ unsigned &SelectOpNo) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ Register OtherOperandReg = RHS;
+ SelectOpNo = 1;
+ MachineInstr *Select = MRI.getVRegDef(LHS);
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ if (Select->getOpcode() != TargetOpcode::G_SELECT ||
+ !MRI.hasOneNonDBGUse(LHS)) {
+ OtherOperandReg = LHS;
+ SelectOpNo = 2;
+ Select = MRI.getVRegDef(RHS);
+ if (Select->getOpcode() != TargetOpcode::G_SELECT ||
+ !MRI.hasOneNonDBGUse(RHS))
+ return false;
+ }
+
+ MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
+ MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
+
+ if (!isConstantOrConstantVector(*SelectLHS, MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false))
+ return false;
+ if (!isConstantOrConstantVector(*SelectRHS, MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false))
+ return false;
+
+ unsigned BinOpcode = MI.getOpcode();
+
+ // We know know one of the operands is a select of constants. Now verify that
+ // the other binary operator operand is either a constant, or we can handle a
+ // variable.
+ bool CanFoldNonConst =
+ (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
+ (isNullOrNullSplat(*SelectLHS, MRI) ||
+ isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
+ (isNullOrNullSplat(*SelectRHS, MRI) ||
+ isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
+ if (CanFoldNonConst)
+ return true;
+
+ return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false);
+}
+
+/// \p SelectOperand is the operand in binary operator \p MI that is the select
+/// to fold.
+void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+ const unsigned &SelectOperand) {
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
+
+ Register SelectCond = Select->getOperand(1).getReg();
+ Register SelectTrue = Select->getOperand(2).getReg();
+ Register SelectFalse = Select->getOperand(3).getReg();
+
+ LLT Ty = MRI.getType(Dst);
+ unsigned BinOpcode = MI.getOpcode();
+
+ Register FoldTrue, FoldFalse;
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
+ if (SelectOperand == 1) {
+ // TODO: SelectionDAG verifies this actually constant folds before
+ // committing to the combine.
+
+ FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
+ FoldFalse =
+ Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
+ } else {
+ FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
+ FoldFalse =
+ Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
+ }
+
+ Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
+ MI.eraseFromParent();
+}
+
+std::optional<SmallVector<Register, 8>>
+CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
+ assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
+ // We want to detect if Root is part of a tree which represents a bunch
+ // of loads being merged into a larger load. We'll try to recognize patterns
+ // like, for example:
+ //
+ // Reg Reg
+ // \ /
+ // OR_1 Reg
+ // \ /
+ // OR_2
+ // \ Reg
+ // .. /
+ // Root
+ //
+ // Reg Reg Reg Reg
+ // \ / \ /
+ // OR_1 OR_2
+ // \ /
+ // \ /
+ // ...
+ // Root
+ //
+ // Each "Reg" may have been produced by a load + some arithmetic. This
+ // function will save each of them.
+ SmallVector<Register, 8> RegsToVisit;
+ SmallVector<const MachineInstr *, 7> Ors = {Root};
+
+ // In the "worst" case, we're dealing with a load for each byte. So, there
+ // are at most #bytes - 1 ORs.
+ const unsigned MaxIter =
+ MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
+ for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
+ if (Ors.empty())
+ break;
+ const MachineInstr *Curr = Ors.pop_back_val();
+ Register OrLHS = Curr->getOperand(1).getReg();
+ Register OrRHS = Curr->getOperand(2).getReg();
+
+ // In the combine, we want to elimate the entire tree.
+ if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
+ return std::nullopt;
+
+ // If it's a G_OR, save it and continue to walk. If it's not, then it's
+ // something that may be a load + arithmetic.
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrLHS);
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrRHS);
+ }
+
+ // We're going to try and merge each register into a wider power-of-2 type,
+ // so we ought to have an even number of registers.
+ if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
+ return std::nullopt;
+ return RegsToVisit;
+}
+
+/// Helper function for findLoadOffsetsForLoadOrCombine.
+///
+/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
+/// and then moving that value into a specific byte offset.
+///
+/// e.g. x[i] << 24
+///
+/// \returns The load instruction and the byte offset it is moved into.
+static std::optional<std::pair<GZExtLoad *, int64_t>>
+matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
+ const MachineRegisterInfo &MRI) {
+ assert(MRI.hasOneNonDBGUse(Reg) &&
+ "Expected Reg to only have one non-debug use?");
+ Register MaybeLoad;
+ int64_t Shift;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
+ Shift = 0;
+ MaybeLoad = Reg;
+ }
+
+ if (Shift % MemSizeInBits != 0)
+ return std::nullopt;
+
+ // TODO: Handle other types of loads.
+ auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
+ if (!Load)
+ return std::nullopt;
+
+ if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
+ return std::nullopt;
+
+ return std::make_pair(Load, Shift / MemSizeInBits);
+}
+
+std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
+CombinerHelper::findLoadOffsetsForLoadOrCombine(
+ SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
+
+ // Each load found for the pattern. There should be one for each RegsToVisit.
+ SmallSetVector<const MachineInstr *, 8> Loads;
+
+ // The lowest index used in any load. (The lowest "i" for each x[i].)
+ int64_t LowestIdx = INT64_MAX;
+
+ // The load which uses the lowest index.
+ GZExtLoad *LowestIdxLoad = nullptr;
+
+ // Keeps track of the load indices we see. We shouldn't see any indices twice.
+ SmallSet<int64_t, 8> SeenIdx;
+
+ // Ensure each load is in the same MBB.
+ // TODO: Support multiple MachineBasicBlocks.
+ MachineBasicBlock *MBB = nullptr;
+ const MachineMemOperand *MMO = nullptr;
+
+ // Earliest instruction-order load in the pattern.
+ GZExtLoad *EarliestLoad = nullptr;
+
+ // Latest instruction-order load in the pattern.
+ GZExtLoad *LatestLoad = nullptr;
+
+ // Base pointer which every load should share.
+ Register BasePtr;
+
+ // We want to find a load for each register. Each load should have some
+ // appropriate bit twiddling arithmetic. During this loop, we will also keep
+ // track of the load which uses the lowest index. Later, we will check if we
+ // can use its pointer in the final, combined load.
+ for (auto Reg : RegsToVisit) {
+ // Find the load, and find the position that it will end up in (e.g. a
+ // shifted) value.
+ auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
+ if (!LoadAndPos)
+ return std::nullopt;
+ GZExtLoad *Load;
+ int64_t DstPos;
+ std::tie(Load, DstPos) = *LoadAndPos;
+
+ // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
+ // it is difficult to check for stores/calls/etc between loads.
+ MachineBasicBlock *LoadMBB = Load->getParent();
+ if (!MBB)
+ MBB = LoadMBB;
+ if (LoadMBB != MBB)
+ return std::nullopt;
+
+ // Make sure that the MachineMemOperands of every seen load are compatible.
+ auto &LoadMMO = Load->getMMO();
+ if (!MMO)
+ MMO = &LoadMMO;
+ if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
+ return std::nullopt;
+
+ // Find out what the base pointer and index for the load is.
+ Register LoadPtr;
+ int64_t Idx;
+ if (!mi_match(Load->getOperand(1).getReg(), MRI,
+ m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
+ LoadPtr = Load->getOperand(1).getReg();
+ Idx = 0;
+ }
+
+ // Don't combine things like a[i], a[i] -> a bigger load.
+ if (!SeenIdx.insert(Idx).second)
+ return std::nullopt;
+
+ // Every load must share the same base pointer; don't combine things like:
+ //
+ // a[i], b[i + 1] -> a bigger load.
+ if (!BasePtr.isValid())
+ BasePtr = LoadPtr;
+ if (BasePtr != LoadPtr)
+ return std::nullopt;
+
+ if (Idx < LowestIdx) {
+ LowestIdx = Idx;
+ LowestIdxLoad = Load;
+ }
+
+ // Keep track of the byte offset that this load ends up at. If we have seen
+ // the byte offset, then stop here. We do not want to combine:
+ //
+ // a[i] << 16, a[i + k] << 16 -> a bigger load.
+ if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
+ return std::nullopt;
+ Loads.insert(Load);
+
+ // Keep track of the position of the earliest/latest loads in the pattern.
+ // We will check that there are no load fold barriers between them later
+ // on.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ if (!EarliestLoad || dominates(*Load, *EarliestLoad))
+ EarliestLoad = Load;
+ if (!LatestLoad || dominates(*LatestLoad, *Load))
+ LatestLoad = Load;
+ }
+
+ // We found a load for each register. Let's check if each load satisfies the
+ // pattern.
+ assert(Loads.size() == RegsToVisit.size() &&
+ "Expected to find a load for each register?");
+ assert(EarliestLoad != LatestLoad && EarliestLoad &&
+ LatestLoad && "Expected at least two loads?");
+
+ // Check if there are any stores, calls, etc. between any of the loads. If
+ // there are, then we can't safely perform the combine.
+ //
+ // MaxIter is chosen based off the (worst case) number of iterations it
+ // typically takes to succeed in the LLVM test suite plus some padding.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ const unsigned MaxIter = 20;
+ unsigned Iter = 0;
+ for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
+ LatestLoad->getIterator())) {
+ if (Loads.count(&MI))
+ continue;
+ if (MI.isLoadFoldBarrier())
+ return std::nullopt;
+ if (Iter++ == MaxIter)
+ return std::nullopt;
+ }
+
+ return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
+}
+
+bool CombinerHelper::matchLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ MachineFunction &MF = *MI.getMF();
+ // Assuming a little-endian target, transform:
+ // s8 *a = ...
+ // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+ // =>
+ // s32 val = *((i32)a)
+ //
+ // s8 *a = ...
+ // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+ // =>
+ // s32 val = BSWAP(*((s32)a))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty.isVector())
+ return false;
+
+ // We need to combine at least two loads into this type. Since the smallest
+ // possible load is into a byte, we need at least a 16-bit wide type.
+ const unsigned WideMemSizeInBits = Ty.getSizeInBits();
+ if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
+ return false;
+
+ // Match a collection of non-OR instructions in the pattern.
+ auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
+ if (!RegsToVisit)
+ return false;
+
+ // We have a collection of non-OR instructions. Figure out how wide each of
+ // the small loads should be based off of the number of potential loads we
+ // found.
+ const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
+ if (NarrowMemSizeInBits % 8 != 0)
+ return false;
+
+ // Check if each register feeding into each OR is a load from the same
+ // base pointer + some arithmetic.
+ //
+ // e.g. a[0], a[1] << 8, a[2] << 16, etc.
+ //
+ // Also verify that each of these ends up putting a[i] into the same memory
+ // offset as a load into a wide type would.
+ SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
+ GZExtLoad *LowestIdxLoad, *LatestLoad;
+ int64_t LowestIdx;
+ auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
+ MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
+ if (!MaybeLoadInfo)
+ return false;
+ std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
+
+ // We have a bunch of loads being OR'd together. Using the addresses + offsets
+ // we found before, check if this corresponds to a big or little endian byte
+ // pattern. If it does, then we can represent it using a load + possibly a
+ // BSWAP.
+ bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
+ std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+ if (!IsBigEndian)
+ return false;
+ bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
+ if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
+ return false;
+
+ // Make sure that the load from the lowest index produces offset 0 in the
+ // final value.
+ //
+ // This ensures that we won't combine something like this:
+ //
+ // load x[i] -> byte 2
+ // load x[i+1] -> byte 0 ---> wide_load x[i]
+ // load x[i+2] -> byte 1
+ const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
+ const unsigned ZeroByteOffset =
+ *IsBigEndian
+ ? bigEndianByteAt(NumLoadsInTy, 0)
+ : littleEndianByteAt(NumLoadsInTy, 0);
+ auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
+ if (ZeroOffsetIdx == MemOffset2Idx.end() ||
+ ZeroOffsetIdx->second != LowestIdx)
+ return false;
+
+ // We wil reuse the pointer from the load which ends up at byte offset 0. It
+ // may not use index 0.
+ Register Ptr = LowestIdxLoad->getPointerReg();
+ const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
+ LegalityQuery::MemDesc MMDesc(MMO);
+ MMDesc.MemoryTy = Ty;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
+ return false;
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
+
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ unsigned Fast = 0;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
+ !Fast)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.setInstrAndDebugLoc(*LatestLoad);
+ Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
+ MIB.buildLoad(LoadDst, Ptr, *NewMMO);
+ if (NeedsBSwap)
+ MIB.buildBSwap(Dst, LoadDst);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
+ MachineInstr *&ExtMI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // TODO: Extending a vector may be expensive, don't do this until heuristics
+ // are better.
+ if (MRI.getType(DstReg).isVector())
+ return false;
+
+ // Try to match a phi, whose only use is an extend.
+ if (!MRI.hasOneNonDBGUse(DstReg))
+ return false;
+ ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
+ switch (ExtMI->getOpcode()) {
+ case TargetOpcode::G_ANYEXT:
+ return true; // G_ANYEXT is usually free.
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ break;
+ default:
+ return false;
+ }
+
+ // If the target is likely to fold this extend away, don't propagate.
+ if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
+ return false;
+
+ // We don't want to propagate the extends unless there's a good chance that
+ // they'll be optimized in some way.
+ // Collect the unique incoming values.
+ SmallPtrSet<MachineInstr *, 4> InSrcs;
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+ auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_CONSTANT:
+ InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
+ // Don't try to propagate if there are too many places to create new
+ // extends, chances are it'll increase code size.
+ if (InSrcs.size() > 2)
+ return false;
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
+ MachineInstr *&ExtMI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI);
+ Register DstReg = ExtMI->getOperand(0).getReg();
+ LLT ExtTy = MRI.getType(DstReg);
+
+ // Propagate the extension into the block of each incoming reg's block.
+ // Use a SetVector here because PHIs can have duplicate edges, and we want
+ // deterministic iteration order.
+ SmallSetVector<MachineInstr *, 8> SrcMIs;
+ SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
+ for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
+ auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
+ if (!SrcMIs.insert(SrcMI))
+ continue;
+
+ // Build an extend after each src inst.
+ auto *MBB = SrcMI->getParent();
+ MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
+ if (InsertPt != MBB->end() && InsertPt->isPHI())
+ InsertPt = MBB->getFirstNonPHI();
+
+ Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
+ Builder.setDebugLoc(MI.getDebugLoc());
+ auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
+ SrcMI->getOperand(0).getReg());
+ OldToNewSrcMap[SrcMI] = NewExt;
+ }
+
+ // Create a new phi with the extended inputs.
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
+ NewPhi.addDef(DstReg);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ if (!MO.isReg()) {
+ NewPhi.addMBB(MO.getMBB());
+ continue;
+ }
+ auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
+ NewPhi.addUse(NewSrc->getOperand(0).getReg());
+ }
+ Builder.insertInstr(NewPhi);
+ ExtMI->eraseFromParent();
+}
+
+bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
+ Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
+ // If we have a constant index, look for a G_BUILD_VECTOR source
+ // and find the source register that the index maps to.
+ Register SrcVec = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcVec);
+
+ auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
+ return false;
+
+ unsigned VecIdx = Cst->Value.getZExtValue();
+
+ // Check if we have a build_vector or build_vector_trunc with an optional
+ // trunc in front.
+ MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
+ if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
+ SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
+ }
+
+ if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+ SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
+ return false;
+
+ EVT Ty(getMVTForLLT(SrcTy));
+ if (!MRI.hasOneNonDBGUse(SrcVec) &&
+ !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+ return false;
+
+ Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
+ return true;
+}
+
+void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
+ Register &Reg) {
+ // Check the type of the register, since it may have come from a
+ // G_BUILD_VECTOR_TRUNC.
+ LLT ScalarTy = MRI.getType(Reg);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ if (ScalarTy != DstTy) {
+ assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
+ Builder.buildTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+ return;
+ }
+ replaceSingleDefInstWithReg(MI, Reg);
+}
+
+bool CombinerHelper::matchExtractAllEltsFromBuildVector(
+ MachineInstr &MI,
+ SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ // This combine tries to find build_vector's which have every source element
+ // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
+ // the masked load scalarization is run late in the pipeline. There's already
+ // a combine for a similar pattern starting from the extract, but that
+ // doesn't attempt to do it if there are multiple uses of the build_vector,
+ // which in this case is true. Starting the combine from the build_vector
+ // feels more natural than trying to find sibling nodes of extracts.
+ // E.g.
+ // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
+ // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
+ // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
+ // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
+ // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
+ // ==>
+ // replace ext{1,2,3,4} with %s{1,2,3,4}
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ unsigned NumElts = DstTy.getNumElements();
+
+ SmallBitVector ExtractedElts(NumElts);
+ for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
+ if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
+ return false;
+ auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
+ if (!Cst)
+ return false;
+ unsigned Idx = Cst->getZExtValue();
+ if (Idx >= NumElts)
+ return false; // Out of range.
+ ExtractedElts.set(Idx);
+ SrcDstPairs.emplace_back(
+ std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
+ }
+ // Match if every element was extracted.
+ return ExtractedElts.all();
+}
+
+void CombinerHelper::applyExtractAllEltsFromBuildVector(
+ MachineInstr &MI,
+ SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ for (auto &Pair : SrcDstPairs) {
+ auto *ExtMI = Pair.second;
+ replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
+ ExtMI->eraseFromParent();
+ }
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::applyBuildFn(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::applyBuildFnNoErase(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+}
+
+bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+
+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
+ unsigned FshOpc = 0;
+
+ // Match (or (shl ...), (lshr ...)).
+ if (!mi_match(Dst, MRI,
+ // m_GOr() handles the commuted version as well.
+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+ m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
+ return false;
+
+ // Given constants C0 and C1 such that C0 + C1 is bit-width:
+ // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
+ int64_t CstShlAmt, CstLShrAmt;
+ if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
+ mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
+ CstShlAmt + CstLShrAmt == BitWidth) {
+ FshOpc = TargetOpcode::G_FSHR;
+ Amt = LShrAmt;
+
+ } else if (mi_match(LShrAmt, MRI,
+ m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
+ ShlAmt == Amt) {
+ // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
+ FshOpc = TargetOpcode::G_FSHL;
+
+ } else if (mi_match(ShlAmt, MRI,
+ m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
+ LShrAmt == Amt) {
+ // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
+ FshOpc = TargetOpcode::G_FSHR;
+
+ } else {
+ return false;
+ }
+
+ LLT AmtTy = MRI.getType(Amt);
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
+ };
+ return true;
+}
+
+/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
+bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ if (X != Y)
+ return false;
+ unsigned RotateOpc =
+ Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
+ return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
+}
+
+void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
+ bool IsFSHL = Opc == TargetOpcode::G_FSHL;
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
+ : TargetOpcode::G_ROTR));
+ MI.removeOperand(2);
+ Observer.changedInstr(MI);
+}
+
+// Fold (rot x, c) -> (rot x, c % BitSize)
+bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
+ MI.getOpcode() == TargetOpcode::G_ROTR);
+ unsigned Bitsize =
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
+ Register AmtReg = MI.getOperand(2).getReg();
+ bool OutOfRange = false;
+ auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ OutOfRange |= CI->getValue().uge(Bitsize);
+ return true;
+ };
+ return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
+}
+
+void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
+ MI.getOpcode() == TargetOpcode::G_ROTR);
+ unsigned Bitsize =
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Register Amt = MI.getOperand(2).getReg();
+ LLT AmtTy = MRI.getType(Amt);
+ auto Bits = Builder.buildConstant(AmtTy, Bitsize);
+ Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Amt);
+ Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
+ int64_t &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
+ auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
+ std::optional<bool> KnownVal;
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unexpected G_ICMP predicate?");
+ case CmpInst::ICMP_EQ:
+ KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_NE:
+ KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SGE:
+ KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SGT:
+ KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SLE:
+ KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SLT:
+ KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_UGE:
+ KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_UGT:
+ KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_ULE:
+ KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_ULT:
+ KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
+ break;
+ }
+ if (!KnownVal)
+ return false;
+ MatchInfo =
+ *KnownVal
+ ? getICmpTrueVal(getTargetLowering(),
+ /*IsVector = */
+ MRI.getType(MI.getOperand(0).getReg()).isVector(),
+ /* IsFP = */ false)
+ : 0;
+ return true;
+}
+
+bool CombinerHelper::matchICmpToLHSKnownBits(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Given:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP ne %x, 0
+ //
+ // Or:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP eq %x, 1
+ //
+ // We can replace %cmp with %x assuming true is 1 on the target.
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (!CmpInst::isEquality(Pred))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
+ /* IsFP = */ false) != 1)
+ return false;
+ int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
+ return false;
+ Register LHS = MI.getOperand(2).getReg();
+ auto KnownLHS = KB->getKnownBits(LHS);
+ if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
+ return false;
+ // Make sure replacing Dst with the LHS is a legal operation.
+ LLT LHSTy = MRI.getType(LHS);
+ unsigned LHSSize = LHSTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Op = TargetOpcode::COPY;
+ if (DstSize != LHSSize)
+ Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
+ if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
+ return true;
+}
+
+// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
+bool CombinerHelper::matchAndOrDisjointMask(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // Ignore vector types to simplify matching the two constants.
+ // TODO: do this for vectors and scalars via a demanded bits analysis.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector())
+ return false;
+
+ Register Src;
+ Register AndMaskReg;
+ int64_t AndMaskBits;
+ int64_t OrMaskBits;
+ if (!mi_match(MI, MRI,
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
+ m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
+ return false;
+
+ // Check if OrMask could turn on any bits in Src.
+ if (AndMaskBits & OrMaskBits)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ // Canonicalize the result to have the constant on the RHS.
+ if (MI.getOperand(1).getReg() == AndMaskReg)
+ MI.getOperand(2).setReg(AndMaskReg);
+ MI.getOperand(1).setReg(Src);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
+bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(Src);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
+ return false;
+ int64_t Width = MI.getOperand(2).getImm();
+ Register ShiftSrc;
+ int64_t ShiftImm;
+ if (!mi_match(
+ Src, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
+ m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
+ return false;
+ if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
+ auto Cst2 = B.buildConstant(ExtractTy, Width);
+ B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
+ };
+ return true;
+}
+
+/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
+bool CombinerHelper::matchBitfieldExtractFromAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
+ TargetOpcode::G_UBFX, Ty, ExtractTy))
+ return false;
+
+ int64_t AndImm, LSBImm;
+ Register ShiftSrc;
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
+ m_ICst(AndImm))))
+ return false;
+
+ // The mask is a mask of the low bits iff imm & (imm+1) == 0.
+ auto MaybeMask = static_cast<uint64_t>(AndImm);
+ if (MaybeMask & (MaybeMask + 1))
+ return false;
+
+ // LSB must fit within the register.
+ if (static_cast<uint64_t>(LSBImm) >= Size)
+ return false;
+
+ uint64_t Width = APInt(Size, AndImm).countr_one();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+
+ const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SBFX
+ : TargetOpcode::G_UBFX;
+
+ // Check if the type we would use for the extract is legal
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+ return false;
+
+ Register ShlSrc;
+ int64_t ShrAmt;
+ int64_t ShlAmt;
+ const unsigned Size = Ty.getScalarSizeInBits();
+
+ // Try to match shr (shl x, c1), c2
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ // Make sure that the shift sizes can fit a bitfield extract
+ if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+ return false;
+
+ // Skip this combine if the G_SEXT_INREG combine could handle it
+ if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+ return false;
+
+ // Calculate start position and width of the extract
+ const int64_t Pos = ShrAmt - ShlAmt;
+ const int64_t Width = Size - ShrAmt;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchBitfieldExtractFromShrAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
+ TargetOpcode::G_UBFX, Ty, ExtractTy))
+ return false;
+
+ // Try to match shr (and x, c1), c2
+ Register AndSrc;
+ int64_t ShrAmt;
+ int64_t SMask;
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (ShrAmt < 0 || ShrAmt >= Size)
+ return false;
+
+ // If the shift subsumes the mask, emit the 0 directly.
+ if (0 == (SMask >> ShrAmt)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, 0);
+ };
+ return true;
+ }
+
+ // Check that ubfx can do the extraction, with no holes in the mask.
+ uint64_t UMask = SMask;
+ UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
+ UMask &= maskTrailingOnes<uint64_t>(Size);
+ if (!isMask_64(UMask))
+ return false;
+
+ // Calculate start position and width of the extract.
+ const int64_t Pos = ShrAmt;
+ const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
+
+ // It's preferable to keep the shift, rather than form G_SBFX.
+ // TODO: remove the G_AND via demanded bits analysis.
+ if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::reassociationCanBreakAddressingModePattern(
+ MachineInstr &PtrAdd) {
+ assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+
+ Register Src1Reg = PtrAdd.getOperand(1).getReg();
+ MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+ if (!Src1Def)
+ return false;
+
+ Register Src2Reg = PtrAdd.getOperand(2).getReg();
+
+ if (MRI.hasOneNonDBGUse(Src1Reg))
+ return false;
+
+ auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ if (!C1)
+ return false;
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ const APInt &C1APIntVal = *C1;
+ const APInt &C2APIntVal = *C2;
+ const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+ // This combine may end up running before ptrtoint/inttoptr combines
+ // manage to eliminate redundant conversions, so try to look through them.
+ MachineInstr *ConvUseMI = &UseMI;
+ unsigned ConvUseOpc = ConvUseMI->getOpcode();
+ while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
+ ConvUseOpc == TargetOpcode::G_PTRTOINT) {
+ Register DefReg = ConvUseMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DefReg))
+ break;
+ ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
+ ConvUseOpc = ConvUseMI->getOpcode();
+ }
+ auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
+ ConvUseOpc == TargetOpcode::G_STORE;
+ if (!LoadStore)
+ continue;
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ unsigned AS =
+ MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
+ Type *AccessTy =
+ getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
+ PtrAdd.getMF()->getFunction().getContext());
+ const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // if and only if (G_PTR_ADD X, C) has one use.
+ Register LHSBase;
+ std::optional<ValueAndVReg> LHSCstOff;
+ if (!mi_match(MI.getBaseReg(), MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
+ return false;
+
+ auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ // When we change LHSPtrAdd's offset register we might cause it to use a reg
+ // before its def. Sink the instruction so the outer PTR_ADD to ensure this
+ // doesn't happen.
+ LHSPtrAdd->moveBefore(&MI);
+ Register RHSReg = MI.getOffsetReg();
+ // set VReg will cause type mismatch if it comes from extend/trunc
+ auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ Observer.changingInstr(*LHSPtrAdd);
+ LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ Observer.changedInstr(*LHSPtrAdd);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
+ if (!LHSPtrAdd)
+ return false;
+
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register LHSSrc1 = LHSPtrAdd->getBaseReg();
+ Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
+ auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ // We're trying to match a few pointer computation patterns here for
+ // re-association opportunities.
+ // 1) Isolating a constant operand to be on the RHS, e.g.:
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ //
+ // 2) Folding two constants in each sub-tree as long as such folding
+ // doesn't break a legal addressing mode.
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ //
+ // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // iif (G_PTR_ADD X, C) has one use.
+ MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
+ MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
+
+ // Try to match example 2.
+ if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
+
+ // Try to match example 3.
+ if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
+
+ // Try to match example 1.
+ if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
+ return true;
+
+ return false;
+}
+bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,
+ Register OpLHS, Register OpRHS,
+ BuildFnTy &MatchInfo) {
+ LLT OpRHSTy = MRI.getType(OpRHS);
+ MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
+
+ if (OpLHSDef->getOpcode() != Opc)
+ return false;
+
+ MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
+ Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
+ Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
+
+ // If the inner op is (X op C), pull the constant out so it can be folded with
+ // other constants in the expression tree. Folding is not guaranteed so we
+ // might have (C1 op C2). In that case do not pull a constant out because it
+ // won't help and can lead to infinite loops.
+ if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
+ !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
+ if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
+ // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
+ };
+ return true;
+ }
+ if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // We don't check if the reassociation will break a legal addressing mode
+ // here since pointer arithmetic is handled by G_PTR_ADD.
+ unsigned Opc = MI.getOpcode();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
+ return true;
+ if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
+ return true;
+ return false;
+}
+
+bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
+ if (!MaybeCst)
+ return false;
+ MatchInfo = *MaybeCst;
+ return true;
+}
+
+bool CombinerHelper::matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ // Look for a binop feeding into an AND with a mask:
+ //
+ // %add = G_ADD %lhs, %rhs
+ // %and = G_AND %add, 000...11111111
+ //
+ // Check if it's possible to perform the binop at a narrower width and zext
+ // back to the original width like so:
+ //
+ // %narrow_lhs = G_TRUNC %lhs
+ // %narrow_rhs = G_TRUNC %rhs
+ // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
+ // %new_add = G_ZEXT %narrow_add
+ // %and = G_AND %new_add, 000...11111111
+ //
+ // This can allow later combines to eliminate the G_AND if it turns out
+ // that the mask is irrelevant.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ Register AndLHS = MI.getOperand(1).getReg();
+ Register AndRHS = MI.getOperand(2).getReg();
+ LLT WideTy = MRI.getType(Dst);
+
+ // If the potential binop has more than one use, then it's possible that one
+ // of those uses will need its full width.
+ if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
+ return false;
+
+ // Check if the LHS feeding the AND is impacted by the high bits that we're
+ // masking out.
+ //
+ // e.g. for 64-bit x, y:
+ //
+ // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
+ MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
+ if (!LHSInst)
+ return false;
+ unsigned LHSOpc = LHSInst->getOpcode();
+ switch (LHSOpc) {
+ default:
+ return false;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ break;
+ }
+
+ // Find the mask on the RHS.
+ auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
+ if (!Cst)
+ return false;
+ auto Mask = Cst->Value;
+ if (!Mask.isMask())
+ return false;
+
+ // No point in combining if there's nothing to truncate.
+ unsigned NarrowWidth = Mask.countr_one();
+ if (NarrowWidth == WideTy.getSizeInBits())
+ return false;
+ LLT NarrowTy = LLT::scalar(NarrowWidth);
+
+ // Check if adding the zext + truncates could be harmful.
+ auto &MF = *MI.getMF();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
+ !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
+ return false;
+ Register BinOpLHS = LHSInst->getOperand(1).getReg();
+ Register BinOpRHS = LHSInst->getOperand(2).getReg();
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
+ auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
+ auto NarrowBinOp =
+ Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
+ auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Ext.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+ : TargetOpcode::G_SADDO;
+ MI.setDesc(Builder.getTII().get(NewOpc));
+ MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*MULO x, 0) -> 0 + no carry out
+ assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
+ MI.getOpcode() == TargetOpcode::G_SMULO);
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Carry = MI.getOperand(1).getReg();
+ if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
+ !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, 0);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*ADDO x, 0) -> x + no carry out
+ assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
+ MI.getOpcode() == TargetOpcode::G_SADDO);
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ Register Carry = MI.getOperand(1).getReg();
+ if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, LHS);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
+ // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
+ assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
+ MI.getOpcode() == TargetOpcode::G_SADDE ||
+ MI.getOpcode() == TargetOpcode::G_USUBE ||
+ MI.getOpcode() == TargetOpcode::G_SSUBE);
+ if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ MatchInfo = [&](MachineIRBuilder &B) {
+ unsigned NewOpcode;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UADDE:
+ NewOpcode = TargetOpcode::G_UADDO;
+ break;
+ case TargetOpcode::G_SADDE:
+ NewOpcode = TargetOpcode::G_SADDO;
+ break;
+ case TargetOpcode::G_USUBE:
+ NewOpcode = TargetOpcode::G_USUBO;
+ break;
+ case TargetOpcode::G_SSUBE:
+ NewOpcode = TargetOpcode::G_SSUBO;
+ break;
+ }
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(NewOpcode));
+ MI.removeOperand(4);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SUB);
+ Register Dst = MI.getOperand(0).getReg();
+ // (x + y) - z -> x (if y == z)
+ // (x + y) - z -> y (if x == z)
+ Register X, Y, Z;
+ if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
+ Register ReplaceReg;
+ int64_t CstX, CstY;
+ if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
+ mi_match(Z, MRI, m_SpecificICstOrSplat(CstY))))
+ ReplaceReg = X;
+ else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
+ mi_match(Z, MRI, m_SpecificICstOrSplat(CstX))))
+ ReplaceReg = Y;
+ if (ReplaceReg) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
+ return true;
+ }
+ }
+
+ // x - (y + z) -> 0 - y (if x == z)
+ // x - (y + z) -> 0 - z (if x == y)
+ if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
+ Register ReplaceReg;
+ int64_t CstX;
+ if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
+ mi_match(Z, MRI, m_SpecificICstOrSplat(CstX))))
+ ReplaceReg = Y;
+ else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
+ mi_match(Y, MRI, m_SpecificICstOrSplat(CstX))))
+ ReplaceReg = Z;
+ if (ReplaceReg) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Zero = B.buildConstant(MRI.getType(Dst), 0);
+ B.buildSub(Dst, Zero, ReplaceReg);
+ };
+ return true;
+ }
+ }
+ return false;
+}
+
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseNPQ = false;
+ SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](const Constant *C) {
+ auto *CI = cast<ConstantInt>(C);
+ const APInt &Divisor = CI->getValue();
+
+ bool SelNPQ = false;
+ APInt Magic(Divisor.getBitWidth(), 0);
+ unsigned PreShift = 0, PostShift = 0;
+
+ // Magic algorithm doesn't work for division by 1. We need to emit a select
+ // at the end.
+ // TODO: Use undef values for divisor of 1.
+ if (!Divisor.isOne()) {
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor);
+
+ Magic = std::move(magics.Magic);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
+ PreShift = magics.PreShift;
+ PostShift = magics.PostShift;
+ SelNPQ = magics.IsAdd;
+ }
+
+ PreShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ NPQFactors.push_back(
+ MIB.buildConstant(ScalarTy,
+ SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits))
+ .getReg(0));
+ PostShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register PreShift, PostShift, MagicFactor, NPQFactor;
+ auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+ if (RHSDef) {
+ PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+ MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+ NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+ PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+ } else {
+ assert(MRI.getType(RHS).isScalar() &&
+ "Non-build_vector operation should have been a scalar");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ Register Q = LHS;
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+ if (UseNPQ) {
+ Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (Ty.isVector())
+ NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+ else
+ NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+ Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+ }
+
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ auto One = MIB.buildConstant(Ty, 1);
+ auto IsOne = MIB.buildICmp(
+ CmpInst::Predicate::ICMP_EQ,
+ Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+ return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ }
+
+ auto CheckEltValue = [&](const Constant *C) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+ return !CI->isZero();
+ return false;
+ };
+ return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildUDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // If the sdiv has an 'exact' flag we can use a simpler lowering.
+ if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
+ return matchUnaryPredicate(
+ MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
+ }
+
+ // Don't support the general case for now.
+ return false;
+}
+
+void CombinerHelper::applySDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildSDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ auto &SDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = SDiv.getReg(0);
+ Register LHS = SDiv.getReg(1);
+ Register RHS = SDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseSRA = false;
+ SmallVector<Register, 16> Shifts, Factors;
+
+ auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
+ bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
+
+ auto BuildSDIVPattern = [&](const Constant *C) {
+ // Don't recompute inverses for each splat element.
+ if (IsSplat && !Factors.empty()) {
+ Shifts.push_back(Shifts[0]);
+ Factors.push_back(Factors[0]);
+ return true;
+ }
+
+ auto *CI = cast<ConstantInt>(C);
+ APInt Divisor = CI->getValue();
+ unsigned Shift = Divisor.countr_zero();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+
+ // Calculate the multiplicative inverse modulo BW.
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = Divisor.getBitWidth();
+ APInt Factor = Divisor.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
+ Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register Shift, Factor;
+ if (Ty.isVector()) {
+ Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
+ Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ Register Res = LHS;
+
+ if (UseSRA)
+ Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
+
+ return MIB.buildMul(Ty, Res, Factor);
+}
+
+bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UMULH);
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ auto MatchPow2ExceptOne = [&](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
+ return false;
+ };
+ if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
+ return false;
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
+}
+
+void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ unsigned NumEltBits = Ty.getScalarSizeInBits();
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto LogBase2 = buildLogBase2(RHS, Builder);
+ auto ShiftAmt =
+ Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
+ auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
+ Builder.buildLShr(Dst, LHS, Trunc);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
+ Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ LLT Type = MRI.getType(Dst);
+
+ // fold (fadd x, fneg(y)) -> (fsub x, y)
+ // fold (fadd fneg(y), x) -> (fsub x, y)
+ // G_ADD is commutative so both cases are checked by m_GFAdd
+ if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
+ Opc = TargetOpcode::G_FSUB;
+ }
+ /// fold (fsub x, fneg(y)) -> (fadd x, y)
+ else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
+ Opc = TargetOpcode::G_FADD;
+ }
+ // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
+ mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
+ mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
+ // no opcode change
+ } else
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(Opc));
+ MI.getOperand(1).setReg(X);
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ Register LHS = MI.getOperand(1).getReg();
+ MatchInfo = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ const auto LHSCst = Ty.isVector()
+ ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
+ : getFConstantVRegValWithLookThrough(LHS, MRI);
+ if (!LHSCst)
+ return false;
+
+ // -0.0 is always allowed
+ if (LHSCst->Value.isNegZero())
+ return true;
+
+ // +0.0 is only allowed if nsz is set.
+ if (LHSCst->Value.isPosZero())
+ return MI.getFlag(MachineInstr::FmNsz);
+
+ return false;
+}
+
+void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Builder.buildFNeg(
+ Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
+ eraseInst(MI);
+}
+
+/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
+/// due to global flags or MachineInstr flags.
+static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
+ if (MI.getOpcode() != TargetOpcode::G_FMUL)
+ return false;
+ return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
+}
+
+static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
+ const MachineRegisterInfo &MRI) {
+ return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end()) >
+ std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end());
+}
+
+bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
+ bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate) {
+
+ auto *MF = MI.getMF();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetOptions &Options = MF->getTarget().Options;
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ if (CanReassociate &&
+ !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
+ return false;
+
+ // Floating-point multiply-add with intermediate rounding.
+ HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return false;
+
+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD;
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
+ return false;
+
+ Aggressive = TLI.enableAggressiveFMAFusion(DstType);
+ return true;
+}
+
+bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS.MI->getOperand(1).getReg(),
+ LHS.MI->getOperand(2).getReg(), RHS.Reg});
+ };
+ return true;
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {RHS.MI->getOperand(1).getReg(),
+ RHS.MI->getOperand(2).getReg(), LHS.Reg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ MachineInstr *FpExtSrc;
+ if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
+ // Note: Commutes FADD operands.
+ if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
+ return false;
+
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ MachineInstr *FMA = nullptr;
+ Register Z;
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
+ FMA = LHS.MI;
+ Z = RHS.Reg;
+ }
+ // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
+ else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
+ Z = LHS.Reg;
+ FMA = RHS.MI;
+ }
+
+ if (FMA) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
+ Register X = FMA->getOperand(1).getReg();
+ Register Y = FMA->getOperand(2).getReg();
+ Register U = FMulMI->getOperand(1).getReg();
+ Register V = FMulMI->getOperand(2).getReg();
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
+ B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ if (!Aggressive)
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
+ auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
+ Register Y, MachineIRBuilder &B) {
+ Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
+ Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
+ Register InnerFMA =
+ B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
+ .getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+
+ MachineInstr *FMulMI, *FMAMI;
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
+ mi_match(LHS.MI->getOperand(3).getReg(), MRI,
+ m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHS.Reg,
+ LHS.MI->getOperand(1).getReg(),
+ LHS.MI->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
+ };
+
+ return true;
+ }
+ }
+
+ // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
+ mi_match(RHS.MI->getOperand(3).getReg(), MRI,
+ m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHS.Reg,
+ RHS.MI->getOperand(1).getReg(),
+ RHS.MI->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
+ DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ int FirstMulHasFewerUses = true;
+ if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
+ hasMoreUses(*LHS.MI, *RHS.MI, MRI))
+ FirstMulHasFewerUses = false;
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
+ if (FirstMulHasFewerUses &&
+ (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS.MI->getOperand(1).getReg(),
+ LHS.MI->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+ // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
+ else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegY =
+ B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegX =
+ B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegX, FMulMI->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtX =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX, FpExtY, NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
+ if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
+ Register FpExtZ =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, FpExtZ, LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
+ MachineIRBuilder &B) {
+ Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
+ Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
+ };
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fneg (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ // fold (fsub (fneg (fpext (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
+ buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHSReg, B);
+ B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg, B);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchSelectToLogical(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GSelect &Sel = cast<GSelect>(MI);
+ Register DstReg = Sel.getReg(0);
+ Register Cond = Sel.getCondReg();
+ Register TrueReg = Sel.getTrueReg();
+ Register FalseReg = Sel.getFalseReg();
+
+ auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI);
+ auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI);
+
+ const LLT CondTy = MRI.getType(Cond);
+ const LLT OpTy = MRI.getType(TrueReg);
+ if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1)
+ return false;
+
+ // We have a boolean select.
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI);
+ if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildOr(DstReg, Cond, FalseReg);
+ };
+ return true;
+ }
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI);
+ if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildAnd(DstReg, Cond, TrueReg);
+ };
+ return true;
+ }
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (MaybeCstFalse && MaybeCstFalse->isOne()) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg);
+ };
+ return true;
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (MaybeCstTrue && MaybeCstTrue->isZero()) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg);
+ };
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
+ unsigned &IdxToPropagate) {
+ bool PropagateNaN;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ PropagateNaN = false;
+ break;
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ PropagateNaN = true;
+ break;
+ }
+
+ auto MatchNaN = [&](unsigned Idx) {
+ Register MaybeNaNReg = MI.getOperand(Idx).getReg();
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
+ if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
+ return false;
+ IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
+ return true;
+ };
+
+ return MatchNaN(1) || MatchNaN(2);
+}
+
+bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ // Helper lambda to check for opportunities for
+ // A + (B - A) -> B
+ // (B - A) + A -> B
+ auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
+ Register Reg;
+ return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
+ Reg == MaybeSameReg;
+ };
+ return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
+}
+
+bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // This combine folds the following patterns:
+ //
+ // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
+ // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
+ // into
+ // x
+ // if
+ // k == sizeof(VecEltTy)/2
+ // type(x) == type(dst)
+ //
+ // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
+ // into
+ // x
+ // if
+ // type(x) == type(dst)
+
+ LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT DstEltTy = DstVecTy.getElementType();
+
+ Register Lo, Hi;
+
+ if (mi_match(
+ MI, MRI,
+ m_GBuildVector(m_GTrunc(m_GBitcast(m_Reg(Lo))), m_GImplicitDef()))) {
+ MatchInfo = Lo;
+ return MRI.getType(MatchInfo) == DstVecTy;
+ }
+
+ std::optional<ValueAndVReg> ShiftAmount;
+ const auto LoPattern = m_GBitcast(m_Reg(Lo));
+ const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
+ if (mi_match(
+ MI, MRI,
+ m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
+ m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
+ if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
+ MatchInfo = Lo;
+ return MRI.getType(MatchInfo) == DstVecTy;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
+ // if type(x) == type(G_TRUNC)
+ if (!mi_match(MI.getOperand(1).getReg(), MRI,
+ m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
+ return false;
+
+ return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
+ // y if K == size of vector element type
+ std::optional<ValueAndVReg> ShiftAmt;
+ if (!mi_match(MI.getOperand(1).getReg(), MRI,
+ m_GLShr(m_GBitcast(m_GBuildVector(m_Reg(), m_Reg(MatchInfo))),
+ m_GCst(ShiftAmt))))
+ return false;
+
+ LLT MatchTy = MRI.getType(MatchInfo);
+ return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
+ MatchTy == MRI.getType(MI.getOperand(0).getReg());
+}
+
+unsigned CombinerHelper::getFPMinMaxOpcForSelect(
+ CmpInst::Predicate Pred, LLT DstTy,
+ SelectPatternNaNBehaviour VsNaNRetVal) const {
+ assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
+ "Expected a NaN behaviour?");
+ // Choose an opcode based off of legality or the behaviour when one of the
+ // LHS/RHS may be NaN.
+ switch (Pred) {
+ default:
+ return 0;
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE:
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
+ return TargetOpcode::G_FMAXNUM;
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
+ return TargetOpcode::G_FMAXIMUM;
+ if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
+ return TargetOpcode::G_FMAXNUM;
+ if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
+ return TargetOpcode::G_FMAXIMUM;
+ return 0;
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_OLE:
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
+ return TargetOpcode::G_FMINNUM;
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
+ return TargetOpcode::G_FMINIMUM;
+ if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
+ return TargetOpcode::G_FMINNUM;
+ if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
+ return 0;
+ return TargetOpcode::G_FMINIMUM;
+ }
+}
+
+CombinerHelper::SelectPatternNaNBehaviour
+CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
+ bool IsOrderedComparison) const {
+ bool LHSSafe = isKnownNeverNaN(LHS, MRI);
+ bool RHSSafe = isKnownNeverNaN(RHS, MRI);
+ // Completely unsafe.
+ if (!LHSSafe && !RHSSafe)
+ return SelectPatternNaNBehaviour::NOT_APPLICABLE;
+ if (LHSSafe && RHSSafe)
+ return SelectPatternNaNBehaviour::RETURNS_ANY;
+ // An ordered comparison will return false when given a NaN, so it
+ // returns the RHS.
+ if (IsOrderedComparison)
+ return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
+ : SelectPatternNaNBehaviour::RETURNS_OTHER;
+ // An unordered comparison will return true when given a NaN, so it
+ // returns the LHS.
+ return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
+ : SelectPatternNaNBehaviour::RETURNS_NAN;
+}
+
+bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
+ Register TrueVal, Register FalseVal,
+ BuildFnTy &MatchInfo) {
+ // Match: select (fcmp cond x, y) x, y
+ // select (fcmp cond x, y) y, x
+ // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
+ LLT DstTy = MRI.getType(Dst);
+ // Bail out early on pointers, since we'll never want to fold to a min/max.
+ if (DstTy.isPointer())
+ return false;
+ // Match a floating point compare with a less-than/greater-than predicate.
+ // TODO: Allow multiple users of the compare if they are all selects.
+ CmpInst::Predicate Pred;
+ Register CmpLHS, CmpRHS;
+ if (!mi_match(Cond, MRI,
+ m_OneNonDBGUse(
+ m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
+ CmpInst::isEquality(Pred))
+ return false;
+ SelectPatternNaNBehaviour ResWithKnownNaNInfo =
+ computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
+ if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
+ return false;
+ if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
+ ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
+ else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
+ ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
+ }
+ if (TrueVal != CmpLHS || FalseVal != CmpRHS)
+ return false;
+ // Decide what type of max/min this should be based off of the predicate.
+ unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
+ if (!Opc || !isLegal({Opc, {DstTy}}))
+ return false;
+ // Comparisons between signed zero and zero may have different results...
+ // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
+ if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
+ // We don't know if a comparison between two 0s will give us a consistent
+ // result. Be conservative and only proceed if at least one side is
+ // non-zero.
+ auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
+ if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
+ KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
+ if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
+ return false;
+ }
+ }
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // TODO: Handle integer cases.
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ // Condition may be fed by a truncated compare.
+ Register Cond = MI.getOperand(1).getReg();
+ Register MaybeTrunc;
+ if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
+ Cond = MaybeTrunc;
+ Register Dst = MI.getOperand(0).getReg();
+ Register TrueVal = MI.getOperand(2).getReg();
+ Register FalseVal = MI.getOperand(3).getReg();
+ return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
+}
+
+bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // (X + Y) == X --> Y == 0
+ // (X + Y) != X --> Y != 0
+ // (X - Y) == X --> Y == 0
+ // (X - Y) != X --> Y != 0
+ // (X ^ Y) == X --> Y == 0
+ // (X ^ Y) != X --> Y != 0
+ Register Dst = MI.getOperand(0).getReg();
+ CmpInst::Predicate Pred;
+ Register X, Y, OpLHS, OpRHS;
+ bool MatchedSub = mi_match(
+ Dst, MRI,
+ m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
+ if (MatchedSub && X != OpLHS)
+ return false;
+ if (!MatchedSub) {
+ if (!mi_match(Dst, MRI,
+ m_c_GICmp(m_Pred(Pred), m_Reg(X),
+ m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
+ m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
+ return false;
+ Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
+ }
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Zero = B.buildConstant(MRI.getType(Y), 0);
+ B.buildICmp(Pred, Dst, Y, Zero);
+ };
+ return CmpInst::isEquality(Pred) && Y.isValid();
+}
+
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
+bool CombinerHelper::tryCombine(MachineInstr &MI) {
+ if (tryCombineCopy(MI))
+ return true;
+ if (tryCombineExtendingLoads(MI))
+ return true;
+ if (tryCombineIndexedLoadStore(MI))
+ return true;
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
new file mode 100644
index 000000000000..d747cbf5aadc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -0,0 +1,68 @@
+//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutor.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the GIMatchTableExecutor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "gi-match-table-executor"
+
+using namespace llvm;
+
+GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers)
+ : Renderers(MaxRenderers) {}
+
+GIMatchTableExecutor::GIMatchTableExecutor() = default;
+
+bool GIMatchTableExecutor::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+ if (MO.isReg() && MO.getReg())
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ return VRegVal->Value.getSExtValue() == Value;
+ return false;
+}
+
+bool GIMatchTableExecutor::isBaseWithConstantOffset(
+ const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
+ if (!Root.isReg())
+ return false;
+
+ MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
+ if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ return true;
+}
+
+bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
+ MachineInstr &IntoMI) const {
+ // Immediate neighbours are already folded.
+ if (MI.getParent() == IntoMI.getParent() &&
+ std::next(MI.getIterator()) == IntoMI.getIterator())
+ return true;
+
+ // Convergent instructions cannot be moved in the CFG.
+ if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
+ return false;
+
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
new file mode 100644
index 000000000000..59f4d60a41d8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -0,0 +1,48 @@
+//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file constains common code to combine machine functions at generic
+// level.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void GISelChangeObserver::changingAllUsesOfReg(
+ const MachineRegisterInfo &MRI, Register Reg) {
+ for (auto &ChangingMI : MRI.use_instructions(Reg)) {
+ changingInstr(ChangingMI);
+ ChangingAllUsesOfReg.insert(&ChangingMI);
+ }
+}
+
+void GISelChangeObserver::finishedChangingAllUsesOfReg() {
+ for (auto *ChangedMI : ChangingAllUsesOfReg)
+ changedInstr(*ChangedMI);
+ ChangingAllUsesOfReg.clear();
+}
+
+RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
+ MachineFunction::Delegate *Del)
+ : MF(MF), Delegate(Del) {
+ // Register this as the delegate for handling insertions and deletions of
+ // instructions.
+ MF.setDelegate(Del);
+}
+
+RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); }
+
+RAIIMFObserverInstaller::RAIIMFObserverInstaller(MachineFunction &MF,
+ GISelChangeObserver &Observer)
+ : MF(MF) {
+ MF.setObserver(&Observer);
+}
+
+RAIIMFObserverInstaller::~RAIIMFObserverInstaller() { MF.setObserver(nullptr); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
new file mode 100644
index 000000000000..363ffbfa90b5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -0,0 +1,771 @@
+//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for querying information about KnownBits during GISel
+/// passes.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "gisel-known-bits"
+
+using namespace llvm;
+
+char llvm::GISelKnownBitsAnalysis::ID = 0;
+
+INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
+ "Analysis for ComputingKnownBits", false, true)
+
+GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
+ : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
+ DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}
+
+Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
+ const MachineInstr *MI = MRI.getVRegDef(R);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::COPY:
+ return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
+ case TargetOpcode::G_ASSERT_ALIGN: {
+ // TODO: Min with source
+ return Align(MI->getOperand(2).getImm());
+ }
+ case TargetOpcode::G_FRAME_INDEX: {
+ int FrameIdx = MI->getOperand(1).getIndex();
+ return MF.getFrameInfo().getObjectAlign(FrameIdx);
+ }
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ default:
+ return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
+ }
+}
+
+KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
+ assert(MI.getNumExplicitDefs() == 1 &&
+ "expected single return generic instruction");
+ return getKnownBits(MI.getOperand(0).getReg());
+}
+
+KnownBits GISelKnownBits::getKnownBits(Register R) {
+ const LLT Ty = MRI.getType(R);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
+ return getKnownBits(R, DemandedElts);
+}
+
+KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
+ unsigned Depth) {
+ // For now, we only maintain the cache during one request.
+ assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
+
+ KnownBits Known;
+ computeKnownBitsImpl(R, Known, DemandedElts);
+ ComputeKnownBitsCache.clear();
+ return Known;
+}
+
+bool GISelKnownBits::signBitIsZero(Register R) {
+ LLT Ty = MRI.getType(R);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
+}
+
+APInt GISelKnownBits::getKnownZeroes(Register R) {
+ return getKnownBits(R).Zero;
+}
+
+APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
+
+LLVM_ATTRIBUTE_UNUSED static void
+dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
+ dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
+ << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
+ << toString(Known.Zero | Known.One, 16, false) << "\n"
+ << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
+ << "\n"
+ << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
+ << "\n";
+}
+
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ return;
+
+ KnownBits Known2;
+ computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+ // Only known if known in both the LHS and RHS.
+ Known = Known.intersectWith(Known2);
+}
+
+// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
+// created using Width. Use this function when the inputs are KnownBits
+// objects. TODO: Move this KnownBits.h if this is usable in more cases.
+static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
+ const KnownBits &OffsetKnown,
+ const KnownBits &WidthKnown) {
+ KnownBits Mask(BitWidth);
+ Mask.Zero = APInt::getBitsSetFrom(
+ BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
+ Mask.One = APInt::getLowBitsSet(
+ BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
+ return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
+}
+
+void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ MachineInstr &MI = *MRI.getVRegDef(R);
+ unsigned Opcode = MI.getOpcode();
+ LLT DstTy = MRI.getType(R);
+
+ // Handle the case where this is called on a register that does not have a
+ // type constraint (i.e. it has a register class constraint instead). This is
+ // unlikely to occur except by looking through copies but it is possible for
+ // the initial register being queried to be in this state.
+ if (!DstTy.isValid()) {
+ Known = KnownBits();
+ return;
+ }
+
+ unsigned BitWidth = DstTy.getScalarSizeInBits();
+ auto CacheEntry = ComputeKnownBitsCache.find(R);
+ if (CacheEntry != ComputeKnownBitsCache.end()) {
+ Known = CacheEntry->second;
+ LLVM_DEBUG(dbgs() << "Cache hit at ");
+ LLVM_DEBUG(dumpResult(MI, Known, Depth));
+ assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
+ return;
+ }
+ Known = KnownBits(BitWidth); // Don't know anything
+
+ // Depth may get bigger than max depth if it gets passed to a different
+ // GISelKnownBits object.
+ // This may happen when say a generic part uses a GISelKnownBits object
+ // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
+ // which creates a new GISelKnownBits object with a different and smaller
+ // depth. If we just check for equality, we would never exit if the depth
+ // that is passed down to the target specific GISelKnownBits object is
+ // already bigger than its max depth.
+ if (Depth >= getMaxDepth())
+ return;
+
+ if (!DemandedElts)
+ return; // No demanded elts, better to assume we don't know anything.
+
+ KnownBits Known2;
+
+ switch (Opcode) {
+ default:
+ TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
+ Depth);
+ break;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // Collect the known bits that are shared by every demanded vector element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Known bits are the values that are shared by every demanded element.
+ Known = Known.intersectWith(Known2);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::COPY:
+ case TargetOpcode::G_PHI:
+ case TargetOpcode::PHI: {
+ Known.One = APInt::getAllOnes(BitWidth);
+ Known.Zero = APInt::getAllOnes(BitWidth);
+ // Destination registers should not have subregisters at this
+ // point of the pipeline, otherwise the main live-range will be
+ // defined more than once, which is against SSA.
+ assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
+ // Record in the cache that we know nothing for MI.
+ // This will get updated later and in the meantime, if we reach that
+ // phi again, because of a loop, we will cut the search thanks to this
+ // cache entry.
+ // We could actually build up more information on the phi by not cutting
+ // the search, but that additional information is more a side effect
+ // than an intended choice.
+ // Therefore, for now, save on compile time until we derive a proper way
+ // to derive known bits for PHIs within loops.
+ ComputeKnownBitsCache[R] = KnownBits(BitWidth);
+ // PHI's operand are a mix of registers and basic blocks interleaved.
+ // We only care about the register ones.
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+ const MachineOperand &Src = MI.getOperand(Idx);
+ Register SrcReg = Src.getReg();
+ // Look through trivial copies and phis but don't look through trivial
+ // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
+ // analysis is currently unable to determine the bit width of a
+ // register class.
+ //
+ // We can't use NoSubRegister by name as it's defined by each target but
+ // it's always defined to be 0 by tablegen.
+ if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
+ MRI.getType(SrcReg).isValid()) {
+ // For COPYs we don't do anything, don't increase the depth.
+ computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+ Depth + (Opcode != TargetOpcode::COPY));
+ Known = Known.intersectWith(Known2);
+ // If we reach a point where we don't know anything
+ // just stop looking through the operands.
+ if (Known.isUnknown())
+ break;
+ } else {
+ // We know nothing.
+ Known = KnownBits(BitWidth);
+ break;
+ }
+ }
+ break;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ auto CstVal = getIConstantVRegVal(R, MRI);
+ if (!CstVal)
+ break;
+ Known = KnownBits::makeConstant(*CstVal);
+ break;
+ }
+ case TargetOpcode::G_FRAME_INDEX: {
+ int FrameIdx = MI.getOperand(1).getIndex();
+ TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
+ break;
+ }
+ case TargetOpcode::G_SUB: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, Known,
+ Known2);
+ break;
+ }
+ case TargetOpcode::G_XOR: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ Known ^= Known2;
+ break;
+ }
+ case TargetOpcode::G_PTR_ADD: {
+ if (DstTy.isVector())
+ break;
+ // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
+ break;
+ [[fallthrough]];
+ }
+ case TargetOpcode::G_ADD: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ Known =
+ KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, Known, Known2);
+ break;
+ }
+ case TargetOpcode::G_AND: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ Known &= Known2;
+ break;
+ }
+ case TargetOpcode::G_OR: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ Known |= Known2;
+ break;
+ }
+ case TargetOpcode::G_MUL: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::mul(Known, Known2);
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+ Known, DemandedElts, Depth + 1);
+ break;
+ }
+ case TargetOpcode::G_SMIN: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::smin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_SMAX: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::smax(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMIN: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMAX: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ if (DstTy.isVector())
+ break;
+ if (TL.getBooleanContents(DstTy.isVector(),
+ Opcode == TargetOpcode::G_FCMP) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case TargetOpcode::G_SEXT: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case TargetOpcode::G_ASSERT_SEXT:
+ case TargetOpcode::G_SEXT_INREG: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.sextInReg(MI.getOperand(2).getImm());
+ break;
+ }
+ case TargetOpcode::G_ANYEXT: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.anyext(BitWidth);
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (const MDNode *Ranges = MMO->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
+
+ break;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ if (DstTy.isVector())
+ break;
+ // Everything above the retrieved bits is zero
+ Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_ASHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::ashr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::lshr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_SHL: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::shl(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_PTRTOINT:
+ if (DstTy.isVector())
+ break;
+ // Fall through and handle them the same as zext/trunc.
+ [[fallthrough]];
+ case TargetOpcode::G_ASSERT_ZEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_TRUNC: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned SrcBitWidth;
+
+ // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
+ if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
+ SrcBitWidth = MI.getOperand(2).getImm();
+ else {
+ SrcBitWidth = SrcTy.isPointer()
+ ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
+ : SrcTy.getSizeInBits();
+ }
+ assert(SrcBitWidth && "SrcBitWidth can't be zero");
+ Known = Known.zextOrTrunc(SrcBitWidth);
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known = Known.zextOrTrunc(BitWidth);
+ if (BitWidth > SrcBitWidth)
+ Known.Zero.setBitsFrom(SrcBitWidth);
+ break;
+ }
+ case TargetOpcode::G_ASSERT_ALIGN: {
+ int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
+
+ // TODO: Should use maximum with source
+ // If a node is guaranteed to be aligned, set low zero bits accordingly as
+ // well as clearing one bits.
+ Known.Zero.setLowBits(LogOfAlign);
+ Known.One.clearLowBits(LogOfAlign);
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+
+ for (unsigned I = 0; I != NumOps - 1; ++I) {
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
+ DemandedElts, Depth + 1);
+ Known.insertBits(SrcOpKnown, I * OpSize);
+ }
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ if (DstTy.isVector())
+ break;
+ unsigned NumOps = MI.getNumOperands();
+ Register SrcReg = MI.getOperand(NumOps - 1).getReg();
+ if (MRI.getType(SrcReg).isVector())
+ return; // TODO: Handle vectors.
+
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+
+ // Figure out the result operand index
+ unsigned DstIdx = 0;
+ for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
+ ++DstIdx)
+ ;
+
+ Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+ break;
+ }
+ case TargetOpcode::G_BSWAP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known = Known.byteSwap();
+ break;
+ }
+ case TargetOpcode::G_BITREVERSE: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known = Known.reverseBits();
+ break;
+ }
+ case TargetOpcode::G_CTPOP: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // We can bound the space the count needs. Also, bits known to be zero can't
+ // contribute to the population.
+ unsigned BitsPossiblySet = Known2.countMaxPopulation();
+ unsigned LowBits = llvm::bit_width(BitsPossiblySet);
+ Known.Zero.setBitsFrom(LowBits);
+ // TODO: we could bound Known.One using the lower bound on the number of
+ // bits which might be set provided by popcnt KnownOne2.
+ break;
+ }
+ case TargetOpcode::G_UBFX: {
+ KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
+ Depth + 1);
+ Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
+ break;
+ }
+ case TargetOpcode::G_SBFX: {
+ KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
+ Depth + 1);
+ Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
+ // Sign extend the extracted value using shift left and arithmetic shift
+ // right.
+ KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
+ KnownBits ShiftKnown = KnownBits::computeForAddSub(
+ /*Add*/ false, /*NSW*/ false, ExtKnown, WidthKnown);
+ Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
+ break;
+ }
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO: {
+ if (MI.getOperand(1).getReg() == R) {
+ // If we know the result of a compare has the top bits zero, use this
+ // info.
+ if (TL.getBooleanContents(DstTy.isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ }
+ break;
+ }
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ LLVM_DEBUG(dumpResult(MI, Known, Depth));
+
+ // Update the cache.
+ ComputeKnownBitsCache[R] = Known;
+}
+
+/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
+unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
+ if (Src1SignBits == 1)
+ return 1;
+ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
+}
+
+unsigned GISelKnownBits::computeNumSignBits(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ MachineInstr &MI = *MRI.getVRegDef(R);
+ unsigned Opcode = MI.getOpcode();
+
+ if (Opcode == TargetOpcode::G_CONSTANT)
+ return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
+
+ if (Depth == getMaxDepth())
+ return 1;
+
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
+ LLT DstTy = MRI.getType(R);
+ const unsigned TyBits = DstTy.getScalarSizeInBits();
+
+ // Handle the case where this is called on a register that does not have a
+ // type constraint. This is unlikely to occur except by looking through copies
+ // but it is possible for the initial register being queried to be in this
+ // state.
+ if (!DstTy.isValid())
+ return 1;
+
+ unsigned FirstAnswer = 1;
+ switch (Opcode) {
+ case TargetOpcode::COPY: {
+ MachineOperand &Src = MI.getOperand(1);
+ if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
+ MRI.getType(Src.getReg()).isValid()) {
+ // Don't increment Depth for this one since we didn't do any work.
+ return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
+ }
+
+ return 1;
+ }
+ case TargetOpcode::G_SEXT: {
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+ unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
+ return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
+ }
+ case TargetOpcode::G_ASSERT_SEXT:
+ case TargetOpcode::G_SEXT_INREG: {
+ // Max of the input and what this extends.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned SrcBits = MI.getOperand(2).getImm();
+ unsigned InRegBits = TyBits - SrcBits + 1;
+ return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
+ }
+ case TargetOpcode::G_SEXTLOAD: {
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '17' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits() + 1;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '16' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits();
+ }
+ case TargetOpcode::G_TRUNC: {
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned DstTyBits = DstTy.getScalarSizeInBits();
+ unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+ unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - DstTyBits))
+ return NumSrcSignBits - (NumSrcBits - DstTyBits);
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ return computeNumSignBitsMin(MI.getOperand(2).getReg(),
+ MI.getOperand(3).getReg(), DemandedElts,
+ Depth + 1);
+ }
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // If compares returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (MI.getOperand(1).getReg() == R) {
+ if (TL.getBooleanContents(DstTy.isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return TyBits;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ bool IsFP = Opcode == TargetOpcode::G_FCMP;
+ if (TyBits == 1)
+ break;
+ auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
+ if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
+ return TyBits; // All bits are sign bits.
+ if (BC == TargetLowering::ZeroOrOneBooleanContent)
+ return TyBits - 1; // Every always-zero bit is a sign bit.
+ break;
+ }
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ default: {
+ unsigned NumBits =
+ TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
+ break;
+ }
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ KnownBits Known = getKnownBits(R, DemandedElts, Depth);
+ APInt Mask;
+ if (Known.isNonNegative()) { // sign bit is 0
+ Mask = Known.Zero;
+ } else if (Known.isNegative()) { // sign bit is 1;
+ Mask = Known.One;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLO to determine
+ // the number of identical bits in the top of the input value.
+ Mask <<= Mask.getBitWidth() - TyBits;
+ return std::max(FirstAnswer, Mask.countl_one());
+}
+
+unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
+ LLT Ty = MRI.getType(R);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
+ return computeNumSignBits(R, DemandedElts, Depth);
+}
+
+void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
new file mode 100644
index 000000000000..efcc40641ea8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -0,0 +1,24 @@
+//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+// This file implements the common initialization routines for the
+// GlobalISel library.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+void llvm::initializeGlobalISel(PassRegistry &Registry) {
+ initializeIRTranslatorPass(Registry);
+ initializeLegalizerPass(Registry);
+ initializeLoadStoreOptPass(Registry);
+ initializeLocalizerPass(Registry);
+ initializeRegBankSelectPass(Registry);
+ initializeInstructionSelectPass(Registry);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
new file mode 100644
index 000000000000..9a67a8d05a4d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -0,0 +1,3698 @@
+//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the IRTranslator class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/MemoryOpRemark.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#define DEBUG_TYPE "irtranslator"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ EnableCSEInIRTranslator("enable-cse-in-irtranslator",
+ cl::desc("Should enable CSE in irtranslator"),
+ cl::Optional, cl::init(false));
+char IRTranslator::ID = 0;
+
+INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+ false, false)
+
+static void reportTranslationError(MachineFunction &MF,
+ const TargetPassConfig &TPC,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(Twine(R.getMsg()));
+ else
+ ORE.emit(R);
+}
+
+IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+ : MachineFunctionPass(ID), OptLevel(optlevel) {}
+
+#ifndef NDEBUG
+namespace {
+/// Verify that every instruction created has the same DILocation as the
+/// instruction being translated.
+class DILocationVerifier : public GISelChangeObserver {
+ const Instruction *CurrInst = nullptr;
+
+public:
+ DILocationVerifier() = default;
+ ~DILocationVerifier() = default;
+
+ const Instruction *getCurrentInst() const { return CurrInst; }
+ void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
+
+ void erasingInstr(MachineInstr &MI) override {}
+ void changingInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {}
+
+ void createdInstr(MachineInstr &MI) override {
+ assert(getCurrentInst() && "Inserted instruction without a current MI");
+
+ // Only print the check message if we're actually checking it.
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
+ << " was copied to " << MI);
+#endif
+ // We allow insts in the entry block to have no debug loc because
+ // they could have originated from constants, and we don't want a jumpy
+ // debug experience.
+ assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
+ (MI.getParent()->isEntryBlock() && !MI.getDebugLoc())) &&
+ "Line info was not transferred to all instructions");
+ }
+};
+} // namespace
+#endif // ifndef NDEBUG
+
+
+void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<StackProtector>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ if (OptLevel != CodeGenOpt::None) {
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ }
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+IRTranslator::ValueToVRegInfo::VRegListT &
+IRTranslator::allocateVRegs(const Value &Val) {
+ auto VRegsIt = VMap.findVRegs(Val);
+ if (VRegsIt != VMap.vregs_end())
+ return *VRegsIt->second;
+ auto *Regs = VMap.getVRegs(Val);
+ auto *Offsets = VMap.getOffsets(Val);
+ SmallVector<LLT, 4> SplitTys;
+ computeValueLLTs(*DL, *Val.getType(), SplitTys,
+ Offsets->empty() ? Offsets : nullptr);
+ for (unsigned i = 0; i < SplitTys.size(); ++i)
+ Regs->push_back(0);
+ return *Regs;
+}
+
+ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
+ auto VRegsIt = VMap.findVRegs(Val);
+ if (VRegsIt != VMap.vregs_end())
+ return *VRegsIt->second;
+
+ if (Val.getType()->isVoidTy())
+ return *VMap.getVRegs(Val);
+
+ // Create entry for this type.
+ auto *VRegs = VMap.getVRegs(Val);
+ auto *Offsets = VMap.getOffsets(Val);
+
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
+
+ SmallVector<LLT, 4> SplitTys;
+ computeValueLLTs(*DL, *Val.getType(), SplitTys,
+ Offsets->empty() ? Offsets : nullptr);
+
+ if (!isa<Constant>(Val)) {
+ for (auto Ty : SplitTys)
+ VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
+ return *VRegs;
+ }
+
+ if (Val.getType()->isAggregateType()) {
+ // UndefValue, ConstantAggregateZero
+ auto &C = cast<Constant>(Val);
+ unsigned Idx = 0;
+ while (auto Elt = C.getAggregateElement(Idx++)) {
+ auto EltRegs = getOrCreateVRegs(*Elt);
+ llvm::copy(EltRegs, std::back_inserter(*VRegs));
+ }
+ } else {
+ assert(SplitTys.size() == 1 && "unexpectedly split LLT");
+ VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
+ bool Success = translate(cast<Constant>(Val), VRegs->front());
+ if (!Success) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction().getSubprogram(),
+ &MF->getFunction().getEntryBlock());
+ R << "unable to translate constant: " << ore::NV("Type", Val.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return *VRegs;
+ }
+ }
+
+ return *VRegs;
+}
+
+int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
+ auto MapEntry = FrameIndices.find(&AI);
+ if (MapEntry != FrameIndices.end())
+ return MapEntry->second;
+
+ uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
+ uint64_t Size =
+ ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
+
+ // Always allocate at least one byte.
+ Size = std::max<uint64_t>(Size, 1u);
+
+ int &FI = FrameIndices[&AI];
+ FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
+ return FI;
+}
+
+Align IRTranslator::getMemOpAlign(const Instruction &I) {
+ if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
+ return SI->getAlign();
+ if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
+ return LI->getAlign();
+ if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+ return AI->getAlign();
+ if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I))
+ return AI->getAlign();
+
+ OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
+ R << "unable to translate memop: " << ore::NV("Opcode", &I);
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return Align(1);
+}
+
+MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
+ MachineBasicBlock *&MBB = BBToMBB[&BB];
+ assert(MBB && "BasicBlock was not encountered before");
+ return *MBB;
+}
+
+void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
+ assert(NewPred && "new predecessor must be a real MachineBasicBlock");
+ MachinePreds[Edge].push_back(NewPred);
+}
+
+bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // Get or create a virtual register for each value.
+ // Unless the value is a Constant => loadimm cst?
+ // or inline constant each time?
+ // Creation of a virtual register needs to have a size.
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
+ uint32_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
+ return true;
+}
+
+bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ uint32_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
+ return true;
+}
+
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+ return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
+}
+
+bool IRTranslator::translateCompare(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ auto *CI = dyn_cast<CmpInst>(&U);
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
+ CmpInst::Predicate Pred =
+ CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
+ cast<ConstantExpr>(U).getPredicate());
+ if (CmpInst::isIntPredicate(Pred))
+ MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+ else if (Pred == CmpInst::FCMP_FALSE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
+ else if (Pred == CmpInst::FCMP_TRUE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
+ else {
+ uint32_t Flags = 0;
+ if (CI)
+ Flags = MachineInstr::copyFlagsFromInstruction(*CI);
+ MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
+ }
+
+ return true;
+}
+
+bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
+ const ReturnInst &RI = cast<ReturnInst>(U);
+ const Value *Ret = RI.getReturnValue();
+ if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
+ Ret = nullptr;
+
+ ArrayRef<Register> VRegs;
+ if (Ret)
+ VRegs = getOrCreateVRegs(*Ret);
+
+ Register SwiftErrorVReg = 0;
+ if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
+ SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
+ &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
+ }
+
+ // The target may mess up with the insertion point, but
+ // this is not important as a return is the last instruction
+ // of the block anyway.
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+}
+
+void IRTranslator::emitBranchForMergedCondition(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ CmpInst::Predicate Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ }
+
+ SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
+ BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
+ CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+ return;
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+ SwitchCG::CaseBlock CB(
+ Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+}
+
+static bool isValInBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+void IRTranslator::findMergedConditions(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond) {
+ using namespace PatternMatch;
+ assert((Opc == Instruction::And || Opc == Instruction::Or) &&
+ "Expected Opc to be AND/OR");
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ isValInBlock(NotCond, CurBB->getBasicBlock())) {
+ findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
+ if (BOp) {
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
+ emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
+ InvertCond);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineBasicBlock *TmpBB =
+ MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ }
+}
+
+bool IRTranslator::shouldEmitAsBranches(
+ const std::vector<SwitchCG::CaseBlock> &Cases) {
+ // For multiple cases, it's better to emit as branches.
+ if (Cases.size() != 2)
+ return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
+ Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
+ Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
+ const BranchInst &BrInst = cast<BranchInst>(U);
+ auto &CurMBB = MIRBuilder.getMBB();
+ auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
+
+ if (BrInst.isUnconditional()) {
+ // If the unconditional target is the layout successor, fallthrough.
+ if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
+ MIRBuilder.buildBr(*Succ0MBB);
+
+ // Link successors.
+ for (const BasicBlock *Succ : successors(&BrInst))
+ CurMBB.addSuccessor(&getMBB(*Succ));
+ return true;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = BrInst.getCondition();
+ MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
+
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ using namespace PatternMatch;
+ const Instruction *CondI = dyn_cast<Instruction>(CondVal);
+ if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
+ !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
+ getEdgeProbability(&CurMBB, Succ0MBB),
+ getEdgeProbability(&CurMBB, Succ1MBB),
+ /*InvertCond=*/false);
+ assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (shouldEmitAsBranches(SL->SwitchCases)) {
+ // Emit the branch for this block.
+ emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
+ return true;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
+ MF->erase(SL->SwitchCases[I].ThisBB);
+
+ SL->SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
+ ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, Succ0MBB, Succ1MBB, &CurMBB,
+ CurBuilder->getDebugLoc());
+
+ // Use emitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ emitSwitchCase(CB, &CurMBB, *CurBuilder);
+ return true;
+}
+
+void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI) {
+ Src->addSuccessorWithoutProb(Dst);
+ return;
+ }
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+}
+
+BranchProbability
+IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ if (!FuncInfo.BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ // Extract cases from the switch.
+ const SwitchInst &SI = cast<SwitchInst>(U);
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ CaseClusterVector Clusters;
+ Clusters.reserve(SI.getNumCases());
+ for (const auto &I : SI.cases()) {
+ MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
+ assert(Succ && "Could not find successor mbb in mapping");
+ const ConstantInt *CaseVal = I.getCaseValue();
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+ }
+
+ MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
+
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+
+ MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
+
+ // If there is only the default destination, jump there directly.
+ if (Clusters.empty()) {
+ SwitchMBB->addSuccessor(DefaultMBB);
+ if (DefaultMBB != SwitchMBB->getNextNode())
+ MIB.buildBr(*DefaultMBB);
+ return true;
+ }
+
+ SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
+ SL->findBitTestClusters(Clusters, &SI);
+
+ LLVM_DEBUG({
+ dbgs() << "Case clusters: ";
+ for (const CaseCluster &C : Clusters) {
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
+
+ C.Low->getValue().print(dbgs(), true);
+ if (C.Low != C.High) {
+ dbgs() << '-';
+ C.High->getValue().print(dbgs(), true);
+ }
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ });
+
+ assert(!Clusters.empty());
+ SwitchWorkList WorkList;
+ CaseClusterIt First = Clusters.begin();
+ CaseClusterIt Last = Clusters.end() - 1;
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+ // FIXME: At the moment we don't do any splitting optimizations here like
+ // SelectionDAG does, so this worklist only has one entry.
+ while (!WorkList.empty()) {
+ SwitchWorkListItem W = WorkList.pop_back_val();
+ if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
+ return false;
+ }
+ return true;
+}
+
+void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
+ MachineBasicBlock *MBB) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ MachineIRBuilder MIB(*MBB->getParent());
+ MIB.setMBB(*MBB);
+ MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
+ auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
+ MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
+}
+
+bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
+ MachineBasicBlock *HeaderBB) {
+ MachineIRBuilder MIB(*HeaderBB->getParent());
+ MIB.setMBB(*HeaderBB);
+ MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+ const Value &SValue = *JTH.SValue;
+ // Subtract the lowest switch case value from the value being switched on.
+ const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
+ Register SwitchOpReg = getOrCreateVReg(SValue);
+ auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
+ auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
+
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ Type *PtrIRTy = SValue.getType()->getPointerTo();
+ const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
+ Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
+
+ JT.Reg = Sub.getReg(0);
+
+ if (JTH.FallthroughUnreachable) {
+ if (JT.MBB != HeaderBB->getNextNode())
+ MIB.buildBr(*JT.MBB);
+ return true;
+ }
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ auto Cst = getOrCreateVReg(
+ *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
+ Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
+ auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
+
+ auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != HeaderBB->getNextNode())
+ BrCond = MIB.buildBr(*JT.MBB);
+ return true;
+}
+
+void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
+ MachineBasicBlock *SwitchBB,
+ MachineIRBuilder &MIB) {
+ Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
+ Register Cond;
+ DebugLoc OldDbgLoc = MIB.getDebugLoc();
+ MIB.setDebugLoc(CB.DbgLoc);
+ MIB.setMBB(*CB.ThisBB);
+
+ if (CB.PredInfo.NoCmp) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+ CB.ThisBB);
+ CB.ThisBB->normalizeSuccProbs();
+ if (CB.TrueBB != CB.ThisBB->getNextNode())
+ MIB.buildBr(*CB.TrueBB);
+ MIB.setDebugLoc(OldDbgLoc);
+ return;
+ }
+
+ const LLT i1Ty = LLT::scalar(1);
+ // Build the compare.
+ if (!CB.CmpMHS) {
+ const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
+ // For conditional branch lowering, we might try to do something silly like
+ // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
+ // just re-use the existing condition vreg.
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && CI->isOne() &&
+ CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ Cond = CondLHS;
+ } else {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
+ Cond =
+ MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ else
+ Cond =
+ MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ }
+ } else {
+ assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
+ "Can only handle SLE ranges");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ Cond =
+ MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
+ } else {
+ const LLT CmpTy = MRI->getType(CmpOpReg);
+ auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
+ auto Diff = MIB.buildConstant(CmpTy, High - Low);
+ Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+ CB.ThisBB);
+
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
+ CB.ThisBB->normalizeSuccProbs();
+
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+ CB.ThisBB);
+
+ MIB.buildBrCond(Cond, *CB.TrueBB);
+ MIB.buildBr(*CB.FalseBB);
+ MIB.setDebugLoc(OldDbgLoc);
+}
+
+bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB,
+ MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
+ BranchProbability DefaultProb = W.DefaultProb;
+
+ // The jump block hasn't been inserted yet; insert it here.
+ MachineBasicBlock *JumpMBB = JT->MBB;
+ CurMF->insert(BBI, JumpMBB);
+
+ // Since the jump table block is separate from the switch block, we need
+ // to keep track of it as a machine predecessor to the default block,
+ // otherwise we lose the phi edges.
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+ CurMBB);
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+ JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ } else {
+ // Also record edges from the jump table block to it's successors.
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
+ JumpMBB);
+ }
+ }
+
+ if (FallthroughUnreachable)
+ JTH->FallthroughUnreachable = true;
+
+ if (!JTH->FallthroughUnreachable)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
+
+ // The jump table header will be inserted in our current block, do the
+ // range check, and fall through to our fallthrough block.
+ JTH->HeaderBB = CurMBB;
+ JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+ // If we're in the right place, emit the jump table header right now.
+ if (CurMBB == SwitchMBB) {
+ if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
+ return false;
+ JTH->Emitted = true;
+ }
+ return true;
+}
+bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+ Value *Cond,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable,
+ BranchProbability UnhandledProbs,
+ MachineBasicBlock *CurMBB,
+ MachineIRBuilder &MIB,
+ MachineBasicBlock *SwitchMBB) {
+ using namespace SwitchCG;
+ const Value *RHS, *LHS, *MHS;
+ CmpInst::Predicate Pred;
+ if (I->Low == I->High) {
+ // Check Cond == I->Low.
+ Pred = CmpInst::ICMP_EQ;
+ LHS = Cond;
+ RHS = I->Low;
+ MHS = nullptr;
+ } else {
+ // Check I->Low <= Cond <= I->High.
+ Pred = CmpInst::ICMP_SLE;
+ LHS = I->Low;
+ MHS = Cond;
+ RHS = I->High;
+ }
+
+ // If Fallthrough is unreachable, fold away the comparison.
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
+ CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
+
+ emitSwitchCase(CB, SwitchMBB, MIB);
+ return true;
+}
+
+void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ // Subtract the minimum value.
+ Register SwitchOpReg = getOrCreateVReg(*B.SValue);
+
+ LLT SwitchOpTy = MRI->getType(SwitchOpReg);
+ Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
+ auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
+
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
+ LLT MaskTy = SwitchOpTy;
+ if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
+ !llvm::has_single_bit<uint32_t>(MaskTy.getSizeInBits()))
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ else {
+ // Ensure that the type will fit the mask value.
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ break;
+ }
+ }
+ }
+ Register SubReg = RangeSub.getReg(0);
+ if (SwitchOpTy != MaskTy)
+ SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
+
+ B.RegVT = getMVTForLLT(MaskTy);
+ B.Reg = SubReg;
+
+ MachineBasicBlock *MBB = B.Cases[0].ThisBB;
+
+ if (!B.FallthroughUnreachable)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+
+ SwitchBB->normalizeSuccProbs();
+
+ if (!B.FallthroughUnreachable) {
+ // Conditional branch to the default block.
+ auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
+ auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
+ RangeSub, RangeCst);
+ MIB.buildBrCond(RangeCmp, *B.Default);
+ }
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != SwitchBB->getNextNode())
+ MIB.buildBr(*MBB);
+}
+
+void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
+ MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext,
+ Register Reg, SwitchCG::BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ LLT SwitchTy = getLLTForMVT(BB.RegVT);
+ Register Cmp;
+ unsigned PopCount = llvm::popcount(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ auto MaskTrailingZeros =
+ MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask));
+ Cmp =
+ MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
+ .getReg(0);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ auto MaskTrailingOnes =
+ MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask));
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
+ .getReg(0);
+ } else {
+ // Make desired shift.
+ auto CstOne = MIB.buildConstant(SwitchTy, 1);
+ auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
+
+ // Emit bit tests and jumps.
+ auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
+ auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
+ auto CstZero = MIB.buildConstant(SwitchTy, 0);
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
+ .getReg(0);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ // Record the fact that the IR edge from the header to the bit test target
+ // will go through our new block. Neeeded for PHIs to have nodes added.
+ addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
+ SwitchBB);
+
+ MIB.buildBrCond(Cmp, *B.TargetBB);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != SwitchBB->getNextNode())
+ MIB.buildBr(*NextMBB);
+}
+
+bool IRTranslator::lowerBitTestWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ emitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ return true;
+}
+
+bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
+ Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *NextMBB = nullptr;
+ MachineFunction::iterator BBI(W.MBB);
+ if (++BBI != FuncInfo.MF->end())
+ NextMBB = &*BBI;
+
+ if (EnableOpts) {
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Prob != b.Prob
+ ? a.Prob > b.Prob
+ : a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Rearrange the case blocks so that the last one falls through if possible
+ // without changing the order of probabilities.
+ for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
+ --I;
+ if (I->Prob > W.LastCluster->Prob)
+ break;
+ if (I->Kind == CC_Range && I->MBB == NextMBB) {
+ std::swap(*I, *W.LastCluster);
+ break;
+ }
+ }
+ }
+
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
+
+ MachineBasicBlock *CurMBB = W.MBB;
+ for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
+ MachineBasicBlock *Fallthrough;
+ if (I == W.LastCluster) {
+ // For the last cluster, fall through to the default destination.
+ Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
+ } else {
+ Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+ CurMF->insert(BBI, Fallthrough);
+ }
+ UnhandledProbs -= I->Prob;
+
+ switch (I->Kind) {
+ case CC_BitTests: {
+ if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ DefaultProb, UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
+ return false;
+ }
+ break;
+ }
+
+ case CC_JumpTable: {
+ if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower jump table");
+ return false;
+ }
+ break;
+ }
+ case CC_Range: {
+ if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
+ FallthroughUnreachable, UnhandledProbs,
+ CurMBB, MIB, SwitchMBB)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower switch range");
+ return false;
+ }
+ break;
+ }
+ }
+ CurMBB = Fallthrough;
+ }
+
+ return true;
+}
+
+bool IRTranslator::translateIndirectBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
+
+ const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
+ MIRBuilder.buildBrIndirect(Tgt);
+
+ // Link successors.
+ SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ for (const BasicBlock *Succ : successors(&BrInst)) {
+ // It's legal for indirectbr instructions to have duplicate blocks in the
+ // destination list. We don't allow this in MIR. Skip anything that's
+ // already a successor.
+ if (!AddedSuccessors.insert(Succ).second)
+ continue;
+ CurBB.addSuccessor(&getMBB(*Succ));
+ }
+
+ return true;
+}
+
+static bool isSwiftError(const Value *V) {
+ if (auto Arg = dyn_cast<Argument>(V))
+ return Arg->hasSwiftErrorAttr();
+ if (auto AI = dyn_cast<AllocaInst>(V))
+ return AI->isSwiftError();
+ return false;
+}
+
+bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
+ const LoadInst &LI = cast<LoadInst>(U);
+
+ unsigned StoreSize = DL->getTypeStoreSize(LI.getType());
+ if (StoreSize == 0)
+ return true;
+
+ ArrayRef<Register> Regs = getOrCreateVRegs(LI);
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
+ Register Base = getOrCreateVReg(*LI.getPointerOperand());
+ AAMDNodes AAInfo = LI.getAAMetadata();
+
+ const Value *Ptr = LI.getPointerOperand();
+ Type *OffsetIRTy = DL->getIndexType(Ptr->getType());
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ if (CLI->supportSwiftError() && isSwiftError(Ptr)) {
+ assert(Regs.size() == 1 && "swifterror should be single pointer");
+ Register VReg =
+ SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr);
+ MIRBuilder.buildCopy(Regs[0], VReg);
+ return true;
+ }
+
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ MachineMemOperand::Flags Flags =
+ TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
+ if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
+ if (AA->pointsToConstantMemory(
+ MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
+ Flags |= MachineMemOperand::MOInvariant;
+ }
+ }
+
+ const MDNode *Ranges =
+ Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
+ for (unsigned i = 0; i < Regs.size(); ++i) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
+
+ MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
+ Align BaseAlign = getMemOpAlign(LI);
+ auto MMO = MF->getMachineMemOperand(
+ Ptr, Flags, MRI->getType(Regs[i]),
+ commonAlignment(BaseAlign, Offsets[i] / 8), AAInfo, Ranges,
+ LI.getSyncScopeID(), LI.getOrdering());
+ MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
+ }
+
+ return true;
+}
+
+bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
+ const StoreInst &SI = cast<StoreInst>(U);
+ if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
+ return true;
+
+ ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
+ Register Base = getOrCreateVReg(*SI.getPointerOperand());
+
+ Type *OffsetIRTy = DL->getIndexType(SI.getPointerOperandType());
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
+ assert(Vals.size() == 1 && "swifterror should be single pointer");
+
+ Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
+ SI.getPointerOperand());
+ MIRBuilder.buildCopy(VReg, Vals[0]);
+ return true;
+ }
+
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
+
+ for (unsigned i = 0; i < Vals.size(); ++i) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
+
+ MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
+ Align BaseAlign = getMemOpAlign(SI);
+ auto MMO = MF->getMachineMemOperand(
+ Ptr, Flags, MRI->getType(Vals[i]),
+ commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr,
+ SI.getSyncScopeID(), SI.getOrdering());
+ MIRBuilder.buildStore(Vals[i], Addr, *MMO);
+ }
+ return true;
+}
+
+static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
+ const Value *Src = U.getOperand(0);
+ Type *Int32Ty = Type::getInt32Ty(U.getContext());
+
+ // getIndexedOffsetInType is designed for GEPs, so the first index is the
+ // usual array element rather than looking into the actual aggregate.
+ SmallVector<Value *, 1> Indices;
+ Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
+ for (auto Idx : EVI->indices())
+ Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
+ for (auto Idx : IVI->indices())
+ Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+ } else {
+ for (unsigned i = 1; i < U.getNumOperands(); ++i)
+ Indices.push_back(U.getOperand(i));
+ }
+
+ return 8 * static_cast<uint64_t>(
+ DL.getIndexedOffsetInType(Src->getType(), Indices));
+}
+
+bool IRTranslator::translateExtractValue(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const Value *Src = U.getOperand(0);
+ uint64_t Offset = getOffsetFromIndices(U, *DL);
+ ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
+ unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
+ auto &DstRegs = allocateVRegs(U);
+
+ for (unsigned i = 0; i < DstRegs.size(); ++i)
+ DstRegs[i] = SrcRegs[Idx++];
+
+ return true;
+}
+
+bool IRTranslator::translateInsertValue(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const Value *Src = U.getOperand(0);
+ uint64_t Offset = getOffsetFromIndices(U, *DL);
+ auto &DstRegs = allocateVRegs(U);
+ ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
+ ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
+ auto *InsertedIt = InsertedRegs.begin();
+
+ for (unsigned i = 0; i < DstRegs.size(); ++i) {
+ if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
+ DstRegs[i] = *InsertedIt++;
+ else
+ DstRegs[i] = SrcRegs[i];
+ }
+
+ return true;
+}
+
+bool IRTranslator::translateSelect(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ Register Tst = getOrCreateVReg(*U.getOperand(0));
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
+ ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
+ ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
+
+ uint32_t Flags = 0;
+ if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
+ Flags = MachineInstr::copyFlagsFromInstruction(*SI);
+
+ for (unsigned i = 0; i < ResRegs.size(); ++i) {
+ MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
+ }
+
+ return true;
+}
+
+bool IRTranslator::translateCopy(const User &U, const Value &V,
+ MachineIRBuilder &MIRBuilder) {
+ Register Src = getOrCreateVReg(V);
+ auto &Regs = *VMap.getVRegs(U);
+ if (Regs.empty()) {
+ Regs.push_back(Src);
+ VMap.getOffsets(U)->push_back(0);
+ } else {
+ // If we already assigned a vreg for this instruction, we can't change that.
+ // Emit a copy to satisfy the users we already emitted.
+ MIRBuilder.buildCopy(Regs[0], Src);
+ }
+ return true;
+}
+
+bool IRTranslator::translateBitCast(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If we're bitcasting to the source type, we can reuse the source vreg.
+ if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
+ getLLTForType(*U.getType(), *DL)) {
+ // If the source is a ConstantInt then it was probably created by
+ // ConstantHoisting and we should leave it alone.
+ if (isa<ConstantInt>(U.getOperand(0)))
+ return translateCast(TargetOpcode::G_CONSTANT_FOLD_BARRIER, U,
+ MIRBuilder);
+ return translateCopy(U, *U.getOperand(0), MIRBuilder);
+ }
+
+ return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
+}
+
+bool IRTranslator::translateCast(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ Register Op = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op});
+ return true;
+}
+
+bool IRTranslator::translateGetElementPtr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ Value &Op0 = *U.getOperand(0);
+ Register BaseReg = getOrCreateVReg(Op0);
+ Type *PtrIRTy = Op0.getType();
+ LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ // Normalize Vector GEP - all scalar operands should be converted to the
+ // splat vector.
+ unsigned VectorWidth = 0;
+
+ // True if we should use a splat vector; using VectorWidth alone is not
+ // sufficient.
+ bool WantSplatVector = false;
+ if (auto *VT = dyn_cast<VectorType>(U.getType())) {
+ VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
+ // We don't produce 1 x N vectors; those are treated as scalars.
+ WantSplatVector = VectorWidth > 1;
+ }
+
+ // We might need to splat the base pointer into a vector if the offsets
+ // are vectors.
+ if (WantSplatVector && !PtrTy.isVector()) {
+ BaseReg =
+ MIRBuilder
+ .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg)
+ .getReg(0);
+ PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
+ PtrTy = getLLTForType(*PtrIRTy, *DL);
+ OffsetIRTy = DL->getIndexType(PtrIRTy);
+ OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ }
+
+ int64_t Offset = 0;
+ for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
+ continue;
+ } else {
+ uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+
+ // If this is a scalar constant or a splat vector of constants,
+ // handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ Offset += ElementSize * CI->getSExtValue();
+ continue;
+ }
+
+ if (Offset != 0) {
+ auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
+ BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
+ .getReg(0);
+ Offset = 0;
+ }
+
+ Register IdxReg = getOrCreateVReg(*Idx);
+ LLT IdxTy = MRI->getType(IdxReg);
+ if (IdxTy != OffsetTy) {
+ if (!IdxTy.isVector() && WantSplatVector) {
+ IdxReg = MIRBuilder.buildSplatVector(
+ OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
+ }
+
+ IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
+ }
+
+ // N = N + Idx * ElementSize;
+ // Avoid doing it for ElementSize of 1.
+ Register GepOffsetReg;
+ if (ElementSize != 1) {
+ auto ElementSizeMIB = MIRBuilder.buildConstant(
+ getLLTForType(*OffsetIRTy, *DL), ElementSize);
+ GepOffsetReg =
+ MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
+ } else
+ GepOffsetReg = IdxReg;
+
+ BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
+ }
+ }
+
+ if (Offset != 0) {
+ auto OffsetMIB =
+ MIRBuilder.buildConstant(OffsetTy, Offset);
+ MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
+ return true;
+ }
+
+ MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
+ return true;
+}
+
+bool IRTranslator::translateMemFunc(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder,
+ unsigned Opcode) {
+ const Value *SrcPtr = CI.getArgOperand(1);
+ // If the source is undef, then just emit a nop.
+ if (isa<UndefValue>(SrcPtr))
+ return true;
+
+ SmallVector<Register, 3> SrcRegs;
+
+ unsigned MinPtrSize = UINT_MAX;
+ for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
+ Register SrcReg = getOrCreateVReg(**AI);
+ LLT SrcTy = MRI->getType(SrcReg);
+ if (SrcTy.isPointer())
+ MinPtrSize = std::min<unsigned>(SrcTy.getSizeInBits(), MinPtrSize);
+ SrcRegs.push_back(SrcReg);
+ }
+
+ LLT SizeTy = LLT::scalar(MinPtrSize);
+
+ // The size operand should be the minimum of the pointer sizes.
+ Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
+ if (MRI->getType(SizeOpReg) != SizeTy)
+ SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
+
+ auto ICall = MIRBuilder.buildInstr(Opcode);
+ for (Register SrcReg : SrcRegs)
+ ICall.addUse(SrcReg);
+
+ Align DstAlign;
+ Align SrcAlign;
+ unsigned IsVol =
+ cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();
+
+ ConstantInt *CopySize = nullptr;
+
+ if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
+ DstAlign = MCI->getDestAlign().valueOrOne();
+ SrcAlign = MCI->getSourceAlign().valueOrOne();
+ CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2));
+ } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
+ DstAlign = MCI->getDestAlign().valueOrOne();
+ SrcAlign = MCI->getSourceAlign().valueOrOne();
+ CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2));
+ } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
+ DstAlign = MMI->getDestAlign().valueOrOne();
+ SrcAlign = MMI->getSourceAlign().valueOrOne();
+ CopySize = dyn_cast<ConstantInt>(MMI->getArgOperand(2));
+ } else {
+ auto *MSI = cast<MemSetInst>(&CI);
+ DstAlign = MSI->getDestAlign().valueOrOne();
+ }
+
+ if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
+ // We need to propagate the tail call flag from the IR inst as an argument.
+ // Otherwise, we have to pessimize and assume later that we cannot tail call
+ // any memory intrinsics.
+ ICall.addImm(CI.isTailCall() ? 1 : 0);
+ }
+
+ // Create mem operands to store the alignment and volatile info.
+ MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad;
+ MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore;
+ if (IsVol) {
+ LoadFlags |= MachineMemOperand::MOVolatile;
+ StoreFlags |= MachineMemOperand::MOVolatile;
+ }
+
+ AAMDNodes AAInfo = CI.getAAMetadata();
+ if (AA && CopySize &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ SrcPtr, LocationSize::precise(CopySize->getZExtValue()), AAInfo))) {
+ LoadFlags |= MachineMemOperand::MOInvariant;
+
+ // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
+ // but the previous usage implied it did. Probably should check
+ // isDereferenceableAndAlignedPointer.
+ LoadFlags |= MachineMemOperand::MODereferenceable;
+ }
+
+ ICall.addMemOperand(
+ MF->getMachineMemOperand(MachinePointerInfo(CI.getArgOperand(0)),
+ StoreFlags, 1, DstAlign, AAInfo));
+ if (Opcode != TargetOpcode::G_MEMSET)
+ ICall.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(SrcPtr), LoadFlags, 1, SrcAlign, AAInfo));
+
+ return true;
+}
+
+void IRTranslator::getStackGuard(Register DstReg,
+ MachineIRBuilder &MIRBuilder) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
+ auto MIB =
+ MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
+
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
+ if (!Global)
+ return;
+
+ unsigned AddrSpace = Global->getType()->getPointerAddressSpace();
+ LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace));
+
+ MachinePointerInfo MPInfo(Global);
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable;
+ MachineMemOperand *MemRef = MF->getMachineMemOperand(
+ MPInfo, Flags, PtrTy, DL->getPointerABIAlignment(AddrSpace));
+ MIB.setMemRefs({MemRef});
+}
+
+bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+ MachineIRBuilder &MIRBuilder) {
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
+ MIRBuilder.buildInstr(
+ Op, {ResRegs[0], ResRegs[1]},
+ {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});
+
+ return true;
+}
+
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ Register Dst = getOrCreateVReg(CI);
+ Register Src0 = getOrCreateVReg(*CI.getOperand(0));
+ Register Src1 = getOrCreateVReg(*CI.getOperand(1));
+ uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
+ MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
+ return true;
+}
+
+unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::bswap:
+ return TargetOpcode::G_BSWAP;
+ case Intrinsic::bitreverse:
+ return TargetOpcode::G_BITREVERSE;
+ case Intrinsic::fshl:
+ return TargetOpcode::G_FSHL;
+ case Intrinsic::fshr:
+ return TargetOpcode::G_FSHR;
+ case Intrinsic::ceil:
+ return TargetOpcode::G_FCEIL;
+ case Intrinsic::cos:
+ return TargetOpcode::G_FCOS;
+ case Intrinsic::ctpop:
+ return TargetOpcode::G_CTPOP;
+ case Intrinsic::exp:
+ return TargetOpcode::G_FEXP;
+ case Intrinsic::exp2:
+ return TargetOpcode::G_FEXP2;
+ case Intrinsic::fabs:
+ return TargetOpcode::G_FABS;
+ case Intrinsic::copysign:
+ return TargetOpcode::G_FCOPYSIGN;
+ case Intrinsic::minnum:
+ return TargetOpcode::G_FMINNUM;
+ case Intrinsic::maxnum:
+ return TargetOpcode::G_FMAXNUM;
+ case Intrinsic::minimum:
+ return TargetOpcode::G_FMINIMUM;
+ case Intrinsic::maximum:
+ return TargetOpcode::G_FMAXIMUM;
+ case Intrinsic::canonicalize:
+ return TargetOpcode::G_FCANONICALIZE;
+ case Intrinsic::floor:
+ return TargetOpcode::G_FFLOOR;
+ case Intrinsic::fma:
+ return TargetOpcode::G_FMA;
+ case Intrinsic::log:
+ return TargetOpcode::G_FLOG;
+ case Intrinsic::log2:
+ return TargetOpcode::G_FLOG2;
+ case Intrinsic::log10:
+ return TargetOpcode::G_FLOG10;
+ case Intrinsic::ldexp:
+ return TargetOpcode::G_FLDEXP;
+ case Intrinsic::nearbyint:
+ return TargetOpcode::G_FNEARBYINT;
+ case Intrinsic::pow:
+ return TargetOpcode::G_FPOW;
+ case Intrinsic::powi:
+ return TargetOpcode::G_FPOWI;
+ case Intrinsic::rint:
+ return TargetOpcode::G_FRINT;
+ case Intrinsic::round:
+ return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::roundeven:
+ return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
+ case Intrinsic::sin:
+ return TargetOpcode::G_FSIN;
+ case Intrinsic::sqrt:
+ return TargetOpcode::G_FSQRT;
+ case Intrinsic::trunc:
+ return TargetOpcode::G_INTRINSIC_TRUNC;
+ case Intrinsic::readcyclecounter:
+ return TargetOpcode::G_READCYCLECOUNTER;
+ case Intrinsic::ptrmask:
+ return TargetOpcode::G_PTRMASK;
+ case Intrinsic::lrint:
+ return TargetOpcode::G_INTRINSIC_LRINT;
+ // FADD/FMUL require checking the FMF, so are handled elsewhere.
+ case Intrinsic::vector_reduce_fmin:
+ return TargetOpcode::G_VECREDUCE_FMIN;
+ case Intrinsic::vector_reduce_fmax:
+ return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_add:
+ return TargetOpcode::G_VECREDUCE_ADD;
+ case Intrinsic::vector_reduce_mul:
+ return TargetOpcode::G_VECREDUCE_MUL;
+ case Intrinsic::vector_reduce_and:
+ return TargetOpcode::G_VECREDUCE_AND;
+ case Intrinsic::vector_reduce_or:
+ return TargetOpcode::G_VECREDUCE_OR;
+ case Intrinsic::vector_reduce_xor:
+ return TargetOpcode::G_VECREDUCE_XOR;
+ case Intrinsic::vector_reduce_smax:
+ return TargetOpcode::G_VECREDUCE_SMAX;
+ case Intrinsic::vector_reduce_smin:
+ return TargetOpcode::G_VECREDUCE_SMIN;
+ case Intrinsic::vector_reduce_umax:
+ return TargetOpcode::G_VECREDUCE_UMAX;
+ case Intrinsic::vector_reduce_umin:
+ return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::lround:
+ return TargetOpcode::G_LROUND;
+ case Intrinsic::llround:
+ return TargetOpcode::G_LLROUND;
+ }
+ return Intrinsic::not_intrinsic;
+}
+
+bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
+ Intrinsic::ID ID,
+ MachineIRBuilder &MIRBuilder) {
+
+ unsigned Op = getSimpleIntrinsicOpcode(ID);
+
+ // Is this a simple intrinsic?
+ if (Op == Intrinsic::not_intrinsic)
+ return false;
+
+ // Yes. Let's translate it.
+ SmallVector<llvm::SrcOp, 4> VRegs;
+ for (const auto &Arg : CI.args())
+ VRegs.push_back(getOrCreateVReg(*Arg));
+
+ MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+}
+
+// TODO: Include ConstainedOps.def when all strict instructions are defined.
+static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_constrained_fadd:
+ return TargetOpcode::G_STRICT_FADD;
+ case Intrinsic::experimental_constrained_fsub:
+ return TargetOpcode::G_STRICT_FSUB;
+ case Intrinsic::experimental_constrained_fmul:
+ return TargetOpcode::G_STRICT_FMUL;
+ case Intrinsic::experimental_constrained_fdiv:
+ return TargetOpcode::G_STRICT_FDIV;
+ case Intrinsic::experimental_constrained_frem:
+ return TargetOpcode::G_STRICT_FREM;
+ case Intrinsic::experimental_constrained_fma:
+ return TargetOpcode::G_STRICT_FMA;
+ case Intrinsic::experimental_constrained_sqrt:
+ return TargetOpcode::G_STRICT_FSQRT;
+ case Intrinsic::experimental_constrained_ldexp:
+ return TargetOpcode::G_STRICT_FLDEXP;
+ default:
+ return 0;
+ }
+}
+
+bool IRTranslator::translateConstrainedFPIntrinsic(
+ const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
+
+ unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
+ if (!Opcode)
+ return false;
+
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI);
+ if (EB == fp::ExceptionBehavior::ebIgnore)
+ Flags |= MachineInstr::NoFPExcept;
+
+ SmallVector<llvm::SrcOp, 4> VRegs;
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
+ if (!FPI.isUnaryOp())
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
+ if (FPI.isTernaryOp())
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
+
+ MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
+ return true;
+}
+
+std::optional<MCRegister> IRTranslator::getArgPhysReg(Argument &Arg) {
+ auto VRegs = getOrCreateVRegs(Arg);
+ if (VRegs.size() != 1)
+ return std::nullopt;
+
+ // Arguments are lowered as a copy of a livein physical register.
+ auto *VRegDef = MF->getRegInfo().getVRegDef(VRegs[0]);
+ if (!VRegDef || !VRegDef->isCopy())
+ return std::nullopt;
+ return VRegDef->getOperand(1).getReg().asMCReg();
+}
+
+bool IRTranslator::translateIfEntryValueArgument(const DbgValueInst &DebugInst,
+ MachineIRBuilder &MIRBuilder) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getValue());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg) {
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << DebugInst << "\n");
+ return true;
+ }
+
+ MIRBuilder.buildDirectDbgValue(*PhysReg, DebugInst.getVariable(),
+ DebugInst.getExpression());
+ return true;
+}
+
+bool IRTranslator::translateIfEntryValueArgument(
+ const DbgDeclareInst &DebugInst) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getAddress());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg)
+ return false;
+
+ MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg,
+ DebugInst.getDebugLoc());
+ return true;
+}
+
+bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+ MachineIRBuilder &MIRBuilder) {
+ if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
+ if (ORE->enabled()) {
+ if (MemoryOpRemark::canHandle(MI, *LibInfo)) {
+ MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo);
+ R.visit(MI);
+ }
+ }
+ }
+
+ // If this is a simple intrinsic (that is, we just need to add a def of
+ // a vreg, and uses for each arg operand, then translate it.
+ if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
+ return true;
+
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ // No stack colouring in O0, discard region information.
+ if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+ return true;
+
+ unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
+ : TargetOpcode::LIFETIME_END;
+
+ // Get the underlying objects for the location passed on the lifetime
+ // marker.
+ SmallVector<const Value *, 4> Allocas;
+ getUnderlyingObjects(CI.getArgOperand(1), Allocas);
+
+ // Iterate over each underlying object, creating lifetime markers for each
+ // static alloca. Quit if we find a non-static alloca.
+ for (const Value *V : Allocas) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI)
+ continue;
+
+ if (!AI->isStaticAlloca())
+ return true;
+
+ MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
+ }
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
+ assert(DI.getVariable() && "Missing variable");
+
+ const Value *Address = DI.getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return true;
+ }
+
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ auto AI = dyn_cast<AllocaInst>(Address);
+ if (AI && AI->isStaticAlloca()) {
+ // Static allocas are tracked at the MF level, no need for DBG_VALUE
+ // instructions (in fact, they get ignored if they *do* exist).
+ MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
+ getOrCreateFrameIndex(*AI), DI.getDebugLoc());
+ return true;
+ }
+
+ if (translateIfEntryValueArgument(DI))
+ return true;
+
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
+ return true;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
+ assert(DI.getLabel() && "Missing label");
+
+ assert(DI.getLabel()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+
+ MIRBuilder.buildDbgLabel(DI.getLabel());
+ return true;
+ }
+ case Intrinsic::vaend:
+ // No target I know of cares about va_end. Certainly no in-tree target
+ // does. Simplest intrinsic ever!
+ return true;
+ case Intrinsic::vastart: {
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ Value *Ptr = CI.getArgOperand(0);
+ unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+
+ // FIXME: Get alignment
+ MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
+ .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
+ MachineMemOperand::MOStore,
+ ListSize, Align(1)));
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst &DI = cast<DbgValueInst>(CI);
+ const Value *V = DI.getValue();
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ if (!V || DI.hasArgList()) {
+ // DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
+ // terminate any prior location.
+ MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
+ return true;
+ }
+ if (const auto *CI = dyn_cast<Constant>(V)) {
+ MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
+ return true;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(V);
+ AI && AI->isStaticAlloca() && DI.getExpression()->startsWithDeref()) {
+ // If the value is an alloca and the expression starts with a
+ // dereference, track a stack slot instead of a register, as registers
+ // may be clobbered.
+ auto ExprOperands = DI.getExpression()->getElements();
+ auto *ExprDerefRemoved =
+ DIExpression::get(AI->getContext(), ExprOperands.drop_front());
+ MIRBuilder.buildFIDbgValue(getOrCreateFrameIndex(*AI), DI.getVariable(),
+ ExprDerefRemoved);
+ return true;
+ }
+ if (translateIfEntryValueArgument(DI, MIRBuilder))
+ return true;
+ for (Register Reg : getOrCreateVRegs(*V)) {
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first place, but it seems
+ // pretty baked in right now.
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
+ }
+ return true;
+ }
+ case Intrinsic::uadd_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
+ case Intrinsic::sadd_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
+ case Intrinsic::usub_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
+ case Intrinsic::ssub_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
+ case Intrinsic::umul_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
+ case Intrinsic::smul_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+ case Intrinsic::uadd_sat:
+ return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
+ case Intrinsic::sadd_sat:
+ return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
+ case Intrinsic::usub_sat:
+ return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
+ case Intrinsic::ssub_sat:
+ return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
+ case Intrinsic::ushl_sat:
+ return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
+ case Intrinsic::sshl_sat:
+ return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
+ case Intrinsic::umin:
+ return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
+ case Intrinsic::umax:
+ return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
+ case Intrinsic::smin:
+ return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
+ case Intrinsic::smax:
+ return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
+ case Intrinsic::abs:
+ // TODO: Preserve "int min is poison" arg in GMIR?
+ return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
+ case Intrinsic::smul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
+ case Intrinsic::umul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
+ case Intrinsic::smul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::umul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
+ case Intrinsic::udiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
+ case Intrinsic::udiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
+ case Intrinsic::fmuladd: {
+ const TargetMachine &TM = MF->getTarget();
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ Register Dst = getOrCreateVReg(CI);
+ Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
+ Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
+ Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(*MF,
+ TLI.getValueType(*DL, CI.getType()))) {
+ // TODO: Revisit this to see if we should move this part of the
+ // lowering to the combiner.
+ MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
+ MachineInstr::copyFlagsFromInstruction(CI));
+ } else {
+ LLT Ty = getLLTForType(*CI.getType(), *DL);
+ auto FMul = MIRBuilder.buildFMul(
+ Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildFAdd(Dst, FMul, Op2,
+ MachineInstr::copyFlagsFromInstruction(CI));
+ }
+ return true;
+ }
+ case Intrinsic::convert_from_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPExt(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ case Intrinsic::convert_to_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ case Intrinsic::frexp: {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+ MIRBuilder.buildFFrexp(VRegs[0], VRegs[1],
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
+ case Intrinsic::memcpy_inline:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
+ case Intrinsic::memcpy:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
+ case Intrinsic::memmove:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
+ case Intrinsic::memset:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
+ case Intrinsic::eh_typeid_for: {
+ GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
+ Register Reg = getOrCreateVReg(CI);
+ unsigned TypeID = MF->getTypeIDFor(GV);
+ MIRBuilder.buildConstant(Reg, TypeID);
+ return true;
+ }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
+ case Intrinsic::stackguard:
+ getStackGuard(getOrCreateVReg(CI), MIRBuilder);
+ return true;
+ case Intrinsic::stackprotector: {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
+ Register GuardVal;
+ if (TLI.useLoadStackGuardNode()) {
+ GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+ getStackGuard(GuardVal, MIRBuilder);
+ } else
+ GuardVal = getOrCreateVReg(*CI.getArgOperand(0)); // The guard's value.
+
+ AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+ int FI = getOrCreateFrameIndex(*Slot);
+ MF->getFrameInfo().setStackProtectorIndex(FI);
+
+ MIRBuilder.buildStore(
+ GuardVal, getOrCreateVReg(*Slot),
+ *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile,
+ PtrTy, Align(8)));
+ return true;
+ }
+ case Intrinsic::stacksave: {
+ // Save the stack pointer to the location provided by the intrinsic.
+ Register Reg = getOrCreateVReg(CI);
+ Register StackPtr = MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore();
+
+ // If the target doesn't specify a stack pointer, then fall back.
+ if (!StackPtr)
+ return false;
+
+ MIRBuilder.buildCopy(Reg, StackPtr);
+ return true;
+ }
+ case Intrinsic::stackrestore: {
+ // Restore the stack pointer from the location provided by the intrinsic.
+ Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
+ Register StackPtr = MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore();
+
+ // If the target doesn't specify a stack pointer, then fall back.
+ if (!StackPtr)
+ return false;
+
+ MIRBuilder.buildCopy(StackPtr, Reg);
+ return true;
+ }
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz: {
+ ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
+ bool isTrailing = ID == Intrinsic::cttz;
+ unsigned Opcode = isTrailing
+ ? Cst->isZero() ? TargetOpcode::G_CTTZ
+ : TargetOpcode::G_CTTZ_ZERO_UNDEF
+ : Cst->isZero() ? TargetOpcode::G_CTLZ
+ : TargetOpcode::G_CTLZ_ZERO_UNDEF;
+ MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*CI.getArgOperand(0))});
+ return true;
+ }
+ case Intrinsic::invariant_start: {
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
+ Register Undef = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildUndef(Undef);
+ return true;
+ }
+ case Intrinsic::invariant_end:
+ return true;
+ case Intrinsic::expect:
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group: {
+ // Drop the intrinsic, but forward the value.
+ MIRBuilder.buildCopy(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
+ case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::var_annotation:
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, assumptions, and artificial side-effects.
+ return true;
+ case Intrinsic::read_volatile_register:
+ case Intrinsic::read_register: {
+ Value *Arg = CI.getArgOperand(0);
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
+ return true;
+ }
+ case Intrinsic::write_register: {
+ Value *Arg = CI.getArgOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ return true;
+ }
+ case Intrinsic::localescape: {
+ MachineBasicBlock &EntryMBB = MF->front();
+ StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) {
+ Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+
+ int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
+ MCSymbol *FrameAllocSym =
+ MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
+ Idx);
+
+ // This should be inserted at the start of the entry block.
+ auto LocalEscape =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+
+ EntryMBB.insert(EntryMBB.begin(), LocalEscape);
+ }
+
+ return true;
+ }
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ // Need to check for the reassoc flag to decide whether we want a
+ // sequential reduction opcode or not.
+ Register Dst = getOrCreateVReg(CI);
+ Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
+ Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
+ unsigned Opc = 0;
+ if (!CI.hasAllowReassoc()) {
+ // The sequential ordering case.
+ Opc = ID == Intrinsic::vector_reduce_fadd
+ ? TargetOpcode::G_VECREDUCE_SEQ_FADD
+ : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+ MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
+ // We split the operation into a separate G_FADD/G_FMUL + the reduce,
+ // since the associativity doesn't matter.
+ unsigned ScalarOpc;
+ if (ID == Intrinsic::vector_reduce_fadd) {
+ Opc = TargetOpcode::G_VECREDUCE_FADD;
+ ScalarOpc = TargetOpcode::G_FADD;
+ } else {
+ Opc = TargetOpcode::G_VECREDUCE_FMUL;
+ ScalarOpc = TargetOpcode::G_FMUL;
+ }
+ LLT DstTy = MRI->getType(Dst);
+ auto Rdx = MIRBuilder.buildInstr(
+ Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
+ MachineInstr::copyFlagsFromInstruction(CI));
+
+ return true;
+ }
+ case Intrinsic::trap:
+ case Intrinsic::debugtrap:
+ case Intrinsic::ubsantrap: {
+ StringRef TrapFuncName =
+ CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
+ if (TrapFuncName.empty())
+ break; // Use the default handling.
+ CallLowering::CallLoweringInfo Info;
+ if (ID == Intrinsic::ubsantrap) {
+ Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
+ CI.getArgOperand(0)->getType(), 0});
+ }
+ Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
+ Info.CB = &CI;
+ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
+ return CLI->lowerCall(MIRBuilder, Info);
+ }
+ case Intrinsic::fptrunc_round: {
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
+
+ // Convert the metadata argument to a constant integer
+ Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
+ std::optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ // Add the Rounding mode as an integer
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
+ {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*CI.getArgOperand(0))}, Flags)
+ .addImm((int)*RoundMode);
+
+ return true;
+ }
+ case Intrinsic::is_fpclass: {
+ Value *FpValue = CI.getOperand(0);
+ ConstantInt *TestMaskValue = cast<ConstantInt>(CI.getOperand(1));
+
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_IS_FPCLASS, {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*FpValue)})
+ .addImm(TestMaskValue->getZExtValue());
+
+ return true;
+ }
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
+ return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
+ MIRBuilder);
+
+ }
+ return false;
+}
+
+bool IRTranslator::translateInlineAsm(const CallBase &CB,
+ MachineIRBuilder &MIRBuilder) {
+
+ const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
+
+ if (!ALI) {
+ LLVM_DEBUG(
+ dbgs() << "Inline asm lowering is not supported for this target yet\n");
+ return false;
+ }
+
+ return ALI->lowerInlineAsm(
+ MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
+}
+
+bool IRTranslator::translateCallBase(const CallBase &CB,
+ MachineIRBuilder &MIRBuilder) {
+ ArrayRef<Register> Res = getOrCreateVRegs(CB);
+
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftInVReg = 0;
+ Register SwiftErrorVReg = 0;
+ for (const auto &Arg : CB.args()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &CB, &MIRBuilder.getMBB(), Arg));
+ Args.emplace_back(ArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(&CB)) {
+ if (ORE->enabled()) {
+ if (MemoryOpRemark::canHandle(CI, *LibInfo)) {
+ MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo);
+ R.visit(CI);
+ }
+ }
+ }
+
+ // We don't set HasCalls on MFI here yet because call lowering may decide to
+ // optimize into tail calls. Instead, we defer that to selection where a final
+ // scan is done to check if any instructions are calls.
+ bool Success =
+ CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
+ [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
+
+ // Check if we just inserted a tail call.
+ if (Success) {
+ assert(!HasTailCall && "Can't tail call return twice from block?");
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
+ }
+
+ return Success;
+}
+
+bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
+ const CallInst &CI = cast<CallInst>(U);
+ auto TII = MF->getTarget().getIntrinsicInfo();
+ const Function *F = CI.getCalledFunction();
+
+ // FIXME: support Windows dllimport function calls.
+ if (F && (F->hasDLLImportStorageClass() ||
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ F->hasExternalWeakLinkage())))
+ return false;
+
+ // FIXME: support control flow guard targets.
+ if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
+ return false;
+
+ // FIXME: support statepoints and related.
+ if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(U))
+ return false;
+
+ if (CI.isInlineAsm())
+ return translateInlineAsm(CI, MIRBuilder);
+
+ diagnoseDontCall(CI);
+
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ if (F && F->isIntrinsic()) {
+ ID = F->getIntrinsicID();
+ if (TII && ID == Intrinsic::not_intrinsic)
+ ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
+ }
+
+ if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
+ return translateCallBase(CI, MIRBuilder);
+
+ assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
+
+ if (translateKnownIntrinsic(CI, ID, MIRBuilder))
+ return true;
+
+ ArrayRef<Register> ResultRegs;
+ if (!CI.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CI);
+
+ // Ignore the callsite attributes. Backend code is most likely not expecting
+ // an intrinsic to sometimes have side effects and sometimes not.
+ MachineInstrBuilder MIB =
+ MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+ if (isa<FPMathOperator>(CI))
+ MIB->copyIRFlags(CI);
+
+ for (const auto &Arg : enumerate(CI.args())) {
+ // If this is required to be an immediate, don't materialize it in a
+ // register.
+ if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
+ // imm arguments are more convenient than cimm (and realistically
+ // probably sufficient), so use them.
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ MIB.addImm(CI->getSExtValue());
+ } else {
+ MIB.addFPImm(cast<ConstantFP>(Arg.value()));
+ }
+ } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MD = MDVal->getMetadata();
+ auto *MDN = dyn_cast<MDNode>(MD);
+ if (!MDN) {
+ if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD))
+ MDN = MDNode::get(MF->getFunction().getContext(), ConstMD);
+ else // This was probably an MDString.
+ return false;
+ }
+ MIB.addMetadata(MDN);
+ } else {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
+ if (VRegs.size() > 1)
+ return false;
+ MIB.addUse(VRegs[0]);
+ }
+ }
+
+ // Add a MachineMemOperand if it is a target mem intrinsic.
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ TargetLowering::IntrinsicInfo Info;
+ // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
+ if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+ Align Alignment = Info.align.value_or(
+ DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
+ LLT MemTy = Info.memVT.isSimple()
+ ? getLLTForMVT(Info.memVT.getSimpleVT())
+ : LLT::scalar(Info.memVT.getStoreSizeInBits());
+
+ // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
+ // didn't yield anything useful.
+ MachinePointerInfo MPI;
+ if (Info.ptrVal)
+ MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
+ else if (Info.fallbackAddressSpace)
+ MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ MIB.addMemOperand(
+ MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
+ }
+
+ return true;
+}
+
+bool IRTranslator::findUnwindDestinations(
+ const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality = classifyEHPersonality(
+ EHPadBB->getParent()->getFunction().getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+ if (IsWasmCXX) {
+ // Ignore this for now.
+ return false;
+ }
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ break;
+ }
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ }
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+ return true;
+}
+
+bool IRTranslator::translateInvoke(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const InvokeInst &I = cast<InvokeInst>(U);
+ MCContext &Context = MF->getContext();
+
+ const BasicBlock *ReturnBB = I.getSuccessor(0);
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
+
+ const Function *Fn = I.getCalledFunction();
+
+ // FIXME: support invoking patchpoint and statepoint intrinsics.
+ if (Fn && Fn->isIntrinsic())
+ return false;
+
+ // FIXME: support whatever these are.
+ if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ return false;
+
+ // FIXME: support control flow guard targets.
+ if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
+ return false;
+
+ // FIXME: support Windows exception handling.
+ if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
+ return false;
+
+ bool LowerInlineAsm = I.isInlineAsm();
+ bool NeedEHLabel = true;
+
+ // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
+ // the region covered by the try.
+ MCSymbol *BeginSymbol = nullptr;
+ if (NeedEHLabel) {
+ MIRBuilder.buildInstr(TargetOpcode::G_INVOKE_REGION_START);
+ BeginSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ }
+
+ if (LowerInlineAsm) {
+ if (!translateInlineAsm(I, MIRBuilder))
+ return false;
+ } else if (!translateCallBase(I, MIRBuilder))
+ return false;
+
+ MCSymbol *EndSymbol = nullptr;
+ if (NeedEHLabel) {
+ EndSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ }
+
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+
+ if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
+ return false;
+
+ MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
+ &ReturnMBB = getMBB(*ReturnBB);
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, &ReturnMBB);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
+ if (NeedEHLabel) {
+ assert(BeginSymbol && "Expected a begin symbol!");
+ assert(EndSymbol && "Expected an end symbol!");
+ MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ }
+
+ MIRBuilder.buildBr(ReturnMBB);
+ return true;
+}
+
+bool IRTranslator::translateCallBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // FIXME: Implement this.
+ return false;
+}
+
+bool IRTranslator::translateLandingPad(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const LandingPadInst &LP = cast<LandingPadInst>(U);
+
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+
+ MBB.setIsEHPad();
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother.
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return true;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
+ return true;
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
+ .addSym(MF->addLandingPad(&MBB));
+
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
+ LLT Ty = getLLTForType(*LP.getType(), *DL);
+ Register Undef = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Undef);
+
+ SmallVector<LLT, 2> Tys;
+ for (Type *Ty : cast<StructType>(LP.getType())->elements())
+ Tys.push_back(getLLTForType(*Ty, *DL));
+ assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
+
+ // Mark exception register as live in.
+ Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ if (!ExceptionReg)
+ return false;
+
+ MBB.addLiveIn(ExceptionReg);
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
+ MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
+
+ Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ if (!SelectorReg)
+ return false;
+
+ MBB.addLiveIn(SelectorReg);
+ Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+ MIRBuilder.buildCopy(PtrVReg, SelectorReg);
+ MIRBuilder.buildCast(ResRegs[1], PtrVReg);
+
+ return true;
+}
+
+bool IRTranslator::translateAlloca(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ auto &AI = cast<AllocaInst>(U);
+
+ if (AI.isSwiftError())
+ return true;
+
+ if (AI.isStaticAlloca()) {
+ Register Res = getOrCreateVReg(AI);
+ int FI = getOrCreateFrameIndex(AI);
+ MIRBuilder.buildFrameIndex(Res, FI);
+ return true;
+ }
+
+ // FIXME: support stack probing for Windows.
+ if (MF->getTarget().getTargetTriple().isOSWindows())
+ return false;
+
+ // Now we're in the harder dynamic case.
+ Register NumElts = getOrCreateVReg(*AI.getArraySize());
+ Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
+ LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
+ if (MRI->getType(NumElts) != IntPtrTy) {
+ Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
+ NumElts = ExtElts;
+ }
+
+ Type *Ty = AI.getAllocatedType();
+
+ Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+ Register TySize =
+ getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
+ MIRBuilder.buildMul(AllocSize, NumElts, TySize);
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
+ auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
+ auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+ MachineInstr::NoUWrap);
+ auto AlignCst =
+ MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
+ auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
+
+ Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
+ if (Alignment <= StackAlign)
+ Alignment = Align(1);
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
+
+ MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
+ assert(MF->getFrameInfo().hasVarSizedObjects());
+ return true;
+}
+
+bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
+ // FIXME: We may need more info about the type. Because of how LLT works,
+ // we're completely discarding the i64/double distinction here (amongst
+ // others). Fortunately the ABIs I know of where that matters don't use va_arg
+ // anyway but that's not guaranteed.
+ MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
+ {getOrCreateVReg(*U.getOperand(0)),
+ DL->getABITypeAlign(U.getType()).value()});
+ return true;
+}
+
+bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
+ if (!MF->getTarget().Options.TrapUnreachable)
+ return true;
+
+ auto &UI = cast<UnreachableInst>(U);
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (MF->getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *UI.getParent();
+ if (&UI != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(UI));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return true;
+ }
+ }
+ }
+
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ return true;
+}
+
+bool IRTranslator::translateInsertElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
+ return translateCopy(U, *U.getOperand(1), MIRBuilder);
+
+ Register Res = getOrCreateVReg(U);
+ Register Val = getOrCreateVReg(*U.getOperand(0));
+ Register Elt = getOrCreateVReg(*U.getOperand(1));
+ Register Idx = getOrCreateVReg(*U.getOperand(2));
+ MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
+ return true;
+}
+
+bool IRTranslator::translateExtractElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
+ return translateCopy(U, *U.getOperand(0), MIRBuilder);
+
+ Register Res = getOrCreateVReg(U);
+ Register Val = getOrCreateVReg(*U.getOperand(0));
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
+ Register Idx;
+ if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
+ if (CI->getBitWidth() != PreferredVecIdxWidth) {
+ APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
+ auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
+ Idx = getOrCreateVReg(*NewIdxCI);
+ }
+ }
+ if (!Idx)
+ Idx = getOrCreateVReg(*U.getOperand(1));
+ if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
+ const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+ Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
+ }
+ MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
+ return true;
+}
+
+bool IRTranslator::translateShuffleVector(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ ArrayRef<int> Mask;
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
+ Mask = SVI->getShuffleMask();
+ else
+ Mask = cast<ConstantExpr>(U).getShuffleMask();
+ ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
+ {getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1))})
+ .addShuffleMask(MaskAlloc);
+ return true;
+}
+
+bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
+ const PHINode &PI = cast<PHINode>(U);
+
+ SmallVector<MachineInstr *, 4> Insts;
+ for (auto Reg : getOrCreateVRegs(PI)) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
+ Insts.push_back(MIB.getInstr());
+ }
+
+ PendingPHIs.emplace_back(&PI, std::move(Insts));
+ return true;
+}
+
+bool IRTranslator::translateAtomicCmpXchg(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
+
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
+
+ auto Res = getOrCreateVRegs(I);
+ Register OldValRes = Res[0];
+ Register SuccessRes = Res[1];
+ Register Addr = getOrCreateVReg(*I.getPointerOperand());
+ Register Cmp = getOrCreateVReg(*I.getCompareOperand());
+ Register NewVal = getOrCreateVReg(*I.getNewValOperand());
+
+ MIRBuilder.buildAtomicCmpXchgWithSuccess(
+ OldValRes, SuccessRes, Addr, Cmp, NewVal,
+ *MF->getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp),
+ getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(),
+ I.getSuccessOrdering(), I.getFailureOrdering()));
+ return true;
+}
+
+bool IRTranslator::translateAtomicRMW(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
+
+ Register Res = getOrCreateVReg(I);
+ Register Addr = getOrCreateVReg(*I.getPointerOperand());
+ Register Val = getOrCreateVReg(*I.getValOperand());
+
+ unsigned Opcode = 0;
+ switch (I.getOperation()) {
+ default:
+ return false;
+ case AtomicRMWInst::Xchg:
+ Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
+ break;
+ case AtomicRMWInst::Add:
+ Opcode = TargetOpcode::G_ATOMICRMW_ADD;
+ break;
+ case AtomicRMWInst::Sub:
+ Opcode = TargetOpcode::G_ATOMICRMW_SUB;
+ break;
+ case AtomicRMWInst::And:
+ Opcode = TargetOpcode::G_ATOMICRMW_AND;
+ break;
+ case AtomicRMWInst::Nand:
+ Opcode = TargetOpcode::G_ATOMICRMW_NAND;
+ break;
+ case AtomicRMWInst::Or:
+ Opcode = TargetOpcode::G_ATOMICRMW_OR;
+ break;
+ case AtomicRMWInst::Xor:
+ Opcode = TargetOpcode::G_ATOMICRMW_XOR;
+ break;
+ case AtomicRMWInst::Max:
+ Opcode = TargetOpcode::G_ATOMICRMW_MAX;
+ break;
+ case AtomicRMWInst::Min:
+ Opcode = TargetOpcode::G_ATOMICRMW_MIN;
+ break;
+ case AtomicRMWInst::UMax:
+ Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
+ break;
+ case AtomicRMWInst::UMin:
+ Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
+ break;
+ case AtomicRMWInst::FAdd:
+ Opcode = TargetOpcode::G_ATOMICRMW_FADD;
+ break;
+ case AtomicRMWInst::FSub:
+ Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
+ break;
+ case AtomicRMWInst::FMax:
+ Opcode = TargetOpcode::G_ATOMICRMW_FMAX;
+ break;
+ case AtomicRMWInst::FMin:
+ Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
+ break;
+ case AtomicRMWInst::UIncWrap:
+ Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP;
+ break;
+ case AtomicRMWInst::UDecWrap:
+ Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP;
+ break;
+ }
+
+ MIRBuilder.buildAtomicRMW(
+ Opcode, Res, Addr, Val,
+ *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, MRI->getType(Val), getMemOpAlign(I),
+ I.getAAMetadata(), nullptr, I.getSyncScopeID(),
+ I.getOrdering()));
+ return true;
+}
+
+bool IRTranslator::translateFence(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const FenceInst &Fence = cast<FenceInst>(U);
+ MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
+ Fence.getSyncScopeID());
+ return true;
+}
+
+bool IRTranslator::translateFreeze(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
+ const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));
+
+ assert(DstRegs.size() == SrcRegs.size() &&
+ "Freeze with different source and destination type?");
+
+ for (unsigned I = 0; I < DstRegs.size(); ++I) {
+ MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
+ }
+
+ return true;
+}
+
+void IRTranslator::finishPendingPhis() {
+#ifndef NDEBUG
+ DILocationVerifier Verifier;
+ GISelObserverWrapper WrapperObserver(&Verifier);
+ RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+#endif // ifndef NDEBUG
+ for (auto &Phi : PendingPHIs) {
+ const PHINode *PI = Phi.first;
+ ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+ MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
+ EntryBuilder->setDebugLoc(PI->getDebugLoc());
+#ifndef NDEBUG
+ Verifier.setCurrentInst(PI);
+#endif // ifndef NDEBUG
+
+ SmallSet<const MachineBasicBlock *, 16> SeenPreds;
+ for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
+ auto IRPred = PI->getIncomingBlock(i);
+ ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
+ for (auto *Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
+ if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
+ continue;
+ SeenPreds.insert(Pred);
+ for (unsigned j = 0; j < ValRegs.size(); ++j) {
+ MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
+ MIB.addUse(ValRegs[j]);
+ MIB.addMBB(Pred);
+ }
+ }
+ }
+ }
+}
+
+bool IRTranslator::translate(const Instruction &Inst) {
+ CurBuilder->setDebugLoc(Inst.getDebugLoc());
+ CurBuilder->setPCSections(Inst.getMetadata(LLVMContext::MD_pcsections));
+
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ if (TLI.fallBackToDAGISel(Inst))
+ return false;
+
+ switch (Inst.getOpcode()) {
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: \
+ return translate##OPCODE(Inst, *CurBuilder.get());
+#include "llvm/IR/Instruction.def"
+ default:
+ return false;
+ }
+}
+
+bool IRTranslator::translate(const Constant &C, Register Reg) {
+ // We only emit constants into the entry block from here. To prevent jumpy
+ // debug behaviour remove debug line.
+ if (auto CurrInstDL = CurBuilder->getDL())
+ EntryBuilder->setDebugLoc(DebugLoc());
+
+ if (auto CI = dyn_cast<ConstantInt>(&C))
+ EntryBuilder->buildConstant(Reg, *CI);
+ else if (auto CF = dyn_cast<ConstantFP>(&C))
+ EntryBuilder->buildFConstant(Reg, *CF);
+ else if (isa<UndefValue>(C))
+ EntryBuilder->buildUndef(Reg);
+ else if (isa<ConstantPointerNull>(C))
+ EntryBuilder->buildConstant(Reg, 0);
+ else if (auto GV = dyn_cast<GlobalValue>(&C))
+ EntryBuilder->buildGlobalValue(Reg, GV);
+ else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
+ if (!isa<FixedVectorType>(CAZ->getType()))
+ return false;
+ // Return the scalar if it is a <1 x Ty> vector.
+ unsigned NumElts = CAZ->getElementCount().getFixedValue();
+ if (NumElts == 1)
+ return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder);
+ SmallVector<Register, 4> Ops;
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant &Elt = *CAZ->getElementValue(I);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder->buildBuildVector(Reg, Ops);
+ } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CV->getNumElements() == 1)
+ return translateCopy(C, *CV->getElementAsConstant(0), *EntryBuilder);
+ SmallVector<Register, 4> Ops;
+ for (unsigned i = 0; i < CV->getNumElements(); ++i) {
+ Constant &Elt = *CV->getElementAsConstant(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder->buildBuildVector(Reg, Ops);
+ } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+ switch(CE->getOpcode()) {
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: \
+ return translate##OPCODE(*CE, *EntryBuilder.get());
+#include "llvm/IR/Instruction.def"
+ default:
+ return false;
+ }
+ } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
+ if (CV->getNumOperands() == 1)
+ return translateCopy(C, *CV->getOperand(0), *EntryBuilder);
+ SmallVector<Register, 4> Ops;
+ for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
+ Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
+ }
+ EntryBuilder->buildBuildVector(Reg, Ops);
+ } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
+ EntryBuilder->buildBlockAddress(Reg, BA);
+ } else
+ return false;
+
+ return true;
+}
+
+bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
+ MachineBasicBlock &MBB) {
+ for (auto &BTB : SL->BitTestCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!BTB.Emitted)
+ emitBitTestHeader(BTB, BTB.Parent);
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
+
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // We need to record the replacement phi edge here that normally
+ // happens in emitBitTestCase before we delete the case, otherwise the
+ // phi edge will be lost.
+ addMachineCFGPred({BTB.Parent->getBasicBlock(),
+ BTB.Cases[ej - 1].TargetBB->getBasicBlock()},
+ MBB);
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+ // This is "default" BB. We have two jumps to it. From "header" BB and from
+ // last "case" BB, unless the latter was skipped.
+ CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
+ BTB.Default->getBasicBlock()};
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
+ }
+ }
+ SL->BitTestCases.clear();
+
+ for (auto &JTCase : SL->JTCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!JTCase.first.Emitted)
+ emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
+
+ emitJumpTable(JTCase.second, JTCase.second.MBB);
+ }
+ SL->JTCases.clear();
+
+ for (auto &SwCase : SL->SwitchCases)
+ emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
+ SL->SwitchCases.clear();
+
+ // Check if we need to generate stack-protector guard checks.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ if (SP.shouldEmitSDCheck(BB)) {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ bool FunctionBasedInstrumentation =
+ TLI.getSSPStackGuardCheck(*MF->getFunction().getParent());
+ SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation);
+ }
+ // Handle stack protector.
+ if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
+ return false;
+ } else if (SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector(
+ ParentMBB, *MF->getSubtarget().getInstrInfo());
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ if (!emitSPDescriptorParent(SPDescriptor, ParentMBB))
+ return false;
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB))
+ return false;
+ }
+
+ // Clear the Per-BB State.
+ SPDescriptor.resetPerBBState();
+ }
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+ CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
+
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
+
+ Register Guard;
+ Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
+ Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ Register GuardVal =
+ CurBuilder
+ ->buildLoad(PtrMemTy, StackSlotPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile)
+ .getReg(0);
+
+ if (TLI.useStackGuardXorFP()) {
+ LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
+ return false;
+ }
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ // This path is currently untestable on GlobalISel, since the only platform
+ // that needs this seems to be Windows, and we fall back on that currently.
+ // The code still lives here in case that changes.
+ // Silence warning about unused variable until the code below that uses
+ // 'GuardCheckFn' is enabled.
+ (void)GuardCheckFn;
+ return false;
+#if 0
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+ ISD::ArgFlagsTy Flags;
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Flags.setInReg();
+ CallLowering::ArgInfo GuardArgInfo(
+ {GuardVal, FnTy->getParamType(0), {Flags}});
+
+ CallLowering::CallLoweringInfo Info;
+ Info.OrigArgs.push_back(GuardArgInfo);
+ Info.CallConv = GuardCheckFn->getCallingConv();
+ Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0);
+ Info.OrigRet = {Register(), FnTy->getReturnType()};
+ if (!CLI->lowerCall(MIRBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
+ return false;
+ }
+ return true;
+#endif
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ if (TLI.useLoadStackGuardNode()) {
+ Guard =
+ MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
+ getStackGuard(Guard, *CurBuilder);
+ } else {
+ // TODO: test using android subtarget when we support @llvm.thread.pointer.
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ Register GuardPtr = getOrCreateVReg(*IRGuard);
+
+ Guard = CurBuilder
+ ->buildLoad(PtrMemTy, GuardPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile)
+ .getReg(0);
+ }
+
+ // Perform the comparison.
+ auto Cmp =
+ CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal);
+ // If the guard/stackslot do not equal, branch to failure MBB.
+ CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
+ // Otherwise branch to success MBB.
+ CurBuilder->buildBr(*SPD.getSuccessMBB());
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *FailureBB) {
+ CurBuilder->setInsertPt(*FailureBB, FailureBB->end());
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+
+ const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL;
+ const char *Name = TLI.getLibcallName(Libcall);
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()),
+ 0};
+ if (!CLI->lowerCall(*CurBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
+ return false;
+ }
+
+ // On PS4/PS5, the "return address" must still be within the calling
+ // function, even if it's at the very end, so emit an explicit TRAP here.
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ const TargetMachine &TM = MF->getTarget();
+ if (TM.getTargetTriple().isPS() || TM.getTargetTriple().isWasm()) {
+ LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
+ return false;
+ }
+ return true;
+}
+
+void IRTranslator::finalizeFunction() {
+ // Release the memory used by the different maps we
+ // needed during the translation.
+ PendingPHIs.clear();
+ VMap.reset();
+ FrameIndices.clear();
+ MachinePreds.clear();
+ // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
+ // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
+ // destroying it twice (in ~IRTranslator() and ~LLVMContext())
+ EntryBuilder.reset();
+ CurBuilder.reset();
+ FuncInfo.clear();
+ SPDescriptor.resetPerFunctionState();
+}
+
+/// Returns true if a BasicBlock \p BB within a variadic function contains a
+/// variadic musttail call.
+static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
+ if (!IsVarArg)
+ return false;
+
+ // Walk the block backwards, because tail calls usually only appear at the end
+ // of a block.
+ return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) {
+ const auto *CI = dyn_cast<CallInst>(&I);
+ return CI && CI->isMustTailCall();
+ });
+}
+
+bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
+ MF = &CurMF;
+ const Function &F = MF->getFunction();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ // Set the CSEConfig and run the analysis.
+ GISelCSEInfo *CSEInfo = nullptr;
+ TPC = &getAnalysis<TargetPassConfig>();
+ bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
+ ? EnableCSEInIRTranslator
+ : TPC->isGISelCSEEnabled();
+
+ if (EnableCSE) {
+ EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
+ CSEInfo = &Wrapper.get(TPC->getCSEConfig());
+ EntryBuilder->setCSEInfo(CSEInfo);
+ CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
+ CurBuilder->setCSEInfo(CSEInfo);
+ } else {
+ EntryBuilder = std::make_unique<MachineIRBuilder>();
+ CurBuilder = std::make_unique<MachineIRBuilder>();
+ }
+ CLI = MF->getSubtarget().getCallLowering();
+ CurBuilder->setMF(*MF);
+ EntryBuilder->setMF(*MF);
+ MRI = &MF->getRegInfo();
+ DL = &F.getParent()->getDataLayout();
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
+ const TargetMachine &TM = MF->getTarget();
+ TM.resetTargetOptions(F);
+ EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
+ FuncInfo.MF = MF;
+ if (EnableOpts) {
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ } else {
+ AA = nullptr;
+ FuncInfo.BPI = nullptr;
+ }
+
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ MF->getFunction());
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
+
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+
+ SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
+ SL->init(TLI, TM, *DL);
+
+
+
+ assert(PendingPHIs.empty() && "stale PHIs");
+
+ // Targets which want to use big endian can enable it using
+ // enableBigEndian()
+ if (!DL->isLittleEndian() && !CLI->enableBigEndian()) {
+ // Currently we don't properly handle big endian code.
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ F.getSubprogram(), &F.getEntryBlock());
+ R << "unable to translate in big endian mode";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ }
+
+ // Release the per-function state when we return, whether we succeeded or not.
+ auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
+
+ // Setup a separate basic-block for the arguments and constants
+ MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
+ MF->push_back(EntryBB);
+ EntryBuilder->setMBB(*EntryBB);
+
+ DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
+ SwiftError.setFunction(CurMF);
+ SwiftError.createEntriesInEntryBlock(DbgLoc);
+
+ bool IsVarArg = F.isVarArg();
+ bool HasMustTailInVarArgFn = false;
+
+ // Create all blocks, in IR order, to preserve the layout.
+ for (const BasicBlock &BB: F) {
+ auto *&MBB = BBToMBB[&BB];
+
+ MBB = MF->CreateMachineBasicBlock(&BB);
+ MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
+
+ if (!HasMustTailInVarArgFn)
+ HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
+ }
+
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
+
+ // Make our arguments/constants entry block fallthrough to the IR entry block.
+ EntryBB->addSuccessor(&getMBB(F.front()));
+
+ if (CLI->fallBackToDAGISel(*MF)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ F.getSubprogram(), &F.getEntryBlock());
+ R << "unable to lower function: " << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
+
+ // Lower the actual args into this basic block.
+ SmallVector<ArrayRef<Register>, 8> VRegArgs;
+ for (const Argument &Arg: F.args()) {
+ if (DL->getTypeStoreSize(Arg.getType()).isZero())
+ continue; // Don't handle zero sized types.
+ ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
+ VRegArgs.push_back(VRegs);
+
+ if (Arg.hasSwiftErrorAttr()) {
+ assert(VRegs.size() == 1 && "Too many vregs for Swift error");
+ SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
+ }
+ }
+
+ if (!CLI->lowerFormalArguments(*EntryBuilder, F, VRegArgs, FuncInfo)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ F.getSubprogram(), &F.getEntryBlock());
+ R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
+
+ // Need to visit defs before uses when translating instructions.
+ GISelObserverWrapper WrapperObserver;
+ if (EnableCSE && CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ {
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+#ifndef NDEBUG
+ DILocationVerifier Verifier;
+ WrapperObserver.addObserver(&Verifier);
+#endif // ifndef NDEBUG
+ RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+ RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
+ for (const BasicBlock *BB : RPOT) {
+ MachineBasicBlock &MBB = getMBB(*BB);
+ // Set the insertion point of all the following translations to
+ // the end of this basic block.
+ CurBuilder->setMBB(MBB);
+ HasTailCall = false;
+ for (const Instruction &Inst : *BB) {
+ // If we translated a tail call in the last step, then we know
+ // everything after the call is either a return, or something that is
+ // handled by the call itself. (E.g. a lifetime marker or assume
+ // intrinsic.) In this case, we should stop translating the block and
+ // move on.
+ if (HasTailCall)
+ break;
+#ifndef NDEBUG
+ Verifier.setCurrentInst(&Inst);
+#endif // ifndef NDEBUG
+ if (translate(Inst))
+ continue;
+
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ Inst.getDebugLoc(), BB);
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
+
+ if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
+
+ R << ": '" << InstStr.str() << "'";
+ }
+
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
+
+ if (!finalizeBasicBlock(*BB, MBB)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ BB->getTerminator()->getDebugLoc(), BB);
+ R << "unable to translate basic block";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
+ }
+#ifndef NDEBUG
+ WrapperObserver.removeObserver(&Verifier);
+#endif
+ }
+
+ finishPendingPhis();
+
+ SwiftError.propagateVRegs();
+
+ // Merge the argument lowering and constants block with its single
+ // successor, the LLVM-IR entry block. We want the basic block to
+ // be maximal.
+ assert(EntryBB->succ_size() == 1 &&
+ "Custom BB used for lowering should have only one successor");
+ // Get the successor of the current entry block.
+ MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+ assert(NewEntryBB.pred_size() == 1 &&
+ "LLVM-IR entry block has a predecessor!?");
+ // Move all the instruction from the current entry block to the
+ // new entry block.
+ NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+ EntryBB->end());
+
+ // Update the live-in information for the new entry block.
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+ NewEntryBB.addLiveIn(LiveIn);
+ NewEntryBB.sortUniqueLiveIns();
+
+ // Get rid of the now empty basic block.
+ EntryBB->removeSuccessor(&NewEntryBB);
+ MF->remove(EntryBB);
+ MF->deleteMachineBasicBlock(EntryBB);
+
+ assert(&MF->front() == &NewEntryBB &&
+ "New entry wasn't next in the list of basic block!");
+
+ // Initialize stack protector information.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ SP.copyToMachineFrameInfo(MF->getFrameInfo());
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
new file mode 100644
index 000000000000..3925611f1485
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -0,0 +1,687 @@
+//===-- lib/CodeGen/GlobalISel/InlineAsmLowering.cpp ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering from LLVM IR inline asm to MIR INLINEASM
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "inline-asm-lowering"
+
+using namespace llvm;
+
+void InlineAsmLowering::anchor() {}
+
+namespace {
+
+/// GISelAsmOperandInfo - This contains information for each constraint that we
+/// are lowering.
+class GISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// Regs - If this is a register or register class operand, this
+ /// contains the set of assigned registers corresponding to the operand.
+ SmallVector<Register, 1> Regs;
+
+ explicit GISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &Info)
+ : TargetLowering::AsmOperandInfo(Info) {}
+};
+
+using GISelAsmOperandInfoVector = SmallVector<GISelAsmOperandInfo, 16>;
+
+class ExtraFlags {
+ unsigned Flags = 0;
+
+public:
+ explicit ExtraFlags(const CallBase &CB) {
+ const InlineAsm *IA = cast<InlineAsm>(CB.getCalledOperand());
+ if (IA->hasSideEffects())
+ Flags |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ Flags |= InlineAsm::Extra_IsAlignStack;
+ if (CB.isConvergent())
+ Flags |= InlineAsm::Extra_IsConvergent;
+ Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+ }
+
+ void update(const TargetLowering::AsmOperandInfo &OpInfo) {
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an Other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for Other constraints as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ Flags |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ Flags |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ unsigned get() const { return Flags; }
+};
+
+} // namespace
+
+/// Assign virtual/physical registers for the specified register operand.
+static void getRegistersForValue(MachineFunction &MF,
+ MachineIRBuilder &MIRBuilder,
+ GISelAsmOperandInfo &OpInfo,
+ GISelAsmOperandInfo &RefOpInfo) {
+
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // No work to do for memory operations.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ return;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ Register AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+ &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+ // RC is unset only on failure. Return immediately.
+ if (!RC)
+ return;
+
+ // No need to allocate a matching input constraint since the constraint it's
+ // matching to has already been allocated.
+ if (OpInfo.isMatchingInputConstraint())
+ return;
+
+ // Initialize NumRegs.
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs =
+ TLI.getNumRegisters(MF.getFunction().getContext(), OpInfo.ConstraintVT);
+
+ // If this is a constraint for a specific physical register, but the type of
+ // the operand requires more than one register to be passed, we allocate the
+ // required amount of physical registers, starting from the selected physical
+ // register.
+ // For this, first retrieve a register iterator for the given register class
+ TargetRegisterClass::iterator I = RC->begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Advance the iterator to the assigned register (if set)
+ if (AssignedReg) {
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "AssignedReg should be a member of provided RC");
+ }
+
+ // Finally, assign the registers. If the AssignedReg isn't set, create virtual
+ // registers with the provided register class
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
+ OpInfo.Regs.push_back(R);
+ }
+}
+
+/// Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ case TargetLowering::C_Address:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering *TLI) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI->getConstraintType(OpInfo.Codes[i]);
+
+ // Indirect 'other' or 'immediate' constraints are not allowed.
+ if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+ CType == TargetLowering::C_Register ||
+ CType == TargetLowering::C_RegisterClass))
+ continue;
+
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ // FIXME: prefer immediate constraints if the target allows it
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+static void computeConstraintToUse(const TargetLowering *TLI,
+ TargetLowering::AsmOperandInfo &OpInfo) {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ chooseConstraint(OpInfo, TLI);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *Val = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(Val) || isa<ConstantInt>(Val) || isa<Function>(Val))
+ return;
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = TLI->LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) {
+ unsigned Flag = I.getOperand(OpIdx).getImm();
+ return InlineAsm::getNumOperandRegisters(Flag);
+}
+
+static bool buildAnyextOrCopy(Register Dst, Register Src,
+ MachineIRBuilder &MIRBuilder) {
+ const TargetRegisterInfo *TRI =
+ MIRBuilder.getMF().getSubtarget().getRegisterInfo();
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ auto SrcTy = MRI->getType(Src);
+ if (!SrcTy.isValid()) {
+ LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n");
+ return false;
+ }
+ unsigned SrcSize = TRI->getRegSizeInBits(Src, *MRI);
+ unsigned DstSize = TRI->getRegSizeInBits(Dst, *MRI);
+
+ if (DstSize < SrcSize) {
+ LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n");
+ return false;
+ }
+
+ // Attempt to anyext small scalar sources.
+ if (DstSize > SrcSize) {
+ if (!SrcTy.isScalar()) {
+ LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of"
+ "destination register class\n");
+ return false;
+ }
+ Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0);
+ }
+
+ MIRBuilder.buildCopy(Dst, Src);
+ return true;
+}
+
+bool InlineAsmLowering::lowerInlineAsm(
+ MachineIRBuilder &MIRBuilder, const CallBase &Call,
+ std::function<ArrayRef<Register>(const Value &Val)> GetOrCreateVRegs)
+ const {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ GISelAsmOperandInfoVector ConstraintOperands;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(DL, TRI, Call);
+
+ ExtraFlags ExtraInfo(Call);
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (auto &T : TargetConstraints) {
+ ConstraintOperands.push_back(GISelAsmOperandInfo(T));
+ GISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Compute the value type for each operand.
+ if (OpInfo.hasArg()) {
+ OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo));
+
+ if (isa<BasicBlock>(OpInfo.CallOperandVal)) {
+ LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n");
+ return false;
+ }
+
+ Type *OpTy = OpInfo.CallOperandVal->getType();
+
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (OpInfo.isIndirect) {
+ OpTy = Call.getParamElementType(ArgNo);
+ assert(OpTy && "Indirect operand must have elementtype attribute");
+ }
+
+ // FIXME: Support aggregate input operands
+ if (!OpTy->isSingleValueType()) {
+ LLVM_DEBUG(
+ dbgs() << "Aggregate input operands are not supported yet\n");
+ return false;
+ }
+
+ OpInfo.ConstraintVT =
+ TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT();
+ ++ArgNo;
+ } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
+ OpInfo.ConstraintVT =
+ TLI->getSimpleValueType(DL, STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT =
+ TLI->getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
+ }
+ ++ResNo;
+ } else {
+ assert(OpInfo.Type != InlineAsm::isLabel &&
+ "GlobalISel currently doesn't support callbr");
+ OpInfo.ConstraintVT = MVT::Other;
+ }
+
+ if (OpInfo.ConstraintVT == MVT::i64x8)
+ return false;
+
+ // Compute the constraint code and ConstraintType to use.
+ computeConstraintToUse(TLI, OpInfo);
+
+ // The selected constraint type might expose new sideeffects
+ ExtraInfo.update(OpInfo);
+ }
+
+ // At this point, all operand types are decided.
+ // Create the MachineInstr, but don't insert it yet since input
+ // operands still need to insert instructions before this one
+ auto Inst = MIRBuilder.buildInstrNoInsert(TargetOpcode::INLINEASM)
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo.get());
+
+ // Starting from this operand: flag followed by register(s) will be added as
+ // operands to Inst for each constraint. Used for matching input constraints.
+ unsigned StartIdx = Inst->getNumOperands();
+
+ // Collects the output operands for later processing
+ GISelAsmOperandInfoVector OutputOperands;
+
+ for (auto &OpInfo : ConstraintOperands) {
+ GISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : OpInfo;
+
+ // Assign registers for register operands
+ getRegistersForValue(MF, MIRBuilder, OpInfo, RefOpInfo);
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ unsigned ConstraintID =
+ TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM instruction to know about this
+ // output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ Inst.addImm(OpFlags);
+ ArrayRef<Register> SourceRegs =
+ GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(
+ SourceRegs.size() == 1 &&
+ "Expected the memory output to fit into a single virtual register");
+ Inst.addReg(SourceRegs[0]);
+ } else {
+ // Otherwise, this outputs to a register (directly for C_Register /
+ // C_RegisterClass/C_Other.
+ assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Other);
+
+ // Find a register that we can use.
+ if (OpInfo.Regs.empty()) {
+ LLVM_DEBUG(dbgs()
+ << "Couldn't allocate output register for constraint\n");
+ return false;
+ }
+
+ // Add information to the INLINEASM instruction to know that this
+ // register is set.
+ unsigned Flag = InlineAsm::getFlagWord(
+ OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ OpInfo.Regs.size());
+ if (OpInfo.Regs.front().isVirtual()) {
+ // Put the register class of the virtual registers in the flag word.
+ // That way, later passes can recompute register class constraints for
+ // inline assembly as well as normal instructions. Don't do this for
+ // tied operands that can use the regclass information from the def.
+ const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ Inst.addImm(Flag);
+
+ for (Register Reg : OpInfo.Regs) {
+ Inst.addReg(Reg,
+ RegState::Define | getImplRegState(Reg.isPhysical()) |
+ (OpInfo.isEarlyClobber ? RegState::EarlyClobber : 0));
+ }
+
+ // Remember this output operand for later processing
+ OutputOperands.push_back(OpInfo);
+ }
+
+ break;
+ case InlineAsm::isInput:
+ case InlineAsm::isLabel: {
+ if (OpInfo.isMatchingInputConstraint()) {
+ unsigned DefIdx = OpInfo.getMatchedOperand();
+ // Find operand with register def that corresponds to DefIdx.
+ unsigned InstFlagIdx = StartIdx;
+ for (unsigned i = 0; i < DefIdx; ++i)
+ InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1;
+ assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag");
+
+ unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm();
+ if (InlineAsm::isMemKind(MatchedOperandFlag)) {
+ LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not "
+ "supported. This should be target specific.\n");
+ return false;
+ }
+ if (!InlineAsm::isRegDefKind(MatchedOperandFlag) &&
+ !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) {
+ LLVM_DEBUG(dbgs() << "Unknown matching constraint\n");
+ return false;
+ }
+
+ // We want to tie input to register in next operand.
+ unsigned DefRegIdx = InstFlagIdx + 1;
+ Register Def = Inst->getOperand(DefRegIdx).getReg();
+
+ ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(SrcRegs.size() == 1 && "Single register is expected here");
+
+ // When Def is physreg: use given input.
+ Register In = SrcRegs[0];
+ // When Def is vreg: copy input to new vreg with same reg class as Def.
+ if (Def.isVirtual()) {
+ In = MRI->createVirtualRegister(MRI->getRegClass(Def));
+ if (!buildAnyextOrCopy(In, SrcRegs[0], MIRBuilder))
+ return false;
+ }
+
+ // Add Flag and input register operand (In) to Inst. Tie In to Def.
+ unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
+ unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx);
+ Inst.addImm(Flag);
+ Inst.addReg(In);
+ Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1);
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect) {
+ LLVM_DEBUG(dbgs() << "Indirect input operands with unknown constraint "
+ "not supported yet\n");
+ return false;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+
+ std::vector<MachineOperand> Ops;
+ if (!lowerAsmOperandForConstraint(OpInfo.CallOperandVal,
+ OpInfo.ConstraintCode, Ops,
+ MIRBuilder)) {
+ LLVM_DEBUG(dbgs() << "Don't support constraint: "
+ << OpInfo.ConstraintCode << " yet\n");
+ return false;
+ }
+
+ assert(Ops.size() > 0 &&
+ "Expected constraint to be lowered to at least one operand");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned OpFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ Inst.addImm(OpFlags);
+ Inst.add(Ops);
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+
+ if (!OpInfo.isIndirect) {
+ LLVM_DEBUG(dbgs()
+ << "Cannot indirectify memory input operands yet\n");
+ return false;
+ }
+
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+
+ unsigned ConstraintID =
+ TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ Inst.addImm(OpFlags);
+ ArrayRef<Register> SourceRegs =
+ GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(
+ SourceRegs.size() == 1 &&
+ "Expected the memory input to fit into a single virtual register");
+ Inst.addReg(SourceRegs[0]);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+
+ if (OpInfo.isIndirect) {
+ LLVM_DEBUG(dbgs() << "Can't handle indirect register inputs yet "
+ "for constraint '"
+ << OpInfo.ConstraintCode << "'\n");
+ return false;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.Regs.empty()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Couldn't allocate input register for register constraint\n");
+ return false;
+ }
+
+ unsigned NumRegs = OpInfo.Regs.size();
+ ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(NumRegs == SourceRegs.size() &&
+ "Expected the number of input registers to match the number of "
+ "source registers");
+
+ if (NumRegs > 1) {
+ LLVM_DEBUG(dbgs() << "Input operands with multiple input registers are "
+ "not supported yet\n");
+ return false;
+ }
+
+ unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
+ if (OpInfo.Regs.front().isVirtual()) {
+ // Put the register class of the virtual registers in the flag word.
+ const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+ Inst.addImm(Flag);
+ if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
+ return false;
+ Inst.addReg(OpInfo.Regs[0]);
+ break;
+ }
+
+ case InlineAsm::isClobber: {
+
+ unsigned NumRegs = OpInfo.Regs.size();
+ if (NumRegs > 0) {
+ unsigned Flag =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs);
+ Inst.addImm(Flag);
+
+ for (Register Reg : OpInfo.Regs) {
+ Inst.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Reg.isPhysical()));
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
+ Inst.addMetadata(SrcLoc);
+
+ // All inputs are handled, insert the instruction now
+ MIRBuilder.insertInstr(Inst);
+
+ // Finally, copy the output operands into the output registers
+ ArrayRef<Register> ResRegs = GetOrCreateVRegs(Call);
+ if (ResRegs.size() != OutputOperands.size()) {
+ LLVM_DEBUG(dbgs() << "Expected the number of output registers to match the "
+ "number of destination registers\n");
+ return false;
+ }
+ for (unsigned int i = 0, e = ResRegs.size(); i < e; i++) {
+ GISelAsmOperandInfo &OpInfo = OutputOperands[i];
+
+ if (OpInfo.Regs.empty())
+ continue;
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass: {
+ if (OpInfo.Regs.size() > 1) {
+ LLVM_DEBUG(dbgs() << "Output operands with multiple defining "
+ "registers are not supported yet\n");
+ return false;
+ }
+
+ Register SrcReg = OpInfo.Regs[0];
+ unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
+ LLT ResTy = MRI->getType(ResRegs[i]);
+ if (ResTy.isScalar() && ResTy.getSizeInBits() < SrcSize) {
+ // First copy the non-typed virtual register into a generic virtual
+ // register
+ Register Tmp1Reg =
+ MRI->createGenericVirtualRegister(LLT::scalar(SrcSize));
+ MIRBuilder.buildCopy(Tmp1Reg, SrcReg);
+ // Need to truncate the result of the register
+ MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg);
+ } else if (ResTy.getSizeInBits() == SrcSize) {
+ MIRBuilder.buildCopy(ResRegs[i], SrcReg);
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled output operand with "
+ "mismatched register size\n");
+ return false;
+ }
+
+ break;
+ }
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ LLVM_DEBUG(
+ dbgs() << "Cannot lower target specific output constraints yet\n");
+ return false;
+ case TargetLowering::C_Memory:
+ break; // Already handled.
+ case TargetLowering::C_Address:
+ break; // Silence warning.
+ case TargetLowering::C_Unknown:
+ LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool InlineAsmLowering::lowerAsmOperandForConstraint(
+ Value *Val, StringRef Constraint, std::vector<MachineOperand> &Ops,
+ MachineIRBuilder &MIRBuilder) const {
+ if (Constraint.size() > 1)
+ return false;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default:
+ return false;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // immediate integer with a known value.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "expected immediate to fit into 64-bits");
+ // Boolean constants should be zero-extended, others are sign-extended
+ bool IsBool = CI->getBitWidth() == 1;
+ int64_t ExtVal = IsBool ? CI->getZExtValue() : CI->getSExtValue();
+ Ops.push_back(MachineOperand::CreateImm(ExtVal));
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
new file mode 100644
index 000000000000..9bbef11067ae
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -0,0 +1,330 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CodeGenCoverage.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "instruction-select"
+
+using namespace llvm;
+
+#ifdef LLVM_GISEL_COV_PREFIX
+static cl::opt<std::string>
+ CoveragePrefix("gisel-coverage-prefix", cl::init(LLVM_GISEL_COV_PREFIX),
+ cl::desc("Record GlobalISel rule coverage files of this "
+ "prefix if instrumentation was generated"));
+#else
+static const std::string CoveragePrefix;
+#endif
+
+char InstructionSelect::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
+ "Select target instructions out of generic instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
+ "Select target instructions out of generic instructions",
+ false, false)
+
+InstructionSelect::InstructionSelect(CodeGenOpt::Level OL)
+ : MachineFunctionPass(ID), OptLevel(OL) {}
+
+// In order not to crash when calling getAnalysis during testing with -run-pass
+// we use the default opt level here instead of None, so that the addRequired()
+// calls are made in getAnalysisUsage().
+InstructionSelect::InstructionSelect()
+ : MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {}
+
+void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+
+ if (OptLevel != CodeGenOpt::None) {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ }
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+
+ CodeGenOpt::Level OldOptLevel = OptLevel;
+ auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; });
+ OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
+ : MF.getTarget().getOptLevel();
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ if (OptLevel != CodeGenOpt::None) {
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ if (PSI && PSI->hasProfileSummary())
+ BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+ }
+
+ CodeGenCoverage CoverageInfo;
+ assert(ISel && "Cannot work without InstructionSelector");
+ ISel->setupMF(MF, KB, &CoverageInfo, PSI, BFI);
+
+ // An optimization remark emitter. Used to report failures.
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+ // FIXME: There are many other MF/MFI fields we need to initialize.
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+#ifndef NDEBUG
+ // Check that our input is fully legal: we require the function to have the
+ // Legalized property, so it should be.
+ // FIXME: This should be in the MachineVerifier, as the RegBankSelected
+ // property check already is.
+ if (!DisableGISelLegalityCheck)
+ if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "instruction is not legal", *MI);
+ return false;
+ }
+ // FIXME: We could introduce new blocks and will need to fix the outer loop.
+ // Until then, keep track of the number of blocks to assert that we don't.
+ const size_t NumBlocks = MF.size();
+#endif
+ // Keep track of selected blocks, so we can delete unreachable ones later.
+ DenseSet<MachineBasicBlock *> SelectedBlocks;
+
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
+ ISel->CurMBB = MBB;
+ SelectedBlocks.insert(MBB);
+ if (MBB->empty())
+ continue;
+
+ // Select instructions in reverse block order. We permit erasing so have
+ // to resort to manually iterating and recognizing the begin (rend) case.
+ bool ReachedBegin = false;
+ for (auto MII = std::prev(MBB->end()), Begin = MBB->begin();
+ !ReachedBegin;) {
+#ifndef NDEBUG
+ // Keep track of the insertion range for debug printing.
+ const auto AfterIt = std::next(MII);
+#endif
+ // Select this instruction.
+ MachineInstr &MI = *MII;
+
+ // And have our iterator point to the next instruction, if there is one.
+ if (MII == Begin)
+ ReachedBegin = true;
+ else
+ --MII;
+
+ LLVM_DEBUG(dbgs() << "Selecting: \n " << MI);
+
+ // We could have folded this instruction away already, making it dead.
+ // If so, erase it.
+ if (isTriviallyDead(MI, MRI)) {
+ LLVM_DEBUG(dbgs() << "Is dead; erasing.\n");
+ salvageDebugInfo(MRI, MI);
+ MI.eraseFromParent();
+ continue;
+ }
+
+ // Eliminate hints or G_CONSTANT_FOLD_BARRIER.
+ if (isPreISelGenericOptimizationHint(MI.getOpcode()) ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_FOLD_BARRIER) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
+
+ // At this point, the destination register class of the op may have
+ // been decided.
+ //
+ // Propagate that through to the source register.
+ const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
+ if (DstRC)
+ MRI.setRegClass(SrcReg, DstRC);
+ assert(canReplaceReg(DstReg, SrcReg, MRI) &&
+ "Must be able to replace dst with src!");
+ MI.eraseFromParent();
+ MRI.replaceRegWith(DstReg, SrcReg);
+ continue;
+ }
+
+ if (MI.getOpcode() == TargetOpcode::G_INVOKE_REGION_START) {
+ MI.eraseFromParent();
+ continue;
+ }
+
+ if (!ISel->select(MI)) {
+ // FIXME: It would be nice to dump all inserted instructions. It's
+ // not obvious how, esp. considering select() can insert after MI.
+ reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
+ return false;
+ }
+
+ // Dump the range of instructions that MI expanded into.
+ LLVM_DEBUG({
+ auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII);
+ dbgs() << "Into:\n";
+ for (auto &InsertedMI : make_range(InsertedBegin, AfterIt))
+ dbgs() << " " << InsertedMI;
+ dbgs() << '\n';
+ });
+ }
+ }
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ if (!SelectedBlocks.contains(&MBB)) {
+ // This is an unreachable block and therefore hasn't been selected, since
+ // the main selection loop above uses a postorder block traversal.
+ // We delete all the instructions in this block since it's unreachable.
+ MBB.clear();
+ // Don't delete the block in case the block has it's address taken or is
+ // still being referenced by a phi somewhere.
+ continue;
+ }
+ // Try to find redundant copies b/w vregs of the same register class.
+ bool ReachedBegin = false;
+ for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) {
+ // Select this instruction.
+ MachineInstr &MI = *MII;
+
+ // And have our iterator point to the next instruction, if there is one.
+ if (MII == Begin)
+ ReachedBegin = true;
+ else
+ --MII;
+ if (MI.getOpcode() != TargetOpcode::COPY)
+ continue;
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (SrcReg.isVirtual() && DstReg.isVirtual()) {
+ auto SrcRC = MRI.getRegClass(SrcReg);
+ auto DstRC = MRI.getRegClass(DstReg);
+ if (SrcRC == DstRC) {
+ MRI.replaceRegWith(DstReg, SrcReg);
+ MI.eraseFromParent();
+ }
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ // Now that selection is complete, there are no more generic vregs. Verify
+ // that the size of the now-constrained vreg is unchanged and that it has a
+ // register class.
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register VReg = Register::index2VirtReg(I);
+
+ MachineInstr *MI = nullptr;
+ if (!MRI.def_empty(VReg))
+ MI = &*MRI.def_instr_begin(VReg);
+ else if (!MRI.use_empty(VReg)) {
+ MI = &*MRI.use_instr_begin(VReg);
+ // Debug value instruction is permitted to use undefined vregs.
+ if (MI->isDebugValue())
+ continue;
+ }
+ if (!MI)
+ continue;
+
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
+ if (!RC) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has no regclass after selection", *MI);
+ return false;
+ }
+
+ const LLT Ty = MRI.getType(VReg);
+ if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) {
+ reportGISelFailure(
+ MF, TPC, MORE, "gisel-select",
+ "VReg's low-level type and register class have different sizes", *MI);
+ return false;
+ }
+ }
+
+ if (MF.size() != NumBlocks) {
+ MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure",
+ MF.getFunction().getSubprogram(),
+ /*MBB=*/nullptr);
+ R << "inserting blocks is not supported yet";
+ reportGISelFailure(MF, TPC, MORE, R);
+ return false;
+ }
+#endif
+ // Determine if there are any calls in this machine function. Ported from
+ // SelectionDAG.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (const auto &MBB : MF) {
+ if (MFI.hasCalls() && MF.hasInlineAsm())
+ break;
+
+ for (const auto &MI : MBB) {
+ if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm())
+ MFI.setHasCalls(true);
+ if (MI.isInlineAsm())
+ MF.setHasInlineAsm(true);
+ }
+ }
+
+ // FIXME: FinalizeISel pass calls finalizeLowering, so it's called twice.
+ auto &TLI = *MF.getSubtarget().getTargetLowering();
+ TLI.finalizeLowering(MF);
+
+ LLVM_DEBUG({
+ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":";
+ for (auto RuleID : CoverageInfo.covered())
+ dbgs() << " id" << RuleID;
+ dbgs() << "\n\n";
+ });
+ CoverageInfo.emit(CoveragePrefix,
+ TLI.getTargetMachine().getTarget().getBackendName());
+
+ // If we successfully selected the function nothing is going to use the vreg
+ // types after us (otherwise MIRPrinter would need them). Make sure the types
+ // disappear.
+ MRI.clearVirtRegTypes();
+
+ // FIXME: Should we accurately track changes?
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
new file mode 100644
index 000000000000..c48591cc2f02
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -0,0 +1,16 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+
+namespace llvm {
+
+// vtable anchor
+InstructionSelector::~InstructionSelector() = default;
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
new file mode 100644
index 000000000000..8cfb1b786c24
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -0,0 +1,383 @@
+//===- lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp - Legalizer ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement an interface to specify and query how an illegal operation on a
+// given type should be expanded.
+//
+// Issues to be resolved:
+// + Make it fast.
+// + Support weird types like i3, <7 x i3>, ...
+// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include <map>
+
+using namespace llvm;
+using namespace LegacyLegalizeActions;
+
+#define DEBUG_TYPE "legalizer-info"
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, LegacyLegalizeAction Action) {
+ switch (Action) {
+ case Legal:
+ OS << "Legal";
+ break;
+ case NarrowScalar:
+ OS << "NarrowScalar";
+ break;
+ case WidenScalar:
+ OS << "WidenScalar";
+ break;
+ case FewerElements:
+ OS << "FewerElements";
+ break;
+ case MoreElements:
+ OS << "MoreElements";
+ break;
+ case Bitcast:
+ OS << "Bitcast";
+ break;
+ case Lower:
+ OS << "Lower";
+ break;
+ case Libcall:
+ OS << "Libcall";
+ break;
+ case Custom:
+ OS << "Custom";
+ break;
+ case Unsupported:
+ OS << "Unsupported";
+ break;
+ case NotFound:
+ OS << "NotFound";
+ break;
+ }
+ return OS;
+}
+
+LegacyLegalizerInfo::LegacyLegalizerInfo() {
+ // Set defaults.
+ // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
+ // fundamental load/store Jakob proposed. Once loads & stores are supported.
+ setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}});
+
+ setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
+
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall);
+ setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}});
+}
+
+void LegacyLegalizerInfo::computeTables() {
+ assert(TablesInitialized == false);
+
+ for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) {
+ const unsigned Opcode = FirstOp + OpcodeIdx;
+ for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size();
+ ++TypeIdx) {
+ // 0. Collect information specified through the setAction API, i.e.
+ // for specific bit sizes.
+ // For scalar types:
+ SizeAndActionsVec ScalarSpecifiedActions;
+ // For pointer types:
+ std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions;
+ // For vector types:
+ std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions;
+ for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) {
+ const LLT Type = LLT2Action.first;
+ const LegacyLegalizeAction Action = LLT2Action.second;
+
+ auto SizeAction = std::make_pair(Type.getSizeInBits(), Action);
+ if (Type.isPointer())
+ AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back(
+ SizeAction);
+ else if (Type.isVector())
+ ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()]
+ .push_back(SizeAction);
+ else
+ ScalarSpecifiedActions.push_back(SizeAction);
+ }
+
+ // 1. Handle scalar types
+ {
+ // Decide how to handle bit sizes for which no explicit specification
+ // was given.
+ SizeChangeStrategy S = &unsupportedForDifferentSizes;
+ if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
+ ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+ S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
+ llvm::sort(ScalarSpecifiedActions);
+ checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
+ setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
+ }
+
+ // 2. Handle pointer types
+ for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
+ llvm::sort(PointerSpecifiedActions.second);
+ checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
+ // For pointer types, we assume that there isn't a meaningfull way
+ // to change the number of bits used in the pointer.
+ setPointerAction(
+ Opcode, TypeIdx, PointerSpecifiedActions.first,
+ unsupportedForDifferentSizes(PointerSpecifiedActions.second));
+ }
+
+ // 3. Handle vector types
+ SizeAndActionsVec ElementSizesSeen;
+ for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
+ llvm::sort(VectorSpecifiedActions.second);
+ const uint16_t ElementSize = VectorSpecifiedActions.first;
+ ElementSizesSeen.push_back({ElementSize, Legal});
+ checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
+ // For vector types, we assume that the best way to adapt the number
+ // of elements is to the next larger number of elements type for which
+ // the vector type is legal, unless there is no such type. In that case,
+ // legalize towards a vector type with a smaller number of elements.
+ SizeAndActionsVec NumElementsActions;
+ for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) {
+ assert(BitsizeAndAction.first % ElementSize == 0);
+ const uint16_t NumElements = BitsizeAndAction.first / ElementSize;
+ NumElementsActions.push_back({NumElements, BitsizeAndAction.second});
+ }
+ setVectorNumElementAction(
+ Opcode, TypeIdx, ElementSize,
+ moreToWiderTypesAndLessToWidest(NumElementsActions));
+ }
+ llvm::sort(ElementSizesSeen);
+ SizeChangeStrategy VectorElementSizeChangeStrategy =
+ &unsupportedForDifferentSizes;
+ if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
+ VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+ VectorElementSizeChangeStrategy =
+ VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx];
+ setScalarInVectorAction(
+ Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen));
+ }
+ }
+
+ TablesInitialized = true;
+}
+
+// FIXME: inefficient implementation for now. Without ComputeValueVTs we're
+// probably going to need specialized lookup structures for various types before
+// we have any hope of doing well with something like <13 x i3>. Even the common
+// cases should do better than what we have now.
+std::pair<LegacyLegalizeAction, LLT>
+LegacyLegalizerInfo::getAspectAction(const InstrAspect &Aspect) const {
+ assert(TablesInitialized && "backend forgot to call computeTables");
+ // These *have* to be implemented for now, they're the fundamental basis of
+ // how everything else is transformed.
+ if (Aspect.Type.isScalar() || Aspect.Type.isPointer())
+ return findScalarLegalAction(Aspect);
+ assert(Aspect.Type.isVector());
+ return findVectorLegalAction(Aspect);
+}
+
+LegacyLegalizerInfo::SizeAndActionsVec
+LegacyLegalizerInfo::increaseToLargerTypesAndDecreaseToLargest(
+ const SizeAndActionsVec &v, LegacyLegalizeAction IncreaseAction,
+ LegacyLegalizeAction DecreaseAction) {
+ SizeAndActionsVec result;
+ unsigned LargestSizeSoFar = 0;
+ if (v.size() >= 1 && v[0].first != 1)
+ result.push_back({1, IncreaseAction});
+ for (size_t i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ LargestSizeSoFar = v[i].first;
+ if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) {
+ result.push_back({LargestSizeSoFar + 1, IncreaseAction});
+ LargestSizeSoFar = v[i].first + 1;
+ }
+ }
+ result.push_back({LargestSizeSoFar + 1, DecreaseAction});
+ return result;
+}
+
+LegacyLegalizerInfo::SizeAndActionsVec
+LegacyLegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest(
+ const SizeAndActionsVec &v, LegacyLegalizeAction DecreaseAction,
+ LegacyLegalizeAction IncreaseAction) {
+ SizeAndActionsVec result;
+ if (v.size() == 0 || v[0].first != 1)
+ result.push_back({1, IncreaseAction});
+ for (size_t i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) {
+ result.push_back({v[i].first + 1, DecreaseAction});
+ }
+ }
+ return result;
+}
+
+LegacyLegalizerInfo::SizeAndAction
+LegacyLegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
+ assert(Size >= 1);
+ // Find the last element in Vec that has a bitsize equal to or smaller than
+ // the requested bit size.
+ // That is the element just before the first element that is bigger than Size.
+ auto It = partition_point(
+ Vec, [=](const SizeAndAction &A) { return A.first <= Size; });
+ assert(It != Vec.begin() && "Does Vec not start with size 1?");
+ int VecIdx = It - Vec.begin() - 1;
+
+ LegacyLegalizeAction Action = Vec[VecIdx].second;
+ switch (Action) {
+ case Legal:
+ case Bitcast:
+ case Lower:
+ case Libcall:
+ case Custom:
+ return {Size, Action};
+ case FewerElements:
+ // FIXME: is this special case still needed and correct?
+ // Special case for scalarization:
+ if (Vec == SizeAndActionsVec({{1, FewerElements}}))
+ return {1, FewerElements};
+ [[fallthrough]];
+ case NarrowScalar: {
+ // The following needs to be a loop, as for now, we do allow needing to
+ // go over "Unsupported" bit sizes before finding a legalizable bit size.
+ // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8,
+ // we need to iterate over s9, and then to s32 to return (s32, Legal).
+ // If we want to get rid of the below loop, we should have stronger asserts
+ // when building the SizeAndActionsVecs, probably not allowing
+ // "Unsupported" unless at the ends of the vector.
+ for (int i = VecIdx - 1; i >= 0; --i)
+ if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+ Vec[i].second != Unsupported)
+ return {Vec[i].first, Action};
+ llvm_unreachable("");
+ }
+ case WidenScalar:
+ case MoreElements: {
+ // See above, the following needs to be a loop, at least for now.
+ for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i)
+ if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+ Vec[i].second != Unsupported)
+ return {Vec[i].first, Action};
+ llvm_unreachable("");
+ }
+ case Unsupported:
+ return {Size, Unsupported};
+ case NotFound:
+ llvm_unreachable("NotFound");
+ }
+ llvm_unreachable("Action has an unknown enum value");
+}
+
+std::pair<LegacyLegalizeAction, LLT>
+LegacyLegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
+ assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());
+ if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+ return {NotFound, LLT()};
+ const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
+ if (Aspect.Type.isPointer() &&
+ AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==
+ AddrSpace2PointerActions[OpcodeIdx].end()) {
+ return {NotFound, LLT()};
+ }
+ const SmallVector<SizeAndActionsVec, 1> &Actions =
+ Aspect.Type.isPointer()
+ ? AddrSpace2PointerActions[OpcodeIdx]
+ .find(Aspect.Type.getAddressSpace())
+ ->second
+ : ScalarActions[OpcodeIdx];
+ if (Aspect.Idx >= Actions.size())
+ return {NotFound, LLT()};
+ const SizeAndActionsVec &Vec = Actions[Aspect.Idx];
+ // FIXME: speed up this search, e.g. by using a results cache for repeated
+ // queries?
+ auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits());
+ return {SizeAndAction.second,
+ Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first)
+ : LLT::pointer(Aspect.Type.getAddressSpace(),
+ SizeAndAction.first)};
+}
+
+std::pair<LegacyLegalizeAction, LLT>
+LegacyLegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
+ assert(Aspect.Type.isVector());
+ // First legalize the vector element size, then legalize the number of
+ // lanes in the vector.
+ if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+ return {NotFound, Aspect.Type};
+ const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
+ const unsigned TypeIdx = Aspect.Idx;
+ if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())
+ return {NotFound, Aspect.Type};
+ const SizeAndActionsVec &ElemSizeVec =
+ ScalarInVectorActions[OpcodeIdx][TypeIdx];
+
+ LLT IntermediateType;
+ auto ElementSizeAndAction =
+ findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits());
+ IntermediateType = LLT::fixed_vector(Aspect.Type.getNumElements(),
+ ElementSizeAndAction.first);
+ if (ElementSizeAndAction.second != Legal)
+ return {ElementSizeAndAction.second, IntermediateType};
+
+ auto i = NumElements2Actions[OpcodeIdx].find(
+ IntermediateType.getScalarSizeInBits());
+ if (i == NumElements2Actions[OpcodeIdx].end()) {
+ return {NotFound, IntermediateType};
+ }
+ const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx];
+ auto NumElementsAndAction =
+ findAction(NumElementsVec, IntermediateType.getNumElements());
+ return {NumElementsAndAction.second,
+ LLT::fixed_vector(NumElementsAndAction.first,
+ IntermediateType.getScalarSizeInBits())};
+}
+
+unsigned LegacyLegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const {
+ assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode");
+ return Opcode - FirstOp;
+}
+
+
+LegacyLegalizeActionStep
+LegacyLegalizerInfo::getAction(const LegalityQuery &Query) const {
+ for (unsigned i = 0; i < Query.Types.size(); ++i) {
+ auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
+ if (Action.first != Legal) {
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action="
+ << Action.first << ", " << Action.second << "\n");
+ return {Action.first, i, Action.second};
+ } else
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
+ }
+ LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n");
+ return {Legal, 0, LLT{}};
+}
+
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
new file mode 100644
index 000000000000..2c77ed8b0600
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -0,0 +1,213 @@
+//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A library of predicate factories to use for LegalityPredicate.
+//
+//===----------------------------------------------------------------------===//
+
+// Enable optimizations to work around MSVC debug mode bug in 32-bit:
+// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html
+// FIXME: Remove this when the issue is closed.
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)
+// We have to disable runtime checks in order to enable optimizations. This is
+// done for the entire file because the problem is actually observed in STL
+// template functions.
+#pragma runtime_checks("", off)
+#pragma optimize("gs", on)
+#endif
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+using namespace llvm;
+
+LegalityPredicate LegalityPredicates::typeIs(unsigned TypeIdx, LLT Type) {
+ return
+ [=](const LegalityQuery &Query) { return Query.Types[TypeIdx] == Type; };
+}
+
+LegalityPredicate
+LegalityPredicates::typeInSet(unsigned TypeIdx,
+ std::initializer_list<LLT> TypesInit) {
+ SmallVector<LLT, 4> Types = TypesInit;
+ return [=](const LegalityQuery &Query) {
+ return llvm::is_contained(Types, Query.Types[TypeIdx]);
+ };
+}
+
+LegalityPredicate LegalityPredicates::typePairInSet(
+ unsigned TypeIdx0, unsigned TypeIdx1,
+ std::initializer_list<std::pair<LLT, LLT>> TypesInit) {
+ SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit;
+ return [=](const LegalityQuery &Query) {
+ std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]};
+ return llvm::is_contained(Types, Match);
+ };
+}
+
+LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
+ unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
+ std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
+ SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit;
+ return [=](const LegalityQuery &Query) {
+ TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
+ Query.MMODescrs[MMOIdx].MemoryTy,
+ Query.MMODescrs[MMOIdx].AlignInBits};
+ return llvm::any_of(TypesAndMemDesc,
+ [=](const TypePairAndMemDesc &Entry) -> bool {
+ return Match.isCompatible(Entry);
+ });
+ };
+}
+
+LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isScalar();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isVector(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isVector();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isPointer();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
+ unsigned AddrSpace) {
+ return [=](const LegalityQuery &Query) {
+ LLT Ty = Query.Types[TypeIdx];
+ return Ty.isPointer() && Ty.getAddressSpace() == AddrSpace;
+ };
+}
+
+LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx,
+ LLT EltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && QueryTy.getElementType() == EltTy;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarNarrowerThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarWiderThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::smallerThan(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() <
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
+LegalityPredicate LegalityPredicates::largerThan(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() >
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.getScalarSizeInBits() < Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltWiderThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.getScalarSizeInBits() > Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return !isPowerOf2_32(QueryTy.getScalarSizeInBits());
+ };
+}
+
+LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() % Size != 0;
+ };
+}
+
+LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() &&
+ !llvm::has_single_bit<uint32_t>(QueryTy.getSizeInBits());
+ };
+}
+
+LegalityPredicate LegalityPredicates::sizeIs(unsigned TypeIdx, unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].getSizeInBits() == Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() ==
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
+LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
+ return [=](const LegalityQuery &Query) {
+ return !llvm::has_single_bit<uint32_t>(
+ Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
+ };
+}
+
+LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
+ return !MemTy.isByteSized() ||
+ !llvm::has_single_bit<uint32_t>(MemTy.getSizeInBytes());
+ };
+}
+
+LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && !isPowerOf2_32(QueryTy.getNumElements());
+ };
+}
+
+LegalityPredicate LegalityPredicates::atomicOrderingAtLeastOrStrongerThan(
+ unsigned MMOIdx, AtomicOrdering Ordering) {
+ return [=](const LegalityQuery &Query) {
+ return isAtLeastOrStrongerThan(Query.MMODescrs[MMOIdx].Ordering, Ordering);
+ };
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
new file mode 100644
index 000000000000..25c1db91b05d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -0,0 +1,112 @@
+//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A library of mutation factories to use for LegalityMutation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+using namespace llvm;
+
+LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, LLT Ty) {
+ return
+ [=](const LegalityQuery &Query) { return std::make_pair(TypeIdx, Ty); };
+}
+
+LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return std::make_pair(TypeIdx, Query.Types[FromTypeIdx]);
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+ LLT NewEltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ ElementCount NewEltCount =
+ NewTy.isVector() ? NewTy.getElementCount() : ElementCount::getFixed(1);
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
+ LLT NewEltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
+ : ElementCount::getFixed(1);
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
+ unsigned Min) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits =
+ std::max(1u << Log2_32_Ceil(Ty.getScalarSizeInBits()), Min);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
+ };
+}
+
+LegalizeMutation
+LegalizeMutations::widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits = alignTo(Ty.getScalarSizeInBits(), Size);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
+ };
+}
+
+LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
+ unsigned Min) {
+ return [=](const LegalityQuery &Query) {
+ const LLT VecTy = Query.Types[TypeIdx];
+ unsigned NewNumElements =
+ std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min);
+ return std::make_pair(
+ TypeIdx, LLT::fixed_vector(NewNumElements, VecTy.getElementType()));
+ };
+}
+
+LegalizeMutation LegalizeMutations::scalarize(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return std::make_pair(TypeIdx, Query.Types[TypeIdx].getElementType());
+ };
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
new file mode 100644
index 000000000000..aecbe0b7604c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -0,0 +1,385 @@
+//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize individual
+/// instructions and the LegalizePass wrapper pass for the primary
+/// legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+
+#define DEBUG_TYPE "legalizer"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ EnableCSEInLegalizer("enable-cse-in-legalizer",
+ cl::desc("Should enable CSE in Legalizer"),
+ cl::Optional, cl::init(false));
+
+// This is a temporary hack, should be removed soon.
+static cl::opt<bool> AllowGInsertAsArtifact(
+ "allow-ginsert-as-artifact",
+ cl::desc("Allow G_INSERT to be considered an artifact. Hack around AMDGPU "
+ "test infinite loops."),
+ cl::Optional, cl::init(true));
+
+enum class DebugLocVerifyLevel {
+ None,
+ Legalizations,
+ LegalizationsAndArtifactCombiners,
+};
+#ifndef NDEBUG
+static cl::opt<DebugLocVerifyLevel> VerifyDebugLocs(
+ "verify-legalizer-debug-locs",
+ cl::desc("Verify that debug locations are handled"),
+ cl::values(
+ clEnumValN(DebugLocVerifyLevel::None, "none", "No verification"),
+ clEnumValN(DebugLocVerifyLevel::Legalizations, "legalizations",
+ "Verify legalizations"),
+ clEnumValN(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners,
+ "legalizations+artifactcombiners",
+ "Verify legalizations and artifact combines")),
+ cl::init(DebugLocVerifyLevel::Legalizations));
+#else
+// Always disable it for release builds by preventing the observer from being
+// installed.
+static const DebugLocVerifyLevel VerifyDebugLocs = DebugLocVerifyLevel::None;
+#endif
+
+char Legalizer::ID = 0;
+INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
+ "Legalize the Machine IR a function's Machine IR", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
+ "Legalize the Machine IR a function's Machine IR", false,
+ false)
+
+Legalizer::Legalizer() : MachineFunctionPass(ID) { }
+
+void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void Legalizer::init(MachineFunction &MF) {
+}
+
+static bool isArtifact(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ return true;
+ case TargetOpcode::G_INSERT:
+ return AllowGInsertAsArtifact;
+ }
+}
+using InstListTy = GISelWorkList<256>;
+using ArtifactListTy = GISelWorkList<128>;
+
+namespace {
+class LegalizerWorkListManager : public GISelChangeObserver {
+ InstListTy &InstList;
+ ArtifactListTy &ArtifactList;
+#ifndef NDEBUG
+ SmallVector<MachineInstr *, 4> NewMIs;
+#endif
+
+public:
+ LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
+ : InstList(Insts), ArtifactList(Arts) {}
+
+ void createdOrChangedInstr(MachineInstr &MI) {
+ // Only legalize pre-isel generic instructions.
+ // Legalization process could generate Target specific pseudo
+ // instructions with generic types. Don't record them
+ if (isPreISelGenericOpcode(MI.getOpcode())) {
+ if (isArtifact(MI))
+ ArtifactList.insert(&MI);
+ else
+ InstList.insert(&MI);
+ }
+ }
+
+ void createdInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(NewMIs.push_back(&MI));
+ createdOrChangedInstr(MI);
+ }
+
+ void printNewInstrs() {
+ LLVM_DEBUG({
+ for (const auto *MI : NewMIs)
+ dbgs() << ".. .. New MI: " << *MI;
+ NewMIs.clear();
+ });
+ }
+
+ void erasingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << ".. .. Erasing: " << MI);
+ InstList.remove(&MI);
+ ArtifactList.remove(&MI);
+ }
+
+ void changingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << ".. .. Changing MI: " << MI);
+ }
+
+ void changedInstr(MachineInstr &MI) override {
+ // When insts change, we want to revisit them to legalize them again.
+ // We'll consider them the same as created.
+ LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
+ createdOrChangedInstr(MI);
+ }
+};
+} // namespace
+
+Legalizer::MFResult
+Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
+ ArrayRef<GISelChangeObserver *> AuxObservers,
+ LostDebugLocObserver &LocObserver,
+ MachineIRBuilder &MIRBuilder,
+ GISelKnownBits *KB) {
+ MIRBuilder.setMF(MF);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Populate worklists.
+ InstListTy InstList;
+ ArtifactListTy ArtifactList;
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ // Perform legalization bottom up so we can DCE as we legalize.
+ // Traverse BB in RPOT and within each basic block, add insts top down,
+ // so when we pop_back_val in the legalization process, we traverse bottom-up.
+ for (auto *MBB : RPOT) {
+ if (MBB->empty())
+ continue;
+ for (MachineInstr &MI : *MBB) {
+ // Only legalize pre-isel generic instructions: others don't have types
+ // and are assumed to be legal.
+ if (!isPreISelGenericOpcode(MI.getOpcode()))
+ continue;
+ if (isArtifact(MI))
+ ArtifactList.deferred_insert(&MI);
+ else
+ InstList.deferred_insert(&MI);
+ }
+ }
+ ArtifactList.finalize();
+ InstList.finalize();
+
+ // This observer keeps the worklists updated.
+ LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
+ // We want both WorkListObserver as well as all the auxiliary observers (e.g.
+ // CSEInfo) to observe all changes. Use the wrapper observer.
+ GISelObserverWrapper WrapperObserver(&WorkListObserver);
+ for (GISelChangeObserver *Observer : AuxObservers)
+ WrapperObserver.addObserver(Observer);
+
+ // Now install the observer as the delegate to MF.
+ // This will keep all the observers notified about new insertions/deletions.
+ RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
+ LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB);
+ LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
+ bool Changed = false;
+ SmallVector<MachineInstr *, 128> RetryList;
+ do {
+ LLVM_DEBUG(dbgs() << "=== New Iteration ===\n");
+ assert(RetryList.empty() && "Expected no instructions in RetryList");
+ unsigned NumArtifacts = ArtifactList.size();
+ while (!InstList.empty()) {
+ MachineInstr &MI = *InstList.pop_back_val();
+ assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+ "Expecting generic opcode");
+ if (isTriviallyDead(MI, MRI)) {
+ salvageDebugInfo(MRI, MI);
+ eraseInstr(MI, MRI, &LocObserver);
+ continue;
+ }
+
+ // Do the legalization for this instruction.
+ auto Res = Helper.legalizeInstrStep(MI, LocObserver);
+ // Error out if we couldn't legalize this instruction. We may want to
+ // fall back to DAG ISel instead in the future.
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ // Move illegal artifacts to RetryList instead of aborting because
+ // legalizing InstList may generate artifacts that allow
+ // ArtifactCombiner to combine away them.
+ if (isArtifact(MI)) {
+ LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n");
+ assert(NumArtifacts == 0 &&
+ "Artifacts are only expected in instruction list starting the "
+ "second iteration, but each iteration starting second must "
+ "start with an empty artifacts list");
+ (void)NumArtifacts;
+ RetryList.push_back(&MI);
+ continue;
+ }
+ Helper.MIRBuilder.stopObservingChanges();
+ return {Changed, &MI};
+ }
+ WorkListObserver.printNewInstrs();
+ LocObserver.checkpoint();
+ Changed |= Res == LegalizerHelper::Legalized;
+ }
+ // Try to combine the instructions in RetryList again if there
+ // are new artifacts. If not, stop legalizing.
+ if (!RetryList.empty()) {
+ if (!ArtifactList.empty()) {
+ while (!RetryList.empty())
+ ArtifactList.insert(RetryList.pop_back_val());
+ } else {
+ LLVM_DEBUG(dbgs() << "No new artifacts created, not retrying!\n");
+ Helper.MIRBuilder.stopObservingChanges();
+ return {Changed, RetryList.front()};
+ }
+ }
+ LocObserver.checkpoint();
+ while (!ArtifactList.empty()) {
+ MachineInstr &MI = *ArtifactList.pop_back_val();
+ assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+ "Expecting generic opcode");
+ if (isTriviallyDead(MI, MRI)) {
+ salvageDebugInfo(MRI, MI);
+ eraseInstr(MI, MRI, &LocObserver);
+ continue;
+ }
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+ LLVM_DEBUG(dbgs() << "Trying to combine: " << MI);
+ if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
+ WrapperObserver)) {
+ WorkListObserver.printNewInstrs();
+ eraseInstrs(DeadInstructions, MRI, &LocObserver);
+ LocObserver.checkpoint(
+ VerifyDebugLocs ==
+ DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
+ Changed = true;
+ continue;
+ }
+ // If this was not an artifact (that could be combined away), this might
+ // need special handling. Add it to InstList, so when it's processed
+ // there, it has to be legal or specially handled.
+ else {
+ LLVM_DEBUG(dbgs() << ".. Not combined, moving to instructions list\n");
+ InstList.insert(&MI);
+ }
+ }
+ } while (!InstList.empty());
+
+ return {Changed, /*FailedOn*/ nullptr};
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+ init(MF);
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+ std::unique_ptr<MachineIRBuilder> MIRBuilder;
+ GISelCSEInfo *CSEInfo = nullptr;
+ bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+ ? EnableCSEInLegalizer
+ : TPC.isGISelCSEEnabled();
+ if (EnableCSE) {
+ MIRBuilder = std::make_unique<CSEMIRBuilder>();
+ CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+ MIRBuilder->setCSEInfo(CSEInfo);
+ } else
+ MIRBuilder = std::make_unique<MachineIRBuilder>();
+
+ SmallVector<GISelChangeObserver *, 1> AuxObservers;
+ if (EnableCSE && CSEInfo) {
+ // We want CSEInfo in addition to WorkListObserver to observe all changes.
+ AuxObservers.push_back(CSEInfo);
+ }
+ assert(!CSEInfo || !errorToBool(CSEInfo->verify()));
+ LostDebugLocObserver LocObserver(DEBUG_TYPE);
+ if (VerifyDebugLocs > DebugLocVerifyLevel::None)
+ AuxObservers.push_back(&LocObserver);
+
+ // This allows Known Bits Analysis in the legalizer.
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
+ const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
+ MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, LocObserver,
+ *MIRBuilder, KB);
+
+ if (Result.FailedOn) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction", *Result.FailedOn);
+ return false;
+ }
+
+ if (LocObserver.getNumLostDebugLocs()) {
+ MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
+ MF.getFunction().getSubprogram(),
+ /*MBB=*/&*MF.begin());
+ R << "lost "
+ << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs())
+ << " debug locations during pass";
+ reportGISelWarning(MF, TPC, MORE, R);
+ // Example remark:
+ // --- !Missed
+ // Pass: gisel-legalize
+ // Name: GISelFailure
+ // DebugLoc: { File: '.../legalize-urem.mir', Line: 1, Column: 0 }
+ // Function: test_urem_s32
+ // Args:
+ // - String: 'lost '
+ // - NumLostDebugLocs: '1'
+ // - String: ' debug locations during pass'
+ // ...
+ }
+
+ // If for some reason CSE was not enabled, make sure that we invalidate the
+ // CSEInfo object (as we currently declare that the analysis is preserved).
+ // The next time get on the wrapper is called, it will force it to recompute
+ // the analysis.
+ if (!EnableCSE)
+ Wrapper.setComputed(false);
+ return Result.Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
new file mode 100644
index 000000000000..f0da0d88140f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -0,0 +1,8119 @@
+//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize
+/// individual instructions and the LegalizeMachineIR wrapper pass for the
+/// primary legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <numeric>
+#include <optional>
+
+#define DEBUG_TYPE "legalizer"
+
+using namespace llvm;
+using namespace LegalizeActions;
+using namespace MIPatternMatch;
+
+/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
+///
+/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
+/// with any leftover piece as type \p LeftoverTy
+///
+/// Returns -1 in the first element of the pair if the breakdown is not
+/// satisfiable.
+static std::pair<int, int>
+getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned Size = OrigTy.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned NumParts = Size / NarrowSize;
+ unsigned LeftoverSize = Size - NumParts * NarrowSize;
+ assert(Size > NarrowSize);
+
+ if (LeftoverSize == 0)
+ return {NumParts, 0};
+
+ if (NarrowTy.isVector()) {
+ unsigned EltSize = OrigTy.getScalarSizeInBits();
+ if (LeftoverSize % EltSize != 0)
+ return {-1, -1};
+ LeftoverTy = LLT::scalarOrVector(
+ ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
+ } else {
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ }
+
+ int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
+ return std::make_pair(NumParts, NumLeftover);
+}
+
+static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
+
+ if (!Ty.isScalar())
+ return nullptr;
+
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return Type::getHalfTy(Ctx);
+ case 32:
+ return Type::getFloatTy(Ctx);
+ case 64:
+ return Type::getDoubleTy(Ctx);
+ case 80:
+ return Type::getX86_FP80Ty(Ctx);
+ case 128:
+ return Type::getFP128Ty(Ctx);
+ default:
+ return nullptr;
+ }
+}
+
+LegalizerHelper::LegalizerHelper(MachineFunction &MF,
+ GISelChangeObserver &Observer,
+ MachineIRBuilder &Builder)
+ : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
+ LI(*MF.getSubtarget().getLegalizerInfo()),
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
+
+LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
+ GISelChangeObserver &Observer,
+ MachineIRBuilder &B, GISelKnownBits *KB)
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) {
+ LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
+
+ MIRBuilder.setInstrAndDebugLoc(MI);
+
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
+ MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+ return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
+ auto Step = LI.getAction(MI, MRI);
+ switch (Step.Action) {
+ case Legal:
+ LLVM_DEBUG(dbgs() << ".. Already legal\n");
+ return AlreadyLegal;
+ case Libcall:
+ LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
+ return libcall(MI, LocObserver);
+ case NarrowScalar:
+ LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
+ return narrowScalar(MI, Step.TypeIdx, Step.NewType);
+ case WidenScalar:
+ LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
+ return widenScalar(MI, Step.TypeIdx, Step.NewType);
+ case Bitcast:
+ LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
+ return bitcast(MI, Step.TypeIdx, Step.NewType);
+ case Lower:
+ LLVM_DEBUG(dbgs() << ".. Lower\n");
+ return lower(MI, Step.TypeIdx, Step.NewType);
+ case FewerElements:
+ LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
+ return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
+ case MoreElements:
+ LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
+ return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
+ case Custom:
+ LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
+ return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
+ default:
+ LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
+ return UnableToLegalize;
+ }
+}
+
+void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs) {
+ for (int i = 0; i < NumParts; ++i)
+ VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
+ LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverRegs) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned RegSize = RegTy.getSizeInBits();
+ unsigned MainSize = MainTy.getSizeInBits();
+ unsigned NumParts = RegSize / MainSize;
+ unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+ // Use an unmerge when possible.
+ if (LeftoverSize == 0) {
+ for (unsigned I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+ return true;
+ }
+
+ // Perform irregular split. Leftover is last element of RegPieces.
+ if (MainTy.isVector()) {
+ SmallVector<Register, 8> RegPieces;
+ extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
+ for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
+ VRegs.push_back(RegPieces[i]);
+ LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
+ LeftoverTy = MRI.getType(LeftoverRegs[0]);
+ return true;
+ }
+
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ // For irregular sizes, extract the individual parts.
+ for (unsigned I = 0; I != NumParts; ++I) {
+ Register NewReg = MRI.createGenericVirtualRegister(MainTy);
+ VRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
+ }
+
+ for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
+ Offset += LeftoverSize) {
+ Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
+ LeftoverRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, Offset);
+ }
+
+ return true;
+}
+
+void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
+ SmallVectorImpl<Register> &VRegs) {
+ LLT RegTy = MRI.getType(Reg);
+ assert(RegTy.isVector() && "Expected a vector type");
+
+ LLT EltTy = RegTy.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % NumElts;
+ unsigned NumNarrowTyPieces = RegNumElts / NumElts;
+
+ // Perfect split without leftover
+ if (LeftoverNumElts == 0)
+ return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
+
+ // Irregular split. Provide direct access to all elements for artifact
+ // combiner using unmerge to elements. Then build vectors with NumElts
+ // elements. Remaining element(s) will be (used to build vector) Leftover.
+ SmallVector<Register, 8> Elts;
+ extractParts(Reg, EltTy, RegNumElts, Elts);
+
+ unsigned Offset = 0;
+ // Requested sub-vectors of NarrowTy.
+ for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
+ VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
+ }
+
+ // Leftover element(s).
+ if (LeftoverNumElts == 1) {
+ VRegs.push_back(Elts[Offset]);
+ } else {
+ LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
+ ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
+ VRegs.push_back(
+ MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
+ }
+}
+
+void LegalizerHelper::insertParts(Register DstReg,
+ LLT ResultTy, LLT PartTy,
+ ArrayRef<Register> PartRegs,
+ LLT LeftoverTy,
+ ArrayRef<Register> LeftoverRegs) {
+ if (!LeftoverTy.isValid()) {
+ assert(LeftoverRegs.empty());
+
+ if (!ResultTy.isVector()) {
+ MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
+ return;
+ }
+
+ if (PartTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, PartRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, PartRegs);
+ return;
+ }
+
+ // Merge sub-vectors with different number of elements and insert into DstReg.
+ if (ResultTy.isVector()) {
+ assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
+ SmallVector<Register, 8> AllRegs;
+ for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
+ AllRegs.push_back(Reg);
+ return mergeMixedSubvectors(DstReg, AllRegs);
+ }
+
+ SmallVector<Register> GCDRegs;
+ LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
+ for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
+ extractGCDType(GCDRegs, GCDTy, PartReg);
+ LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
+ buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
+}
+
+void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
+ Register Reg) {
+ LLT Ty = MRI.getType(Reg);
+ SmallVector<Register, 8> RegElts;
+ extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
+ Elts.append(RegElts);
+}
+
+/// Merge \p PartRegs with different types into \p DstReg.
+void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
+ ArrayRef<Register> PartRegs) {
+ SmallVector<Register, 8> AllElts;
+ for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
+ appendVectorElts(AllElts, PartRegs[i]);
+
+ Register Leftover = PartRegs[PartRegs.size() - 1];
+ if (MRI.getType(Leftover).isScalar())
+ AllElts.push_back(Leftover);
+ else
+ appendVectorElts(AllElts, Leftover);
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
+}
+
+/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
+static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
+ const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+
+ const int StartIdx = Regs.size();
+ const int NumResults = MI.getNumOperands() - 1;
+ Regs.resize(Regs.size() + NumResults);
+ for (int I = 0; I != NumResults; ++I)
+ Regs[StartIdx + I] = MI.getOperand(I).getReg();
+}
+
+void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
+ LLT GCDTy, Register SrcReg) {
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy == GCDTy) {
+ // If the source already evenly divides the result type, we don't need to do
+ // anything.
+ Parts.push_back(SrcReg);
+ } else {
+ // Need to split into common type sized pieces.
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ getUnmergeResults(Parts, *Unmerge);
+ }
+}
+
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+ LLT NarrowTy, Register SrcReg) {
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+ extractGCDType(Parts, GCDTy, SrcReg);
+ return GCDTy;
+}
+
+LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
+ SmallVectorImpl<Register> &VRegs,
+ unsigned PadStrategy) {
+ LLT LCMTy = getLCMType(DstTy, NarrowTy);
+
+ int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
+ int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
+ int NumOrigSrc = VRegs.size();
+
+ Register PadReg;
+
+ // Get a value we can use to pad the source value if the sources won't evenly
+ // cover the result type.
+ if (NumOrigSrc < NumParts * NumSubParts) {
+ if (PadStrategy == TargetOpcode::G_ZEXT)
+ PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
+ else if (PadStrategy == TargetOpcode::G_ANYEXT)
+ PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+ else {
+ assert(PadStrategy == TargetOpcode::G_SEXT);
+
+ // Shift the sign bit of the low register through the high register.
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
+ PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
+ }
+ }
+
+ // Registers for the final merge to be produced.
+ SmallVector<Register, 4> Remerge(NumParts);
+
+ // Registers needed for intermediate merges, which will be merged into a
+ // source for Remerge.
+ SmallVector<Register, 4> SubMerge(NumSubParts);
+
+ // Once we've fully read off the end of the original source bits, we can reuse
+ // the same high bits for remaining padding elements.
+ Register AllPadReg;
+
+ // Build merges to the LCM type to cover the original result type.
+ for (int I = 0; I != NumParts; ++I) {
+ bool AllMergePartsArePadding = true;
+
+ // Build the requested merges to the requested type.
+ for (int J = 0; J != NumSubParts; ++J) {
+ int Idx = I * NumSubParts + J;
+ if (Idx >= NumOrigSrc) {
+ SubMerge[J] = PadReg;
+ continue;
+ }
+
+ SubMerge[J] = VRegs[Idx];
+
+ // There are meaningful bits here we can't reuse later.
+ AllMergePartsArePadding = false;
+ }
+
+ // If we've filled up a complete piece with padding bits, we can directly
+ // emit the natural sized constant if applicable, rather than a merge of
+ // smaller constants.
+ if (AllMergePartsArePadding && !AllPadReg) {
+ if (PadStrategy == TargetOpcode::G_ANYEXT)
+ AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
+ else if (PadStrategy == TargetOpcode::G_ZEXT)
+ AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
+
+ // If this is a sign extension, we can't materialize a trivial constant
+ // with the right type and have to produce a merge.
+ }
+
+ if (AllPadReg) {
+ // Avoid creating additional instructions if we're just adding additional
+ // copies of padding bits.
+ Remerge[I] = AllPadReg;
+ continue;
+ }
+
+ if (NumSubParts == 1)
+ Remerge[I] = SubMerge[0];
+ else
+ Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
+
+ // In the sign extend padding case, re-use the first all-signbit merge.
+ if (AllMergePartsArePadding && !AllPadReg)
+ AllPadReg = Remerge[I];
+ }
+
+ VRegs = std::move(Remerge);
+ return LCMTy;
+}
+
+void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
+ ArrayRef<Register> RemergeRegs) {
+ LLT DstTy = MRI.getType(DstReg);
+
+ // Create the merge to the widened source, and extract the relevant bits into
+ // the result.
+
+ if (DstTy == LCMTy) {
+ MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
+ return;
+ }
+
+ auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
+ if (DstTy.isScalar() && LCMTy.isScalar()) {
+ MIRBuilder.buildTrunc(DstReg, Remerge);
+ return;
+ }
+
+ if (LCMTy.isVector()) {
+ unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
+ SmallVector<Register, 8> UnmergeDefs(NumDefs);
+ UnmergeDefs[0] = DstReg;
+ for (unsigned I = 1; I != NumDefs; ++I)
+ UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
+
+ MIRBuilder.buildUnmerge(UnmergeDefs,
+ MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
+ return;
+ }
+
+ llvm_unreachable("unhandled case");
+}
+
+static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
+#define RTLIBCASE_INT(LibcallPrefix) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return RTLIB::LibcallPrefix##32; \
+ case 64: \
+ return RTLIB::LibcallPrefix##64; \
+ case 128: \
+ return RTLIB::LibcallPrefix##128; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
+
+#define RTLIBCASE(LibcallPrefix) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return RTLIB::LibcallPrefix##32; \
+ case 64: \
+ return RTLIB::LibcallPrefix##64; \
+ case 80: \
+ return RTLIB::LibcallPrefix##80; \
+ case 128: \
+ return RTLIB::LibcallPrefix##128; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
+
+ switch (Opcode) {
+ case TargetOpcode::G_MUL:
+ RTLIBCASE_INT(MUL_I);
+ case TargetOpcode::G_SDIV:
+ RTLIBCASE_INT(SDIV_I);
+ case TargetOpcode::G_UDIV:
+ RTLIBCASE_INT(UDIV_I);
+ case TargetOpcode::G_SREM:
+ RTLIBCASE_INT(SREM_I);
+ case TargetOpcode::G_UREM:
+ RTLIBCASE_INT(UREM_I);
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ RTLIBCASE_INT(CTLZ_I);
+ case TargetOpcode::G_FADD:
+ RTLIBCASE(ADD_F);
+ case TargetOpcode::G_FSUB:
+ RTLIBCASE(SUB_F);
+ case TargetOpcode::G_FMUL:
+ RTLIBCASE(MUL_F);
+ case TargetOpcode::G_FDIV:
+ RTLIBCASE(DIV_F);
+ case TargetOpcode::G_FEXP:
+ RTLIBCASE(EXP_F);
+ case TargetOpcode::G_FEXP2:
+ RTLIBCASE(EXP2_F);
+ case TargetOpcode::G_FREM:
+ RTLIBCASE(REM_F);
+ case TargetOpcode::G_FPOW:
+ RTLIBCASE(POW_F);
+ case TargetOpcode::G_FMA:
+ RTLIBCASE(FMA_F);
+ case TargetOpcode::G_FSIN:
+ RTLIBCASE(SIN_F);
+ case TargetOpcode::G_FCOS:
+ RTLIBCASE(COS_F);
+ case TargetOpcode::G_FLOG10:
+ RTLIBCASE(LOG10_F);
+ case TargetOpcode::G_FLOG:
+ RTLIBCASE(LOG_F);
+ case TargetOpcode::G_FLOG2:
+ RTLIBCASE(LOG2_F);
+ case TargetOpcode::G_FLDEXP:
+ RTLIBCASE(LDEXP_F);
+ case TargetOpcode::G_FCEIL:
+ RTLIBCASE(CEIL_F);
+ case TargetOpcode::G_FFLOOR:
+ RTLIBCASE(FLOOR_F);
+ case TargetOpcode::G_FMINNUM:
+ RTLIBCASE(FMIN_F);
+ case TargetOpcode::G_FMAXNUM:
+ RTLIBCASE(FMAX_F);
+ case TargetOpcode::G_FSQRT:
+ RTLIBCASE(SQRT_F);
+ case TargetOpcode::G_FRINT:
+ RTLIBCASE(RINT_F);
+ case TargetOpcode::G_FNEARBYINT:
+ RTLIBCASE(NEARBYINT_F);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ RTLIBCASE(ROUNDEVEN_F);
+ }
+ llvm_unreachable("Unknown libcall function");
+}
+
+/// True if an instruction is in tail position in its caller. Intended for
+/// legalizing libcalls as tail calls when possible.
+static bool isLibCallInTailPosition(MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ const Function &F = MBB.getParent()->getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore NoAlias and NonNull because they don't affect the
+ // call sequence.
+ AttributeList CallerAttrs = F.getAttributes();
+ if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
+ .removeAttribute(Attribute::NoAlias)
+ .removeAttribute(Attribute::NonNull)
+ .hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
+ CallerAttrs.hasRetAttr(Attribute::SExt))
+ return false;
+
+ // Only tail call if the following instruction is a standard return or if we
+ // have a `thisreturn` callee, and a sequence like:
+ //
+ // G_MEMCPY %0, %1, %2
+ // $x0 = COPY %0
+ // RET_ReallyLR implicit $x0
+ auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
+ if (Next != MBB.instr_end() && Next->isCopy()) {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unsupported opcode");
+ case TargetOpcode::G_BZERO:
+ return false;
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET:
+ break;
+ }
+
+ Register VReg = MI.getOperand(0).getReg();
+ if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
+ return false;
+
+ Register PReg = Next->getOperand(0).getReg();
+ if (!PReg.isPhysical())
+ return false;
+
+ auto Ret = next_nodbg(Next, MBB.instr_end());
+ if (Ret == MBB.instr_end() || !Ret->isReturn())
+ return false;
+
+ if (Ret->getNumImplicitOperands() != 1)
+ return false;
+
+ if (PReg != Ret->getOperand(0).getReg())
+ return false;
+
+ // Skip over the COPY that we just validated.
+ Next = Ret;
+ }
+
+ if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
+ return false;
+
+ return true;
+}
+
+LegalizerHelper::LegalizeResult
+llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
+ const CallLowering::ArgInfo &Result,
+ ArrayRef<CallLowering::ArgInfo> Args,
+ const CallingConv::ID CC) {
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = CC;
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = Result;
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
+ return LegalizerHelper::UnableToLegalize;
+
+ return LegalizerHelper::Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
+ const CallLowering::ArgInfo &Result,
+ ArrayRef<CallLowering::ArgInfo> Args) {
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ const char *Name = TLI.getLibcallName(Libcall);
+ const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
+ return createLibcall(MIRBuilder, Name, Result, Args, CC);
+}
+
+// Useful for libcalls where all operands have the same type.
+static LegalizerHelper::LegalizeResult
+simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
+ Type *OpType) {
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+
+ // FIXME: What does the original arg index mean here?
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ Args.push_back({MO.getReg(), OpType, 0});
+ return createLibcall(MIRBuilder, Libcall,
+ {MI.getOperand(0).getReg(), OpType, 0}, Args);
+}
+
+LegalizerHelper::LegalizeResult
+llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstr &MI, LostDebugLocObserver &LocObserver) {
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ // Add all the args, except for the last which is an imm denoting 'tail'.
+ for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
+ Register Reg = MI.getOperand(i).getReg();
+
+ // Need derive an IR type for call lowering.
+ LLT OpLLT = MRI.getType(Reg);
+ Type *OpTy = nullptr;
+ if (OpLLT.isPointer())
+ OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
+ else
+ OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
+ Args.push_back({Reg, OpTy, 0});
+ }
+
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ RTLIB::Libcall RTLibcall;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_BZERO:
+ RTLibcall = RTLIB::BZERO;
+ break;
+ case TargetOpcode::G_MEMCPY:
+ RTLibcall = RTLIB::MEMCPY;
+ Args[0].Flags[0].setReturned();
+ break;
+ case TargetOpcode::G_MEMMOVE:
+ RTLibcall = RTLIB::MEMMOVE;
+ Args[0].Flags[0].setReturned();
+ break;
+ case TargetOpcode::G_MEMSET:
+ RTLibcall = RTLIB::MEMSET;
+ Args[0].Flags[0].setReturned();
+ break;
+ default:
+ llvm_unreachable("unsupported opcode");
+ }
+ const char *Name = TLI.getLibcallName(RTLibcall);
+
+ // Unsupported libcall on the target.
+ if (!Name) {
+ LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
+ << MIRBuilder.getTII().getName(Opc) << "\n");
+ return LegalizerHelper::UnableToLegalize;
+ }
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
+ Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+ isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
+
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
+ return LegalizerHelper::UnableToLegalize;
+
+ if (Info.LoweredTailCall) {
+ assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
+
+ // Check debug locations before removing the return.
+ LocObserver.checkpoint(true);
+
+ // We must have a return following the call (or debug insts) to get past
+ // isLibCallInTailPosition.
+ do {
+ MachineInstr *Next = MI.getNextNode();
+ assert(Next &&
+ (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
+ "Expected instr following MI to be return or debug inst?");
+ // We lowered a tail call, so the call is now the return from the block.
+ // Delete the old return.
+ Next->eraseFromParent();
+ } while (MI.getNextNode());
+
+ // We expect to lose the debug location from the return.
+ LocObserver.checkpoint(false);
+ }
+
+ return LegalizerHelper::Legalized;
+}
+
+static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
+ Type *FromType) {
+ auto ToMVT = MVT::getVT(ToType);
+ auto FromMVT = MVT::getVT(FromType);
+
+ switch (Opcode) {
+ case TargetOpcode::G_FPEXT:
+ return RTLIB::getFPEXT(FromMVT, ToMVT);
+ case TargetOpcode::G_FPTRUNC:
+ return RTLIB::getFPROUND(FromMVT, ToMVT);
+ case TargetOpcode::G_FPTOSI:
+ return RTLIB::getFPTOSINT(FromMVT, ToMVT);
+ case TargetOpcode::G_FPTOUI:
+ return RTLIB::getFPTOUINT(FromMVT, ToMVT);
+ case TargetOpcode::G_SITOFP:
+ return RTLIB::getSINTTOFP(FromMVT, ToMVT);
+ case TargetOpcode::G_UITOFP:
+ return RTLIB::getUINTTOFP(FromMVT, ToMVT);
+ }
+ llvm_unreachable("Unsupported libcall function");
+}
+
+static LegalizerHelper::LegalizeResult
+conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
+ Type *FromType) {
+ RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
+ return createLibcall(MIRBuilder, Libcall,
+ {MI.getOperand(0).getReg(), ToType, 0},
+ {{MI.getOperand(1).getReg(), FromType, 0}});
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ Type *HLTy = IntegerType::get(Ctx, Size);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
+ return UnableToLegalize;
+ }
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC: {
+ Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
+ Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
+ if (!FromTy || !ToTy)
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
+ // FIXME: Support other types
+ unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(
+ MI, MIRBuilder,
+ ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
+ FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ // FIXME: Support other types
+ unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(
+ MI, MIRBuilder,
+ ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
+ FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_BZERO:
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET: {
+ LegalizeResult Result =
+ createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
+ if (Result != Legalized)
+ return Result;
+ MI.eraseFromParent();
+ return Result;
+ }
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ // If SizeOp0 is not an exact multiple of NarrowSize, emit
+ // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
+ // FIXME: Although this would also be legal for the general case, it causes
+ // a lot of regressions in the emitted code (superfluous COPYs, artifact
+ // combines not being hit). This seems to be a problem related to the
+ // artifact combiner.
+ if (SizeOp0 % NarrowSize != 0) {
+ LLT ImplicitTy = NarrowTy;
+ if (DstTy.isVector())
+ ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
+
+ Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
+ MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ int NumParts = SizeOp0 / NarrowSize;
+
+ SmallVector<Register, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i)
+ DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
+
+ if (DstTy.isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const APInt &Val = MI.getOperand(1).getCImm()->getValue();
+ unsigned TotalSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts = TotalSize / NarrowSize;
+
+ SmallVector<Register, 4> PartRegs;
+ for (int I = 0; I != NumParts; ++I) {
+ unsigned Offset = I * NarrowSize;
+ auto K = MIRBuilder.buildConstant(NarrowTy,
+ Val.lshr(Offset).trunc(NarrowSize));
+ PartRegs.push_back(K.getReg(0));
+ }
+
+ LLT LeftoverTy;
+ unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
+ SmallVector<Register, 1> LeftoverRegs;
+ if (LeftoverBits != 0) {
+ LeftoverTy = LLT::scalar(LeftoverBits);
+ auto K = MIRBuilder.buildConstant(
+ LeftoverTy,
+ Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
+ LeftoverRegs.push_back(K.getReg(0));
+ }
+
+ insertParts(MI.getOperand(0).getReg(),
+ Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ return narrowScalarExt(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_TRUNC: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
+ LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
+ return UnableToLegalize;
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
+ MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ case TargetOpcode::G_FREEZE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // Should widen scalar first
+ if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
+ return UnableToLegalize;
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
+ SmallVector<Register, 8> Parts;
+ for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
+ Parts.push_back(
+ MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
+ }
+
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_UMULH:
+ return narrowScalarMul(MI, NarrowTy);
+ case TargetOpcode::G_EXTRACT:
+ return narrowScalarExtract(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_INSERT:
+ return narrowScalarInsert(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_LOAD: {
+ auto &LoadMI = cast<GLoad>(MI);
+ Register DstReg = LoadMI.getDstReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
+ MIRBuilder.buildAnyExt(DstReg, TmpReg);
+ LoadMI.eraseFromParent();
+ return Legalized;
+ }
+
+ return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
+ }
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
+ auto &LoadMI = cast<GExtLoad>(MI);
+ Register DstReg = LoadMI.getDstReg();
+ Register PtrReg = LoadMI.getPointerReg();
+
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = LoadMI.getMMO();
+ unsigned MemSize = MMO.getSizeInBits();
+
+ if (MemSize == NarrowSize) {
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ } else if (MemSize < NarrowSize) {
+ MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
+ } else if (MemSize > NarrowSize) {
+ // FIXME: Need to split the load.
+ return UnableToLegalize;
+ }
+
+ if (isa<GZExtLoad>(LoadMI))
+ MIRBuilder.buildZExt(DstReg, TmpReg);
+ else
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+
+ LoadMI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ auto &StoreMI = cast<GStore>(MI);
+
+ Register SrcReg = StoreMI.getValueReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+
+ int NumParts = SizeOp0 / NarrowSize;
+ unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
+ unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
+ if (SrcTy.isVector() && LeftoverBits != 0)
+ return UnableToLegalize;
+
+ if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildTrunc(TmpReg, SrcReg);
+ MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
+ StoreMI.eraseFromParent();
+ return Legalized;
+ }
+
+ return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
+ }
+ case TargetOpcode::G_SELECT:
+ return narrowScalarSelect(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
+ // Legalize bitwise operation:
+ // A = BinOp<Ty> B, C
+ // into:
+ // B1, ..., BN = G_UNMERGE_VALUES B
+ // C1, ..., CN = G_UNMERGE_VALUES C
+ // A1 = BinOp<Ty/N> B1, C2
+ // ...
+ // AN = BinOp<Ty/N> BN, CN
+ // A = G_MERGE_VALUES A1, ..., AN
+ return narrowScalarBasic(MI, TypeIdx, NarrowTy);
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ return narrowScalarShift(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTPOP:
+ if (TypeIdx == 1)
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTPOP:
+ return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
+ default:
+ return UnableToLegalize;
+ }
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INTTOPTR:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRTOINT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PHI: {
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
+ unsigned NumParts = SizeOp0 / NarrowSize;
+ SmallVector<Register, 2> DstRegs(NumParts);
+ SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
+ Observer.changingInstr(MI);
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
+ extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
+ SrcRegs[i / 2]);
+ }
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, MI);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
+ MachineInstrBuilder MIB =
+ MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
+ for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
+ MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
+ }
+ MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
+ Observer.changedInstr(MI);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx != 2)
+ return UnableToLegalize;
+
+ int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, OpIdx);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_ICMP: {
+ Register LHS = MI.getOperand(2).getReg();
+ LLT SrcTy = MRI.getType(LHS);
+ uint64_t SrcSize = SrcTy.getSizeInBits();
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ // TODO: Handle the non-equality case for weird sizes.
+ if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
+ return UnableToLegalize;
+
+ LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
+ SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
+ if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
+ LHSLeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
+ SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
+ if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
+ RHSPartRegs, RHSLeftoverRegs))
+ return UnableToLegalize;
+
+ // We now have the LHS and RHS of the compare split into narrow-type
+ // registers, plus potentially some leftover type.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT ResTy = MRI.getType(Dst);
+ if (ICmpInst::isEquality(Pred)) {
+ // For each part on the LHS and RHS, keep track of the result of XOR-ing
+ // them together. For each equal part, the result should be all 0s. For
+ // each non-equal part, we'll get at least one 1.
+ auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+ SmallVector<Register, 4> Xors;
+ for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
+ Xors.push_back(Xor);
+ }
+
+ // Build a G_XOR for each leftover register. Each G_XOR must be widened
+ // to the desired narrow type so that we can OR them together later.
+ SmallVector<Register, 4> WidenedXors;
+ for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
+ LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
+ buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
+ /* PadStrategy = */ TargetOpcode::G_ZEXT);
+ Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
+ }
+
+ // Now, for each part we broke up, we know if they are equal/not equal
+ // based off the G_XOR. We can OR these all together and compare against
+ // 0 to get the result.
+ assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
+ auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
+ for (unsigned I = 2, E = Xors.size(); I < E; ++I)
+ Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
+ MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
+ } else {
+ // TODO: Handle non-power-of-two types.
+ assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
+ assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
+ Register LHSL = LHSPartRegs[0];
+ Register LHSH = LHSPartRegs[1];
+ Register RHSL = RHSPartRegs[0];
+ Register RHSH = RHSPartRegs[1];
+ MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
+ MachineInstrBuilder CmpHEQ =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
+ MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
+ ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
+ MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SEXT_INREG: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+
+ // So long as the new type has more bits than the bits we're extending we
+ // don't need to break it apart.
+ if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
+ Observer.changingInstr(MI);
+ // We don't lose any non-extension bits by truncating the src and
+ // sign-extending the dst.
+ MachineOperand &MO1 = MI.getOperand(1);
+ auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
+ MO1.setReg(TruncMIB.getReg(0));
+
+ MachineOperand &MO2 = MI.getOperand(0);
+ Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildSExt(MO2, DstExt);
+ MO2.setReg(DstExt);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ // Break it apart. Components below the extension point are unmodified. The
+ // component containing the extension point becomes a narrower SEXT_INREG.
+ // Components above it are ashr'd from the component containing the
+ // extension point.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
+
+ // List the registers where the destination will be scattered.
+ SmallVector<Register, 2> DstRegs;
+ // List the registers where the source will be split.
+ SmallVector<Register, 2> SrcRegs;
+
+ // Create all the temporary registers.
+ for (int i = 0; i < NumParts; ++i) {
+ Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
+
+ SrcRegs.push_back(SrcReg);
+ }
+
+ // Explode the big arguments into smaller chunks.
+ MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
+
+ Register AshrCstReg =
+ MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
+ .getReg(0);
+ Register FullExtensionReg = 0;
+ Register PartialExtensionReg = 0;
+
+ // Do the operation on each small part.
+ for (int i = 0; i < NumParts; ++i) {
+ if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
+ DstRegs.push_back(SrcRegs[i]);
+ else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
+ assert(PartialExtensionReg &&
+ "Expected to visit partial extension before full");
+ if (FullExtensionReg) {
+ DstRegs.push_back(FullExtensionReg);
+ continue;
+ }
+ DstRegs.push_back(
+ MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
+ .getReg(0));
+ FullExtensionReg = DstRegs.back();
+ } else {
+ DstRegs.push_back(
+ MIRBuilder
+ .buildInstr(
+ TargetOpcode::G_SEXT_INREG, {NarrowTy},
+ {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
+ .getReg(0));
+ PartialExtensionReg = DstRegs.back();
+ }
+ }
+
+ // Gather the destination registers into the final destination.
+ Register DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_BITREVERSE: {
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ unsigned NumParts = SizeOp0 / NarrowSize;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {SrcRegs[NumParts - 1 - i]});
+ DstRegs.push_back(DstPart.getReg(0));
+ }
+
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
+
+ Observer.changedInstr(MI);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_PTRMASK: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, 2);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI:
+ return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_FPEXT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP:
+ return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
+ }
+}
+
+Register LegalizerHelper::coerceToScalar(Register Val) {
+ LLT Ty = MRI.getType(Val);
+ if (Ty.isScalar())
+ return Val;
+
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ LLT NewTy = LLT::scalar(Ty.getSizeInBits());
+ if (Ty.isPointer()) {
+ if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
+ return Register();
+ return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
+ }
+
+ Register NewVal = Val;
+
+ assert(Ty.isVector());
+ LLT EltTy = Ty.getElementType();
+ if (EltTy.isPointer())
+ NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
+ return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
+}
+
+void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx, unsigned ExtOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
+ MO.setReg(ExtB.getReg(0));
+}
+
+void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
+ MO.setReg(ExtB.getReg(0));
+}
+
+void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx, unsigned TruncOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
+ MO.setReg(DstExt);
+}
+
+void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
+ unsigned OpIdx, unsigned ExtOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
+ MO.setReg(DstTrunc);
+}
+
+void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ Register Dst = MO.getReg();
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MO.setReg(DstExt);
+ MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
+}
+
+void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ SmallVector<Register, 8> Regs;
+ MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
+}
+
+void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
+}
+
+void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register CastDst = MRI.createGenericVirtualRegister(CastTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildBitcast(MO, CastDst);
+ MO.setReg(CastDst);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ LLT SrcTy = MRI.getType(Src1Reg);
+ const int DstSize = DstTy.getSizeInBits();
+ const int SrcSize = SrcTy.getSizeInBits();
+ const int WideSize = WideTy.getSizeInBits();
+ const int NumMerge = (DstSize + WideSize - 1) / WideSize;
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned NumSrc = MI.getNumOperands() - 1;
+ unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
+
+ if (WideSize >= DstSize) {
+ // Directly pack the bits in the target type.
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ const unsigned Offset = (I - 1) * PartSize;
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
+
+ auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
+
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
+ MRI.createGenericVirtualRegister(WideTy);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+ auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ }
+
+ if (WideSize > DstSize)
+ MIRBuilder.buildTrunc(DstReg, ResultReg);
+ else if (DstTy.isPointer())
+ MIRBuilder.buildIntToPtr(DstReg, ResultReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Unmerge the original values to the GCD type, and recombine to the next
+ // multiple greater than the original type.
+ //
+ // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
+ // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
+ // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
+ // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
+ // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
+ // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
+ // %12:_(s12) = G_MERGE_VALUES %10, %11
+ //
+ // Padding with undef if necessary:
+ //
+ // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
+ // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
+ // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
+ // %7:_(s2) = G_IMPLICIT_DEF
+ // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
+ // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
+ // %10:_(s12) = G_MERGE_VALUES %8, %9
+
+ const int GCD = std::gcd(SrcSize, WideSize);
+ LLT GCDTy = LLT::scalar(GCD);
+
+ SmallVector<Register, 8> Parts;
+ SmallVector<Register, 8> NewMergeRegs;
+ SmallVector<Register, 8> Unmerges;
+ LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
+
+ // Decompose the original operands if they don't evenly divide.
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ Register SrcReg = MO.getReg();
+ if (GCD == SrcSize) {
+ Unmerges.push_back(SrcReg);
+ } else {
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
+ Unmerges.push_back(Unmerge.getReg(J));
+ }
+ }
+
+ // Pad with undef to the next size that is a multiple of the requested size.
+ if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
+ Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+ for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
+ Unmerges.push_back(UndefReg);
+ }
+
+ const int PartsPerGCD = WideSize / GCD;
+
+ // Build merges of each piece.
+ ArrayRef<Register> Slicer(Unmerges);
+ for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
+ auto Merge =
+ MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
+ NewMergeRegs.push_back(Merge.getReg(0));
+ }
+
+ // A truncate may be necessary if the requested type doesn't evenly divide the
+ // original result type.
+ if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
+ MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
+ } else {
+ auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
+ MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ int NumDst = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst0Reg);
+ if (!DstTy.isScalar())
+ return UnableToLegalize;
+
+ if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
+ if (SrcTy.isPointer()) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
+ LLVM_DEBUG(
+ dbgs() << "Not casting non-integral address space integer\n");
+ return UnableToLegalize;
+ }
+
+ SrcTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
+ }
+
+ // Widen SrcTy to WideTy. This does not affect the result, but since the
+ // user requested this size, it is probably better handled than SrcTy and
+ // should reduce the total number of legalization artifacts.
+ if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
+ SrcTy = WideTy;
+ SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
+ }
+
+ // Theres no unmerge type to target. Directly extract the bits from the
+ // source type
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
+ for (int I = 1; I != NumDst; ++I) {
+ auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
+ auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Extend the source to a wider type.
+ LLT LCMTy = getLCMType(SrcTy, WideTy);
+
+ Register WideSrc = SrcReg;
+ if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
+ // TODO: If this is an integral address space, cast to integer and anyext.
+ if (SrcTy.isPointer()) {
+ LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
+ return UnableToLegalize;
+ }
+
+ WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
+
+ // Create a sequence of unmerges and merges to the original results. Since we
+ // may have widened the source, we will need to pad the results with dead defs
+ // to cover the source register.
+ // e.g. widen s48 to s64:
+ // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
+ //
+ // =>
+ // %4:_(s192) = G_ANYEXT %0:_(s96)
+ // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
+ // ; unpack to GCD type, with extra dead defs
+ // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
+ // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
+ // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
+ // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
+ // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
+ const LLT GCDTy = getGCDType(WideTy, DstTy);
+ const int NumUnmerge = Unmerge->getNumOperands() - 1;
+ const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
+
+ // Directly unmerge to the destination without going through a GCD type
+ // if possible
+ if (PartsPerRemerge == 1) {
+ const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J) {
+ int Idx = I * PartsPerUnmerge + J;
+ if (Idx < NumDst)
+ MIB.addDef(MI.getOperand(Idx).getReg());
+ else {
+ // Create dead def for excess components.
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ }
+ }
+
+ MIB.addUse(Unmerge.getReg(I));
+ }
+ } else {
+ SmallVector<Register, 16> Parts;
+ for (int J = 0; J != NumUnmerge; ++J)
+ extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
+
+ SmallVector<Register, 8> RemergeParts;
+ for (int I = 0; I != NumDst; ++I) {
+ for (int J = 0; J < PartsPerRemerge; ++J) {
+ const int Idx = I * PartsPerRemerge + J;
+ RemergeParts.emplace_back(Parts[Idx]);
+ }
+
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
+ RemergeParts.clear();
+ }
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned Offset = MI.getOperand(2).getImm();
+
+ if (TypeIdx == 0) {
+ if (SrcTy.isVector() || DstTy.isVector())
+ return UnableToLegalize;
+
+ SrcOp Src(SrcReg);
+ if (SrcTy.isPointer()) {
+ // Extracts from pointers can be handled only if they are really just
+ // simple integers.
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+ return UnableToLegalize;
+
+ LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
+ SrcTy = SrcAsIntTy;
+ }
+
+ if (DstTy.isPointer())
+ return UnableToLegalize;
+
+ if (Offset == 0) {
+ // Avoid a shift in the degenerate case.
+ MIRBuilder.buildTrunc(DstReg,
+ MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Do a shift in the source type.
+ LLT ShiftTy = SrcTy;
+ if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
+ Src = MIRBuilder.buildAnyExt(WideTy, Src);
+ ShiftTy = WideTy;
+ }
+
+ auto LShr = MIRBuilder.buildLShr(
+ ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
+ MIRBuilder.buildTrunc(DstReg, LShr);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (SrcTy.isScalar()) {
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (!SrcTy.isVector())
+ return UnableToLegalize;
+
+ if (DstTy != SrcTy.getElementType())
+ return UnableToLegalize;
+
+ if (Offset % SrcTy.getScalarSizeInBits() != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+ MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
+ Offset);
+ widenScalarDst(MI, WideTy.getScalarType(), 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 0 || WideTy.isVector())
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ unsigned Opcode;
+ unsigned ExtOpcode;
+ std::optional<Register> CarryIn;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SADDO:
+ Opcode = TargetOpcode::G_ADD;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ break;
+ case TargetOpcode::G_SSUBO:
+ Opcode = TargetOpcode::G_SUB;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ break;
+ case TargetOpcode::G_UADDO:
+ Opcode = TargetOpcode::G_ADD;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ break;
+ case TargetOpcode::G_USUBO:
+ Opcode = TargetOpcode::G_SUB;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ break;
+ case TargetOpcode::G_SADDE:
+ Opcode = TargetOpcode::G_UADDE;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ case TargetOpcode::G_SSUBE:
+ Opcode = TargetOpcode::G_USUBE;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ case TargetOpcode::G_UADDE:
+ Opcode = TargetOpcode::G_UADDE;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ case TargetOpcode::G_USUBE:
+ Opcode = TargetOpcode::G_USUBE;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ }
+
+ if (TypeIdx == 1) {
+ unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
+
+ Observer.changingInstr(MI);
+ if (CarryIn)
+ widenScalarSrc(MI, WideTy, 4, BoolExtOp);
+ widenScalarDst(MI, WideTy, 1);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
+ auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
+ // Do the arithmetic in the larger type.
+ Register NewOp;
+ if (CarryIn) {
+ LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
+ NewOp = MIRBuilder
+ .buildInstr(Opcode, {WideTy, CarryOutTy},
+ {LHSExt, RHSExt, *CarryIn})
+ .getReg(0);
+ } else {
+ NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
+ }
+ LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+ auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
+ auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
+ // There is no overflow if the ExtOp is the same as NewOp.
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
+ // Now trunc the NewOp to the original result.
+ MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
+ MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
+ MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT;
+ // We can convert this to:
+ // 1. Any extend iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB|SHL]SAT
+ // 4. L/ASHR by M-N
+ //
+ // It may be more efficient to lower this to a min and a max operation in
+ // the higher precision arithmetic if the promoted operation isn't legal,
+ // but this decision is up to the target's lowering request.
+ Register DstReg = MI.getOperand(0).getReg();
+
+ unsigned NewBits = WideTy.getScalarSizeInBits();
+ unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
+
+ // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
+ // must not left shift the RHS to preserve the shift amount.
+ auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
+ auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
+ : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
+ auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
+ auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
+ auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
+
+ auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
+ {ShiftL, ShiftR}, MI.getFlags());
+
+ // Use a shift that will preserve the number of sign bits when the trunc is
+ // folded away.
+ auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
+ : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
+
+ MIRBuilder.buildTrunc(DstReg, Result);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
+ auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
+ LLT SrcTy = MRI.getType(LHS);
+ LLT OverflowTy = MRI.getType(OriginalOverflow);
+ unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
+
+ // To determine if the result overflowed in the larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
+ auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
+
+ auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
+ {LeftOperand, RightOperand});
+ auto Mul = Mulo->getOperand(0);
+ MIRBuilder.buildTrunc(Result, Mul);
+
+ MachineInstrBuilder ExtResult;
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ if (IsSigned) {
+ // For signed, overflow occurred when the high part does not sign-extend
+ // the low part.
+ ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
+ } else {
+ // Unsigned overflow occurred when the high part does not zero-extend the
+ // low part.
+ ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
+ }
+
+ // Multiplication cannot overflow if the WideTy is >= 2 * original width,
+ // so we don't need to check the overflow result of larger type Mulo.
+ if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
+ auto Overflow =
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
+ // Finally check if the multiplication in the larger type itself overflowed.
+ MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
+ } else {
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_SUB:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR:
+ case TargetOpcode::G_ATOMICRMW_MIN:
+ case TargetOpcode::G_ATOMICRMW_MAX:
+ case TargetOpcode::G_ATOMICRMW_UMIN:
+ case TargetOpcode::G_ATOMICRMW_UMAX:
+ assert(TypeIdx == 0 && "atomicrmw with second scalar type");
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ assert(TypeIdx == 1 &&
+ "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_EXTRACT:
+ return widenScalarExtract(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_INSERT:
+ return widenScalarInsert(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_MERGE_VALUES:
+ return widenScalarMergeValues(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ return widenScalarMulo(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_SSHLSAT:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_USHLSAT:
+ return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTPOP: {
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ // First extend the input.
+ unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
+ MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
+ ? TargetOpcode::G_ANYEXT
+ : TargetOpcode::G_ZEXT;
+ auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
+ LLT CurTy = MRI.getType(SrcReg);
+ unsigned NewOpc = MI.getOpcode();
+ if (NewOpc == TargetOpcode::G_CTTZ) {
+ // The count is the same in the larger type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit =
+ APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
+ MIBSrc = MIRBuilder.buildOr(
+ WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+ // Now we know the operand is non-zero, use the more relaxed opcode.
+ NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
+ }
+
+ // Perform the operation at the larger size.
+ auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
+ // This is already the correct result for CTPOP and CTTZs
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
+ MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ // The correct result is NewOp - (Difference in widety and current ty).
+ unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+ MIBNewOp = MIRBuilder.buildSub(
+ WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
+ }
+
+ MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_BSWAP: {
+ Observer.changingInstr(MI);
+ Register DstReg = MI.getOperand(0).getReg();
+
+ Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+ MI.getOperand(0).setReg(DstExt);
+
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ LLT Ty = MRI.getType(DstReg);
+ unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+ MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
+ MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
+
+ MIRBuilder.buildTrunc(DstReg, ShrReg);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_BITREVERSE: {
+ Observer.changingInstr(MI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ MI.getOperand(0).setReg(DstExt);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
+ auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shift);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FREEZE:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_ABS:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_SUB:
+ // Perform operation at larger width (any extension is fines here, high bits
+ // don't affect the result) and then truncate the result back to the
+ // original type.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
+ }
+
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_SHL:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ assert(TypeIdx == 1);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ }
+
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_SDIVREM:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ widenScalarDst(MI, WideTy);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
+ TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+
+ widenScalarSrc(MI, WideTy, 1, CvtOp);
+ widenScalarDst(MI, WideTy);
+ } else {
+ assert(TypeIdx == 1);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ }
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_UDIVREM:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
+ widenScalarDst(MI, WideTy);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_SELECT:
+ Observer.changingInstr(MI);
+ if (TypeIdx == 0) {
+ // Perform operation at larger width (any extension is fine here, high
+ // bits don't affect the result) and then truncate the result back to the
+ // original type.
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
+ // Explicit extension is required here since high bits affect the result.
+ widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
+ }
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_SITOFP:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_UITOFP:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_STORE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (!Ty.isScalar())
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+
+ unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
+ TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
+ widenScalarSrc(MI, WideTy, 0, ExtType);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ MachineOperand &SrcMO = MI.getOperand(1);
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
+ MRI.getType(MI.getOperand(0).getReg()));
+ assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
+ ExtOpc == TargetOpcode::G_ANYEXT) &&
+ "Illegal Extend");
+ const APInt &SrcVal = SrcMO.getCImm()->getValue();
+ const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
+ ? SrcVal.sext(WideTy.getSizeInBits())
+ : SrcVal.zext(WideTy.getSizeInBits());
+ Observer.changingInstr(MI);
+ SrcMO.setCImm(ConstantInt::get(Ctx, Val));
+
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FCONSTANT: {
+ // To avoid changing the bits of the constant due to extension to a larger
+ // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
+ MachineOperand &SrcMO = MI.getOperand(1);
+ APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
+ widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_BRCOND:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_FCMP:
+ Observer.changingInstr(MI);
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy);
+ else {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
+ }
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_ICMP:
+ Observer.changingInstr(MI);
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy);
+ else {
+ unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
+ MI.getOperand(1).getPredicate()))
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
+ widenScalarSrc(MI, WideTy, 2, ExtOpcode);
+ widenScalarSrc(MI, WideTy, 3, ExtOpcode);
+ }
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_PTR_ADD:
+ assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_PHI: {
+ assert(TypeIdx == 0 && "Expecting only Idx 0");
+
+ Observer.changingInstr(MI);
+ for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
+ widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
+ }
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ if (TypeIdx == 0) {
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ Observer.changingInstr(MI);
+
+ widenScalarSrc(
+ MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
+ TargetOpcode::G_ANYEXT);
+
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx != 2)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ // TODO: Probably should be zext
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
+
+ widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideVecTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 2) {
+ Observer.changingInstr(MI);
+ // TODO: Probably should be zext
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+ }
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ assert(TypeIdx == 0);
+ Observer.changingInstr(MI);
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
+
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_FPOWI:
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP: {
+ if (TypeIdx == 0) {
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 1) {
+ // For some reason SelectionDAG tries to promote to a libcall without
+ // actually changing the integer type for promotion.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+ }
+ case TargetOpcode::G_FFREXP: {
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ } else {
+ widenScalarDst(MI, WideTy, 1);
+ }
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRTOINT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ Observer.changingInstr(MI);
+
+ const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+ widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
+
+ // Avoid changing the result vector type if the source element type was
+ // requested.
+ if (TypeIdx == 1) {
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+ } else {
+ widenScalarDst(MI, WideTy, 0);
+ }
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_SEXT_INREG:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRMASK: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ }
+}
+
+static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
+ MachineIRBuilder &B, Register Src, LLT Ty) {
+ auto Unmerge = B.buildUnmerge(Ty, Src);
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ Pieces.push_back(Unmerge.getReg(I));
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFConstant(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+
+ unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
+ LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ Align Alignment = Align(DL.getABITypeAlign(
+ getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
+
+ auto Addr = MIRBuilder.buildConstantPool(
+ AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
+ MI.getOperand(1).getFPImm(), Alignment));
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+ MRI.getType(Dst), Alignment);
+
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ if (SrcTy.isVector()) {
+ LLT SrcEltTy = SrcTy.getElementType();
+ SmallVector<Register, 8> SrcRegs;
+
+ if (DstTy.isVector()) {
+ int NumDstElt = DstTy.getNumElements();
+ int NumSrcElt = SrcTy.getNumElements();
+
+ LLT DstEltTy = DstTy.getElementType();
+ LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
+ LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
+
+ // If there's an element size mismatch, insert intermediate casts to match
+ // the result element type.
+ if (NumSrcElt < NumDstElt) { // Source element type is larger.
+ // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
+ //
+ // =>
+ //
+ // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
+ // %3:_(<2 x s8>) = G_BITCAST %2
+ // %4:_(<2 x s8>) = G_BITCAST %3
+ // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
+ DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
+ SrcPartTy = SrcEltTy;
+ } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
+ //
+ // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
+ //
+ // =>
+ //
+ // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
+ // %3:_(s16) = G_BITCAST %2
+ // %4:_(s16) = G_BITCAST %3
+ // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
+ SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
+ DstCastTy = DstEltTy;
+ }
+
+ getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
+ for (Register &SrcReg : SrcRegs)
+ SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
+ } else
+ getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
+
+ MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (DstTy.isVector()) {
+ SmallVector<Register, 8> SrcRegs;
+ getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
+ MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+/// Figure out the bit offset into a register when coercing a vector index for
+/// the wide element type. This is only for the case when promoting vector to
+/// one with larger elements.
+//
+///
+/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
+ Register Idx,
+ unsigned NewEltSize,
+ unsigned OldEltSize) {
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ LLT IdxTy = B.getMRI()->getType(Idx);
+
+ // Now figure out the amount we need to shift to get the target bits.
+ auto OffsetMask = B.buildConstant(
+ IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
+ auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
+ return B.buildShl(IdxTy, OffsetIdx,
+ B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
+}
+
+/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
+/// is casting to a vector with a smaller element size, perform multiple element
+/// extracts and merge the results. If this is coercing to a vector with larger
+/// elements, index the bitcasted vector and extract the target element with bit
+/// operations. This is intended to force the indexing in the native register
+/// size for architectures that can dynamically index the register file.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
+
+ LLT SrcEltTy = SrcVecTy.getElementType();
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = SrcVecTy.getNumElements();
+
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = SrcEltTy.getSizeInBits();
+ if (NewNumElts > OldNumElts) {
+ // Decreasing the vector element size
+ //
+ // e.g. i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+ if (NewNumElts % OldNumElts != 0)
+ return UnableToLegalize;
+
+ // Type of the intermediate result vector.
+ const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
+ LLT MidTy =
+ LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
+
+ auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
+
+ SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
+ auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
+
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
+ auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
+ auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
+ NewOps[I] = Elt.getReg(0);
+ }
+
+ auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
+ MIRBuilder.buildBitcast(Dst, NewVec);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ // Increasing the vector element size.
+ // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
+ //
+ // =>
+ //
+ // %cast = G_BITCAST %vec
+ // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
+ // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
+ // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+ // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+ // %elt_bits = G_LSHR %wide_elt, %offset_bits
+ // %elt = G_TRUNC %elt_bits
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register WideElt = CastVec;
+ if (CastTy.isVector()) {
+ WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ // Shift the wide element to get the target element.
+ auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
+ MIRBuilder.buildTrunc(Dst, ExtractedBits);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
+/// TargetReg, while preserving other bits in \p TargetReg.
+///
+/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
+static Register buildBitFieldInsert(MachineIRBuilder &B,
+ Register TargetReg, Register InsertReg,
+ Register OffsetBits) {
+ LLT TargetTy = B.getMRI()->getType(TargetReg);
+ LLT InsertTy = B.getMRI()->getType(InsertReg);
+ auto ZextVal = B.buildZExt(TargetTy, InsertReg);
+ auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
+
+ // Produce a bitmask of the value to insert
+ auto EltMask = B.buildConstant(
+ TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
+ InsertTy.getSizeInBits()));
+ // Shift it into position
+ auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
+ auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
+
+ // Clear out the bits in the wide element
+ auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
+
+ // The value to insert has all zeros already, so stick it into the masked
+ // wide element.
+ return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
+}
+
+/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
+/// is increasing the element size, perform the indexing in the target element
+/// type, and use bit operations to insert at the element position. This is
+/// intended for architectures that can dynamically index the register file and
+/// want to force indexing in the native register size.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
+ MI.getFirst4RegLLTs();
+ LLT VecTy = DstTy;
+
+ LLT VecEltTy = VecTy.getElementType();
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = VecEltTy.getSizeInBits();
+
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = VecTy.getNumElements();
+
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register ExtractedElt = CastVec;
+ if (CastTy.isVector()) {
+ ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
+ Val, OffsetBits);
+ if (CastTy.isVector()) {
+ InsertedElt = MIRBuilder.buildInsertVectorElement(
+ CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
+ }
+
+ MIRBuilder.buildBitcast(Dst, InsertedElt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
+ // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
+ Register DstReg = LoadMI.getDstReg();
+ Register PtrReg = LoadMI.getPointerReg();
+ LLT DstTy = MRI.getType(DstReg);
+ MachineMemOperand &MMO = LoadMI.getMMO();
+ LLT MemTy = MMO.getMemoryType();
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
+
+ if (MemSizeInBits != MemStoreSizeInBits) {
+ if (MemTy.isVector())
+ return UnableToLegalize;
+
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
+
+ Register LoadReg = DstReg;
+ LLT LoadTy = DstTy;
+
+ // If this wasn't already an extending load, we need to widen the result
+ // register to avoid creating a load with a narrower result than the source.
+ if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
+ LoadTy = WideMemTy;
+ LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
+ }
+
+ if (isa<GSExtLoad>(LoadMI)) {
+ auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
+ MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
+ } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
+ auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from Wide thus automatically gives zext from MemVT.
+ MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
+ } else {
+ MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
+ }
+
+ if (DstTy != LoadTy)
+ MIRBuilder.buildTrunc(DstReg, LoadReg);
+
+ LoadMI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Big endian lowering not implemented.
+ if (MIRBuilder.getDataLayout().isBigEndian())
+ return UnableToLegalize;
+
+ // This load needs splitting into power of 2 sized loads.
+ //
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ // This load needs splitting into power of 2 sized loads.
+ LargeSplitSize = llvm::bit_floor(MemSizeInBits);
+ SmallSplitSize = MemSizeInBits - LargeSplitSize;
+ } else {
+ // This is already a power of 2, but we still need to split this in half.
+ //
+ // Assume we're being asked to decompose an unaligned load.
+ // TODO: If this requires multiple splits, handle them all at once.
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize;
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
+
+ if (MemTy.isVector()) {
+ // TODO: Handle vector extloads
+ if (MemTy != DstTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
+ }
+
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
+ PtrReg, *LargeMMO);
+
+ auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
+ LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
+ SmallPtr, *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+
+ if (AnyExtTy == DstTy)
+ MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
+ else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or});
+ } else {
+ assert(DstTy.isPointer() && "expected pointer");
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+
+ // FIXME: We currently consider this to be illegal for non-integral address
+ // spaces, but we need still need a way to reinterpret the bits.
+ MIRBuilder.buildIntToPtr(DstReg, Or);
+ }
+
+ LoadMI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
+ // Lower a non-power of 2 store into multiple pow-2 stores.
+ // E.g. split an i24 store into an i16 store + i8 store.
+ // We do this by first extending the stored value to the next largest power
+ // of 2 type, and then using truncating stores to store the components.
+ // By doing this, likewise with G_LOAD, generate an extend that can be
+ // artifact-combined away instead of leaving behind extracts.
+ Register SrcReg = StoreMI.getValueReg();
+ Register PtrReg = StoreMI.getPointerReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand &MMO = **StoreMI.memoperands_begin();
+ LLT MemTy = MMO.getMemoryType();
+
+ unsigned StoreWidth = MemTy.getSizeInBits();
+ unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
+
+ if (StoreWidth != StoreSizeInBits) {
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ LLT WideTy = LLT::scalar(StoreSizeInBits);
+
+ if (StoreSizeInBits > SrcTy.getSizeInBits()) {
+ // Avoid creating a store with a narrower source than result.
+ SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
+ SrcTy = WideTy;
+ }
+
+ auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
+
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
+ MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
+ StoreMI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (MemTy.isVector()) {
+ // TODO: Handle vector trunc stores
+ if (MemTy != SrcTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
+ }
+
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
+ SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
+ } else {
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
+
+ // Extend to the next pow-2. If this store was itself the result of lowering,
+ // e.g. an s56 store being broken into s32 + s24, we might have a stored type
+ // that's wider than the stored size.
+ unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
+ const LLT NewSrcTy = LLT::scalar(AnyExtSize);
+
+ if (SrcTy.isPointer()) {
+ const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
+ }
+
+ auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
+
+ // Obtain the smaller value by shifting away the larger value.
+ auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
+
+ // Generate the PtrAdd and truncating stores.
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
+
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
+ StoreMI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Not sure how to interpret a bitcast of an extending load.
+ if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ bitcastDst(MI, CastTy, 0);
+ MMO.setType(CastTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Not sure how to interpret a bitcast of a truncating store.
+ if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ bitcastSrc(MI, CastTy, 0);
+ MMO.setType(CastTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_SELECT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
+ LLVM_DEBUG(
+ dbgs() << "bitcast action not implemented for vector select\n");
+ return UnableToLegalize;
+ }
+
+ Observer.changingInstr(MI);
+ bitcastSrc(MI, CastTy, 2);
+ bitcastSrc(MI, CastTy, 3);
+ bitcastDst(MI, CastTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
+ Observer.changingInstr(MI);
+ bitcastSrc(MI, CastTy, 1);
+ bitcastSrc(MI, CastTy, 2);
+ bitcastDst(MI, CastTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
+ default:
+ return UnableToLegalize;
+ }
+}
+
+// Legalize an instruction by changing the opcode in place.
+void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
+ Observer.changedInstr(MI);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
+ using namespace TargetOpcode;
+
+ switch(MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_FCONSTANT:
+ return lowerFConstant(MI);
+ case TargetOpcode::G_BITCAST:
+ return lowerBitcast(MI);
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ auto Quot =
+ MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
+ {MI.getOperand(1), MI.getOperand(2)});
+
+ auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
+ MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ return lowerSADDO_SSUBO(MI);
+ case TargetOpcode::G_UMULH:
+ case TargetOpcode::G_SMULH:
+ return lowerSMULH_UMULH(MI);
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
+ // result.
+ auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
+ LLT Ty = MRI.getType(Res);
+
+ unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
+ ? TargetOpcode::G_SMULH
+ : TargetOpcode::G_UMULH;
+
+ Observer.changingInstr(MI);
+ const auto &TII = MIRBuilder.getTII();
+ MI.setDesc(TII.get(TargetOpcode::G_MUL));
+ MI.removeOperand(1);
+ Observer.changedInstr(MI);
+
+ auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+
+ // Move insert point forward so we can use the Res register if needed.
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ // For *signed* multiply, overflow is detected by checking:
+ // (hi != (lo >> bitwidth-1))
+ if (Opcode == TargetOpcode::G_SMULH) {
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
+ auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
+ } else {
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+ }
+ return Legalized;
+ }
+ case TargetOpcode::G_FNEG: {
+ auto [Res, SubByReg] = MI.getFirst2Regs();
+ LLT Ty = MRI.getType(Res);
+
+ // TODO: Handle vector types once we are able to
+ // represent them.
+ if (Ty.isVector())
+ return UnableToLegalize;
+ auto SignMask =
+ MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
+ MIRBuilder.buildXor(Res, SubByReg, SignMask);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_STRICT_FSUB: {
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Res);
+
+ // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
+ auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
+
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
+ MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
+ else
+ MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FMAD:
+ return lowerFMad(MI);
+ case TargetOpcode::G_FFLOOR:
+ return lowerFFloor(MI);
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return lowerIntrinsicRound(MI);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ // Since round even is the assumed rounding mode for unconstrained FP
+ // operations, rint and roundeven are the same operation.
+ changeOpcode(MI, TargetOpcode::G_FRINT);
+ return Legalized;
+ }
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
+ MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
+ **MI.memoperands_begin());
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ return lowerLoad(cast<GAnyLoad>(MI));
+ case TargetOpcode::G_STORE:
+ return lowerStore(cast<GStore>(MI));
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTPOP:
+ return lowerBitCount(MI);
+ case G_UADDO: {
+ auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
+
+ MIRBuilder.buildAdd(Res, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_UADDE: {
+ auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
+ LLT Ty = MRI.getType(Res);
+
+ auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
+ auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
+ MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_USUBO: {
+ auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
+
+ MIRBuilder.buildSub(Res, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_USUBE: {
+ auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
+ const LLT CondTy = MRI.getType(BorrowOut);
+ const LLT Ty = MRI.getType(Res);
+
+ auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
+ auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
+ MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
+
+ auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
+ auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
+ MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_UITOFP:
+ return lowerUITOFP(MI);
+ case G_SITOFP:
+ return lowerSITOFP(MI);
+ case G_FPTOUI:
+ return lowerFPTOUI(MI);
+ case G_FPTOSI:
+ return lowerFPTOSI(MI);
+ case G_FPTRUNC:
+ return lowerFPTRUNC(MI);
+ case G_FPOWI:
+ return lowerFPOWI(MI);
+ case G_SMIN:
+ case G_SMAX:
+ case G_UMIN:
+ case G_UMAX:
+ return lowerMinMax(MI);
+ case G_FCOPYSIGN:
+ return lowerFCopySign(MI);
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ return lowerFMinNumMaxNum(MI);
+ case G_MERGE_VALUES:
+ return lowerMergeValues(MI);
+ case G_UNMERGE_VALUES:
+ return lowerUnmergeValues(MI);
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(MI.getOperand(2).isImm() && "Expected immediate");
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
+ LLT DstTy = MRI.getType(DstReg);
+ Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
+
+ auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
+ MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
+ MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return lowerExtractInsertVectorElt(MI);
+ case G_SHUFFLE_VECTOR:
+ return lowerShuffleVector(MI);
+ case G_DYN_STACKALLOC:
+ return lowerDynStackAlloc(MI);
+ case G_EXTRACT:
+ return lowerExtract(MI);
+ case G_INSERT:
+ return lowerInsert(MI);
+ case G_BSWAP:
+ return lowerBswap(MI);
+ case G_BITREVERSE:
+ return lowerBitreverse(MI);
+ case G_READ_REGISTER:
+ case G_WRITE_REGISTER:
+ return lowerReadWriteRegister(MI);
+ case G_UADDSAT:
+ case G_USUBSAT: {
+ // Try to make a reasonable guess about which lowering strategy to use. The
+ // target can override this with custom lowering and calling the
+ // implementation functions.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (LI.isLegalOrCustom({G_UMIN, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SADDSAT:
+ case G_SSUBSAT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ // FIXME: It would probably make more sense to see if G_SADDO is preferred,
+ // since it's a shorter expansion. However, we would need to figure out the
+ // preferred boolean type for the carry out for the query.
+ if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SSHLSAT:
+ case G_USHLSAT:
+ return lowerShlSat(MI);
+ case G_ABS:
+ return lowerAbsToAddXor(MI);
+ case G_SELECT:
+ return lowerSelect(MI);
+ case G_IS_FPCLASS:
+ return lowerISFPCLASS(MI);
+ case G_SDIVREM:
+ case G_UDIVREM:
+ return lowerDIVREM(MI);
+ case G_FSHL:
+ case G_FSHR:
+ return lowerFunnelShift(MI);
+ case G_ROTL:
+ case G_ROTR:
+ return lowerRotate(MI);
+ case G_MEMSET:
+ case G_MEMCPY:
+ case G_MEMMOVE:
+ return lowerMemCpyFamily(MI);
+ case G_MEMCPY_INLINE:
+ return lowerMemcpyInline(MI);
+ GISEL_VECREDUCE_CASES_NONSEQ
+ return lowerVectorReduction(MI);
+ }
+}
+
+Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
+ Align MinAlign) const {
+ // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
+ // datalayout for the preferred alignment. Also there should be a target hook
+ // for this to allow targets to reduce the alignment and ignore the
+ // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
+ // the type.
+ return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
+}
+
+MachineInstrBuilder
+LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
+ MachinePointerInfo &PtrInfo) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
+
+ unsigned AddrSpace = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+
+ PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
+ return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
+}
+
+static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
+ LLT VecTy) {
+ int64_t IdxVal;
+ if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
+ return IdxReg;
+
+ LLT IdxTy = B.getMRI()->getType(IdxReg);
+ unsigned NElts = VecTy.getNumElements();
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
+ return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
+ }
+
+ return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
+ .getReg(0);
+}
+
+Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
+ Register Index) {
+ LLT EltTy = VecTy.getElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltTy.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
+
+ LLT IdxTy = MRI.getType(Index);
+ auto Mul = MIRBuilder.buildMul(IdxTy, Index,
+ MIRBuilder.buildConstant(IdxTy, EltSize));
+
+ LLT PtrTy = MRI.getType(VecPtr);
+ return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
+}
+
+#ifndef NDEBUG
+/// Check that all vector operands have same number of elements. Other operands
+/// should be listed in NonVecOp.
+static bool hasSameNumEltsOnAllVectorOperands(
+ GenericMachineInstr &MI, MachineRegisterInfo &MRI,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ if (MI.getNumMemOperands() != 0)
+ return false;
+
+ LLT VecTy = MRI.getType(MI.getReg(0));
+ if (!VecTy.isVector())
+ return false;
+ unsigned NumElts = VecTy.getNumElements();
+
+ for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ continue;
+ }
+
+ LLT Ty = MRI.getType(Op.getReg());
+ if (!Ty.isVector()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ continue;
+ }
+
+ if (Ty.getNumElements() != NumElts)
+ return false;
+ }
+
+ return true;
+}
+#endif
+
+/// Fill \p DstOps with DstOps that have same number of elements combined as
+/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
+/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
+/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
+static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
+ unsigned NumElts) {
+ LLT LeftoverTy;
+ assert(Ty.isVector() && "Expected vector type");
+ LLT EltTy = Ty.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ int NumParts, NumLeftover;
+ std::tie(NumParts, NumLeftover) =
+ getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
+
+ assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
+ for (int i = 0; i < NumParts; ++i) {
+ DstOps.push_back(NarrowTy);
+ }
+
+ if (LeftoverTy.isValid()) {
+ assert(NumLeftover == 1 && "expected exactly one leftover");
+ DstOps.push_back(LeftoverTy);
+ }
+}
+
+/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
+/// made from \p Op depending on operand type.
+static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
+ MachineOperand &Op) {
+ for (unsigned i = 0; i < N; ++i) {
+ if (Op.isReg())
+ Ops.push_back(Op.getReg());
+ else if (Op.isImm())
+ Ops.push_back(Op.getImm());
+ else if (Op.isPredicate())
+ Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
+ else
+ llvm_unreachable("Unsupported type");
+ }
+}
+
+// Handle splitting vector operations which need to have the same number of
+// elements in each type index, but each type index may have a different element
+// type.
+//
+// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+//
+// Also handles some irregular breakdown cases, e.g.
+// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+// s64 = G_SHL s64, s32
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorMultiEltType(
+ GenericMachineInstr &MI, unsigned NumElts,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
+ "Non-compatible opcode or not specified non-vector operands");
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
+
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
+
+ // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
+ // Build instructions with DstOps to use instruction found by CSE directly.
+ // CSE copies found instruction into given vreg when building with vreg dest.
+ SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
+ // Output registers will be taken from created instructions.
+ SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
+ for (unsigned i = 0; i < NumDefs; ++i) {
+ makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
+ }
+
+ // Split vector input operands into sub-vectors with NumElts elts + Leftover.
+ // Operands listed in NonVecOpIndices will be used as is without splitting;
+ // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
+ // scalar condition (op 1), immediate in sext_inreg (op 2).
+ SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ ++UseIdx, ++UseNo) {
+ if (is_contained(NonVecOpIndices, UseIdx)) {
+ broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
+ MI.getOperand(UseIdx));
+ } else {
+ SmallVector<Register, 8> SplitPieces;
+ extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
+ for (auto Reg : SplitPieces)
+ InputOpsPieces[UseNo].push_back(Reg);
+ }
+ }
+
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
+
+ // Take i-th piece of each input operand split and build sub-vector/scalar
+ // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ SmallVector<DstOp, 2> Defs;
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ Defs.push_back(OutputOpsPieces[DstNo][i]);
+
+ SmallVector<SrcOp, 3> Uses;
+ for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
+ Uses.push_back(InputOpsPieces[InputNo][i]);
+
+ auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ OutputRegs[DstNo].push_back(I.getReg(DstNo));
+ }
+
+ // Merge small outputs into MI's output for each def operand.
+ if (NumLeftovers) {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
+ } else {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
+ unsigned NumElts) {
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
+
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
+
+ SmallVector<DstOp, 8> OutputOpsPieces;
+ SmallVector<Register, 8> OutputRegs;
+ makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
+
+ // Instructions that perform register split will be inserted in basic block
+ // where register is defined (basic block is in the next operand).
+ SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ UseIdx += 2, ++UseNo) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
+ extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
+ }
+
+ // Build PHIs with fewer elements.
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
+ MIRBuilder.setInsertPt(*MI.getParent(), MI);
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
+ Phi.addDef(
+ MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
+ OutputRegs.push_back(Phi.getReg(0));
+
+ for (unsigned j = 0; j < NumInputs / 2; ++j) {
+ Phi.addUse(InputOpsPieces[j][i]);
+ Phi.add(MI.getOperand(1 + j * 2 + 1));
+ }
+ }
+
+ // Merge small outputs into MI's def.
+ if (NumLeftovers) {
+ mergeMixedSubvectors(MI.getReg(0), OutputRegs);
+ } else {
+ MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ const int NumDst = MI.getNumOperands() - 1;
+ const Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ if (TypeIdx != 1 || NarrowTy == DstTy)
+ return UnableToLegalize;
+
+ // Requires compatible types. Otherwise SrcReg should have been defined by
+ // merge-like instruction that would get artifact combined. Most likely
+ // instruction that defines SrcReg has to perform more/fewer elements
+ // legalization compatible with NarrowTy.
+ assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+
+ if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
+ return UnableToLegalize;
+
+ // This is most likely DstTy (smaller then register size) packed in SrcTy
+ // (larger then register size) and since unmerge was not combined it will be
+ // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
+ // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
+
+ // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
+ //
+ // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
+ // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
+ // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
+ const int NumUnmerge = Unmerge->getNumOperands() - 1;
+ const int PartsPerUnmerge = NumDst / NumUnmerge;
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J)
+ MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
+ MIB.addUse(Unmerge.getReg(I));
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ // Requires compatible types. Otherwise user of DstReg did not perform unmerge
+ // that should have been artifact combined. Most likely instruction that uses
+ // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
+ assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if (NarrowTy == SrcTy)
+ return UnableToLegalize;
+
+ // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
+ // is for old mir tests. Since the changes to more/fewer elements it should no
+ // longer be possible to generate MIR like this when starting from llvm-ir
+ // because LCMTy approach was replaced with merge/unmerge to vector elements.
+ if (TypeIdx == 1) {
+ assert(SrcTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
+ return UnableToLegalize;
+ // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
+ //
+ // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
+ // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
+ // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
+ // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
+ // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
+ // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
+
+ SmallVector<Register, 8> Elts;
+ LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
+ for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
+ auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
+ for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
+ Elts.push_back(Unmerge.getReg(j));
+ }
+
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumNarrowTyElts = NarrowTy.getNumElements();
+ unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
+ for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
+ ++i, Offset += NumNarrowTyElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
+ NarrowTyElts.push_back(
+ MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
+ }
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ assert(TypeIdx == 0 && "Bad type index");
+ if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
+ (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
+ return UnableToLegalize;
+
+ // This is most likely SrcTy (smaller then register size) packed in DstTy
+ // (larger then register size) and since merge was not combined it will be
+ // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
+ // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
+
+ // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
+ //
+ // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
+ // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
+ // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
+ unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
+ for (unsigned i = 0; i < NumParts; ++i) {
+ SmallVector<Register, 8> Sources;
+ for (unsigned j = 0; j < NumElts; ++j)
+ Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
+ NarrowTyElts.push_back(
+ MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
+ }
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowVecTy) {
+ auto [DstReg, SrcVec] = MI.getFirst2Regs();
+ Register InsertVal;
+ bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+
+ assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
+ if (IsInsert)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ // TODO: Handle total scalarization case.
+ if (!NarrowVecTy.isVector())
+ return UnableToLegalize;
+
+ LLT VecTy = MRI.getType(SrcVec);
+
+ // If the index is a constant, we can really break this down as you would
+ // expect, and index into the target size pieces.
+ int64_t IdxVal;
+ auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
+ if (MaybeCst) {
+ IdxVal = MaybeCst->Value.getSExtValue();
+ // Avoid out of bounds indexing the pieces.
+ if (IdxVal >= VecTy.getNumElements()) {
+ MIRBuilder.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ SmallVector<Register, 8> VecParts;
+ LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
+
+ // Build a sequence of NarrowTy pieces in VecParts for this operand.
+ LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
+ TargetOpcode::G_ANYEXT);
+
+ unsigned NewNumElts = NarrowVecTy.getNumElements();
+
+ LLT IdxTy = MRI.getType(Idx);
+ int64_t PartIdx = IdxVal / NewNumElts;
+ auto NewIdx =
+ MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
+
+ if (IsInsert) {
+ LLT PartTy = MRI.getType(VecParts[PartIdx]);
+
+ // Use the adjusted index to insert into one of the subvectors.
+ auto InsertPart = MIRBuilder.buildInsertVectorElement(
+ PartTy, VecParts[PartIdx], InsertVal, NewIdx);
+ VecParts[PartIdx] = InsertPart.getReg(0);
+
+ // Recombine the inserted subvector with the others to reform the result
+ // vector.
+ buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
+ } else {
+ MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // With a variable index, we can't perform the operation in a smaller type, so
+ // we're forced to expand this.
+ //
+ // TODO: We could emit a chain of compare/select to figure out which piece to
+ // index.
+ return lowerExtractInsertVectorElt(MI);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ // This implementation doesn't work for atomics. Give up instead of doing
+ // something invalid.
+ if (LdStMI.isAtomic())
+ return UnableToLegalize;
+
+ bool IsLoad = isa<GLoad>(LdStMI);
+ Register ValReg = LdStMI.getReg(0);
+ Register AddrReg = LdStMI.getPointerReg();
+ LLT ValTy = MRI.getType(ValReg);
+
+ // FIXME: Do we need a distinct NarrowMemory legalize action?
+ if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
+ LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
+ return UnableToLegalize;
+ }
+
+ int NumParts = -1;
+ int NumLeftover = -1;
+ LLT LeftoverTy;
+ SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
+ if (IsLoad) {
+ std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
+ } else {
+ if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
+ NarrowLeftoverRegs)) {
+ NumParts = NarrowRegs.size();
+ NumLeftover = NarrowLeftoverRegs.size();
+ }
+ }
+
+ if (NumParts == -1)
+ return UnableToLegalize;
+
+ LLT PtrTy = MRI.getType(AddrReg);
+ const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ unsigned TotalSize = ValTy.getSizeInBits();
+
+ // Split the load/store into PartTy sized pieces starting at Offset. If this
+ // is a load, return the new registers in ValRegs. For a store, each elements
+ // of ValRegs should be PartTy. Returns the next offset that needs to be
+ // handled.
+ bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
+ auto MMO = LdStMI.getMMO();
+ auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
+ unsigned NumParts, unsigned Offset) -> unsigned {
+ MachineFunction &MF = MIRBuilder.getMF();
+ unsigned PartSize = PartTy.getSizeInBits();
+ for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
+ ++Idx) {
+ unsigned ByteOffset = Offset / 8;
+ Register NewAddrReg;
+
+ MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
+
+ if (IsLoad) {
+ Register Dst = MRI.createGenericVirtualRegister(PartTy);
+ ValRegs.push_back(Dst);
+ MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
+ } else {
+ MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
+ }
+ Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
+ }
+
+ return Offset;
+ };
+
+ unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
+ unsigned HandledOffset =
+ splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
+
+ // Handle the rest of the register if this isn't an even type breakdown.
+ if (LeftoverTy.isValid())
+ splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
+
+ if (IsLoad) {
+ insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
+ LeftoverTy, NarrowLeftoverRegs);
+ }
+
+ LdStMI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ using namespace TargetOpcode;
+ GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
+ unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+
+ switch (MI.getOpcode()) {
+ case G_IMPLICIT_DEF:
+ case G_TRUNC:
+ case G_AND:
+ case G_OR:
+ case G_XOR:
+ case G_ADD:
+ case G_SUB:
+ case G_MUL:
+ case G_PTR_ADD:
+ case G_SMULH:
+ case G_UMULH:
+ case G_FADD:
+ case G_FMUL:
+ case G_FSUB:
+ case G_FNEG:
+ case G_FABS:
+ case G_FCANONICALIZE:
+ case G_FDIV:
+ case G_FREM:
+ case G_FMA:
+ case G_FMAD:
+ case G_FPOW:
+ case G_FEXP:
+ case G_FEXP2:
+ case G_FLOG:
+ case G_FLOG2:
+ case G_FLOG10:
+ case G_FLDEXP:
+ case G_FNEARBYINT:
+ case G_FCEIL:
+ case G_FFLOOR:
+ case G_FRINT:
+ case G_INTRINSIC_ROUND:
+ case G_INTRINSIC_ROUNDEVEN:
+ case G_INTRINSIC_TRUNC:
+ case G_FCOS:
+ case G_FSIN:
+ case G_FSQRT:
+ case G_BSWAP:
+ case G_BITREVERSE:
+ case G_SDIV:
+ case G_UDIV:
+ case G_SREM:
+ case G_UREM:
+ case G_SDIVREM:
+ case G_UDIVREM:
+ case G_SMIN:
+ case G_SMAX:
+ case G_UMIN:
+ case G_UMAX:
+ case G_ABS:
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ case G_FMINNUM_IEEE:
+ case G_FMAXNUM_IEEE:
+ case G_FMINIMUM:
+ case G_FMAXIMUM:
+ case G_FSHL:
+ case G_FSHR:
+ case G_ROTL:
+ case G_ROTR:
+ case G_FREEZE:
+ case G_SADDSAT:
+ case G_SSUBSAT:
+ case G_UADDSAT:
+ case G_USUBSAT:
+ case G_UMULO:
+ case G_SMULO:
+ case G_SHL:
+ case G_LSHR:
+ case G_ASHR:
+ case G_SSHLSAT:
+ case G_USHLSAT:
+ case G_CTLZ:
+ case G_CTLZ_ZERO_UNDEF:
+ case G_CTTZ:
+ case G_CTTZ_ZERO_UNDEF:
+ case G_CTPOP:
+ case G_FCOPYSIGN:
+ case G_ZEXT:
+ case G_SEXT:
+ case G_ANYEXT:
+ case G_FPEXT:
+ case G_FPTRUNC:
+ case G_SITOFP:
+ case G_UITOFP:
+ case G_FPTOSI:
+ case G_FPTOUI:
+ case G_INTTOPTR:
+ case G_PTRTOINT:
+ case G_ADDRSPACE_CAST:
+ case G_UADDO:
+ case G_USUBO:
+ case G_UADDE:
+ case G_USUBE:
+ case G_SADDO:
+ case G_SSUBO:
+ case G_SADDE:
+ case G_SSUBE:
+ case G_STRICT_FADD:
+ case G_STRICT_FSUB:
+ case G_STRICT_FMUL:
+ case G_STRICT_FMA:
+ case G_STRICT_FLDEXP:
+ case G_FFREXP:
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
+ case G_ICMP:
+ case G_FCMP:
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
+ case G_IS_FPCLASS:
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
+ case G_SELECT:
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
+ case G_PHI:
+ return fewerElementsVectorPhi(GMI, NumElts);
+ case G_UNMERGE_VALUES:
+ return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
+ case G_BUILD_VECTOR:
+ assert(TypeIdx == 0 && "not a vector type index");
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_CONCAT_VECTORS:
+ if (TypeIdx != 1) // TODO: This probably does work as expected already.
+ return UnableToLegalize;
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
+ case G_LOAD:
+ case G_STORE:
+ return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
+ case G_SEXT_INREG:
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
+ GISEL_VECREDUCE_CASES_NONSEQ
+ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
+ case G_SHUFFLE_VECTOR:
+ return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
+ default:
+ return UnableToLegalize;
+ }
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
+ MI.getFirst3RegLLTs();
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ // The shuffle should be canonicalized by now.
+ if (DstTy != Src1Ty)
+ return UnableToLegalize;
+ if (DstTy != Src2Ty)
+ return UnableToLegalize;
+
+ if (!isPowerOf2_32(DstTy.getNumElements()))
+ return UnableToLegalize;
+
+ // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
+ // Further legalization attempts will be needed to do split further.
+ NarrowTy =
+ DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
+ unsigned NewElts = NarrowTy.getNumElements();
+
+ SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
+ extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
+ extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
+ Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
+ SplitSrc2Regs[1]};
+
+ Register Hi, Lo;
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ Register &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool UseBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = Mask[FirstMaskIdx + MaskOffset];
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= std::size(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= std::size(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ UseBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (UseBuildVector) {
+ LLT EltTy = NarrowTy.getElementType();
+ SmallVector<Register, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = Mask[FirstMaskIdx + MaskOffset];
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= std::size(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(MIRBuilder
+ .buildExtractVectorElement(
+ EltTy, Inputs[Input],
+ MIRBuilder.buildConstant(LLT::scalar(32), Idx))
+ .getReg(0));
+ }
+
+ // Construct the Lo/Hi output using a G_BUILD_VECTOR.
+ Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
+ } else {
+ Register Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ Register Op1 = InputUsed[1] == -1U
+ ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
+ : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
+ }
+
+ Ops.clear();
+ }
+
+ MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+static unsigned getScalarOpcForReduction(unsigned Opc) {
+ unsigned ScalarOpc;
+ switch (Opc) {
+ case TargetOpcode::G_VECREDUCE_FADD:
+ ScalarOpc = TargetOpcode::G_FADD;
+ break;
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ ScalarOpc = TargetOpcode::G_FMUL;
+ break;
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ ScalarOpc = TargetOpcode::G_FMAXNUM;
+ break;
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ ScalarOpc = TargetOpcode::G_FMINNUM;
+ break;
+ case TargetOpcode::G_VECREDUCE_ADD:
+ ScalarOpc = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_VECREDUCE_MUL:
+ ScalarOpc = TargetOpcode::G_MUL;
+ break;
+ case TargetOpcode::G_VECREDUCE_AND:
+ ScalarOpc = TargetOpcode::G_AND;
+ break;
+ case TargetOpcode::G_VECREDUCE_OR:
+ ScalarOpc = TargetOpcode::G_OR;
+ break;
+ case TargetOpcode::G_VECREDUCE_XOR:
+ ScalarOpc = TargetOpcode::G_XOR;
+ break;
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ ScalarOpc = TargetOpcode::G_SMAX;
+ break;
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ ScalarOpc = TargetOpcode::G_SMIN;
+ break;
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ ScalarOpc = TargetOpcode::G_UMAX;
+ break;
+ case TargetOpcode::G_VECREDUCE_UMIN:
+ ScalarOpc = TargetOpcode::G_UMIN;
+ break;
+ default:
+ llvm_unreachable("Unhandled reduction");
+ }
+ return ScalarOpc;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
+ Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
+ "Sequential reductions not expected");
+
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ // The semantics of the normal non-sequential reductions allow us to freely
+ // re-associate the operation.
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+
+ if (NarrowTy.isVector() &&
+ (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
+ return UnableToLegalize;
+
+ unsigned ScalarOpc = getScalarOpcForReduction(Opc);
+ SmallVector<Register> SplitSrcs;
+ // If NarrowTy is a scalar then we're being asked to scalarize.
+ const unsigned NumParts =
+ NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
+ : SrcTy.getNumElements();
+
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ if (NarrowTy.isScalar()) {
+ if (DstTy != NarrowTy)
+ return UnableToLegalize; // FIXME: handle implicit extensions.
+
+ if (isPowerOf2_32(NumParts)) {
+ // Generate a tree of scalar operations to reduce the critical path.
+ SmallVector<Register> PartialResults;
+ unsigned NumPartsLeft = NumParts;
+ while (NumPartsLeft > 1) {
+ for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
+ PartialResults.emplace_back(
+ MIRBuilder
+ .buildInstr(ScalarOpc, {NarrowTy},
+ {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
+ .getReg(0));
+ }
+ SplitSrcs = PartialResults;
+ PartialResults.clear();
+ NumPartsLeft = SplitSrcs.size();
+ }
+ assert(SplitSrcs.size() == 1);
+ MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // If we can't generate a tree, then just do sequential operations.
+ Register Acc = SplitSrcs[0];
+ for (unsigned Idx = 1; Idx < NumParts; ++Idx)
+ Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
+ .getReg(0);
+ MIRBuilder.buildCopy(DstReg, Acc);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ SmallVector<Register> PartialReductions;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ PartialReductions.push_back(
+ MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
+ }
+
+
+ // If the types involved are powers of 2, we can generate intermediate vector
+ // ops, before generating a final reduction operation.
+ if (isPowerOf2_32(SrcTy.getNumElements()) &&
+ isPowerOf2_32(NarrowTy.getNumElements())) {
+ return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
+ }
+
+ Register Acc = PartialReductions[0];
+ for (unsigned Part = 1; Part < NumParts; ++Part) {
+ if (Part == NumParts - 1) {
+ MIRBuilder.buildInstr(ScalarOpc, {DstReg},
+ {Acc, PartialReductions[Part]});
+ } else {
+ Acc = MIRBuilder
+ .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
+ .getReg(0);
+ }
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
+ LLT SrcTy, LLT NarrowTy,
+ unsigned ScalarOpc) {
+ SmallVector<Register> SplitSrcs;
+ // Split the sources into NarrowTy size pieces.
+ extractParts(SrcReg, NarrowTy,
+ SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
+ // We're going to do a tree reduction using vector operations until we have
+ // one NarrowTy size value left.
+ while (SplitSrcs.size() > 1) {
+ SmallVector<Register> PartialRdxs;
+ for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
+ Register LHS = SplitSrcs[Idx];
+ Register RHS = SplitSrcs[Idx + 1];
+ // Create the intermediate vector op.
+ Register Res =
+ MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
+ PartialRdxs.push_back(Res);
+ }
+ SplitSrcs = std::move(PartialRdxs);
+ }
+ // Finally generate the requested NarrowTy based reduction.
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(SplitSrcs[0]);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+ const LLT HalfTy, const LLT AmtTy) {
+
+ Register InL = MRI.createGenericVirtualRegister(HalfTy);
+ Register InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
+
+ if (Amt.isZero()) {
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ LLT NVT = HalfTy;
+ unsigned NVTBits = HalfTy.getSizeInBits();
+ unsigned VTBits = 2 * NVTBits;
+
+ SrcOp Lo(Register(0)), Hi(Register(0));
+ if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = MIRBuilder.buildShl(NVT, InL,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ } else if (Amt == NVTBits) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = InL;
+ } else {
+ Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
+ auto OrLHS =
+ MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
+ auto OrRHS = MIRBuilder.buildLShr(
+ NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+ Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ }
+ } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildLShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
+ }
+ } else {
+ if (Amt.ugt(VTBits)) {
+ Hi = Lo = MIRBuilder.buildAShr(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
+ }
+ }
+
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+// TODO: Optimize if constant shift amount.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
+ LLT RequestedTy) {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, RequestedTy, 2);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ Register Amt = MI.getOperand(2).getReg();
+ LLT ShiftAmtTy = MRI.getType(Amt);
+ const unsigned DstEltSize = DstTy.getScalarSizeInBits();
+ if (DstEltSize % 2 != 0)
+ return UnableToLegalize;
+
+ // Ignore the input type. We can only go to exactly half the size of the
+ // input. If that isn't small enough, the resulting pieces will be further
+ // legalized.
+ const unsigned NewBitSize = DstEltSize / 2;
+ const LLT HalfTy = LLT::scalar(NewBitSize);
+ const LLT CondTy = LLT::scalar(1);
+
+ if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
+ return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
+ ShiftAmtTy);
+ }
+
+ // TODO: Expand with known bits.
+
+ // Handle the fully general expansion by an unknown amount.
+ auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
+
+ Register InL = MRI.createGenericVirtualRegister(HalfTy);
+ Register InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
+
+ auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
+ auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
+
+ auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
+ auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
+ auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
+
+ Register ResultRegs[2];
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL: {
+ // Short: ShAmt < NewBitSize
+ auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
+
+ auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+ auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
+
+ // Long: ShAmt >= NewBitSize
+ auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
+ auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
+
+ auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
+ auto Hi = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
+
+ auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
+
+ // Long: ShAmt >= NewBitSize
+ MachineInstrBuilder HiL;
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
+ HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
+ }
+ auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
+ {InH, AmtExcess}); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ default:
+ llvm_unreachable("not a shift");
+ }
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy) {
+ assert(TypeIdx == 0 && "Expecting only Idx 0");
+
+ Observer.changingInstr(MI);
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ moreElementsVectorSrc(MI, MoreTy, I);
+ }
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_LOAD: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_STRICT_FADD:
+ case TargetOpcode::G_STRICT_FSUB:
+ case TargetOpcode::G_STRICT_FMUL: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_STRICT_FMA:
+ case TargetOpcode::G_FSHR:
+ case TargetOpcode::G_FSHL: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_EXTRACT:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INSERT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_SEXT_INREG:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_SELECT: {
+ auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
+ if (TypeIdx == 1) {
+ if (!CondTy.isScalar() ||
+ DstTy.getElementCount() != MoreTy.getElementCount())
+ return UnableToLegalize;
+
+ // This is turning a scalar select of vectors into a vector
+ // select. Broadcast the select condition.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (CondTy.isVector())
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return UnableToLegalize;
+ case TargetOpcode::G_PHI:
+ return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_BUILD_VECTOR: {
+ SmallVector<SrcOp, 8> Elts;
+ for (auto Op : MI.uses()) {
+ Elts.push_back(Op.getReg());
+ }
+
+ for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
+ Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
+ }
+
+ MIRBuilder.buildDeleteTrailingVectorElements(
+ MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_TRUNC: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ LLT SrcTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(1).getReg()).getElementType());
+ moreElementsVectorSrc(MI, SrcTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ default:
+ return UnableToLegalize;
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ unsigned MaskNumElts = Mask.size();
+ unsigned SrcNumElts = SrcTy.getNumElements();
+ LLT DestEltTy = DstTy.getElementType();
+
+ if (MaskNumElts == SrcNumElts)
+ return Legalized;
+
+ if (MaskNumElts < SrcNumElts) {
+ // Extend mask to match new destination vector size with
+ // undef values.
+ SmallVector<int, 16> NewMask(Mask);
+ for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
+ NewMask.push_back(-1);
+
+ moreElementsVectorDst(MI, SrcTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+
+ return Legalized;
+ }
+
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
+
+ // Create new source vectors by concatenating the initial
+ // source vectors with undefined vectors of the same size.
+ auto Undef = MIRBuilder.buildUndef(SrcTy);
+ SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
+ SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
+ MOps1[0] = MI.getOperand(1).getReg();
+ MOps2[0] = MI.getOperand(2).getReg();
+
+ auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
+ auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
+ for (unsigned I = 0; I != MaskNumElts; ++I) {
+ int Idx = Mask[I];
+ if (Idx >= static_cast<int>(SrcNumElts))
+ Idx += PaddedMaskNumElts - SrcNumElts;
+ MappedOps[I] = Idx;
+ }
+
+ // If we got more elements than required, extract subvector.
+ if (MaskNumElts != PaddedMaskNumElts) {
+ auto Shuffle =
+ MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
+
+ SmallVector<Register, 16> Elts(MaskNumElts);
+ for (unsigned I = 0; I < MaskNumElts; ++I) {
+ Elts[I] =
+ MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
+ .getReg(0);
+ }
+ MIRBuilder.buildBuildVector(DstReg, Elts);
+ } else {
+ MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
+ }
+
+ MI.eraseFromParent();
+ return LegalizerHelper::LegalizeResult::Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
+ unsigned int TypeIdx, LLT MoreTy) {
+ auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ unsigned NumElts = DstTy.getNumElements();
+ unsigned WidenNumElts = MoreTy.getNumElements();
+
+ if (DstTy.isVector() && Src1Ty.isVector() &&
+ DstTy.getNumElements() != Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI);
+ }
+
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ // Expect a canonicalized shuffle.
+ if (DstTy != Src1Ty || DstTy != Src2Ty)
+ return UnableToLegalize;
+
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned I = 0; I != NumElts; ++I) {
+ int Idx = Mask[I];
+ if (Idx < static_cast<int>(NumElts))
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned I = NumElts; I != WidenNumElts; ++I)
+ NewMask.push_back(-1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
+ ArrayRef<Register> Src1Regs,
+ ArrayRef<Register> Src2Regs,
+ LLT NarrowTy) {
+ MachineIRBuilder &B = MIRBuilder;
+ unsigned SrcParts = Src1Regs.size();
+ unsigned DstParts = DstRegs.size();
+
+ unsigned DstIdx = 0; // Low bits of the result.
+ Register FactorSum =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
+ DstRegs[DstIdx] = FactorSum;
+
+ unsigned CarrySumPrevDstIdx;
+ SmallVector<Register, 4> Factors;
+
+ for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
+ // Collect low parts of muls for DstIdx.
+ for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
+ i <= std::min(DstIdx, SrcParts - 1); ++i) {
+ MachineInstrBuilder Mul =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
+ Factors.push_back(Mul.getReg(0));
+ }
+ // Collect high parts of muls from previous DstIdx.
+ for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
+ i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
+ MachineInstrBuilder Umulh =
+ B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
+ Factors.push_back(Umulh.getReg(0));
+ }
+ // Add CarrySum from additions calculated for previous DstIdx.
+ if (DstIdx != 1) {
+ Factors.push_back(CarrySumPrevDstIdx);
+ }
+
+ Register CarrySum;
+ // Add all factors and accumulate all carries into CarrySum.
+ if (DstIdx != DstParts - 1) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+ FactorSum = Uaddo.getReg(0);
+ CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
+ FactorSum = Uaddo.getReg(0);
+ MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
+ CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+ }
+ } else {
+ // Since value for the next index is not calculated, neither is CarrySum.
+ FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i)
+ FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
+ }
+
+ CarrySumPrevDstIdx = CarrySum;
+ DstRegs[DstIdx] = FactorSum;
+ Factors.clear();
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstType = MRI.getType(DstReg);
+ // FIXME: add support for vector types
+ if (DstType.isVector())
+ return UnableToLegalize;
+
+ unsigned Opcode = MI.getOpcode();
+ unsigned OpO, OpE, OpF;
+ switch (Opcode) {
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_ADD:
+ OpO = TargetOpcode::G_UADDO;
+ OpE = TargetOpcode::G_UADDE;
+ OpF = TargetOpcode::G_UADDE;
+ if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
+ OpF = TargetOpcode::G_SADDE;
+ break;
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SUB:
+ OpO = TargetOpcode::G_USUBO;
+ OpE = TargetOpcode::G_USUBE;
+ OpF = TargetOpcode::G_USUBE;
+ if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
+ OpF = TargetOpcode::G_SSUBE;
+ break;
+ default:
+ llvm_unreachable("Unexpected add/sub opcode!");
+ }
+
+ // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
+ unsigned NumDefs = MI.getNumExplicitDefs();
+ Register Src1 = MI.getOperand(NumDefs).getReg();
+ Register Src2 = MI.getOperand(NumDefs + 1).getReg();
+ Register CarryDst, CarryIn;
+ if (NumDefs == 2)
+ CarryDst = MI.getOperand(1).getReg();
+ if (MI.getNumOperands() == NumDefs + 3)
+ CarryIn = MI.getOperand(NumDefs + 2).getReg();
+
+ LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT LeftoverTy, DummyTy;
+ SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
+ extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
+ extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
+
+ int NarrowParts = Src1Regs.size();
+ for (int I = 0, E = Src1Left.size(); I != E; ++I) {
+ Src1Regs.push_back(Src1Left[I]);
+ Src2Regs.push_back(Src2Left[I]);
+ }
+ DstRegs.reserve(Src1Regs.size());
+
+ for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
+ Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ // Forward the final carry-out to the destination register
+ if (i == e - 1 && CarryDst)
+ CarryOut = CarryDst;
+
+ if (!CarryIn) {
+ MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
+ {Src1Regs[i], Src2Regs[i]});
+ } else if (i == e - 1) {
+ MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
+ {Src1Regs[i], Src2Regs[i], CarryIn});
+ } else {
+ MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
+ {Src1Regs[i], Src2Regs[i], CarryIn});
+ }
+
+ DstRegs.push_back(DstReg);
+ CarryIn = CarryOut;
+ }
+ insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
+ ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
+ ArrayRef(DstRegs).drop_front(NarrowParts));
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
+ auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
+
+ LLT Ty = MRI.getType(DstReg);
+ if (Ty.isVector())
+ return UnableToLegalize;
+
+ unsigned Size = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ if (Size % NarrowSize != 0)
+ return UnableToLegalize;
+
+ unsigned NumParts = Size / NarrowSize;
+ bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
+ unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
+
+ SmallVector<Register, 2> Src1Parts, Src2Parts;
+ SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
+ extractParts(Src1, NarrowTy, NumParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumParts, Src2Parts);
+ multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
+
+ // Take only high half of registers if this is high mul.
+ ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
+
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // If all finite floats fit into the narrowed integer type, we can just swap
+ // out the result type. This is practically only useful for conversions from
+ // half to at least 16-bits, so just handle the one case.
+ if (SrcTy.getScalarType() != LLT::scalar(16) ||
+ NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0,
+ IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+ int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ // FIXME: add support for when SizeOp1 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp1 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp1 / NarrowSize;
+
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ Register OpReg = MI.getOperand(0).getReg();
+ uint64_t OpStart = MI.getOperand(2).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset;
+ uint64_t SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ Register SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else if (DstRegs.size() > 1)
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
+ else
+ MIRBuilder.buildCopy(DstReg, DstRegs[0]);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT LeftoverTy;
+ extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
+ LeftoverRegs);
+
+ for (Register Reg : LeftoverRegs)
+ SrcRegs.push_back(Reg);
+
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+ Register OpReg = MI.getOperand(2).getReg();
+ uint64_t OpStart = MI.getOperand(3).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
+ unsigned DstStart = I * NarrowSize;
+
+ if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is defined by this insert, forward the new
+ // value.
+ DstRegs.push_back(OpReg);
+ continue;
+ }
+
+ Register SrcReg = SrcRegs[I];
+ if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
+ // The leftover reg is smaller than NarrowTy, so we need to extend it.
+ SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
+ }
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcReg);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, InsertOffset;
+ uint64_t SegSize;
+ if (OpStart < DstStart) {
+ InsertOffset = 0;
+ ExtractOffset = DstStart - OpStart;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+ } else {
+ InsertOffset = OpStart - DstStart;
+ ExtractOffset = 0;
+ SegSize =
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ }
+
+ Register SegReg = OpReg;
+ if (ExtractOffset != 0 || SegSize != OpSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+ }
+
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
+ DstRegs.push_back(DstReg);
+ }
+
+ uint64_t WideSize = DstRegs.size() * NarrowSize;
+ Register DstReg = MI.getOperand(0).getReg();
+ if (WideSize > RegTy.getSizeInBits()) {
+ Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
+ MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
+ MIRBuilder.buildTrunc(DstReg, MergeReg);
+ } else
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ assert(MI.getNumOperands() == 3 && TypeIdx == 0);
+
+ SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
+ SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src0Regs, Src0LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
+ Src1Regs, Src1LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {Src0Regs[I], Src1Regs[I]});
+ DstRegs.push_back(Inst.getReg(0));
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(
+ MI.getOpcode(),
+ {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
+ DstLeftoverRegs.push_back(Inst.getReg(0));
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
+
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ SmallVector<Register, 8> Parts;
+ LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
+ LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register CondReg = MI.getOperand(1).getReg();
+ LLT CondTy = MRI.getType(CondReg);
+ if (CondTy.isVector()) // TODO: Handle vselect
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+ SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src1Regs, Src1LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
+ Src2Regs, Src2LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Select = MIRBuilder.buildSelect(NarrowTy,
+ CondReg, Src1Regs[I], Src2Regs[I]);
+ DstRegs.push_back(Select.getReg(0));
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Select = MIRBuilder.buildSelect(
+ LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
+ DstLeftoverRegs.push_back(Select.getReg(0));
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
+
+ MachineIRBuilder &B = MIRBuilder;
+ auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
+ // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
+ auto C_0 = B.buildConstant(NarrowTy, 0);
+ auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ UnmergeSrc.getReg(1), C_0);
+ auto LoCTLZ = IsUndef ?
+ B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
+ B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
+ auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
+ auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
+ auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
+ B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
+
+ MachineIRBuilder &B = MIRBuilder;
+ auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
+ // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
+ auto C_0 = B.buildConstant(NarrowTy, 0);
+ auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ UnmergeSrc.getReg(0), C_0);
+ auto HiCTTZ = IsUndef ?
+ B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
+ B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
+ auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
+ auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
+ auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
+ B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
+
+ auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
+ auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
+ MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ MachineIRBuilder &B = MIRBuilder;
+ Register ExpReg = MI.getOperand(2).getReg();
+ LLT ExpTy = MRI.getType(ExpReg);
+
+ unsigned ClampSize = NarrowTy.getScalarSizeInBits();
+
+ // Clamp the exponent to the range of the target type.
+ auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
+ auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
+ auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
+ auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
+
+ auto Trunc = B.buildTrunc(NarrowTy, Clamp);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Trunc.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitCount(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ const auto &TII = MIRBuilder.getTII();
+ auto isSupported = [this](const LegalityQuery &Q) {
+ auto QAction = LI.getAction(Q).Action;
+ return QAction == Legal || QAction == Libcall || QAction == Custom;
+ };
+ switch (Opc) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ // This trivially expands to CTLZ.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CTLZ: {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned Len = SrcTy.getSizeInBits();
+
+ if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
+ // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
+ auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
+ auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
+ auto ICmp = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
+ auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
+ MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // for now, we do this:
+ // NewLen = NextPowerOf2(Len);
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // Upto NewLen/2
+ // return Len - popcount(x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ Register Op = SrcReg;
+ unsigned NewLen = PowerOf2Ceil(Len);
+ for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
+ auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
+ auto MIBOp = MIRBuilder.buildOr(
+ SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
+ Op = MIBOp.getReg(0);
+ }
+ auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
+ MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
+ MIBPop);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
+ // This trivially expands to CTTZ.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ: {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+
+ unsigned Len = SrcTy.getSizeInBits();
+ if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
+ // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
+ // zero.
+ auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
+ auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
+ auto ICmp = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
+ auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
+ MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
+ auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
+ auto MIBTmp = MIRBuilder.buildAnd(
+ SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
+ if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
+ auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
+ MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
+ MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
+ MI.getOperand(1).setReg(MIBTmp.getReg(0));
+ return Legalized;
+ }
+ case TargetOpcode::G_CTPOP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
+ unsigned Size = Ty.getSizeInBits();
+ MachineIRBuilder &B = MIRBuilder;
+
+ // Count set bits in blocks of 2 bits. Default approach would be
+ // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
+ // We use following formula instead:
+ // B2Count = val - { (val >> 1) & 0x55555555 }
+ // since it gives same result in blocks of 2 with one instruction less.
+ auto C_1 = B.buildConstant(Ty, 1);
+ auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
+ APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
+ auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
+ auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
+ auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
+
+ // In order to get count in blocks of 4 add values from adjacent block of 2.
+ // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
+ auto C_2 = B.buildConstant(Ty, 2);
+ auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
+ APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
+ auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
+ auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
+ auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
+ auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
+
+ // For count in blocks of 8 bits we don't have to mask high 4 bits before
+ // addition since count value sits in range {0,...,8} and 4 bits are enough
+ // to hold such binary values. After addition high 4 bits still hold count
+ // of set bits in high 4 bit block, set them to zero and get 8 bit result.
+ // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
+ auto C_4 = B.buildConstant(Ty, 4);
+ auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
+ auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
+ APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
+ auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
+ auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
+
+ assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
+ // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
+ // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
+ auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
+ auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
+
+ // Shift count result from 8 high bits to low bits.
+ auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
+
+// Check that (every element of) Reg is undef or not an exact multiple of BW.
+static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
+ Register Reg, unsigned BW) {
+ return matchUnaryPredicate(
+ MRI, Reg,
+ [=](const Constant *C) {
+ // Null constant here means an undef.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
+ return !CI || CI->getValue().urem(BW) != 0;
+ },
+ /*AllowUndefs*/ true);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShTy = MRI.getType(Z);
+
+ unsigned BW = Ty.getScalarSizeInBits();
+
+ if (!isPowerOf2_32(BW))
+ return UnableToLegalize;
+
+ const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
+ unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
+
+ if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
+ // fshl X, Y, Z -> fshr X, Y, -Z
+ // fshr X, Y, Z -> fshl X, Y, -Z
+ auto Zero = MIRBuilder.buildConstant(ShTy, 0);
+ Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
+ } else {
+ // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
+ // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
+ auto One = MIRBuilder.buildConstant(ShTy, 1);
+ if (IsFSHL) {
+ Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
+ X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
+ } else {
+ X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
+ Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
+ }
+
+ Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
+ }
+
+ MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShTy = MRI.getType(Z);
+
+ const unsigned BW = Ty.getScalarSizeInBits();
+ const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
+
+ Register ShX, ShY;
+ Register ShAmt, InvShAmt;
+
+ // FIXME: Emit optimized urem by constant instead of letting it expand later.
+ if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
+ ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
+ InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
+ ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
+ ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ auto NotZ = MIRBuilder.buildNot(ShTy, Z);
+ InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
+ } else {
+ auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
+ ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
+ InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
+ }
+
+ auto One = MIRBuilder.buildConstant(ShTy, 1);
+ if (IsFSHL) {
+ ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
+ auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
+ ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
+ } else {
+ auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
+ ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
+ ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
+ }
+ }
+
+ MIRBuilder.buildOr(Dst, ShX, ShY);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
+ // These operations approximately do the following (while avoiding undefined
+ // shifts by BW):
+ // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
+
+ bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
+ unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
+
+ // TODO: Use smarter heuristic that accounts for vector legalization.
+ if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
+ return lowerFunnelShiftAsShifts(MI);
+
+ // This only works for powers of 2, fallback to shifts if it fails.
+ LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
+ if (Result == UnableToLegalize)
+ return lowerFunnelShiftAsShifts(MI);
+ return Result;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
+ auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
+ bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
+ unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
+ auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
+ MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
+
+ unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
+ bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
+
+ MIRBuilder.setInstrAndDebugLoc(MI);
+
+ // If a rotate in the other direction is supported, use it.
+ unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
+ if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
+ isPowerOf2_32(EltSizeInBits))
+ return lowerRotateWithReverseRotate(MI);
+
+ // If a funnel shift is supported, use it.
+ unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ bool IsFShLegal = false;
+ if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+ LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+ auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+ Register R3) {
+ MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+ MI.eraseFromParent();
+ return Legalized;
+ };
+ // If a funnel shift in the other direction is supported, use it.
+ if (IsFShLegal) {
+ return buildFunnelShift(FShOpc, Dst, Src, Amt);
+ } else if (isPowerOf2_32(EltSizeInBits)) {
+ Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+ return buildFunnelShift(RevFsh, Dst, Src, Amt);
+ }
+ }
+
+ auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
+ unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
+ unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
+ auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
+ Register ShVal;
+ Register RevShiftVal;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
+ // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
+ auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
+ auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
+ ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
+ auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
+ RevShiftVal =
+ MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
+ } else {
+ // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
+ // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
+ auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
+ auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
+ ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
+ auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
+ auto One = MIRBuilder.buildConstant(AmtTy, 1);
+ auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
+ RevShiftVal =
+ MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
+ }
+ MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
+// representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+ // unsigned cul2f(ulong u) {
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ // }
+
+ auto Zero32 = MIRBuilder.buildConstant(S32, 0);
+ auto Zero64 = MIRBuilder.buildConstant(S64, 0);
+
+ auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
+
+ auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
+ auto Sub = MIRBuilder.buildSub(S32, K, LZ);
+
+ auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
+ auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
+
+ auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
+ auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
+
+ auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
+
+ auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
+ auto T = MIRBuilder.buildAnd(S64, U, Mask1);
+
+ auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
+ auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
+ auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
+
+ auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
+ auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
+ auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
+ auto One = MIRBuilder.buildConstant(S32, 1);
+
+ auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
+ auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
+ auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
+ MIRBuilder.buildAdd(Dst, V, R);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+ if (SrcTy == LLT::scalar(1)) {
+ auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
+ auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+ MIRBuilder.buildSelect(Dst, Src, True, False);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (SrcTy != LLT::scalar(64))
+ return UnableToLegalize;
+
+ if (DstTy == LLT::scalar(32)) {
+ // TODO: SelectionDAG has several alternative expansions to port which may
+ // be more reasonble depending on the available instructions. If a target
+ // has sitofp, does not have CTLZ, or can efficiently use f64 as an
+ // intermediate type, this is probably worse.
+ return lowerU64ToF32BitOps(MI);
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ if (SrcTy == S1) {
+ auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
+ auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+ MIRBuilder.buildSelect(Dst, Src, True, False);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (SrcTy != S64)
+ return UnableToLegalize;
+
+ if (DstTy == S32) {
+ // signed cl2f(long l) {
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ // }
+ Register L = Src;
+ auto SignBit = MIRBuilder.buildConstant(S64, 63);
+ auto S = MIRBuilder.buildAShr(S64, L, SignBit);
+
+ auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
+ auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
+ auto R = MIRBuilder.buildUITOFP(S32, Xor);
+
+ auto RNeg = MIRBuilder.buildFNeg(S32, R);
+ auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
+ MIRBuilder.buildConstant(S64, 0));
+ MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ if (SrcTy != S64 && SrcTy != S32)
+ return UnableToLegalize;
+ if (DstTy != S32 && DstTy != S64)
+ return UnableToLegalize;
+
+ // FPTOSI gives same result as FPTOUI for positive signed integers.
+ // FPTOUI needs to deal with fp values that convert to unsigned integers
+ // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
+
+ APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
+ APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
+ : APFloat::IEEEdouble(),
+ APInt::getZero(SrcTy.getSizeInBits()));
+ TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
+
+ MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
+
+ MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+ // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
+ // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+ MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+ MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+ MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
+ MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
+
+ const LLT S1 = LLT::scalar(1);
+
+ MachineInstrBuilder FCMP =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
+ MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
+ return UnableToLegalize;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
+
+ unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
+
+ auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
+ auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
+
+ auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
+ auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
+
+ auto SignMask = MIRBuilder.buildConstant(SrcTy,
+ APInt::getSignMask(SrcEltBits));
+ auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
+ auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
+ auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
+ Sign = MIRBuilder.buildSExt(DstTy, Sign);
+
+ auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
+ auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
+ auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
+
+ auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
+ R = MIRBuilder.buildZExt(DstTy, R);
+
+ auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
+ auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
+ auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
+ auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
+
+ auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
+ auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
+
+ const LLT S1 = LLT::scalar(1);
+ auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
+ S1, Exponent, ExponentLoBit);
+
+ R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
+
+ auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
+ auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
+
+ auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
+
+ auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
+ S1, Exponent, ZeroSrcTy);
+
+ auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
+ MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+// f64 -> f16 conversion using round-to-nearest-even rounding mode.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+
+ auto [Dst, Src] = MI.getFirst2Regs();
+ assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
+ MRI.getType(Src).getScalarType() == LLT::scalar(64));
+
+ if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
+ return UnableToLegalize;
+
+ if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
+ unsigned Flags = MI.getFlags();
+ auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
+ MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ const unsigned ExpMask = 0x7ff;
+ const unsigned ExpBiasf64 = 1023;
+ const unsigned ExpBiasf16 = 15;
+
+ auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
+ Register U = Unmerge.getReg(0);
+ Register UH = Unmerge.getReg(1);
+
+ auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
+ E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
+
+ // Subtract the fp64 exponent bias (1023) to get the real exponent and
+ // add the f16 bias (15) to get the biased exponent for the f16 format.
+ E = MIRBuilder.buildAdd(
+ S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
+
+ auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
+ M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
+
+ auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
+ MIRBuilder.buildConstant(S32, 0x1ff));
+ MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
+
+ auto Zero = MIRBuilder.buildConstant(S32, 0);
+ auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
+ auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
+ M = MIRBuilder.buildOr(S32, M, Lo40Set);
+
+ // (M != 0 ? 0x0200 : 0) | 0x7c00;
+ auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
+ auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
+ auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
+
+ auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
+ auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
+
+ // N = M | (E << 12);
+ auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
+ auto N = MIRBuilder.buildOr(S32, M, EShl12);
+
+ // B = clamp(1-E, 0, 13);
+ auto One = MIRBuilder.buildConstant(S32, 1);
+ auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
+ auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
+ B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
+
+ auto SigSetHigh = MIRBuilder.buildOr(S32, M,
+ MIRBuilder.buildConstant(S32, 0x1000));
+
+ auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
+ auto D0 = MIRBuilder.buildShl(S32, D, B);
+
+ auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
+ D0, SigSetHigh);
+ auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
+ D = MIRBuilder.buildOr(S32, D, D1);
+
+ auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
+ auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
+
+ auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
+ V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
+
+ auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
+ MIRBuilder.buildConstant(S32, 3));
+ auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
+
+ auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
+ MIRBuilder.buildConstant(S32, 5));
+ auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
+
+ V1 = MIRBuilder.buildOr(S32, V0, V1);
+ V = MIRBuilder.buildAdd(S32, V, V1);
+
+ auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
+ E, MIRBuilder.buildConstant(S32, 30));
+ V = MIRBuilder.buildSelect(S32, CmpEGt30,
+ MIRBuilder.buildConstant(S32, 0x7c00), V);
+
+ auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
+ E, MIRBuilder.buildConstant(S32, 1039));
+ V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
+
+ // Extract the sign bit.
+ auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
+ Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
+
+ // Insert the sign bit
+ V = MIRBuilder.buildOr(S32, Sign, V);
+
+ MIRBuilder.buildTrunc(Dst, V);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
+ auto [DstTy, SrcTy] = MI.getFirst2LLTs();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S16 = LLT::scalar(16);
+
+ if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
+ return lowerFPTRUNC_F64_TO_F16(MI);
+
+ return UnableToLegalize;
+}
+
+// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
+// multiplication tree.
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Dst);
+
+ auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
+ MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SMIN:
+ return CmpInst::ICMP_SLT;
+ case TargetOpcode::G_SMAX:
+ return CmpInst::ICMP_SGT;
+ case TargetOpcode::G_UMIN:
+ return CmpInst::ICMP_ULT;
+ case TargetOpcode::G_UMAX:
+ return CmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("not in integer min/max");
+ }
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
+
+ const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+ LLT CmpType = MRI.getType(Dst).changeElementSize(1);
+
+ auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
+ MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
+ auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
+ const int Src0Size = Src0Ty.getScalarSizeInBits();
+ const int Src1Size = Src1Ty.getScalarSizeInBits();
+
+ auto SignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getSignMask(Src0Size));
+
+ auto NotSignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
+
+ Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
+ Register And1;
+ if (Src0Ty == Src1Ty) {
+ And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
+ } else if (Src0Size > Src1Size) {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
+ auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
+ auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
+ And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
+ auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
+ auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
+ And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
+ }
+
+ // Be careful about setting nsz/nnan/ninf on every instruction, since the
+ // constants are a nan and -0.0, but the final result should preserve
+ // everything.
+ unsigned Flags = MI.getFlags();
+ MIRBuilder.buildOr(Dst, And0, And1, Flags);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
+ unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
+ TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Dst);
+
+ if (!MI.getFlag(MachineInstr::FmNoNans)) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+
+ // Note this must be done here, and not as an optimization combine in the
+ // absence of a dedicate quiet-snan instruction as we're using an
+ // omni-purpose G_FCANONICALIZE.
+ if (!isKnownNeverSNaN(Src0, MRI))
+ Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
+
+ if (!isKnownNeverSNaN(Src1, MRI))
+ Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
+ }
+
+ // If there are no nans, it's safe to simply replace this with the non-IEEE
+ // version.
+ MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
+ // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned Flags = MI.getFlags();
+
+ auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
+ Flags);
+ MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
+ auto [DstReg, X] = MI.getFirst2Regs();
+ const unsigned Flags = MI.getFlags();
+ const LLT Ty = MRI.getType(DstReg);
+ const LLT CondTy = Ty.changeElementSize(1);
+
+ // round(x) =>
+ // t = trunc(x);
+ // d = fabs(x - t);
+ // o = copysign(1.0f, x);
+ // return t + (d >= 0.5 ? o : 0.0);
+
+ auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
+
+ auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
+ auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+ auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
+ auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
+
+ auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
+ Flags);
+ auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
+
+ MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = MRI.getType(DstReg);
+ const LLT CondTy = Ty.changeElementSize(1);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+
+ auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
+ SrcReg, Zero, Flags);
+ auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
+ SrcReg, Trunc, Flags);
+ auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
+ auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
+
+ MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
+ const unsigned NumOps = MI.getNumOperands();
+ auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
+ unsigned PartSize = Src0Ty.getSizeInBits();
+
+ LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ const unsigned Offset = (I - 1) * PartSize;
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
+
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
+ MRI.createGenericVirtualRegister(WideTy);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+ auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ }
+
+ if (DstTy.isPointer()) {
+ if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
+ DstTy.getAddressSpace())) {
+ LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
+ return UnableToLegalize;
+ }
+
+ MIRBuilder.buildIntToPtr(DstReg, ResultReg);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
+ const unsigned NumDst = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(NumDst).getReg();
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst0Reg);
+ if (DstTy.isPointer())
+ return UnableToLegalize; // TODO
+
+ SrcReg = coerceToScalar(SrcReg);
+ if (!SrcReg)
+ return UnableToLegalize;
+
+ // Expand scalarizing unmerge as bitcast to integer and shift.
+ LLT IntTy = MRI.getType(SrcReg);
+
+ MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Offset = DstSize;
+ for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+/// Lower a vector extract or insert by writing the vector to a stack temporary
+/// and reloading the element or vector.
+///
+/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
+/// =>
+/// %stack_temp = G_FRAME_INDEX
+/// G_STORE %vec, %stack_temp
+/// %idx = clamp(%idx, %vec.getNumElements())
+/// %element_ptr = G_PTR_ADD %stack_temp, %idx
+/// %dst = G_LOAD %element_ptr
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ LLT VecTy = MRI.getType(SrcVec);
+ LLT EltTy = VecTy.getElementType();
+ unsigned NumElts = VecTy.getNumElements();
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
+ SmallVector<Register, 8> SrcRegs;
+ extractParts(SrcVec, EltTy, NumElts, SrcRegs);
+
+ if (InsertVal) {
+ SrcRegs[IdxVal] = MI.getOperand(2).getReg();
+ MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
+ } else {
+ MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (!EltTy.isByteSized()) { // Not implemented.
+ LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
+ return UnableToLegalize;
+ }
+
+ unsigned EltBytes = EltTy.getSizeInBytes();
+ Align VecAlign = getStackTemporaryAlignment(VecTy);
+ Align EltAlign;
+
+ MachinePointerInfo PtrInfo;
+ auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
+ VecAlign, PtrInfo);
+ MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
+
+ // Get the pointer to the element, and be sure not to hit undefined behavior
+ // if the index is out of bounds.
+ Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
+
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ int64_t Offset = IdxVal * EltBytes;
+ PtrInfo = PtrInfo.getWithOffset(Offset);
+ EltAlign = commonAlignment(VecAlign, Offset);
+ } else {
+ // We lose information with a variable offset.
+ EltAlign = getStackTemporaryAlignment(EltTy);
+ PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
+ }
+
+ if (InsertVal) {
+ // Write the inserted element
+ MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
+
+ // Reload the whole vector.
+ MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
+ } else {
+ MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
+ auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
+ MI.getFirst3RegLLTs();
+ LLT IdxTy = LLT::scalar(32);
+
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+
+ if (DstTy.isScalar()) {
+ if (Src0Ty.isVector())
+ return UnableToLegalize;
+
+ // This is just a SELECT.
+ assert(Mask.size() == 1 && "Expected a single mask element");
+ Register Val;
+ if (Mask[0] < 0 || Mask[0] > 1)
+ Val = MIRBuilder.buildUndef(DstTy).getReg(0);
+ else
+ Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
+ MIRBuilder.buildCopy(DstReg, Val);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ Register Undef;
+ SmallVector<Register, 32> BuildVec;
+ LLT EltTy = DstTy.getElementType();
+
+ for (int Idx : Mask) {
+ if (Idx < 0) {
+ if (!Undef.isValid())
+ Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
+ BuildVec.push_back(Undef);
+ continue;
+ }
+
+ if (Src0Ty.isScalar()) {
+ BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
+ } else {
+ int NumElts = Src0Ty.getNumElements();
+ Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
+ int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
+ auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
+ auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
+ BuildVec.push_back(Extract.getReg(0));
+ }
+ }
+
+ MIRBuilder.buildBuildVector(DstReg, BuildVec);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
+ const auto &MF = *MI.getMF();
+ const auto &TFI = *MF.getSubtarget().getFrameLowering();
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
+
+ LLT PtrTy = MRI.getType(Dst);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
+ SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
+
+ // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
+ // have to generate an extra instruction to negate the alloc and then use
+ // G_PTR_ADD to add the negative offset.
+ auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
+ if (Alignment > Align(1)) {
+ APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
+ AlignMask.negate();
+ auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
+ Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
+ }
+
+ SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
+ MIRBuilder.buildCopy(SPReg, SPTmp);
+ MIRBuilder.buildCopy(Dst, SPTmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtract(MachineInstr &MI) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ unsigned Offset = MI.getOperand(2).getImm();
+
+ // Extract sub-vector or one element
+ if (SrcTy.isVector()) {
+ unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
+ (Offset + DstSize <= SrcTy.getSizeInBits())) {
+ // Unmerge and allow access to each Src element for the artifact combiner.
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
+
+ // Take element(s) we need to extract and copy it (merge them).
+ SmallVector<Register, 8> SubVectorElts;
+ for (unsigned Idx = Offset / SrcEltSize;
+ Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
+ SubVectorElts.push_back(Unmerge.getReg(Idx));
+ }
+ if (SubVectorElts.size() == 1)
+ MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
+ else
+ MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
+ if (DstTy.isScalar() &&
+ (SrcTy.isScalar() ||
+ (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
+ LLT SrcIntTy = SrcTy;
+ if (!SrcTy.isScalar()) {
+ SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
+ }
+
+ if (Offset == 0)
+ MIRBuilder.buildTrunc(DstReg, SrcReg);
+ else {
+ auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
+ auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shr);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
+ auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
+ uint64_t Offset = MI.getOperand(3).getImm();
+
+ LLT DstTy = MRI.getType(Src);
+ LLT InsertTy = MRI.getType(InsertSrc);
+
+ // Insert sub-vector or one element
+ if (DstTy.isVector() && !InsertTy.isPointer()) {
+ LLT EltTy = DstTy.getElementType();
+ unsigned EltSize = EltTy.getSizeInBits();
+ unsigned InsertSize = InsertTy.getSizeInBits();
+
+ if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
+ (Offset + InsertSize <= DstTy.getSizeInBits())) {
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
+ SmallVector<Register, 8> DstElts;
+ unsigned Idx = 0;
+ // Elements from Src before insert start Offset
+ for (; Idx < Offset / EltSize; ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ // Replace elements in Src with elements from InsertSrc
+ if (InsertTy.getSizeInBits() > EltSize) {
+ auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
+ for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
+ ++Idx, ++i) {
+ DstElts.push_back(UnmergeInsertSrc.getReg(i));
+ }
+ } else {
+ DstElts.push_back(InsertSrc);
+ ++Idx;
+ }
+
+ // Remaining elements from Src after insert
+ for (; Idx < DstTy.getNumElements(); ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
+ if (InsertTy.isVector() ||
+ (DstTy.isVector() && DstTy.getElementType() != InsertTy))
+ return UnableToLegalize;
+
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if ((DstTy.isPointer() &&
+ DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
+ (InsertTy.isPointer() &&
+ DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
+ LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
+ return UnableToLegalize;
+ }
+
+ LLT IntDstTy = DstTy;
+
+ if (!DstTy.isScalar()) {
+ IntDstTy = LLT::scalar(DstTy.getSizeInBits());
+ Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
+ }
+
+ if (!InsertTy.isScalar()) {
+ const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
+ InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
+ }
+
+ Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
+ if (Offset != 0) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
+ ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
+ }
+
+ APInt MaskVal = APInt::getBitsSetWithWrap(
+ DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
+
+ auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
+ auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
+ auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
+
+ MIRBuilder.buildCast(Dst, Or);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
+ auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
+ MI.getFirst4RegLLTs();
+ const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
+
+ LLT Ty = Dst0Ty;
+ LLT BoolTy = Dst1Ty;
+
+ if (IsAdd)
+ MIRBuilder.buildAdd(Dst0, LHS, RHS);
+ else
+ MIRBuilder.buildSub(Dst0, LHS, RHS);
+
+ // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ auto ResultLowerThanLHS =
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
+ auto ConditionRHS = MIRBuilder.buildICmp(
+ IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
+
+ MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Res);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned BaseOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ }
+
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // hi = 0x7fffffff - smax(a, 0)
+ // lo = 0x80000000 - smin(a, 0)
+ // a + smin(smax(lo, b), hi)
+ // ssub.sat(a, b) ->
+ // lo = smax(a, -1) - 0x7fffffff
+ // hi = smin(a, -1) - 0x80000000
+ // a - smin(smax(lo, b), hi)
+ // TODO: AMDGPU can use a "median of 3" instruction here:
+ // a +/- med3(lo, b, hi)
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto MaxVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ MachineInstrBuilder Hi, Lo;
+ if (IsAdd) {
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
+ Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
+ } else {
+ auto NegOne = MIRBuilder.buildConstant(Ty, -1);
+ Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
+ MaxVal);
+ Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
+ MinVal);
+ }
+ auto RHSClamped =
+ MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
+ } else {
+ // uadd.sat(a, b) -> a + umin(~a, b)
+ // usub.sat(a, b) -> a - umin(a, b)
+ Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
+ auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned OverflowOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_UADDO;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_SADDO;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_USUBO;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_SSUBO;
+ break;
+ }
+
+ auto OverflowRes =
+ MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
+ Register Tmp = OverflowRes.getReg(0);
+ Register Ov = OverflowRes.getReg(1);
+ MachineInstrBuilder Clamp;
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // {tmp, ov} = saddo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ // ssub.sat(a, b) ->
+ // {tmp, ov} = ssubo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
+ auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
+ } else {
+ // uadd.sat(a, b) ->
+ // {tmp, ov} = uaddo(a, b)
+ // ov ? 0xffffffff : tmp
+ // usub.sat(a, b) ->
+ // {tmp, ov} = usubo(a, b)
+ // ov ? 0 : tmp
+ Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
+ }
+ MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShlSat(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
+ "Expected shlsat opcode!");
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+
+ unsigned BW = Ty.getScalarSizeInBits();
+ auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
+ auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
+ : MIRBuilder.buildLShr(Ty, Result, RHS);
+
+ MachineInstrBuilder SatVal;
+ if (IsSigned) {
+ auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
+ auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
+ MIRBuilder.buildConstant(Ty, 0));
+ SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
+ } else {
+ SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
+ }
+ auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
+ MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ const LLT Ty = MRI.getType(Src);
+ unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
+ unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
+
+ // Swap most and least significant byte, set remaining bytes in Res to zero.
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
+ auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
+ auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+ auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
+
+ // Set i-th high/low byte in Res to i-th low/high byte from Src.
+ for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
+ // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
+ APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
+ auto Mask = MIRBuilder.buildConstant(Ty, APMask);
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
+ // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
+ auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
+ auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
+ Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
+ // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
+ auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+ auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
+ Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
+ }
+ Res.getInstr()->getOperand(0).setReg(Dst);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+//{ (Src & Mask) >> N } | { (Src << N) & Mask }
+static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
+ MachineInstrBuilder Src, APInt Mask) {
+ const LLT Ty = Dst.getLLTTy(*B.getMRI());
+ MachineInstrBuilder C_N = B.buildConstant(Ty, N);
+ MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
+ auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
+ auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
+ return B.buildOr(Dst, LHS, RHS);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ const LLT Ty = MRI.getType(Src);
+ unsigned Size = Ty.getSizeInBits();
+
+ MachineInstrBuilder BSWAP =
+ MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
+
+ // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
+ // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
+ // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
+ MachineInstrBuilder Swap4 =
+ SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
+
+ // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
+ // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
+ // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
+ MachineInstrBuilder Swap2 =
+ SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+
+ // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
+ // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
+ // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
+ SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
+ int NameOpIdx = IsRead ? 1 : 0;
+ int ValRegIndex = IsRead ? 0 : 1;
+
+ Register ValReg = MI.getOperand(ValRegIndex).getReg();
+ const LLT Ty = MRI.getType(ValReg);
+ const MDString *RegStr = cast<MDString>(
+ cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
+
+ Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
+ if (!PhysReg.isValid())
+ return UnableToLegalize;
+
+ if (IsRead)
+ MIRBuilder.buildCopy(ValReg, PhysReg);
+ else
+ MIRBuilder.buildCopy(PhysReg, ValReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
+ unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ Register Result = MI.getOperand(0).getReg();
+ LLT OrigTy = MRI.getType(Result);
+ auto SizeInBits = OrigTy.getScalarSizeInBits();
+ LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
+
+ auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
+ auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
+ auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
+ unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
+ auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
+ MIRBuilder.buildTrunc(Result, Shifted);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
+
+ if (Mask == fcNone) {
+ MIRBuilder.buildConstant(DstReg, 0);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ if (Mask == fcAllFlags) {
+ MIRBuilder.buildConstant(DstReg, 1);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
+ // version
+
+ unsigned BitSize = SrcTy.getScalarSizeInBits();
+ const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+
+ LLT IntTy = LLT::scalar(BitSize);
+ if (SrcTy.isVector())
+ IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
+ auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
+
+ // Various masks.
+ APInt SignBit = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
+ APInt ExpMask = Inf;
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
+
+ auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
+ auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
+ auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
+ auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
+ auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
+
+ auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
+ auto Sign =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
+
+ auto Res = MIRBuilder.buildConstant(DstTy, 0);
+ // Clang doesn't support capture of structured bindings:
+ LLT DstTyCopy = DstTy;
+ const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
+ Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
+ };
+
+ // Tests that involve more than one class should be processed first.
+ if ((Mask & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) u< exp_mask
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
+ ExpMaskC));
+ Mask &= ~fcFinite;
+ } else if ((Mask & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V u< exp_mask
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
+ ExpMaskC));
+ Mask &= ~fcPosFinite;
+ } else if ((Mask & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
+ ExpMaskC);
+ auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
+ appendToRes(And);
+ Mask &= ~fcNegFinite;
+ }
+
+ if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ // TODO: Handle inverted case
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ ExpBits, ZeroC));
+ Mask &= ~PartialCheck;
+ }
+ }
+
+ // Check for individual classes.
+ if (FPClassTest PartialCheck = Mask & fcZero) {
+ if (PartialCheck == fcPosZero)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, ZeroC));
+ else if (PartialCheck == fcZero)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
+ else // fcNegZero
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, SignBitC));
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcInf) {
+ if (PartialCheck == fcPosInf)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, InfC));
+ else if (PartialCheck == fcInf)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
+ else { // fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, NegInfC));
+ }
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcNan) {
+ auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) u> int(inf)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
+ InfWithQnanBitC));
+ } else { // fcSNan
+ // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
+ // abs(V) u< (unsigned(Inf) | quiet_bit)
+ auto IsNan =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
+ auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
+ Abs, InfWithQnanBitC);
+ appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
+ }
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcNormal) {
+ // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
+ // (max_exp-1))
+ APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
+ auto ExpMinusOne = MIRBuilder.buildSub(
+ IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
+ APInt MaxExpMinusOne = ExpMask - ExpLSB;
+ auto NormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
+ MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
+ if (PartialCheck == fcNegNormal)
+ NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
+ else if (PartialCheck == fcPosNormal) {
+ auto PosSign = MIRBuilder.buildXor(
+ DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
+ NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
+ }
+ appendToRes(NormalRes);
+ }
+
+ MIRBuilder.buildCopy(DstReg, Res);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
+ // Implement vector G_SELECT in terms of XOR, AND, OR.
+ auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
+ MI.getFirst4RegLLTs();
+ if (!DstTy.isVector())
+ return UnableToLegalize;
+
+ bool IsEltPtr = DstTy.getElementType().isPointer();
+ if (IsEltPtr) {
+ LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
+ LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
+ Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
+ Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
+ DstTy = NewTy;
+ }
+
+ if (MaskTy.isScalar()) {
+ // Turn the scalar condition into a vector condition mask.
+
+ Register MaskElt = MaskReg;
+
+ // The condition was potentially zero extended before, but we want a sign
+ // extended boolean.
+ if (MaskTy != LLT::scalar(1))
+ MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
+
+ // Continue the sign extension (or truncate) to match the data type.
+ MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
+ MaskElt).getReg(0);
+
+ // Generate a vector splat idiom.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+ MaskReg = ShufSplat.getReg(0);
+ MaskTy = DstTy;
+ }
+
+ if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
+ return UnableToLegalize;
+ }
+
+ auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
+ auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
+ auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
+ if (IsEltPtr) {
+ auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
+ MIRBuilder.buildIntToPtr(DstReg, Or);
+ } else {
+ MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
+ // Split DIVREM into individual instructions.
+ unsigned Opcode = MI.getOpcode();
+
+ MIRBuilder.buildInstr(
+ Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
+ : TargetOpcode::G_UDIV,
+ {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
+ MIRBuilder.buildInstr(
+ Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
+ : TargetOpcode::G_UREM,
+ {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_ASHR %a, scalar_size-1
+ // %v2 = G_ADD %a, %v1
+ // %res = G_XOR %v2, %v1
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register OpReg = MI.getOperand(1).getReg();
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
+ auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
+ auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
+ MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_CONSTANT 0
+ // %v2 = G_SUB %v1, %a
+ // %res = G_SMAX %a, %v2
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
+ auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
+ auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
+ MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(SrcReg);
+
+ // The source could be a scalar if the IR type was <1 x sN>.
+ if (SrcTy.isScalar()) {
+ if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
+ return UnableToLegalize; // FIXME: handle extension.
+ // This can be just a plain copy.
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+// Returns a list of types to use for memory op lowering in MemOps. A partial
+// port of findOptimalMemOpLowering in TargetLowering.
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ const TargetLowering &TLI) {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+ return false;
+
+ LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
+
+ if (Ty == LLT()) {
+ // Use the largest scalar type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ Ty = LLT::scalar(64);
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
+ assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
+ // FIXME: check for the largest legal type we can load/store to.
+ }
+
+ unsigned NumMemOps = 0;
+ uint64_t Size = Op.size();
+ while (Size) {
+ unsigned TySize = Ty.getSizeInBytes();
+ while (TySize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ LLT NewTy = Ty;
+ // FIXME: check for mem op safety and legality of the types. Not all of
+ // SDAGisms map cleanly to GISel concepts.
+ if (NewTy.isVector())
+ NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
+ NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
+ unsigned NewTySize = NewTy.getSizeInBytes();
+ assert(NewTySize > 0 && "Could not find appropriate type");
+
+ // If the new LLT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ unsigned Fast;
+ // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
+ MVT VT = getMVTForLLT(Ty);
+ if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ TySize = Size;
+ else {
+ Ty = NewTy;
+ TySize = NewTySize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(Ty);
+ Size -= TySize;
+ }
+
+ return true;
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+// Get a vectorized representation of the memset value operand, GISel edition.
+static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NumBits = Ty.getScalarSizeInBits();
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ if (!Ty.isVector() && ValVRegAndVal) {
+ APInt Scalar = ValVRegAndVal->Value.trunc(8);
+ APInt SplatVal = APInt::getSplat(NumBits, Scalar);
+ return MIB.buildConstant(Ty, SplatVal).getReg(0);
+ }
+
+ // Extend the byte value to the larger type, and then multiply by a magic
+ // value 0x010101... in order to replicate it across every byte.
+ // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
+ if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
+ return MIB.buildConstant(Ty, 0).getReg(0);
+ }
+
+ LLT ExtType = Ty.getScalarType();
+ auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
+ if (NumBits > 8) {
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ auto MagicMI = MIB.buildConstant(ExtType, Magic);
+ Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
+ }
+
+ // For vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+
+ return Val;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+ uint64_t KnownLen, Align Alignment,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memset length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
+
+ if (!findGISelOptimalMemOpLowering(MemOps, Limit,
+ MemOp::Set(KnownLen, DstAlignCanChange,
+ Alignment,
+ /*IsZeroMemset=*/IsZeroVal,
+ /*IsVolatile=*/IsVolatile),
+ DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ MachineIRBuilder MIB(MI);
+ // Find the largest store and generate the bit pattern for it.
+ LLT LargestTy = MemOps[0];
+ for (unsigned i = 1; i < MemOps.size(); i++)
+ if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
+ LargestTy = MemOps[i];
+
+ // The memset stored value is always defined as an s8, so in order to make it
+ // work with larger store types we need to repeat the bit pattern across the
+ // wider type.
+ Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
+
+ if (!MemSetValue)
+ return UnableToLegalize;
+
+ // Generate the stores. For each store type in the list, we generate the
+ // matching store of that type to the destination address.
+ LLT PtrTy = MRI.getType(Dst);
+ unsigned DstOff = 0;
+ unsigned Size = KnownLen;
+ for (unsigned I = 0; I < MemOps.size(); I++) {
+ LLT Ty = MemOps[I];
+ unsigned TySize = Ty.getSizeInBytes();
+ if (TySize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(I == MemOps.size() - 1 && I != 0);
+ DstOff -= TySize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ Register Value = MemSetValue;
+ if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
+ MVT VT = getMVTForLLT(Ty);
+ MVT LargestVT = getMVTForLLT(LargestTy);
+ if (!LargestTy.isVector() && !Ty.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
+ else
+ Value = getMemsetValue(Val, Ty, MIB);
+ if (!Value)
+ return UnableToLegalize;
+ }
+
+ auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
+
+ Register Ptr = Dst;
+ if (DstOff != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
+ Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+
+ MIB.buildStore(Value, Ptr, *StoreMMO);
+ DstOff += Ty.getSizeInBytes();
+ Size -= TySize;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
+
+ const auto *MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ // FIXME: support dynamically sized G_MEMCPY_INLINE
+ assert(LenVRegAndVal &&
+ "inline memcpy with dynamic size is not yet supported");
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ Align DstAlign = DstMMO.getBaseAlign();
+ Align SrcAlign = SrcMMO.getBaseAlign();
+
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+ return lowerMemcpy(MI, Dst, Src, KnownLen,
+ std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memcpy length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ Align Alignment = std::min(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ // FIXME: infer better src pointer alignment like SelectionDAG does here.
+ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
+ // if the memcpy is in a tail call position.
+
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ IsVolatile),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Now we need to emit a pair of load and stores for each of the types we've
+ // collected. I.e. for each type, generate a load from the source pointer of
+ // that type width, and then generate a corresponding store to the dest buffer
+ // of that value loaded. This can result in a sequence of loads and stores
+ // mixed types, depending on what the target specifies as good types to use.
+ unsigned CurrOffset = 0;
+ unsigned Size = KnownLen;
+ for (auto CopyTy : MemOps) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ if (CopyTy.getSizeInBytes() > Size)
+ CurrOffset -= CopyTy.getSizeInBytes() - Size;
+
+ // Construct MMOs for the accesses.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ Register Offset;
+ if (CurrOffset != 0) {
+ LLT SrcTy = MRI.getType(Src);
+ Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
+ .getReg(0);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
+ }
+ auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
+
+ // Create the store.
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LdVal, StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ Size -= CopyTy.getSizeInBytes();
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memmove length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ Align Alignment = std::min(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
+ // to a bug in it's findOptimalMemOpLowering implementation. For now do the
+ // same thing here.
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Memmove requires that we perform the loads first before issuing the stores.
+ // Apart from that, this loop is pretty much doing the same thing as the
+ // memcpy codegen function.
+ unsigned CurrOffset = 0;
+ SmallVector<Register, 16> LoadVals;
+ for (auto CopyTy : MemOps) {
+ // Construct MMO for the load.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ if (CurrOffset != 0) {
+ LLT SrcTy = MRI.getType(Src);
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
+ }
+ LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+
+ CurrOffset = 0;
+ for (unsigned I = 0; I < MemOps.size(); ++I) {
+ LLT CopyTy = MemOps[I];
+ // Now store the values loaded.
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
+ // This combine is fairly complex so it's not written with a separate
+ // matcher function.
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) &&
+ "Expected memcpy like instruction");
+
+ auto MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+
+ Align DstAlign = MemOp->getBaseAlign();
+ Align SrcAlign;
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
+
+ if (Opc != TargetOpcode::G_MEMSET) {
+ assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
+ MemOp = *(++MMOIt);
+ SrcAlign = MemOp->getBaseAlign();
+ }
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ if (!LenVRegAndVal)
+ return UnableToLegalize;
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ bool IsVolatile = MemOp->isVolatile();
+ if (Opc == TargetOpcode::G_MEMCPY_INLINE)
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return UnableToLegalize;
+
+ if (MaxLen && KnownLen > MaxLen)
+ return UnableToLegalize;
+
+ if (Opc == TargetOpcode::G_MEMCPY) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
+ IsVolatile);
+ }
+ if (Opc == TargetOpcode::G_MEMMOVE)
+ return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (Opc == TargetOpcode::G_MEMSET)
+ return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
+ return UnableToLegalize;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
new file mode 100644
index 000000000000..1f2e481c63e0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -0,0 +1,435 @@
+//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement an interface to specify and query how an illegal operation on a
+// given type should be expanded.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace LegalizeActions;
+
+#define DEBUG_TYPE "legalizer-info"
+
+cl::opt<bool> llvm::DisableGISelLegalityCheck(
+ "disable-gisel-legality-check",
+ cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"),
+ cl::Hidden);
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
+ switch (Action) {
+ case Legal:
+ OS << "Legal";
+ break;
+ case NarrowScalar:
+ OS << "NarrowScalar";
+ break;
+ case WidenScalar:
+ OS << "WidenScalar";
+ break;
+ case FewerElements:
+ OS << "FewerElements";
+ break;
+ case MoreElements:
+ OS << "MoreElements";
+ break;
+ case Bitcast:
+ OS << "Bitcast";
+ break;
+ case Lower:
+ OS << "Lower";
+ break;
+ case Libcall:
+ OS << "Libcall";
+ break;
+ case Custom:
+ OS << "Custom";
+ break;
+ case Unsupported:
+ OS << "Unsupported";
+ break;
+ case NotFound:
+ OS << "NotFound";
+ break;
+ case UseLegacyRules:
+ OS << "UseLegacyRules";
+ break;
+ }
+ return OS;
+}
+
+raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
+ OS << Opcode << ", Tys={";
+ for (const auto &Type : Types) {
+ OS << Type << ", ";
+ }
+ OS << "}, Opcode=";
+
+ OS << Opcode << ", MMOs={";
+ for (const auto &MMODescr : MMODescrs) {
+ OS << MMODescr.MemoryTy << ", ";
+ }
+ OS << "}";
+
+ return OS;
+}
+
+#ifndef NDEBUG
+// Make sure the rule won't (trivially) loop forever.
+static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
+ const std::pair<unsigned, LLT> &Mutation) {
+ switch (Rule.getAction()) {
+ case Legal:
+ case Custom:
+ case Lower:
+ case MoreElements:
+ case FewerElements:
+ break;
+ default:
+ return Q.Types[Mutation.first] != Mutation.second;
+ }
+ return true;
+}
+
+// Make sure the returned mutation makes sense for the match type.
+static bool mutationIsSane(const LegalizeRule &Rule,
+ const LegalityQuery &Q,
+ std::pair<unsigned, LLT> Mutation) {
+ // If the user wants a custom mutation, then we can't really say much about
+ // it. Return true, and trust that they're doing the right thing.
+ if (Rule.getAction() == Custom || Rule.getAction() == Legal)
+ return true;
+
+ const unsigned TypeIdx = Mutation.first;
+ const LLT OldTy = Q.Types[TypeIdx];
+ const LLT NewTy = Mutation.second;
+
+ switch (Rule.getAction()) {
+ case FewerElements:
+ if (!OldTy.isVector())
+ return false;
+ [[fallthrough]];
+ case MoreElements: {
+ // MoreElements can go from scalar to vector.
+ const ElementCount OldElts = OldTy.isVector() ?
+ OldTy.getElementCount() : ElementCount::getFixed(1);
+ if (NewTy.isVector()) {
+ if (Rule.getAction() == FewerElements) {
+ // Make sure the element count really decreased.
+ if (ElementCount::isKnownGE(NewTy.getElementCount(), OldElts))
+ return false;
+ } else {
+ // Make sure the element count really increased.
+ if (ElementCount::isKnownLE(NewTy.getElementCount(), OldElts))
+ return false;
+ }
+ } else if (Rule.getAction() == MoreElements)
+ return false;
+
+ // Make sure the element type didn't change.
+ return NewTy.getScalarType() == OldTy.getScalarType();
+ }
+ case NarrowScalar:
+ case WidenScalar: {
+ if (OldTy.isVector()) {
+ // Number of elements should not change.
+ if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements())
+ return false;
+ } else {
+ // Both types must be vectors
+ if (NewTy.isVector())
+ return false;
+ }
+
+ if (Rule.getAction() == NarrowScalar) {
+ // Make sure the size really decreased.
+ if (NewTy.getScalarSizeInBits() >= OldTy.getScalarSizeInBits())
+ return false;
+ } else {
+ // Make sure the size really increased.
+ if (NewTy.getScalarSizeInBits() <= OldTy.getScalarSizeInBits())
+ return false;
+ }
+
+ return true;
+ }
+ case Bitcast: {
+ return OldTy != NewTy && OldTy.getSizeInBits() == NewTy.getSizeInBits();
+ }
+ default:
+ return true;
+ }
+}
+#endif
+
+LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
+ LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs());
+ dbgs() << "\n");
+ if (Rules.empty()) {
+ LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n");
+ return {LegalizeAction::UseLegacyRules, 0, LLT{}};
+ }
+ for (const LegalizeRule &Rule : Rules) {
+ if (Rule.match(Query)) {
+ LLVM_DEBUG(dbgs() << ".. match\n");
+ std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query);
+ LLVM_DEBUG(dbgs() << ".. .. " << Rule.getAction() << ", "
+ << Mutation.first << ", " << Mutation.second << "\n");
+ assert(mutationIsSane(Rule, Query, Mutation) &&
+ "legality mutation invalid for match");
+ assert(hasNoSimpleLoops(Rule, Query, Mutation) && "Simple loop detected");
+ return {Rule.getAction(), Mutation.first, Mutation.second};
+ } else
+ LLVM_DEBUG(dbgs() << ".. no match\n");
+ }
+ LLVM_DEBUG(dbgs() << ".. unsupported\n");
+ return {LegalizeAction::Unsupported, 0, LLT{}};
+}
+
+bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const {
+#ifndef NDEBUG
+ if (Rules.empty()) {
+ LLVM_DEBUG(
+ dbgs() << ".. type index coverage check SKIPPED: no rules defined\n");
+ return true;
+ }
+ const int64_t FirstUncovered = TypeIdxsCovered.find_first_unset();
+ if (FirstUncovered < 0) {
+ LLVM_DEBUG(dbgs() << ".. type index coverage check SKIPPED:"
+ " user-defined predicate detected\n");
+ return true;
+ }
+ const bool AllCovered = (FirstUncovered >= NumTypeIdxs);
+ if (NumTypeIdxs > 0)
+ LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered
+ << ", " << (AllCovered ? "OK" : "FAIL") << "\n");
+ return AllCovered;
+#else
+ return true;
+#endif
+}
+
+bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const {
+#ifndef NDEBUG
+ if (Rules.empty()) {
+ LLVM_DEBUG(
+ dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n");
+ return true;
+ }
+ const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset();
+ if (FirstUncovered < 0) {
+ LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:"
+ " user-defined predicate detected\n");
+ return true;
+ }
+ const bool AllCovered = (FirstUncovered >= NumImmIdxs);
+ LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered
+ << ", " << (AllCovered ? "OK" : "FAIL") << "\n");
+ return AllCovered;
+#else
+ return true;
+#endif
+}
+
+/// Helper function to get LLT for the given type index.
+static LLT getTypeFromTypeIdx(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, unsigned OpIdx,
+ unsigned TypeIdx) {
+ assert(TypeIdx < MI.getNumOperands() && "Unexpected TypeIdx");
+ // G_UNMERGE_VALUES has variable number of operands, but there is only
+ // one source type and one destination type as all destinations must be the
+ // same type. So, get the last operand if TypeIdx == 1.
+ if (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && TypeIdx == 1)
+ return MRI.getType(MI.getOperand(MI.getNumOperands() - 1).getReg());
+ return MRI.getType(MI.getOperand(OpIdx).getReg());
+}
+
+unsigned LegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const {
+ assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode");
+ return Opcode - FirstOp;
+}
+
+unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const {
+ unsigned OpcodeIdx = getOpcodeIdxForOpcode(Opcode);
+ if (unsigned Alias = RulesForOpcode[OpcodeIdx].getAlias()) {
+ LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias
+ << "\n");
+ OpcodeIdx = getOpcodeIdxForOpcode(Alias);
+ assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases");
+ }
+
+ return OpcodeIdx;
+}
+
+const LegalizeRuleSet &
+LegalizerInfo::getActionDefinitions(unsigned Opcode) const {
+ unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode);
+ return RulesForOpcode[OpcodeIdx];
+}
+
+LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(unsigned Opcode) {
+ unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode);
+ auto &Result = RulesForOpcode[OpcodeIdx];
+ assert(!Result.isAliasedByAnother() && "Modifying this opcode will modify aliases");
+ return Result;
+}
+
+LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
+ std::initializer_list<unsigned> Opcodes) {
+ unsigned Representative = *Opcodes.begin();
+
+ assert(Opcodes.size() >= 2 &&
+ "Initializer list must have at least two opcodes");
+
+ for (unsigned Op : llvm::drop_begin(Opcodes))
+ aliasActionDefinitions(Representative, Op);
+
+ auto &Return = getActionDefinitionsBuilder(Representative);
+ Return.setIsAliasedByAnother();
+ return Return;
+}
+
+void LegalizerInfo::aliasActionDefinitions(unsigned OpcodeTo,
+ unsigned OpcodeFrom) {
+ assert(OpcodeTo != OpcodeFrom && "Cannot alias to self");
+ assert(OpcodeTo >= FirstOp && OpcodeTo <= LastOp && "Unsupported opcode");
+ const unsigned OpcodeFromIdx = getOpcodeIdxForOpcode(OpcodeFrom);
+ RulesForOpcode[OpcodeFromIdx].aliasTo(OpcodeTo);
+}
+
+LegalizeActionStep
+LegalizerInfo::getAction(const LegalityQuery &Query) const {
+ LegalizeActionStep Step = getActionDefinitions(Query.Opcode).apply(Query);
+ if (Step.Action != LegalizeAction::UseLegacyRules) {
+ return Step;
+ }
+
+ return getLegacyLegalizerInfo().getAction(Query);
+}
+
+LegalizeActionStep
+LegalizerInfo::getAction(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ SmallVector<LLT, 8> Types;
+ SmallBitVector SeenTypes(8);
+ ArrayRef<MCOperandInfo> OpInfo = MI.getDesc().operands();
+ // FIXME: probably we'll need to cache the results here somehow?
+ for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
+ if (!OpInfo[i].isGenericType())
+ continue;
+
+ // We must only record actions once for each TypeIdx; otherwise we'd
+ // try to legalize operands multiple times down the line.
+ unsigned TypeIdx = OpInfo[i].getGenericTypeIndex();
+ if (SeenTypes[TypeIdx])
+ continue;
+
+ SeenTypes.set(TypeIdx);
+
+ LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx);
+ Types.push_back(Ty);
+ }
+
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
+ for (const auto &MMO : MI.memoperands())
+ MemDescrs.push_back({*MMO});
+
+ return getAction({MI.getOpcode(), Types, MemDescrs});
+}
+
+bool LegalizerInfo::isLegal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ return getAction(MI, MRI).Action == Legal;
+}
+
+bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ auto Action = getAction(MI, MRI).Action;
+ // If the action is custom, it may not necessarily modify the instruction,
+ // so we have to assume it's legal.
+ return Action == Legal || Action == Custom;
+}
+
+unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const {
+ return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+}
+
+/// \pre Type indices of every opcode form a dense set starting from 0.
+void LegalizerInfo::verify(const MCInstrInfo &MII) const {
+#ifndef NDEBUG
+ std::vector<unsigned> FailedOpcodes;
+ for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) {
+ const MCInstrDesc &MCID = MII.get(Opcode);
+ const unsigned NumTypeIdxs = std::accumulate(
+ MCID.operands().begin(), MCID.operands().end(), 0U,
+ [](unsigned Acc, const MCOperandInfo &OpInfo) {
+ return OpInfo.isGenericType()
+ ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc)
+ : Acc;
+ });
+ const unsigned NumImmIdxs = std::accumulate(
+ MCID.operands().begin(), MCID.operands().end(), 0U,
+ [](unsigned Acc, const MCOperandInfo &OpInfo) {
+ return OpInfo.isGenericImm()
+ ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc)
+ : Acc;
+ });
+ LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode
+ << "): " << NumTypeIdxs << " type ind"
+ << (NumTypeIdxs == 1 ? "ex" : "ices") << ", "
+ << NumImmIdxs << " imm ind"
+ << (NumImmIdxs == 1 ? "ex" : "ices") << "\n");
+ const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode);
+ if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs))
+ FailedOpcodes.push_back(Opcode);
+ else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs))
+ FailedOpcodes.push_back(Opcode);
+ }
+ if (!FailedOpcodes.empty()) {
+ errs() << "The following opcodes have ill-defined legalization rules:";
+ for (unsigned Opcode : FailedOpcodes)
+ errs() << " " << MII.getName(Opcode);
+ errs() << "\n";
+
+ report_fatal_error("ill-defined LegalizerInfo"
+ ", try -debug-only=legalizer-info for details");
+ }
+#endif
+}
+
+#ifndef NDEBUG
+// FIXME: This should be in the MachineVerifier, but it can't use the
+// LegalizerInfo as it's currently in the separate GlobalISel library.
+// Note that RegBankSelected property already checked in the verifier
+// has the same layering problem, but we only use inline methods so
+// end up not needing to link against the GlobalISel library.
+const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
+ if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ if (isPreISelGenericOpcode(MI.getOpcode()) &&
+ !MLI->isLegalOrCustom(MI, MRI))
+ return &MI;
+ }
+ return nullptr;
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
new file mode 100644
index 000000000000..49f40495d6fc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -0,0 +1,971 @@
+//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the LoadStoreOpt optimization pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "loadstore-opt"
+
+using namespace llvm;
+using namespace ore;
+using namespace MIPatternMatch;
+
+STATISTIC(NumStoresMerged, "Number of stores merged");
+
+const unsigned MaxStoreSizeToForm = 128;
+
+char LoadStoreOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+
+LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+LoadStoreOpt::LoadStoreOpt()
+ : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
+
+void LoadStoreOpt::init(MachineFunction &MF) {
+ this->MF = &MF;
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TLI = MF.getSubtarget().getTargetLowering();
+ LI = MF.getSubtarget().getLegalizerInfo();
+ Builder.setMF(MF);
+ IsPreLegalizer = !MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized);
+ InstsToErase.clear();
+}
+
+void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.setPreservesAll();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
+ MachineRegisterInfo &MRI) {
+ BaseIndexOffset Info;
+ Register PtrAddRHS;
+ if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
+ Info.BaseReg = Ptr;
+ Info.IndexReg = Register();
+ Info.IsIndexSignExt = false;
+ return Info;
+ }
+
+ auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
+ if (RHSCst)
+ Info.Offset = RHSCst->Value.getSExtValue();
+
+ // Just recognize a simple case for now. In future we'll need to match
+ // indexing patterns for base + index + constant.
+ Info.IndexReg = PtrAddRHS;
+ Info.IsIndexSignExt = false;
+ return Info;
+}
+
+bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
+ const MachineInstr &MI2,
+ bool &IsAlias,
+ MachineRegisterInfo &MRI) {
+ auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
+ auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
+ if (!LdSt1 || !LdSt2)
+ return false;
+
+ BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
+ BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
+
+ if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
+ return false;
+
+ int64_t Size1 = LdSt1->getMemSize();
+ int64_t Size2 = LdSt2->getMemSize();
+
+ int64_t PtrDiff;
+ if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
+ PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
+ // If the size of memory access is unknown, do not use it to do analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ if (PtrDiff >= 0 &&
+ Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(Size1 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + Size2) <= 0);
+ return true;
+ }
+ return false;
+ }
+
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
+ auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
+ if (!Base0Def || !Base1Def)
+ return false; // Couldn't tell anything.
+
+
+ if (Base0Def->getOpcode() != Base1Def->getOpcode())
+ return false;
+
+ if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
+ // If the bases have the same frame index but we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (Base0Def != Base1Def &&
+ (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
+ !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // This implementation is a lot more primitive than the SDAG one for now.
+ // FIXME: what about constant pools?
+ if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
+ auto GV0 = Base0Def->getOperand(1).getGlobal();
+ auto GV1 = Base1Def->getOperand(1).getGlobal();
+ if (GV0 != GV1) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // Can't tell anything about aliasing.
+ return false;
+}
+
+bool GISelAddressing::instMayAlias(const MachineInstr &MI,
+ const MachineInstr &Other,
+ MachineRegisterInfo &MRI,
+ AliasAnalysis *AA) {
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ bool IsAtomic;
+ Register BasePtr;
+ int64_t Offset;
+ uint64_t NumBytes;
+ MachineMemOperand *MMO;
+ };
+
+ auto getCharacteristics =
+ [&](const MachineInstr *MI) -> MemUseCharacteristics {
+ if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
+ Register BaseReg;
+ int64_t Offset = 0;
+ // No pre/post-inc addressing modes are considered here, unlike in SDAG.
+ if (!mi_match(LS->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
+ BaseReg = LS->getPointerReg();
+ Offset = 0;
+ }
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ LS->getMMO().getMemoryType().getSizeInBytes());
+ return {LS->isVolatile(), LS->isAtomic(), BaseReg,
+ Offset /*base offset*/, Size, &LS->getMMO()};
+ }
+ // FIXME: support recognizing lifetime instructions.
+ // Default.
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, Register(),
+ (int64_t)0 /*offset*/, 0 /*size*/,
+ (MachineMemOperand *)nullptr};
+ };
+ MemUseCharacteristics MUC0 = getCharacteristics(&MI),
+ MUC1 = getCharacteristics(&Other);
+
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias.
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+ }
+
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // FIXME: port the alignment based alias analysis from SDAG's isAlias().
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ uint64_t Size0 = MUC0.NumBytes;
+ uint64_t Size1 = MUC1.NumBytes;
+ if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0 != MemoryLocation::UnknownSize &&
+ Size1 != MemoryLocation::UnknownSize) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
+ if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ MUC0.MMO->getAAInfo()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ MUC1.MMO->getAAInfo())))
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Returns true if the instruction creates an unavoidable hazard that
+/// forces a boundary between store merge candidates.
+static bool isInstHardMergeHazard(MachineInstr &MI) {
+ return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
+}
+
+bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
+ // Try to merge all the stores in the vector, splitting into separate segments
+ // as necessary.
+ assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
+ LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
+ LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ // Ensure the legal store info is computed for this address space.
+ initializeStoreMergeTargetInfo(AS);
+ const auto &LegalSizes = LegalStoreSizes[AS];
+
+#ifndef NDEBUG
+ for (auto *StoreMI : StoresToMerge)
+ assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
+#endif
+
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ bool AnyMerged = false;
+ do {
+ unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
+ unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
+ // Compute the biggest store we can generate to handle the number of stores.
+ unsigned MergeSizeBits;
+ for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
+ LLT StoreTy = LLT::scalar(MergeSizeBits);
+ EVT StoreEVT =
+ getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
+ if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
+ TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
+ (TLI->isTypeLegal(StoreEVT)))
+ break; // We can generate a MergeSize bits store.
+ }
+ if (MergeSizeBits <= OrigTy.getSizeInBits())
+ return AnyMerged; // No greater merge.
+
+ unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
+ // Perform the actual merging.
+ SmallVector<GStore *, 8> SingleMergeStores(
+ StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
+ AnyMerged |= doSingleStoreMerge(SingleMergeStores);
+ StoresToMerge.erase(StoresToMerge.begin(),
+ StoresToMerge.begin() + NumStoresToMerge);
+ } while (StoresToMerge.size() > 1);
+ return AnyMerged;
+}
+
+bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+ MachineFunction &MF) const {
+ auto Action = LI->getAction(Query).Action;
+ // If the instruction is unsupported, it can't be legalized at all.
+ if (Action == LegalizeActions::Unsupported)
+ return false;
+ return IsPreLegalizer || Action == LegalizeAction::Legal;
+}
+
+bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
+ assert(Stores.size() > 1);
+ // We know that all the stores are consecutive and there are no aliasing
+ // operations in the range. However, the values that are being stored may be
+ // generated anywhere before each store. To ensure we have the values
+ // available, we materialize the wide value and new store at the place of the
+ // final store in the merge sequence.
+ GStore *FirstStore = Stores[0];
+ const unsigned NumStores = Stores.size();
+ LLT SmallTy = MRI->getType(FirstStore->getValueReg());
+ LLT WideValueTy =
+ LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue());
+
+ // For each store, compute pairwise merged debug locs.
+ DebugLoc MergedLoc = Stores.front()->getDebugLoc();
+ for (auto *Store : drop_begin(Stores))
+ MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc());
+
+ Builder.setInstr(*Stores.back());
+ Builder.setDebugLoc(MergedLoc);
+
+ // If all of the store values are constants, then create a wide constant
+ // directly. Otherwise, we need to generate some instructions to merge the
+ // existing values together into a wider type.
+ SmallVector<APInt, 8> ConstantVals;
+ for (auto *Store : Stores) {
+ auto MaybeCst =
+ getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
+ if (!MaybeCst) {
+ ConstantVals.clear();
+ break;
+ }
+ ConstantVals.emplace_back(MaybeCst->Value);
+ }
+
+ Register WideReg;
+ auto *WideMMO =
+ MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
+ if (ConstantVals.empty()) {
+ // Mimic the SDAG behaviour here and don't try to do anything for unknown
+ // values. In future, we should also support the cases of loads and
+ // extracted vector elements.
+ return false;
+ }
+
+ assert(ConstantVals.size() == NumStores);
+ // Check if our wide constant is legal.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
+ return false;
+ APInt WideConst(WideValueTy.getSizeInBits(), 0);
+ for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
+ // Insert the smaller constant into the corresponding position in the
+ // wider one.
+ WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
+ }
+ WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
+ auto NewStore =
+ Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
+ (void) NewStore;
+ LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
+ << " stores into merged store: " << *NewStore);
+ LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;);
+ NumStoresMerged += Stores.size();
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
+ FirstStore->getDebugLoc(),
+ FirstStore->getParent());
+ R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
+ << NV("OrigWidth", SmallTy.getSizeInBytes())
+ << " bytes into a single store of "
+ << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
+ return R;
+ });
+
+ for (auto *MI : Stores)
+ InstsToErase.insert(MI);
+ return true;
+}
+
+bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
+ if (C.Stores.size() < 2) {
+ C.reset();
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
+ << " stores, starting with " << *C.Stores[0]);
+ // We know that the stores in the candidate are adjacent.
+ // Now we need to check if any potential aliasing instructions recorded
+ // during the search alias with load/stores added to the candidate after.
+ // For example, if we have the candidate:
+ // C.Stores = [ST1, ST2, ST3, ST4]
+ // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
+ // ST2, then we would have recorded it into the PotentialAliases structure
+ // with the associated index value of "1". Then we see ST3 and ST4 and add
+ // them to the candidate group. We know that LD1 does not alias with ST1 or
+ // ST2, since we already did that check. However we don't yet know if it
+ // may alias ST3 and ST4, so we perform those checks now.
+ SmallVector<GStore *> StoresToMerge;
+
+ auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
+ for (auto AliasInfo : reverse(C.PotentialAliases)) {
+ MachineInstr *PotentialAliasOp = AliasInfo.first;
+ unsigned PreCheckedIdx = AliasInfo.second;
+ if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
+ // Once our store index is lower than the index associated with the
+ // potential alias, we know that we've already checked for this alias
+ // and all of the earlier potential aliases too.
+ return false;
+ }
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
+ }
+ return false;
+ };
+ // Start from the last store in the group, and check if it aliases with any
+ // of the potential aliasing operations in the list.
+ for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
+ auto *CheckStore = C.Stores[StoreIdx];
+ if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
+ continue;
+ StoresToMerge.emplace_back(CheckStore);
+ }
+
+ LLVM_DEBUG(dbgs() << StoresToMerge.size()
+ << " stores remaining after alias checks. Merging...\n");
+
+ // Now we've checked for aliasing hazards, merge any stores left.
+ C.reset();
+ if (StoresToMerge.size() < 2)
+ return false;
+ return mergeStores(StoresToMerge);
+}
+
+bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
+ StoreMergeCandidate &C) {
+ if (C.Stores.empty())
+ return false;
+ return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
+ return instMayAlias(MI, *OtherMI, *MRI, AA);
+ });
+}
+
+void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
+ PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
+}
+
+bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
+ StoreMergeCandidate &C) {
+ // Check if the given store writes to an adjacent address, and other
+ // requirements.
+ LLT ValueTy = MRI->getType(StoreMI.getValueReg());
+ LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
+
+ // Only handle scalars.
+ if (!ValueTy.isScalar())
+ return false;
+
+ // Don't allow truncating stores for now.
+ if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
+ return false;
+
+ // Avoid adding volatile or ordered stores to the candidate. We already have a
+ // check for this in instMayAlias() but that only get's called later between
+ // potential aliasing hazards.
+ if (!StoreMI.isSimple())
+ return false;
+
+ Register StoreAddr = StoreMI.getPointerReg();
+ auto BIO = getPointerInfo(StoreAddr, *MRI);
+ Register StoreBase = BIO.BaseReg;
+ uint64_t StoreOffCst = BIO.Offset;
+ if (C.Stores.empty()) {
+ // This is the first store of the candidate.
+ // If the offset can't possibly allow for a lower addressed store with the
+ // same base, don't bother adding it.
+ if (StoreOffCst < ValueTy.getSizeInBytes())
+ return false;
+ C.BasePtr = StoreBase;
+ C.CurrentLowestOffset = StoreOffCst;
+ C.Stores.emplace_back(&StoreMI);
+ LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
+ << StoreMI);
+ return true;
+ }
+
+ // Check the store is the same size as the existing ones in the candidate.
+ if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
+ ValueTy.getSizeInBits())
+ return false;
+
+ if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
+ PtrTy.getAddressSpace())
+ return false;
+
+ // There are other stores in the candidate. Check that the store address
+ // writes to the next lowest adjacent address.
+ if (C.BasePtr != StoreBase)
+ return false;
+ if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
+ static_cast<uint64_t>(StoreOffCst))
+ return false;
+
+ // This writes to an adjacent address. Allow it.
+ C.Stores.emplace_back(&StoreMI);
+ C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
+ LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
+ return true;
+}
+
+bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ // Walk through the block bottom-up, looking for merging candidates.
+ StoreMergeCandidate Candidate;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (InstsToErase.contains(&MI))
+ continue;
+
+ if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
+ // We have a G_STORE. Add it to the candidate if it writes to an adjacent
+ // address.
+ if (!addStoreToCandidate(*StoreMI, Candidate)) {
+ // Store wasn't eligible to be added. May need to record it as a
+ // potential alias.
+ if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+ Candidate.addPotentialAlias(*StoreMI);
+ }
+ continue;
+ }
+
+ // If we don't have any stores yet, this instruction can't pose a problem.
+ if (Candidate.Stores.empty())
+ continue;
+
+ // We're dealing with some other kind of instruction.
+ if (isInstHardMergeHazard(MI)) {
+ Changed |= processMergeCandidate(Candidate);
+ Candidate.Stores.clear();
+ continue;
+ }
+
+ if (!MI.mayLoadOrStore())
+ continue;
+
+ if (operationAliasesWithCandidate(MI, Candidate)) {
+ // We have a potential alias, so process the current candidate if we can
+ // and then continue looking for a new candidate.
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+
+ // Record this instruction as a potential alias for future stores that are
+ // added to the candidate.
+ Candidate.addPotentialAlias(MI);
+ }
+
+ // Process any candidate left after finishing searching the entire block.
+ Changed |= processMergeCandidate(Candidate);
+
+ // Erase instructions now that we're no longer iterating over the block.
+ for (auto *MI : InstsToErase)
+ MI->eraseFromParent();
+ InstsToErase.clear();
+ return Changed;
+}
+
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static std::optional<int64_t>
+getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return std::nullopt;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return std::nullopt;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits != 0)
+ return std::nullopt;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return std::nullopt;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return std::nullopt;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores) {
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI->getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ if (FoundStores.size() == 1)
+ return false;
+ // We didn't find enough stores to merge into the size of the original
+ // source value, but we may be able to generate a smaller store if we
+ // truncate the source value.
+ WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits());
+ }
+
+ unsigned NumStoresFound = FoundStores.size();
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ unsigned Fast = 0;
+ bool Allowed = TLI->allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresFound; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF))
+ return false;
+
+ Builder.setInstrAndDebugLoc(StoreMI);
+
+ if (WideStoreTy != MRI->getType(WideSrcVal))
+ WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0);
+
+ if (NeedBswap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(),
+ LowestIdxStore->getMMO().getPointerInfo(),
+ LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : FoundStores) {
+ ST->eraseFromParent();
+ DeletedStores.insert(ST);
+ }
+ return true;
+}
+
+bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
+ bool Changed = false;
+ SmallVector<GStore *, 16> Stores;
+ SmallPtrSet<GStore *, 8> DeletedStores;
+ // Walk up the block so we can see the most eligible stores.
+ for (MachineInstr &MI : llvm::reverse(BB))
+ if (auto *StoreMI = dyn_cast<GStore>(&MI))
+ Stores.emplace_back(StoreMI);
+
+ for (auto *StoreMI : Stores) {
+ if (DeletedStores.count(StoreMI))
+ continue;
+ if (mergeTruncStore(*StoreMI, DeletedStores))
+ Changed = true;
+ }
+ return Changed;
+}
+
+bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &BB : MF){
+ Changed |= mergeBlockStores(BB);
+ Changed |= mergeTruncStoresBlock(BB);
+ }
+
+ // Erase all dead instructions left over by the merging.
+ if (Changed) {
+ for (auto &BB : MF) {
+ for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) {
+ if (isTriviallyDead(I, *MRI))
+ I.eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
+ // Query the legalizer info to record what store types are legal.
+ // We record this because we don't want to bother trying to merge stores into
+ // illegal ones, which would just result in being split again.
+
+ if (LegalStoreSizes.count(AddrSpace)) {
+ assert(LegalStoreSizes[AddrSpace].any());
+ return; // Already cached sizes for this address space.
+ }
+
+ // Need to reserve at least MaxStoreSizeToForm + 1 bits.
+ BitVector LegalSizes(MaxStoreSizeToForm * 2);
+ const auto &LI = *MF->getSubtarget().getLegalizerInfo();
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ Type *IntPtrIRTy =
+ DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
+ LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
+ // We assume that we're not going to be generating any stores wider than
+ // MaxStoreSizeToForm bits for now.
+ for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
+ LLT Ty = LLT::scalar(Size);
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ SmallVector<LLT> StoreTys({Ty, PtrTy});
+ LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
+ LegalizeActionStep ActionStep = LI.getAction(Q);
+ if (ActionStep.Action == LegalizeActions::Legal)
+ LegalSizes.set(Size);
+ }
+ assert(LegalSizes.any() && "Expected some store sizes to be legal!");
+ LegalStoreSizes[AddrSpace] = LegalSizes;
+}
+
+bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
+ << '\n');
+
+ init(MF);
+ bool Changed = false;
+ Changed |= mergeFunctionStores(MF);
+
+ LegalStoreSizes.clear();
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
new file mode 100644
index 000000000000..55984423e5bc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -0,0 +1,220 @@
+//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the Localizer class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "localizer"
+
+using namespace llvm;
+
+char Localizer::ID = 0;
+INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
+
+Localizer::Localizer(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+Localizer::Localizer()
+ : Localizer([](const MachineFunction &) { return false; }) {}
+
+void Localizer::init(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
+}
+
+void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
+ MachineBasicBlock *&InsertMBB) {
+ MachineInstr &MIUse = *MOUse.getParent();
+ InsertMBB = MIUse.getParent();
+ if (MIUse.isPHI())
+ InsertMBB = MIUse.getOperand(MOUse.getOperandNo() + 1).getMBB();
+ return InsertMBB == Def.getParent();
+}
+
+bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
+ MachineInstr *MI = Op.getParent();
+ if (!MI->isPHI())
+ return false;
+
+ Register SrcReg = Op.getReg();
+ for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
+ auto &MO = MI->getOperand(Idx);
+ if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
+ return true;
+ }
+ return false;
+}
+
+bool Localizer::localizeInterBlock(MachineFunction &MF,
+ LocalizedSetVecT &LocalizedInstrs) {
+ bool Changed = false;
+ DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+
+ // Since the IRTranslator only emits constants into the entry block, and the
+ // rest of the GISel pipeline generally emits constants close to their users,
+ // we only localize instructions in the entry block here. This might change if
+ // we start doing CSE across blocks.
+ auto &MBB = MF.front();
+ auto &TL = *MF.getSubtarget().getTargetLowering();
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (!TL.shouldLocalize(MI, TTI))
+ continue;
+ LLVM_DEBUG(dbgs() << "Should localize: " << MI);
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "More than one definition not supported yet");
+ Register Reg = MI.getOperand(0).getReg();
+ // Check if all the users of MI are local.
+ // We are going to invalidation the list of use operands, so we
+ // can't use range iterator.
+ for (MachineOperand &MOUse :
+ llvm::make_early_inc_range(MRI->use_operands(Reg))) {
+ // Check if the use is already local.
+ MachineBasicBlock *InsertMBB;
+ LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MOUse.getOperandNo() << '\n');
+ if (isLocalUse(MOUse, MI, InsertMBB)) {
+ // Even if we're in the same block, if the block is very large we could
+ // still have many long live ranges. Try to do intra-block localization
+ // too.
+ LocalizedInstrs.insert(&MI);
+ continue;
+ }
+
+ // If the use is a phi operand that's not unique, don't try to localize.
+ // If we do, we can cause unnecessary instruction bloat by duplicating
+ // into each predecessor block, when the existing one is sufficient and
+ // allows for easier optimization later.
+ if (isNonUniquePhiValue(MOUse))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
+ Changed = true;
+ auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+ auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+ if (NewVRegIt == MBBWithLocalDef.end()) {
+ // Create the localized instruction.
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+ LocalizedInstrs.insert(LocalizedMI);
+ MachineInstr &UseMI = *MOUse.getParent();
+ if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
+ InsertMBB->insert(UseMI, LocalizedMI);
+ else
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+ LocalizedMI);
+
+ // Set a new register for the definition.
+ Register NewReg = MRI->cloneVirtualRegister(Reg);
+ LocalizedMI->getOperand(0).setReg(NewReg);
+ NewVRegIt =
+ MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+ LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ }
+ LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+ << '\n');
+ // Update the user reg.
+ MOUse.setReg(NewVRegIt->second);
+ }
+ }
+ return Changed;
+}
+
+bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
+ bool Changed = false;
+
+ // For each already-localized instruction which has multiple users, then we
+ // scan the block top down from the current position until we hit one of them.
+
+ // FIXME: Consider doing inst duplication if live ranges are very long due to
+ // many users, but this case may be better served by regalloc improvements.
+
+ for (MachineInstr *MI : LocalizedInstrs) {
+ Register Reg = MI->getOperand(0).getReg();
+ MachineBasicBlock &MBB = *MI->getParent();
+ // All of the user MIs of this reg.
+ SmallPtrSet<MachineInstr *, 32> Users;
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+ if (!UseMI.isPHI())
+ Users.insert(&UseMI);
+ }
+ // If all the users were PHIs then they're not going to be in our block,
+ // don't try to move this instruction.
+ if (Users.empty())
+ continue;
+
+ MachineBasicBlock::iterator II(MI);
+ ++II;
+ while (II != MBB.end() && !Users.count(&*II))
+ ++II;
+
+ assert(II != MBB.end() && "Didn't find the user in the MBB");
+ LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II
+ << '\n');
+
+ MI->removeFromParent();
+ MBB.insert(II, MI);
+ Changed = true;
+
+ // If the instruction (constant) being localized has single user, we can
+ // propagate debug location from user.
+ if (Users.size() == 1) {
+ const auto &DefDL = MI->getDebugLoc();
+ const auto &UserDL = (*Users.begin())->getDebugLoc();
+
+ if ((!DefDL || DefDL.getLine() == 0) && UserDL && UserDL.getLine() != 0) {
+ MI->setDebugLoc(UserDL);
+ }
+ }
+ }
+ return Changed;
+}
+
+bool Localizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ // Don't run the pass if the target asked so.
+ if (DoNotRunPass(MF))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
+
+ init(MF);
+
+ // Keep track of the instructions we localized. We'll do a second pass of
+ // intra-block localization to further reduce live ranges.
+ LocalizedSetVecT LocalizedInstrs;
+
+ bool Changed = localizeInterBlock(MF, LocalizedInstrs);
+ Changed |= localizeIntraBlock(LocalizedInstrs);
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
new file mode 100644
index 000000000000..6d606e5550f1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
@@ -0,0 +1,113 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.cpp -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+
+using namespace llvm;
+
+#define LOC_DEBUG(X) DEBUG_WITH_TYPE(DebugType.str().c_str(), X)
+
+void LostDebugLocObserver::analyzeDebugLocations() {
+ if (LostDebugLocs.empty()) {
+ LOC_DEBUG(dbgs() << ".. No debug info was present\n");
+ return;
+ }
+ if (PotentialMIsForDebugLocs.empty()) {
+ LOC_DEBUG(
+ dbgs() << ".. No instructions to carry debug info (dead code?)\n");
+ return;
+ }
+
+ LOC_DEBUG(dbgs() << ".. Searching " << PotentialMIsForDebugLocs.size()
+ << " instrs for " << LostDebugLocs.size() << " locations\n");
+ SmallPtrSet<MachineInstr *, 4> FoundIn;
+ for (MachineInstr *MI : PotentialMIsForDebugLocs) {
+ if (!MI->getDebugLoc())
+ continue;
+ // Check this first in case there's a matching line-0 location on both input
+ // and output.
+ if (MI->getDebugLoc().getLine() == 0) {
+ LOC_DEBUG(
+ dbgs() << ".. Assuming line-0 location covers remainder (if any)\n");
+ return;
+ }
+ if (LostDebugLocs.erase(MI->getDebugLoc())) {
+ LOC_DEBUG(dbgs() << ".. .. found " << MI->getDebugLoc() << " in " << *MI);
+ FoundIn.insert(MI);
+ continue;
+ }
+ }
+ if (LostDebugLocs.empty())
+ return;
+
+ NumLostDebugLocs += LostDebugLocs.size();
+ LOC_DEBUG({
+ dbgs() << ".. Lost locations:\n";
+ for (const DebugLoc &Loc : LostDebugLocs) {
+ dbgs() << ".. .. ";
+ Loc.print(dbgs());
+ dbgs() << "\n";
+ }
+ dbgs() << ".. MIs with matched locations:\n";
+ for (MachineInstr *MI : FoundIn)
+ if (PotentialMIsForDebugLocs.erase(MI))
+ dbgs() << ".. .. " << *MI;
+ dbgs() << ".. Remaining MIs with unmatched/no locations:\n";
+ for (const MachineInstr *MI : PotentialMIsForDebugLocs)
+ dbgs() << ".. .. " << *MI;
+ });
+}
+
+void LostDebugLocObserver::checkpoint(bool CheckDebugLocs) {
+ if (CheckDebugLocs)
+ analyzeDebugLocations();
+ PotentialMIsForDebugLocs.clear();
+ LostDebugLocs.clear();
+}
+
+void LostDebugLocObserver::createdInstr(MachineInstr &MI) {
+ PotentialMIsForDebugLocs.insert(&MI);
+}
+
+static bool irTranslatorNeverAddsLocations(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return true;
+ }
+}
+
+void LostDebugLocObserver::erasingInstr(MachineInstr &MI) {
+ if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+ return;
+
+ PotentialMIsForDebugLocs.erase(&MI);
+ if (MI.getDebugLoc())
+ LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changingInstr(MachineInstr &MI) {
+ if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+ return;
+
+ PotentialMIsForDebugLocs.erase(&MI);
+ if (MI.getDebugLoc())
+ LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changedInstr(MachineInstr &MI) {
+ PotentialMIsForDebugLocs.insert(&MI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
new file mode 100644
index 000000000000..962b54ec5d6b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -0,0 +1,1318 @@
+//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the MachineIRBuidler class.
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+using namespace llvm;
+
+void MachineIRBuilder::setMF(MachineFunction &MF) {
+ State.MF = &MF;
+ State.MBB = nullptr;
+ State.MRI = &MF.getRegInfo();
+ State.TII = MF.getSubtarget().getInstrInfo();
+ State.DL = DebugLoc();
+ State.PCSections = nullptr;
+ State.II = MachineBasicBlock::iterator();
+ State.Observer = nullptr;
+}
+
+//------------------------------------------------------------------------------
+// Build instruction variants.
+//------------------------------------------------------------------------------
+
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
+ return BuildMI(getMF(), {getDL(), getPCSections()}, getTII().get(Opcode));
+}
+
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
+ getMBB().insert(getInsertPt(), MIB);
+ recordInsertion(MIB);
+ return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildDirectDbgValue(Register Reg, const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
+ return insertInstr(BuildMI(getMF(), getDL(),
+ getTII().get(TargetOpcode::DBG_VALUE),
+ /*IsIndirect*/ false, Reg, Variable, Expr));
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
+ return insertInstr(BuildMI(getMF(), getDL(),
+ getTII().get(TargetOpcode::DBG_VALUE),
+ /*IsIndirect*/ true, Reg, Variable, Expr));
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
+ return insertInstr(buildInstrNoInsert(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE);
+
+ auto *NumericConstant = [&] () -> const Constant* {
+ if (const auto *CE = dyn_cast<ConstantExpr>(&C))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ return CE->getOperand(0);
+ return &C;
+ }();
+
+ if (auto *CI = dyn_cast<ConstantInt>(NumericConstant)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getZExtValue());
+ } else if (auto *CFP = dyn_cast<ConstantFP>(NumericConstant)) {
+ MIB.addFPImm(CFP);
+ } else if (isa<ConstantPointerNull>(NumericConstant)) {
+ MIB.addImm(0);
+ } else {
+ // Insert $noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(Register());
+ }
+
+ MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
+ return insertInstr(MIB);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
+ assert(isa<DILabel>(Label) && "not a label");
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(State.DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstr(TargetOpcode::DBG_LABEL);
+
+ return MIB.addMetadata(Label);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res,
+ const SrcOp &Size,
+ Align Alignment) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type");
+ auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Size.addSrcToMIB(MIB);
+ MIB.addImm(Alignment.value());
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res,
+ int Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_FRAME_INDEX);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addFrameIndex(Idx);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
+ const GlobalValue *GV) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ assert(Res.getLLTTy(*getMRI()).getAddressSpace() ==
+ GV->getType()->getAddressSpace() &&
+ "address space mismatch");
+
+ auto MIB = buildInstr(TargetOpcode::G_GLOBAL_VALUE);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addGlobalAddress(GV);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstantPool(const DstOp &Res,
+ unsigned Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_CONSTANT_POOL);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addConstantPoolIndex(Idx);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
+ unsigned JTI) {
+ return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
+ .addJumpTableIndex(JTI);
+}
+
+void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
+void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0,
+ const LLT Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0 && Res == Op1) && "type mismatch");
+}
+
+void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0,
+ const LLT Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
+MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() &&
+ Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
+ assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
+
+ return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
+}
+
+std::optional<MachineInstrBuilder>
+MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
+ const LLT ValueTy, uint64_t Value) {
+ assert(Res == 0 && "Res is a result argument");
+ assert(ValueTy.isScalar() && "invalid offset type");
+
+ if (Value == 0) {
+ Res = Op0;
+ return std::nullopt;
+ }
+
+ Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
+ auto Cst = buildConstant(ValueTy, Value);
+ return buildPtrAdd(Res, Op0, Cst.getReg(0));
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
+ const SrcOp &Op0,
+ uint32_t NumBits) {
+ LLT PtrTy = Res.getLLTTy(*getMRI());
+ LLT MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ Register MaskReg = getMRI()->createGenericVirtualRegister(MaskTy);
+ buildConstant(MaskReg, maskTrailingZeros<uint64_t>(NumBits));
+ return buildPtrMask(Res, Op0, MaskReg);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert(ResTy.isVector() && "Res non vector type");
+
+ SmallVector<Register, 8> Regs;
+ if (Op0Ty.isVector()) {
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ } else {
+ assert((ResTy.getSizeInBits() > Op0Ty.getSizeInBits()) &&
+ "Op0 has more size");
+ Regs.push_back(Op0.getReg());
+ }
+ Register Undef =
+ buildUndef(Op0Ty.isVector() ? Op0Ty.getElementType() : Op0Ty).getReg(0);
+ unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
+ for (unsigned i = 0; i < NumberOfPadElts; ++i)
+ Regs.push_back(Undef);
+ return buildMergeLikeInstr(Res, Regs);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
+ "Op0 has fewer elements");
+
+ SmallVector<Register, 8> Regs;
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
+ Regs.push_back(Unmerge.getReg(i));
+ return buildMergeLikeInstr(Res, Regs);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
+ return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
+ assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
+ return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
+ unsigned JTI,
+ Register IndexReg) {
+ assert(getMRI()->getType(TablePtr).isPointer() &&
+ "Table reg must be a pointer");
+ return buildInstr(TargetOpcode::G_BRJT)
+ .addUse(TablePtr)
+ .addJumpTableIndex(JTI)
+ .addUse(IndexReg);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::COPY, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ const ConstantInt &Val) {
+ LLT Ty = Res.getLLTTy(*getMRI());
+ LLT EltTy = Ty.getScalarType();
+ assert(EltTy.getScalarSizeInBits() == Val.getBitWidth() &&
+ "creating constant with the wrong size");
+
+ if (Ty.isVector()) {
+ auto Const = buildInstr(TargetOpcode::G_CONSTANT)
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addCImm(&Val);
+ return buildSplatVector(Res, Const);
+ }
+
+ auto Const = buildInstr(TargetOpcode::G_CONSTANT);
+ Const->setDebugLoc(DebugLoc());
+ Res.addDefToMIB(*getMRI(), Const);
+ Const.addCImm(&Val);
+ return Const;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ int64_t Val) {
+ auto IntN = IntegerType::get(getMF().getFunction().getContext(),
+ Res.getLLTTy(*getMRI()).getScalarSizeInBits());
+ ConstantInt *CI = ConstantInt::get(IntN, Val, true);
+ return buildConstant(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) {
+ LLT Ty = Res.getLLTTy(*getMRI());
+ LLT EltTy = Ty.getScalarType();
+
+ assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
+ == EltTy.getSizeInBits() &&
+ "creating fconstant with the wrong size");
+
+ assert(!Ty.isPointer() && "invalid operand type");
+
+ if (Ty.isVector()) {
+ auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addFPImm(&Val);
+
+ return buildSplatVector(Res, Const);
+ }
+
+ auto Const = buildInstr(TargetOpcode::G_FCONSTANT);
+ Const->setDebugLoc(DebugLoc());
+ Res.addDefToMIB(*getMRI(), Const);
+ Const.addFPImm(&Val);
+ return Const;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ const APInt &Val) {
+ ConstantInt *CI = ConstantInt::get(getMF().getFunction().getContext(), Val);
+ return buildConstant(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ double Val) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ auto &Ctx = getMF().getFunction().getContext();
+ auto *CFP =
+ ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
+ return buildFConstant(Res, *CFP);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ const APFloat &Val) {
+ auto &Ctx = getMF().getFunction().getContext();
+ auto *CFP = ConstantFP::get(Ctx, Val);
+ return buildFConstant(Res, *CFP);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
+ MachineBasicBlock &Dest) {
+ assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
+
+ auto MIB = buildInstr(TargetOpcode::G_BRCOND);
+ Tst.addSrcToMIB(MIB);
+ MIB.addMBB(&Dest);
+ return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+
+ LLT Ty = Dst.getLLTTy(*getMRI());
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo);
+ return buildLoad(Dst, Addr, *MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
+ const DstOp &Res,
+ const SrcOp &Addr,
+ MachineMemOperand &MMO) {
+ assert(Res.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+ assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+
+ auto MIB = buildInstr(Opcode);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset(
+ const DstOp &Dst, const SrcOp &BasePtr,
+ MachineMemOperand &BaseMMO, int64_t Offset) {
+ LLT LoadTy = Dst.getLLTTy(*getMRI());
+ MachineMemOperand *OffsetMMO =
+ getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy);
+
+ if (Offset == 0) // This may be a size or type changing load.
+ return buildLoad(Dst, BasePtr, *OffsetMMO);
+
+ LLT PtrTy = BasePtr.getLLTTy(*getMRI());
+ LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
+ auto ConstOffset = buildConstant(OffsetTy, Offset);
+ auto Ptr = buildPtrAdd(PtrTy, BasePtr, ConstOffset);
+ return buildLoad(Dst, Ptr, *OffsetMMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
+ const SrcOp &Addr,
+ MachineMemOperand &MMO) {
+ assert(Val.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+ assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+
+ auto MIB = buildInstr(TargetOpcode::G_STORE);
+ Val.addSrcToMIB(MIB);
+ Addr.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ LLT Ty = Val.getLLTTy(*getMRI());
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo);
+ return buildStore(Val, Addr, *MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_SEXT, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
+}
+
+unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
+ const auto *TLI = getMF().getSubtarget().getTargetLowering();
+ switch (TLI->getBooleanContents(IsVec, IsFP)) {
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ return TargetOpcode::G_SEXT;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ return TargetOpcode::G_ZEXT;
+ default:
+ return TargetOpcode::G_ANYEXT;
+ }
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
+ const SrcOp &Op,
+ bool IsFP) {
+ unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
+ return buildInstr(ExtOp, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBoolExtInReg(const DstOp &Res,
+ const SrcOp &Op,
+ bool IsVector,
+ bool IsFP) {
+ const auto *TLI = getMF().getSubtarget().getTargetLowering();
+ switch (TLI->getBooleanContents(IsVector, IsFP)) {
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ return buildSExtInReg(Res, Op, 1);
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ return buildZExtInReg(Res, Op, 1);
+ case TargetLoweringBase::UndefinedBooleanContent:
+ return buildCopy(Res, Op);
+ }
+
+ llvm_unreachable("unexpected BooleanContent");
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
+ const DstOp &Res,
+ const SrcOp &Op) {
+ assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
+ TargetOpcode::G_SEXT == ExtOpc) &&
+ "Expecting Extending Opc");
+ assert(Res.getLLTTy(*getMRI()).isScalar() ||
+ Res.getLLTTy(*getMRI()).isVector());
+ assert(Res.getLLTTy(*getMRI()).isScalar() ==
+ Op.getLLTTy(*getMRI()).isScalar());
+
+ unsigned Opcode = TargetOpcode::COPY;
+ if (Res.getLLTTy(*getMRI()).getSizeInBits() >
+ Op.getLLTTy(*getMRI()).getSizeInBits())
+ Opcode = ExtOpc;
+ else if (Res.getLLTTy(*getMRI()).getSizeInBits() <
+ Op.getLLTTy(*getMRI()).getSizeInBits())
+ Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(Res.getLLTTy(*getMRI()) == Op.getLLTTy(*getMRI()));
+
+ return buildInstr(Opcode, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildZExtInReg(const DstOp &Res,
+ const SrcOp &Op,
+ int64_t ImmOp) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ auto Mask = buildConstant(
+ ResTy, APInt::getLowBitsSet(ResTy.getScalarSizeInBits(), ImmOp));
+ return buildAnd(Res, Op, Mask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
+ const SrcOp &Src) {
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ LLT DstTy = Dst.getLLTTy(*getMRI());
+ if (SrcTy == DstTy)
+ return buildCopy(Dst, Src);
+
+ unsigned Opcode;
+ if (SrcTy.isPointer() && DstTy.isScalar())
+ Opcode = TargetOpcode::G_PTRTOINT;
+ else if (DstTy.isPointer() && SrcTy.isScalar())
+ Opcode = TargetOpcode::G_INTTOPTR;
+ else {
+ assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
+ Opcode = TargetOpcode::G_BITCAST;
+ }
+
+ return buildInstr(Opcode, Dst, Src);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
+ const SrcOp &Src,
+ uint64_t Index) {
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ LLT DstTy = Dst.getLLTTy(*getMRI());
+
+#ifndef NDEBUG
+ assert(SrcTy.isValid() && "invalid operand type");
+ assert(DstTy.isValid() && "invalid operand type");
+ assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() &&
+ "extracting off end of register");
+#endif
+
+ if (DstTy.getSizeInBits() == SrcTy.getSizeInBits()) {
+ assert(Index == 0 && "insertion past the end of a register");
+ return buildCast(Dst, Src);
+ }
+
+ auto Extract = buildInstr(TargetOpcode::G_EXTRACT);
+ Dst.addDefToMIB(*getMRI(), Extract);
+ Src.addSrcToMIB(Extract);
+ Extract.addImm(Index);
+ return Extract;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMergeValues(const DstOp &Res,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ assert(TmpVec.size() > 1);
+ return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildMergeLikeInstr(const DstOp &Res,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ assert(TmpVec.size() > 1);
+ return buildInstr(getOpcodeForMerge(Res, TmpVec), Res, TmpVec);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildMergeLikeInstr(const DstOp &Res,
+ std::initializer_list<SrcOp> Ops) {
+ assert(Ops.size() > 1);
+ return buildInstr(getOpcodeForMerge(Res, Ops), Res, Ops);
+}
+
+unsigned MachineIRBuilder::getOpcodeForMerge(const DstOp &DstOp,
+ ArrayRef<SrcOp> SrcOps) const {
+ if (DstOp.getLLTTy(*getMRI()).isVector()) {
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector())
+ return TargetOpcode::G_CONCAT_VECTORS;
+ return TargetOpcode::G_BUILD_VECTOR;
+ }
+
+ return TargetOpcode::G_MERGE_VALUES;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
+ const SrcOp &Op) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
+ const SrcOp &Op) {
+ unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
+ SmallVector<DstOp, 8> TmpVec(NumReg, Res);
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
+ const SrcOp &Op) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<DstOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
+ ArrayRef<APInt> Ops) {
+ SmallVector<SrcOp> TmpVec;
+ TmpVec.reserve(Ops.size());
+ LLT EltTy = Res.getLLTTy(*getMRI()).getElementType();
+ for (const auto &Op : Ops)
+ TmpVec.push_back(buildConstant(EltTy, Op));
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
+ const SrcOp &Src) {
+ SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ if (TmpVec[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ Res.getLLTTy(*getMRI()).getElementType().getSizeInBits())
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
+ const SrcOp &Src) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() &&
+ "Expected Src to match Dst elt ty");
+ auto UndefVec = buildUndef(DstTy);
+ auto Zero = buildConstant(LLT::scalar(64), 0);
+ auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
+ SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
+ return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
+ const SrcOp &Src1,
+ const SrcOp &Src2,
+ ArrayRef<int> Mask) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ LLT Src1Ty = Src1.getLLTTy(*getMRI());
+ LLT Src2Ty = Src2.getLLTTy(*getMRI());
+ assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >=
+ Mask.size());
+ assert(DstTy.getElementType() == Src1Ty.getElementType() &&
+ DstTy.getElementType() == Src2Ty.getElementType());
+ (void)DstTy;
+ (void)Src1Ty;
+ (void)Src2Ty;
+ ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
+ return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {Res}, {Src1, Src2})
+ .addShuffleMask(MaskAlloc);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
+ const SrcOp &Src,
+ const SrcOp &Op,
+ unsigned Index) {
+ assert(Index + Op.getLLTTy(*getMRI()).getSizeInBits() <=
+ Res.getLLTTy(*getMRI()).getSizeInBits() &&
+ "insertion past the end of a register");
+
+ if (Res.getLLTTy(*getMRI()).getSizeInBits() ==
+ Op.getLLTTy(*getMRI()).getSizeInBits()) {
+ return buildCast(Res, Op);
+ }
+
+ return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs,
+ bool HasSideEffects) {
+ auto MIB =
+ buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+ : TargetOpcode::G_INTRINSIC);
+ for (unsigned ResultReg : ResultRegs)
+ MIB.addDef(ResultReg);
+ MIB.addIntrinsicID(ID);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<DstOp> Results,
+ bool HasSideEffects) {
+ auto MIB =
+ buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+ : TargetOpcode::G_INTRINSIC);
+ for (DstOp Result : Results)
+ Result.addDefToMIB(*getMRI(), MIB);
+ MIB.addIntrinsicID(ID);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op,
+ std::optional<unsigned> Flags) {
+ return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+ const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1});
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+ const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1,
+ std::optional<unsigned> Flags) {
+
+ return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst,
+ const SrcOp &Op0, const SrcOp &Op1,
+ std::optional<unsigned> Flags) {
+
+ return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val,
+ const SrcOp &Elt, const SrcOp &Idx) {
+ return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT, Res, {Val, Elt, Idx});
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
+ const SrcOp &Idx) {
+ return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT, Res, {Val, Idx});
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
+ Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal,
+ Register NewVal, MachineMemOperand &MMO) {
+#ifndef NDEBUG
+ LLT OldValResTy = getMRI()->getType(OldValRes);
+ LLT SuccessResTy = getMRI()->getType(SuccessRes);
+ LLT AddrTy = getMRI()->getType(Addr);
+ LLT CmpValTy = getMRI()->getType(CmpVal);
+ LLT NewValTy = getMRI()->getType(NewVal);
+ assert(OldValResTy.isScalar() && "invalid operand type");
+ assert(SuccessResTy.isScalar() && "invalid operand type");
+ assert(AddrTy.isPointer() && "invalid operand type");
+ assert(CmpValTy.isValid() && "invalid operand type");
+ assert(NewValTy.isValid() && "invalid operand type");
+ assert(OldValResTy == CmpValTy && "type mismatch");
+ assert(OldValResTy == NewValTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS)
+ .addDef(OldValRes)
+ .addDef(SuccessRes)
+ .addUse(Addr)
+ .addUse(CmpVal)
+ .addUse(NewVal)
+ .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
+ Register CmpVal, Register NewVal,
+ MachineMemOperand &MMO) {
+#ifndef NDEBUG
+ LLT OldValResTy = getMRI()->getType(OldValRes);
+ LLT AddrTy = getMRI()->getType(Addr);
+ LLT CmpValTy = getMRI()->getType(CmpVal);
+ LLT NewValTy = getMRI()->getType(NewVal);
+ assert(OldValResTy.isScalar() && "invalid operand type");
+ assert(AddrTy.isPointer() && "invalid operand type");
+ assert(CmpValTy.isValid() && "invalid operand type");
+ assert(NewValTy.isValid() && "invalid operand type");
+ assert(OldValResTy == CmpValTy && "type mismatch");
+ assert(OldValResTy == NewValTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG)
+ .addDef(OldValRes)
+ .addUse(Addr)
+ .addUse(CmpVal)
+ .addUse(NewVal)
+ .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
+ unsigned Opcode, const DstOp &OldValRes,
+ const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+
+#ifndef NDEBUG
+ LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
+ LLT AddrTy = Addr.getLLTTy(*getMRI());
+ LLT ValTy = Val.getLLTTy(*getMRI());
+ assert(OldValResTy.isScalar() && "invalid operand type");
+ assert(AddrTy.isPointer() && "invalid operand type");
+ assert(ValTy.isValid() && "invalid operand type");
+ assert(OldValResTy == ValTy && "type mismatch");
+ assert(MMO.isAtomic() && "not atomic mem operand");
+#endif
+
+ auto MIB = buildInstr(Opcode);
+ OldValRes.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ Val.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWXchg(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWAdd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWSub(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWAnd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWNand(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(Register OldValRes,
+ Register Addr,
+ Register Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWXor(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWMax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWMin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWUmax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFAdd(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
+ return buildInstr(TargetOpcode::G_FENCE)
+ .addImm(Ordering)
+ .addImm(Scope);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) {
+#ifndef NDEBUG
+ assert(getMRI()->getType(Res).isPointer() && "invalid res type");
+#endif
+
+ return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
+}
+
+void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy,
+ bool IsExtend) {
+#ifndef NDEBUG
+ if (DstTy.isVector()) {
+ assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
+ assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
+ "different number of elements in a trunc/ext");
+ } else
+ assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc");
+
+ if (IsExtend)
+ assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
+ "invalid narrowing extend");
+ else
+ assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() &&
+ "invalid widening trunc");
+#endif
+}
+
+void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy,
+ const LLT Op0Ty, const LLT Op1Ty) {
+#ifndef NDEBUG
+ assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+ "invalid operand type");
+ assert((ResTy == Op0Ty && ResTy == Op1Ty) && "type mismatch");
+ if (ResTy.isScalar() || ResTy.isPointer())
+ assert(TstTy.isScalar() && "type mismatch");
+ else
+ assert((TstTy.isScalar() ||
+ (TstTy.isVector() &&
+ TstTy.getNumElements() == Op0Ty.getNumElements())) &&
+ "type mismatch");
+#endif
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ std::optional<unsigned> Flags) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_SELECT: {
+ assert(DstOps.size() == 1 && "Invalid select");
+ assert(SrcOps.size() == 3 && "Invalid select");
+ validateSelectOp(
+ DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
+ break;
+ }
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_ABS:
+ // All these are unary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateUnaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()));
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SSUBSAT: {
+ // All these are binary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateBinaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_USHLSAT:
+ case TargetOpcode::G_SSHLSAT: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ }
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()), true);
+ break;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPTRUNC: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()), false);
+ break;
+ }
+ case TargetOpcode::G_BITCAST: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ assert(DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "invalid bitcast");
+ break;
+ }
+ case TargetOpcode::COPY:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ // If the caller wants to add a subreg source it has to be done separately
+ // so we may not have any SrcOps at this point yet.
+ break;
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ assert(DstOps.size() == 1 && "Invalid Dst Operands");
+ assert(SrcOps.size() == 3 && "Invalid Src Operands");
+ // For F/ICMP, the first src operand is the predicate, followed by
+ // the two comparands.
+ assert(SrcOps[0].getSrcOpKind() == SrcOp::SrcType::Ty_Predicate &&
+ "Expecting predicate");
+ assert([&]() -> bool {
+ CmpInst::Predicate Pred = SrcOps[0].getPredicate();
+ return Opc == TargetOpcode::G_ICMP ? CmpInst::isIntPredicate(Pred)
+ : CmpInst::isFPPredicate(Pred);
+ }() && "Invalid predicate");
+ assert(SrcOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ assert([&]() -> bool {
+ LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI());
+ LLT DstTy = DstOps[0].getLLTTy(*getMRI());
+ if (Op0Ty.isScalar() || Op0Ty.isPointer())
+ return DstTy.isScalar();
+ else
+ return DstTy.isVector() &&
+ DstTy.getNumElements() == Op0Ty.getNumElements();
+ }() && "Type Mismatch");
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ assert(!DstOps.empty() && "Invalid trivial sequence");
+ assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
+ assert(llvm::all_of(DstOps,
+ [&, this](const DstOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ DstOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in output list");
+ assert((TypeSize::ScalarTy)DstOps.size() *
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input operands do not cover output register");
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ assert(SrcOps.size() >= 2 && "invalid trivial sequence");
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ assert((TypeSize::ScalarTy)SrcOps.size() *
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input operands do not cover output register");
+ assert(!DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "vectors should be built with G_CONCAT_VECTOR or G_BUILD_VECTOR");
+ break;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ assert(DstOps.size() == 1 && "Invalid Dst size");
+ assert(SrcOps.size() == 2 && "Invalid Src size");
+ assert(SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+ assert((DstOps[0].getLLTTy(*getMRI()).isScalar() ||
+ DstOps[0].getLLTTy(*getMRI()).isPointer()) &&
+ "Invalid operand type");
+ assert(SrcOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand type");
+ assert(SrcOps[0].getLLTTy(*getMRI()).getElementType() ==
+ DstOps[0].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ break;
+ }
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ assert(DstOps.size() == 1 && "Invalid dst size");
+ assert(SrcOps.size() == 3 && "Invalid src size");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+ assert(DstOps[0].getLLTTy(*getMRI()).getElementType() ==
+ SrcOps[1].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index");
+ assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() ==
+ SrcOps[0].getLLTTy(*getMRI()).getNumElements() &&
+ "Type mismatch");
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "Res type must be a vector");
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ assert((TypeSize::ScalarTy)SrcOps.size() *
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input scalars do not exactly cover the output vector register");
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "Res type must be a vector");
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ break;
+ }
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return (Op.getLLTTy(*getMRI()).isVector() &&
+ Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI()));
+ }) &&
+ "type mismatch in input list");
+ assert((TypeSize::ScalarTy)SrcOps.size() *
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input vectors do not exactly cover the output vector register");
+ break;
+ }
+ case TargetOpcode::G_UADDE: {
+ assert(DstOps.size() == 2 && "Invalid no of dst operands");
+ assert(SrcOps.size() == 3 && "Invalid no of src operands");
+ assert(DstOps[0].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+ assert((DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())) &&
+ (DstOps[0].getLLTTy(*getMRI()) == SrcOps[1].getLLTTy(*getMRI())) &&
+ "Invalid operand");
+ assert(DstOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+ assert(DstOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+ "type mismatch");
+ break;
+ }
+ }
+
+ auto MIB = buildInstr(Opc);
+ for (const DstOp &Op : DstOps)
+ Op.addDefToMIB(*getMRI(), MIB);
+ for (const SrcOp &Op : SrcOps)
+ Op.addSrcToMIB(MIB);
+ if (Flags)
+ MIB->setFlags(*Flags);
+ return MIB;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
new file mode 100644
index 000000000000..885a1056b2ea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -0,0 +1,1110 @@
+//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegBankSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+
+#define DEBUG_TYPE "regbankselect"
+
+using namespace llvm;
+
+static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
+ cl::desc("Mode of the RegBankSelect pass"), cl::Hidden, cl::Optional,
+ cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
+ "Run the Fast mode (default mapping)"),
+ clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
+ "Use the Greedy mode (best local mapping)")));
+
+char RegBankSelect::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
+ "Assign register bank of generic virtual registers",
+ false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
+ "Assign register bank of generic virtual registers", false,
+ false)
+
+RegBankSelect::RegBankSelect(char &PassID, Mode RunningMode)
+ : MachineFunctionPass(PassID), OptMode(RunningMode) {
+ if (RegBankSelectMode.getNumOccurrences() != 0) {
+ OptMode = RegBankSelectMode;
+ if (RegBankSelectMode != RunningMode)
+ LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
+ }
+}
+
+void RegBankSelect::init(MachineFunction &MF) {
+ RBI = MF.getSubtarget().getRegBankInfo();
+ assert(RBI && "Cannot work without RegisterBankInfo");
+ MRI = &MF.getRegInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TPC = &getAnalysis<TargetPassConfig>();
+ if (OptMode != Mode::Fast) {
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ } else {
+ MBFI = nullptr;
+ MBPI = nullptr;
+ }
+ MIRBuilder.setMF(MF);
+ MORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+}
+
+void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (OptMode != Mode::Fast) {
+ // We could preserve the information from these two analysis but
+ // the APIs do not allow to do so yet.
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ }
+ AU.addRequired<TargetPassConfig>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegBankSelect::assignmentMatch(
+ Register Reg, const RegisterBankInfo::ValueMapping &ValMapping,
+ bool &OnlyAssign) const {
+ // By default we assume we will have to repair something.
+ OnlyAssign = false;
+ // Each part of a break down needs to end up in a different register.
+ // In other word, Reg assignment does not match.
+ if (ValMapping.NumBreakDowns != 1)
+ return false;
+
+ const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
+ const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
+ // Reg is free of assignment, a simple assignment will make the
+ // register bank to match.
+ OnlyAssign = CurRegBank == nullptr;
+ LLVM_DEBUG(dbgs() << "Does assignment already match: ";
+ if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
+ dbgs() << " against ";
+ assert(DesiredRegBank && "The mapping must be valid");
+ dbgs() << *DesiredRegBank << '\n';);
+ return CurRegBank == DesiredRegBank;
+}
+
+bool RegBankSelect::repairReg(
+ MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
+ RegBankSelect::RepairingPlacement &RepairPt,
+ const iterator_range<SmallVectorImpl<Register>::const_iterator> &NewVRegs) {
+
+ assert(ValMapping.NumBreakDowns == (unsigned)size(NewVRegs) &&
+ "need new vreg for each breakdown");
+
+ // An empty range of new register means no repairing.
+ assert(!NewVRegs.empty() && "We should not have to repair");
+
+ MachineInstr *MI;
+ if (ValMapping.NumBreakDowns == 1) {
+ // Assume we are repairing a use and thus, the original reg will be
+ // the source of the repairing.
+ Register Src = MO.getReg();
+ Register Dst = *NewVRegs.begin();
+
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(Src, Dst);
+
+ assert((RepairPt.getNumInsertPoints() == 1 || Dst.isPhysical()) &&
+ "We are about to create several defs for Dst");
+
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
+ .addDef(Dst)
+ .addUse(Src);
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << ':'
+ << printRegClassOrBank(Src, *MRI, TRI)
+ << " to: " << printReg(Dst) << ':'
+ << printRegClassOrBank(Dst, *MRI, TRI) << '\n');
+ } else {
+ // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
+ // sequence.
+ assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported");
+
+ LLT RegTy = MRI->getType(MO.getReg());
+ if (MO.isDef()) {
+ unsigned MergeOp;
+ if (RegTy.isVector()) {
+ if (ValMapping.NumBreakDowns == RegTy.getNumElements())
+ MergeOp = TargetOpcode::G_BUILD_VECTOR;
+ else {
+ assert(
+ (ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns ==
+ RegTy.getSizeInBits()) &&
+ (ValMapping.BreakDown[0].Length % RegTy.getScalarSizeInBits() ==
+ 0) &&
+ "don't understand this value breakdown");
+
+ MergeOp = TargetOpcode::G_CONCAT_VECTORS;
+ }
+ } else
+ MergeOp = TargetOpcode::G_MERGE_VALUES;
+
+ auto MergeBuilder =
+ MIRBuilder.buildInstrNoInsert(MergeOp)
+ .addDef(MO.getReg());
+
+ for (Register SrcReg : NewVRegs)
+ MergeBuilder.addUse(SrcReg);
+
+ MI = MergeBuilder;
+ } else {
+ MachineInstrBuilder UnMergeBuilder =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES);
+ for (Register DefReg : NewVRegs)
+ UnMergeBuilder.addDef(DefReg);
+
+ UnMergeBuilder.addUse(MO.getReg());
+ MI = UnMergeBuilder;
+ }
+ }
+
+ if (RepairPt.getNumInsertPoints() != 1)
+ report_fatal_error("need testcase to support multiple insertion points");
+
+ // TODO:
+ // Check if MI is legal. if not, we need to legalize all the
+ // instructions we are going to insert.
+ std::unique_ptr<MachineInstr *[]> NewInstrs(
+ new MachineInstr *[RepairPt.getNumInsertPoints()]);
+ bool IsFirst = true;
+ unsigned Idx = 0;
+ for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
+ MachineInstr *CurMI;
+ if (IsFirst)
+ CurMI = MI;
+ else
+ CurMI = MIRBuilder.getMF().CloneMachineInstr(MI);
+ InsertPt->insert(*CurMI);
+ NewInstrs[Idx++] = CurMI;
+ IsFirst = false;
+ }
+ // TODO:
+ // Legalize NewInstrs if need be.
+ return true;
+}
+
+uint64_t RegBankSelect::getRepairCost(
+ const MachineOperand &MO,
+ const RegisterBankInfo::ValueMapping &ValMapping) const {
+ assert(MO.isReg() && "We should only repair register operand");
+ assert(ValMapping.NumBreakDowns && "Nothing to map??");
+
+ bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1;
+ const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
+ // If MO does not have a register bank, we should have just been
+ // able to set one unless we have to break the value down.
+ assert(CurRegBank || MO.isDef());
+
+ // Def: Val <- NewDefs
+ // Same number of values: copy
+ // Different number: Val = build_sequence Defs1, Defs2, ...
+ // Use: NewSources <- Val.
+ // Same number of values: copy.
+ // Different number: Src1, Src2, ... =
+ // extract_value Val, Src1Begin, Src1Len, Src2Begin, Src2Len, ...
+ // We should remember that this value is available somewhere else to
+ // coalesce the value.
+
+ if (ValMapping.NumBreakDowns != 1)
+ return RBI->getBreakDownCost(ValMapping, CurRegBank);
+
+ if (IsSameNumOfValues) {
+ const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(CurRegBank, DesiredRegBank);
+ // TODO: It may be possible to actually avoid the copy.
+ // If we repair something where the source is defined by a copy
+ // and the source of that copy is on the right bank, we can reuse
+ // it for free.
+ // E.g.,
+ // RegToRepair<BankA> = copy AlternativeSrc<BankB>
+ // = op RegToRepair<BankA>
+ // We can simply propagate AlternativeSrc instead of copying RegToRepair
+ // into a new virtual register.
+ // We would also need to propagate this information in the
+ // repairing placement.
+ unsigned Cost = RBI->copyCost(*DesiredRegBank, *CurRegBank,
+ RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
+ // TODO: use a dedicated constant for ImpossibleCost.
+ if (Cost != std::numeric_limits<unsigned>::max())
+ return Cost;
+ // Return the legalization cost of that repairing.
+ }
+ return std::numeric_limits<unsigned>::max();
+}
+
+const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
+ MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
+ SmallVectorImpl<RepairingPlacement> &RepairPts) {
+ assert(!PossibleMappings.empty() &&
+ "Do not know how to map this instruction");
+
+ const RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
+ MappingCost Cost = MappingCost::ImpossibleCost();
+ SmallVector<RepairingPlacement, 4> LocalRepairPts;
+ for (const RegisterBankInfo::InstructionMapping *CurMapping :
+ PossibleMappings) {
+ MappingCost CurCost =
+ computeMapping(MI, *CurMapping, LocalRepairPts, &Cost);
+ if (CurCost < Cost) {
+ LLVM_DEBUG(dbgs() << "New best: " << CurCost << '\n');
+ Cost = CurCost;
+ BestMapping = CurMapping;
+ RepairPts.clear();
+ for (RepairingPlacement &RepairPt : LocalRepairPts)
+ RepairPts.emplace_back(std::move(RepairPt));
+ }
+ }
+ if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) {
+ // If none of the mapping worked that means they are all impossible.
+ // Thus, pick the first one and set an impossible repairing point.
+ // It will trigger the failed isel mode.
+ BestMapping = *PossibleMappings.begin();
+ RepairPts.emplace_back(
+ RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible));
+ } else
+ assert(BestMapping && "No suitable mapping for instruction");
+ return *BestMapping;
+}
+
+void RegBankSelect::tryAvoidingSplit(
+ RegBankSelect::RepairingPlacement &RepairPt, const MachineOperand &MO,
+ const RegisterBankInfo::ValueMapping &ValMapping) const {
+ const MachineInstr &MI = *MO.getParent();
+ assert(RepairPt.hasSplit() && "We should not have to adjust for split");
+ // Splitting should only occur for PHIs or between terminators,
+ // because we only do local repairing.
+ assert((MI.isPHI() || MI.isTerminator()) && "Why do we split?");
+
+ assert(&MI.getOperand(RepairPt.getOpIdx()) == &MO &&
+ "Repairing placement does not match operand");
+
+ // If we need splitting for phis, that means it is because we
+ // could not find an insertion point before the terminators of
+ // the predecessor block for this argument. In other words,
+ // the input value is defined by one of the terminators.
+ assert((!MI.isPHI() || !MO.isDef()) && "Need split for phi def?");
+
+ // We split to repair the use of a phi or a terminator.
+ if (!MO.isDef()) {
+ if (MI.isTerminator()) {
+ assert(&MI != &(*MI.getParent()->getFirstTerminator()) &&
+ "Need to split for the first terminator?!");
+ } else {
+ // For the PHI case, the split may not be actually required.
+ // In the copy case, a phi is already a copy on the incoming edge,
+ // therefore there is no need to split.
+ if (ValMapping.NumBreakDowns == 1)
+ // This is a already a copy, there is nothing to do.
+ RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
+ }
+ return;
+ }
+
+ // At this point, we need to repair a defintion of a terminator.
+
+ // Technically we need to fix the def of MI on all outgoing
+ // edges of MI to keep the repairing local. In other words, we
+ // will create several definitions of the same register. This
+ // does not work for SSA unless that definition is a physical
+ // register.
+ // However, there are other cases where we can get away with
+ // that while still keeping the repairing local.
+ assert(MI.isTerminator() && MO.isDef() &&
+ "This code is for the def of a terminator");
+
+ // Since we use RPO traversal, if we need to repair a definition
+ // this means this definition could be:
+ // 1. Used by PHIs (i.e., this VReg has been visited as part of the
+ // uses of a phi.), or
+ // 2. Part of a target specific instruction (i.e., the target applied
+ // some register class constraints when creating the instruction.)
+ // If the constraints come for #2, the target said that another mapping
+ // is supported so we may just drop them. Indeed, if we do not change
+ // the number of registers holding that value, the uses will get fixed
+ // when we get to them.
+ // Uses in PHIs may have already been proceeded though.
+ // If the constraints come for #1, then, those are weak constraints and
+ // no actual uses may rely on them. However, the problem remains mainly
+ // the same as for #2. If the value stays in one register, we could
+ // just switch the register bank of the definition, but we would need to
+ // account for a repairing cost for each phi we silently change.
+ //
+ // In any case, if the value needs to be broken down into several
+ // registers, the repairing is not local anymore as we need to patch
+ // every uses to rebuild the value in just one register.
+ //
+ // To summarize:
+ // - If the value is in a physical register, we can do the split and
+ // fix locally.
+ // Otherwise if the value is in a virtual register:
+ // - If the value remains in one register, we do not have to split
+ // just switching the register bank would do, but we need to account
+ // in the repairing cost all the phi we changed.
+ // - If the value spans several registers, then we cannot do a local
+ // repairing.
+
+ // Check if this is a physical or virtual register.
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ // We are going to split every outgoing edges.
+ // Check that this is possible.
+ // FIXME: The machine representation is currently broken
+ // since it also several terminators in one basic block.
+ // Because of that we would technically need a way to get
+ // the targets of just one terminator to know which edges
+ // we have to split.
+ // Assert that we do not hit the ill-formed representation.
+
+ // If there are other terminators before that one, some of
+ // the outgoing edges may not be dominated by this definition.
+ assert(&MI == &(*MI.getParent()->getFirstTerminator()) &&
+ "Do not know which outgoing edges are relevant");
+ const MachineInstr *Next = MI.getNextNode();
+ assert((!Next || Next->isUnconditionalBranch()) &&
+ "Do not know where each terminator ends up");
+ if (Next)
+ // If the next terminator uses Reg, this means we have
+ // to split right after MI and thus we need a way to ask
+ // which outgoing edges are affected.
+ assert(!Next->readsRegister(Reg) && "Need to split between terminators");
+ // We will split all the edges and repair there.
+ } else {
+ // This is a virtual register defined by a terminator.
+ if (ValMapping.NumBreakDowns == 1) {
+ // There is nothing to repair, but we may actually lie on
+ // the repairing cost because of the PHIs already proceeded
+ // as already stated.
+ // Though the code will be correct.
+ assert(false && "Repairing cost may not be accurate");
+ } else {
+ // We need to do non-local repairing. Basically, patch all
+ // the uses (i.e., phis) that we already proceeded.
+ // For now, just say this mapping is not possible.
+ RepairPt.switchTo(RepairingPlacement::RepairingKind::Impossible);
+ }
+ }
+}
+
+RegBankSelect::MappingCost RegBankSelect::computeMapping(
+ MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
+ SmallVectorImpl<RepairingPlacement> &RepairPts,
+ const RegBankSelect::MappingCost *BestCost) {
+ assert((MBFI || !BestCost) && "Costs comparison require MBFI");
+
+ if (!InstrMapping.isValid())
+ return MappingCost::ImpossibleCost();
+
+ // If mapped with InstrMapping, MI will have the recorded cost.
+ MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
+ bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
+ assert(!Saturated && "Possible mapping saturated the cost");
+ LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
+ LLVM_DEBUG(dbgs() << "With: " << InstrMapping << '\n');
+ RepairPts.clear();
+ if (BestCost && Cost > *BestCost) {
+ LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n");
+ return Cost;
+ }
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+
+ // Moreover, to realize this mapping, the register bank of each operand must
+ // match this mapping. In other words, we may need to locally reassign the
+ // register banks. Account for that repairing cost as well.
+ // In this context, local means in the surrounding of MI.
+ for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands();
+ OpIdx != EndOpIdx; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LLT Ty = MRI.getType(Reg);
+ if (!Ty.isValid())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
+ const RegisterBankInfo::ValueMapping &ValMapping =
+ InstrMapping.getOperandMapping(OpIdx);
+ // If Reg is already properly mapped, this is free.
+ bool Assign;
+ if (assignmentMatch(Reg, ValMapping, Assign)) {
+ LLVM_DEBUG(dbgs() << "=> is free (match).\n");
+ continue;
+ }
+ if (Assign) {
+ LLVM_DEBUG(dbgs() << "=> is free (simple assignment).\n");
+ RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
+ RepairingPlacement::Reassign));
+ continue;
+ }
+
+ // Find the insertion point for the repairing code.
+ RepairPts.emplace_back(
+ RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Insert));
+ RepairingPlacement &RepairPt = RepairPts.back();
+
+ // If we need to split a basic block to materialize this insertion point,
+ // we may give a higher cost to this mapping.
+ // Nevertheless, we may get away with the split, so try that first.
+ if (RepairPt.hasSplit())
+ tryAvoidingSplit(RepairPt, MO, ValMapping);
+
+ // Check that the materialization of the repairing is possible.
+ if (!RepairPt.canMaterialize()) {
+ LLVM_DEBUG(dbgs() << "Mapping involves impossible repairing\n");
+ return MappingCost::ImpossibleCost();
+ }
+
+ // Account for the split cost and repair cost.
+ // Unless the cost is already saturated or we do not care about the cost.
+ if (!BestCost || Saturated)
+ continue;
+
+ // To get accurate information we need MBFI and MBPI.
+ // Thus, if we end up here this information should be here.
+ assert(MBFI && MBPI && "Cost computation requires MBFI and MBPI");
+
+ // FIXME: We will have to rework the repairing cost model.
+ // The repairing cost depends on the register bank that MO has.
+ // However, when we break down the value into different values,
+ // MO may not have a register bank while still needing repairing.
+ // For the fast mode, we don't compute the cost so that is fine,
+ // but still for the repairing code, we will have to make a choice.
+ // For the greedy mode, we should choose greedily what is the best
+ // choice based on the next use of MO.
+
+ // Sums up the repairing cost of MO at each insertion point.
+ uint64_t RepairCost = getRepairCost(MO, ValMapping);
+
+ // This is an impossible to repair cost.
+ if (RepairCost == std::numeric_limits<unsigned>::max())
+ return MappingCost::ImpossibleCost();
+
+ // Bias used for splitting: 5%.
+ const uint64_t PercentageForBias = 5;
+ uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100;
+ // We should not need more than a couple of instructions to repair
+ // an assignment. In other words, the computation should not
+ // overflow because the repairing cost is free of basic block
+ // frequency.
+ assert(((RepairCost < RepairCost * PercentageForBias) &&
+ (RepairCost * PercentageForBias <
+ RepairCost * PercentageForBias + 99)) &&
+ "Repairing involves more than a billion of instructions?!");
+ for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
+ assert(InsertPt->canMaterialize() && "We should not have made it here");
+ // We will applied some basic block frequency and those uses uint64_t.
+ if (!InsertPt->isSplit())
+ Saturated = Cost.addLocalCost(RepairCost);
+ else {
+ uint64_t CostForInsertPt = RepairCost;
+ // Again we shouldn't overflow here givent that
+ // CostForInsertPt is frequency free at this point.
+ assert(CostForInsertPt + Bias > CostForInsertPt &&
+ "Repairing + split bias overflows");
+ CostForInsertPt += Bias;
+ uint64_t PtCost = InsertPt->frequency(*this) * CostForInsertPt;
+ // Check if we just overflowed.
+ if ((Saturated = PtCost < CostForInsertPt))
+ Cost.saturate();
+ else
+ Saturated = Cost.addNonLocalCost(PtCost);
+ }
+
+ // Stop looking into what it takes to repair, this is already
+ // too expensive.
+ if (BestCost && Cost > *BestCost) {
+ LLVM_DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");
+ return Cost;
+ }
+
+ // No need to accumulate more cost information.
+ // We need to still gather the repairing information though.
+ if (Saturated)
+ break;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Total cost is: " << Cost << "\n");
+ return Cost;
+}
+
+bool RegBankSelect::applyMapping(
+ MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
+ SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
+ // OpdMapper will hold all the information needed for the rewriting.
+ RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
+
+ // First, place the repairing code.
+ for (RepairingPlacement &RepairPt : RepairPts) {
+ if (!RepairPt.canMaterialize() ||
+ RepairPt.getKind() == RepairingPlacement::Impossible)
+ return false;
+ assert(RepairPt.getKind() != RepairingPlacement::None &&
+ "This should not make its way in the list");
+ unsigned OpIdx = RepairPt.getOpIdx();
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ const RegisterBankInfo::ValueMapping &ValMapping =
+ InstrMapping.getOperandMapping(OpIdx);
+ Register Reg = MO.getReg();
+
+ switch (RepairPt.getKind()) {
+ case RepairingPlacement::Reassign:
+ assert(ValMapping.NumBreakDowns == 1 &&
+ "Reassignment should only be for simple mapping");
+ MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
+ break;
+ case RepairingPlacement::Insert:
+ // Don't insert additional instruction for debug instruction.
+ if (MI.isDebugInstr())
+ break;
+ OpdMapper.createVRegs(OpIdx);
+ if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)))
+ return false;
+ break;
+ default:
+ llvm_unreachable("Other kind should not happen");
+ }
+ }
+
+ // Second, rewrite the instruction.
+ LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
+ RBI->applyMapping(OpdMapper);
+
+ return true;
+}
+
+bool RegBankSelect::assignInstr(MachineInstr &MI) {
+ LLVM_DEBUG(dbgs() << "Assign: " << MI);
+
+ unsigned Opc = MI.getOpcode();
+ if (isPreISelGenericOptimizationHint(Opc)) {
+ assert((Opc == TargetOpcode::G_ASSERT_ZEXT ||
+ Opc == TargetOpcode::G_ASSERT_SEXT ||
+ Opc == TargetOpcode::G_ASSERT_ALIGN) &&
+ "Unexpected hint opcode!");
+ // The only correct mapping for these is to always use the source register
+ // bank.
+ const RegisterBank *RB =
+ RBI->getRegBank(MI.getOperand(1).getReg(), *MRI, *TRI);
+ // We can assume every instruction above this one has a selected register
+ // bank.
+ assert(RB && "Expected source register to have a register bank?");
+ LLVM_DEBUG(dbgs() << "... Hint always uses source's register bank.\n");
+ MRI->setRegBank(MI.getOperand(0).getReg(), *RB);
+ return true;
+ }
+
+ // Remember the repairing placement for all the operands.
+ SmallVector<RepairingPlacement, 4> RepairPts;
+
+ const RegisterBankInfo::InstructionMapping *BestMapping;
+ if (OptMode == RegBankSelect::Mode::Fast) {
+ BestMapping = &RBI->getInstrMapping(MI);
+ MappingCost DefaultCost = computeMapping(MI, *BestMapping, RepairPts);
+ (void)DefaultCost;
+ if (DefaultCost == MappingCost::ImpossibleCost())
+ return false;
+ } else {
+ RegisterBankInfo::InstructionMappings PossibleMappings =
+ RBI->getInstrPossibleMappings(MI);
+ if (PossibleMappings.empty())
+ return false;
+ BestMapping = &findBestMapping(MI, PossibleMappings, RepairPts);
+ }
+ // Make sure the mapping is valid for MI.
+ assert(BestMapping->verify(MI) && "Invalid instruction mapping");
+
+ LLVM_DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n');
+
+ // After this call, MI may not be valid anymore.
+ // Do not use it.
+ return applyMapping(MI, *BestMapping, RepairPts);
+}
+
+bool RegBankSelect::assignRegisterBanks(MachineFunction &MF) {
+ // Walk the function and assign register banks to all operands.
+ // Use a RPOT to make sure all registers are assigned before we choose
+ // the best mapping of the current instruction.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ // Set a sensible insertion point so that subsequent calls to
+ // MIRBuilder.
+ MIRBuilder.setMBB(*MBB);
+ SmallVector<MachineInstr *> WorkList(
+ make_pointer_range(reverse(MBB->instrs())));
+
+ while (!WorkList.empty()) {
+ MachineInstr &MI = *WorkList.pop_back_val();
+
+ // Ignore target-specific post-isel instructions: they should use proper
+ // regclasses.
+ if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode())
+ continue;
+
+ // Ignore inline asm instructions: they should use physical
+ // registers/regclasses
+ if (MI.isInlineAsm())
+ continue;
+
+ // Ignore IMPLICIT_DEF which must have a regclass.
+ if (MI.isImplicitDef())
+ continue;
+
+ if (!assignInstr(MI)) {
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "unable to map instruction", MI);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool RegBankSelect::checkFunctionIsLegal(MachineFunction &MF) const {
+#ifndef NDEBUG
+ if (!DisableGISelLegalityCheck) {
+ if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "instruction is not legal", *MI);
+ return false;
+ }
+ }
+#endif
+ return true;
+}
+
+bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
+ const Function &F = MF.getFunction();
+ Mode SaveOptMode = OptMode;
+ if (F.hasOptNone())
+ OptMode = Mode::Fast;
+ init(MF);
+
+#ifndef NDEBUG
+ if (!checkFunctionIsLegal(MF))
+ return false;
+#endif
+
+ assignRegisterBanks(MF);
+
+ OptMode = SaveOptMode;
+ return false;
+}
+
+//------------------------------------------------------------------------------
+// Helper Classes Implementation
+//------------------------------------------------------------------------------
+RegBankSelect::RepairingPlacement::RepairingPlacement(
+ MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
+ RepairingPlacement::RepairingKind Kind)
+ // Default is, we are going to insert code to repair OpIdx.
+ : Kind(Kind), OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible), P(P) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isReg() && "Trying to repair a non-reg operand");
+
+ if (Kind != RepairingKind::Insert)
+ return;
+
+ // Repairings for definitions happen after MI, uses happen before.
+ bool Before = !MO.isDef();
+
+ // Check if we are done with MI.
+ if (!MI.isPHI() && !MI.isTerminator()) {
+ addInsertPoint(MI, Before);
+ // We are done with the initialization.
+ return;
+ }
+
+ // Now, look for the special cases.
+ if (MI.isPHI()) {
+ // - PHI must be the first instructions:
+ // * Before, we have to split the related incoming edge.
+ // * After, move the insertion point past the last phi.
+ if (!Before) {
+ MachineBasicBlock::iterator It = MI.getParent()->getFirstNonPHI();
+ if (It != MI.getParent()->end())
+ addInsertPoint(*It, /*Before*/ true);
+ else
+ addInsertPoint(*(--It), /*Before*/ false);
+ return;
+ }
+ // We repair a use of a phi, we may need to split the related edge.
+ MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
+ // Check if we can move the insertion point prior to the
+ // terminators of the predecessor.
+ Register Reg = MO.getReg();
+ MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
+ for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
+ if (It->modifiesRegister(Reg, &TRI)) {
+ // We cannot hoist the repairing code in the predecessor.
+ // Split the edge.
+ addInsertPoint(Pred, *MI.getParent());
+ return;
+ }
+ // At this point, we can insert in Pred.
+
+ // - If It is invalid, Pred is empty and we can insert in Pred
+ // wherever we want.
+ // - If It is valid, It is the first non-terminator, insert after It.
+ if (It == Pred.end())
+ addInsertPoint(Pred, /*Beginning*/ false);
+ else
+ addInsertPoint(*It, /*Before*/ false);
+ } else {
+ // - Terminators must be the last instructions:
+ // * Before, move the insert point before the first terminator.
+ // * After, we have to split the outcoming edges.
+ if (Before) {
+ // Check whether Reg is defined by any terminator.
+ MachineBasicBlock::reverse_iterator It = MI;
+ auto REnd = MI.getParent()->rend();
+
+ for (; It != REnd && It->isTerminator(); ++It) {
+ assert(!It->modifiesRegister(MO.getReg(), &TRI) &&
+ "copy insertion in middle of terminators not handled");
+ }
+
+ if (It == REnd) {
+ addInsertPoint(*MI.getParent()->begin(), true);
+ return;
+ }
+
+ // We are sure to be right before the first terminator.
+ addInsertPoint(*It, /*Before*/ false);
+ return;
+ }
+ // Make sure Reg is not redefined by other terminators, otherwise
+ // we do not know how to split.
+ for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
+ ++It != End;)
+ // The machine verifier should reject this kind of code.
+ assert(It->modifiesRegister(MO.getReg(), &TRI) &&
+ "Do not know where to split");
+ // Split each outcoming edges.
+ MachineBasicBlock &Src = *MI.getParent();
+ for (auto &Succ : Src.successors())
+ addInsertPoint(Src, Succ);
+ }
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineInstr &MI,
+ bool Before) {
+ addInsertPoint(*new InstrInsertPoint(MI, Before));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &MBB,
+ bool Beginning) {
+ addInsertPoint(*new MBBInsertPoint(MBB, Beginning));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &Src,
+ MachineBasicBlock &Dst) {
+ addInsertPoint(*new EdgeInsertPoint(Src, Dst, P));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(
+ RegBankSelect::InsertPoint &Point) {
+ CanMaterialize &= Point.canMaterialize();
+ HasSplit |= Point.isSplit();
+ InsertPoints.emplace_back(&Point);
+}
+
+RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr,
+ bool Before)
+ : Instr(Instr), Before(Before) {
+ // Since we do not support splitting, we do not need to update
+ // liveness and such, so do not do anything with P.
+ assert((!Before || !Instr.isPHI()) &&
+ "Splitting before phis requires more points");
+ assert((!Before || !Instr.getNextNode() || !Instr.getNextNode()->isPHI()) &&
+ "Splitting between phis does not make sense");
+}
+
+void RegBankSelect::InstrInsertPoint::materialize() {
+ if (isSplit()) {
+ // Slice and return the beginning of the new block.
+ // If we need to split between the terminators, we theoritically
+ // need to know where the first and second set of terminators end
+ // to update the successors properly.
+ // Now, in pratice, we should have a maximum of 2 branch
+ // instructions; one conditional and one unconditional. Therefore
+ // we know how to update the successor by looking at the target of
+ // the unconditional branch.
+ // If we end up splitting at some point, then, we should update
+ // the liveness information and such. I.e., we would need to
+ // access P here.
+ // The machine verifier should actually make sure such cases
+ // cannot happen.
+ llvm_unreachable("Not yet implemented");
+ }
+ // Otherwise the insertion point is just the current or next
+ // instruction depending on Before. I.e., there is nothing to do
+ // here.
+}
+
+bool RegBankSelect::InstrInsertPoint::isSplit() const {
+ // If the insertion point is after a terminator, we need to split.
+ if (!Before)
+ return Instr.isTerminator();
+ // If we insert before an instruction that is after a terminator,
+ // we are still after a terminator.
+ return Instr.getPrevNode() && Instr.getPrevNode()->isTerminator();
+}
+
+uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const {
+ // Even if we need to split, because we insert between terminators,
+ // this split has actually the same frequency as the instruction.
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ return MBFI->getBlockFreq(Instr.getParent()).getFrequency();
+}
+
+uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const {
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ return MBFI->getBlockFreq(&MBB).getFrequency();
+}
+
+void RegBankSelect::EdgeInsertPoint::materialize() {
+ // If we end up repairing twice at the same place before materializing the
+ // insertion point, we may think we have to split an edge twice.
+ // We should have a factory for the insert point such that identical points
+ // are the same instance.
+ assert(Src.isSuccessor(DstOrSplit) && DstOrSplit->isPredecessor(&Src) &&
+ "This point has already been split");
+ MachineBasicBlock *NewBB = Src.SplitCriticalEdge(DstOrSplit, P);
+ assert(NewBB && "Invalid call to materialize");
+ // We reuse the destination block to hold the information of the new block.
+ DstOrSplit = NewBB;
+}
+
+uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const {
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ if (WasMaterialized)
+ return MBFI->getBlockFreq(DstOrSplit).getFrequency();
+
+ const MachineBranchProbabilityInfo *MBPI =
+ P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>();
+ if (!MBPI)
+ return 1;
+ // The basic block will be on the edge.
+ return (MBFI->getBlockFreq(&Src) * MBPI->getEdgeProbability(&Src, DstOrSplit))
+ .getFrequency();
+}
+
+bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
+ // If this is not a critical edge, we should not have used this insert
+ // point. Indeed, either the successor or the predecessor should
+ // have do.
+ assert(Src.succ_size() > 1 && DstOrSplit->pred_size() > 1 &&
+ "Edge is not critical");
+ return Src.canSplitCriticalEdge(DstOrSplit);
+}
+
+RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
+ : LocalFreq(LocalFreq.getFrequency()) {}
+
+bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
+ // Check if this overflows.
+ if (LocalCost + Cost < LocalCost) {
+ saturate();
+ return true;
+ }
+ LocalCost += Cost;
+ return isSaturated();
+}
+
+bool RegBankSelect::MappingCost::addNonLocalCost(uint64_t Cost) {
+ // Check if this overflows.
+ if (NonLocalCost + Cost < NonLocalCost) {
+ saturate();
+ return true;
+ }
+ NonLocalCost += Cost;
+ return isSaturated();
+}
+
+bool RegBankSelect::MappingCost::isSaturated() const {
+ return LocalCost == UINT64_MAX - 1 && NonLocalCost == UINT64_MAX &&
+ LocalFreq == UINT64_MAX;
+}
+
+void RegBankSelect::MappingCost::saturate() {
+ *this = ImpossibleCost();
+ --LocalCost;
+}
+
+RegBankSelect::MappingCost RegBankSelect::MappingCost::ImpossibleCost() {
+ return MappingCost(UINT64_MAX, UINT64_MAX, UINT64_MAX);
+}
+
+bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
+ // Sort out the easy cases.
+ if (*this == Cost)
+ return false;
+ // If one is impossible to realize the other is cheaper unless it is
+ // impossible as well.
+ if ((*this == ImpossibleCost()) || (Cost == ImpossibleCost()))
+ return (*this == ImpossibleCost()) < (Cost == ImpossibleCost());
+ // If one is saturated the other is cheaper, unless it is saturated
+ // as well.
+ if (isSaturated() || Cost.isSaturated())
+ return isSaturated() < Cost.isSaturated();
+ // At this point we know both costs hold sensible values.
+
+ // If both values have a different base frequency, there is no much
+ // we can do but to scale everything.
+ // However, if they have the same base frequency we can avoid making
+ // complicated computation.
+ uint64_t ThisLocalAdjust;
+ uint64_t OtherLocalAdjust;
+ if (LLVM_LIKELY(LocalFreq == Cost.LocalFreq)) {
+
+ // At this point, we know the local costs are comparable.
+ // Do the case that do not involve potential overflow first.
+ if (NonLocalCost == Cost.NonLocalCost)
+ // Since the non-local costs do not discriminate on the result,
+ // just compare the local costs.
+ return LocalCost < Cost.LocalCost;
+
+ // The base costs are comparable so we may only keep the relative
+ // value to increase our chances of avoiding overflows.
+ ThisLocalAdjust = 0;
+ OtherLocalAdjust = 0;
+ if (LocalCost < Cost.LocalCost)
+ OtherLocalAdjust = Cost.LocalCost - LocalCost;
+ else
+ ThisLocalAdjust = LocalCost - Cost.LocalCost;
+ } else {
+ ThisLocalAdjust = LocalCost;
+ OtherLocalAdjust = Cost.LocalCost;
+ }
+
+ // The non-local costs are comparable, just keep the relative value.
+ uint64_t ThisNonLocalAdjust = 0;
+ uint64_t OtherNonLocalAdjust = 0;
+ if (NonLocalCost < Cost.NonLocalCost)
+ OtherNonLocalAdjust = Cost.NonLocalCost - NonLocalCost;
+ else
+ ThisNonLocalAdjust = NonLocalCost - Cost.NonLocalCost;
+ // Scale everything to make them comparable.
+ uint64_t ThisScaledCost = ThisLocalAdjust * LocalFreq;
+ // Check for overflow on that operation.
+ bool ThisOverflows = ThisLocalAdjust && (ThisScaledCost < ThisLocalAdjust ||
+ ThisScaledCost < LocalFreq);
+ uint64_t OtherScaledCost = OtherLocalAdjust * Cost.LocalFreq;
+ // Check for overflow on the last operation.
+ bool OtherOverflows =
+ OtherLocalAdjust &&
+ (OtherScaledCost < OtherLocalAdjust || OtherScaledCost < Cost.LocalFreq);
+ // Add the non-local costs.
+ ThisOverflows |= ThisNonLocalAdjust &&
+ ThisScaledCost + ThisNonLocalAdjust < ThisNonLocalAdjust;
+ ThisScaledCost += ThisNonLocalAdjust;
+ OtherOverflows |= OtherNonLocalAdjust &&
+ OtherScaledCost + OtherNonLocalAdjust < OtherNonLocalAdjust;
+ OtherScaledCost += OtherNonLocalAdjust;
+ // If both overflows, we cannot compare without additional
+ // precision, e.g., APInt. Just give up on that case.
+ if (ThisOverflows && OtherOverflows)
+ return false;
+ // If one overflows but not the other, we can still compare.
+ if (ThisOverflows || OtherOverflows)
+ return ThisOverflows < OtherOverflows;
+ // Otherwise, just compare the values.
+ return ThisScaledCost < OtherScaledCost;
+}
+
+bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
+ return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost &&
+ LocalFreq == Cost.LocalFreq;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegBankSelect::MappingCost::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
+ if (*this == ImpossibleCost()) {
+ OS << "impossible";
+ return;
+ }
+ if (isSaturated()) {
+ OS << "saturated";
+ return;
+ }
+ OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
new file mode 100644
index 000000000000..080600d3cc98
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -0,0 +1,1381 @@
+//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file This file implements the utility functions used by the GlobalISel
+/// pipeline.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <numeric>
+#include <optional>
+
+#define DEBUG_TYPE "globalisel-utils"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+Register llvm::constrainRegToClass(MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI, Register Reg,
+ const TargetRegisterClass &RegClass) {
+ if (!RBI.constrainGenericRegister(Reg, RegClass, MRI))
+ return MRI.createVirtualRegister(&RegClass);
+
+ return Reg;
+}
+
+Register llvm::constrainOperandRegClass(
+ const MachineFunction &MF, const TargetRegisterInfo &TRI,
+ MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI, MachineInstr &InsertPt,
+ const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
+ Register Reg = RegMO.getReg();
+ // Assume physical registers are properly constrained.
+ assert(Reg.isVirtual() && "PhysReg not implemented");
+
+ // Save the old register class to check whether
+ // the change notifications will be required.
+ // TODO: A better approach would be to pass
+ // the observers to constrainRegToClass().
+ auto *OldRegClass = MRI.getRegClassOrNull(Reg);
+ Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
+ // If we created a new virtual register because the class is not compatible
+ // then create a copy between the new and the old register.
+ if (ConstrainedReg != Reg) {
+ MachineBasicBlock::iterator InsertIt(&InsertPt);
+ MachineBasicBlock &MBB = *InsertPt.getParent();
+ // FIXME: The copy needs to have the classes constrained for its operands.
+ // Use operand's regbank to get the class for old register (Reg).
+ if (RegMO.isUse()) {
+ BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), ConstrainedReg)
+ .addReg(Reg);
+ } else {
+ assert(RegMO.isDef() && "Must be a definition");
+ BuildMI(MBB, std::next(InsertIt), InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), Reg)
+ .addReg(ConstrainedReg);
+ }
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changingInstr(*RegMO.getParent());
+ }
+ RegMO.setReg(ConstrainedReg);
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changedInstr(*RegMO.getParent());
+ }
+ } else if (OldRegClass != MRI.getRegClassOrNull(Reg)) {
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ if (!RegMO.isDef()) {
+ MachineInstr *RegDef = MRI.getVRegDef(Reg);
+ Observer->changedInstr(*RegDef);
+ }
+ Observer->changingAllUsesOfReg(MRI, Reg);
+ Observer->finishedChangingAllUsesOfReg();
+ }
+ }
+ return ConstrainedReg;
+}
+
+Register llvm::constrainOperandRegClass(
+ const MachineFunction &MF, const TargetRegisterInfo &TRI,
+ MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
+ MachineOperand &RegMO, unsigned OpIdx) {
+ Register Reg = RegMO.getReg();
+ // Assume physical registers are properly constrained.
+ assert(Reg.isVirtual() && "PhysReg not implemented");
+
+ const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);
+ // Some of the target independent instructions, like COPY, may not impose any
+ // register class constraints on some of their operands: If it's a use, we can
+ // skip constraining as the instruction defining the register would constrain
+ // it.
+
+ if (OpRC) {
+ // Obtain the RC from incoming regbank if it is a proper sub-class. Operands
+ // can have multiple regbanks for a superclass that combine different
+ // register types (E.g., AMDGPU's VGPR and AGPR). The regbank ambiguity
+ // resolved by targets during regbankselect should not be overridden.
+ if (const auto *SubRC = TRI.getCommonSubClass(
+ OpRC, TRI.getConstrainedRegClassForOperand(RegMO, MRI)))
+ OpRC = SubRC;
+
+ OpRC = TRI.getAllocatableClass(OpRC);
+ }
+
+ if (!OpRC) {
+ assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) &&
+ "Register class constraint is required unless either the "
+ "instruction is target independent or the operand is a use");
+ // FIXME: Just bailing out like this here could be not enough, unless we
+ // expect the users of this function to do the right thing for PHIs and
+ // COPY:
+ // v1 = COPY v0
+ // v2 = COPY v1
+ // v1 here may end up not being constrained at all. Please notice that to
+ // reproduce the issue we likely need a destination pattern of a selection
+ // rule producing such extra copies, not just an input GMIR with them as
+ // every existing target using selectImpl handles copies before calling it
+ // and they never reach this function.
+ return Reg;
+ }
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *OpRC,
+ RegMO);
+}
+
+bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(!isPreISelGenericOpcode(I.getOpcode()) &&
+ "A selected instruction is expected");
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) {
+ MachineOperand &MO = I.getOperand(OpI);
+
+ // There's nothing to be done on non-register operands.
+ if (!MO.isReg())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n');
+ assert(MO.isReg() && "Unsupported non-reg operand");
+
+ Register Reg = MO.getReg();
+ // Physical registers don't need to be constrained.
+ if (Reg.isPhysical())
+ continue;
+
+ // Register operands with a value of 0 (e.g. predicate operands) don't need
+ // to be constrained.
+ if (Reg == 0)
+ continue;
+
+ // If the operand is a vreg, we should constrain its regclass, and only
+ // insert COPYs if that's impossible.
+ // constrainOperandRegClass does that for us.
+ constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI);
+
+ // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
+ // done.
+ if (MO.isUse()) {
+ int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
+ if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx))
+ I.tieOperands(DefIdx, OpI);
+ }
+ }
+ return true;
+}
+
+bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
+ MachineRegisterInfo &MRI) {
+ // Give up if either DstReg or SrcReg is a physical register.
+ if (DstReg.isPhysical() || SrcReg.isPhysical())
+ return false;
+ // Give up if the types don't match.
+ if (MRI.getType(DstReg) != MRI.getType(SrcReg))
+ return false;
+ // Replace if either DstReg has no constraints or the register
+ // constraints match.
+ return !MRI.getRegClassOrRegBank(DstReg) ||
+ MRI.getRegClassOrRegBank(DstReg) == MRI.getRegClassOrRegBank(SrcReg);
+}
+
+bool llvm::isTriviallyDead(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ // FIXME: This logical is mostly duplicated with
+ // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in
+ // MachineInstr::isLabel?
+
+ // Don't delete frame allocation labels.
+ if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+ return false;
+ // LIFETIME markers should be preserved even if they seem dead.
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END)
+ return false;
+
+ // If we can move an instruction, we can remove it. Otherwise, it has
+ // a side-effect of some sort.
+ bool SawStore = false;
+ if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore) && !MI.isPHI())
+ return false;
+
+ // Instructions without side-effects are dead iff they only define dead vregs.
+ for (const auto &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg))
+ return false;
+ }
+ return true;
+}
+
+static void reportGISelDiagnostic(DiagnosticSeverity Severity,
+ MachineFunction &MF,
+ const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ bool IsFatal = Severity == DS_Error &&
+ TPC.isGlobalISelAbortEnabled();
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || IsFatal)
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (IsFatal)
+ report_fatal_error(Twine(R.getMsg()));
+ else
+ MORE.emit(R);
+}
+
+void llvm::reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ reportGISelDiagnostic(DS_Warning, MF, TPC, MORE, R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ reportGISelDiagnostic(DS_Error, MF, TPC, MORE, R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ const char *PassName, StringRef Msg,
+ const MachineInstr &MI) {
+ MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ",
+ MI.getDebugLoc(), MI.getParent());
+ R << Msg;
+ // Printing MI is expensive; only do it if expensive remarks are enabled.
+ if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName))
+ R << ": " << ore::MNV("Inst", MI);
+ reportGISelFailure(MF, TPC, MORE, R);
+}
+
+std::optional<APInt> llvm::getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
+ VReg, MRI, /*LookThroughInstrs*/ false);
+ assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
+ "Value found while looking through instrs");
+ if (!ValAndVReg)
+ return std::nullopt;
+ return ValAndVReg->Value;
+}
+
+std::optional<int64_t>
+llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
+ std::optional<APInt> Val = getIConstantVRegVal(VReg, MRI);
+ if (Val && Val->getBitWidth() <= 64)
+ return Val->getSExtValue();
+ return std::nullopt;
+}
+
+namespace {
+
+typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
+typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
+
+std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
+ GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
+ bool LookThroughAnyExt = false) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
+ MachineInstr *MI;
+
+ while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI) &&
+ LookThroughInstrs) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_ANYEXT:
+ if (!LookThroughAnyExt)
+ return std::nullopt;
+ [[fallthrough]];
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ SeenOpcodes.push_back(std::make_pair(
+ MI->getOpcode(),
+ MRI.getType(MI->getOperand(0).getReg()).getSizeInBits()));
+ VReg = MI->getOperand(1).getReg();
+ break;
+ case TargetOpcode::COPY:
+ VReg = MI->getOperand(1).getReg();
+ if (VReg.isPhysical())
+ return std::nullopt;
+ break;
+ case TargetOpcode::G_INTTOPTR:
+ VReg = MI->getOperand(1).getReg();
+ break;
+ default:
+ return std::nullopt;
+ }
+ }
+ if (!MI || !IsConstantOpcode(MI))
+ return std::nullopt;
+
+ std::optional<APInt> MaybeVal = getAPCstValue(MI);
+ if (!MaybeVal)
+ return std::nullopt;
+ APInt &Val = *MaybeVal;
+ while (!SeenOpcodes.empty()) {
+ std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
+ switch (OpcodeAndSize.first) {
+ case TargetOpcode::G_TRUNC:
+ Val = Val.trunc(OpcodeAndSize.second);
+ break;
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ Val = Val.sext(OpcodeAndSize.second);
+ break;
+ case TargetOpcode::G_ZEXT:
+ Val = Val.zext(OpcodeAndSize.second);
+ break;
+ }
+ }
+
+ return ValueAndVReg{Val, VReg};
+}
+
+bool isIConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+bool isFConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_FCONSTANT;
+}
+
+bool isAnyConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
+}
+
+std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ return std::nullopt;
+}
+
+std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ if (CstVal.isFPImm())
+ return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ return std::nullopt;
+}
+
+} // end anonymous namespace
+
+std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
+ getCImmAsAPInt, LookThroughInstrs);
+}
+
+std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ bool LookThroughAnyExt) {
+ return getConstantVRegValWithLookThrough(
+ VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
+ LookThroughAnyExt);
+}
+
+std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ auto Reg = getConstantVRegValWithLookThrough(
+ VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+ if (!Reg)
+ return std::nullopt;
+ return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),
+ Reg->VReg};
+}
+
+const ConstantFP *
+llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
+ return nullptr;
+ return MI->getOperand(1).getFPImm();
+}
+
+std::optional<DefinitionAndSourceRegister>
+llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
+ Register DefSrcReg = Reg;
+ auto *DefMI = MRI.getVRegDef(Reg);
+ auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
+ if (!DstTy.isValid())
+ return std::nullopt;
+ unsigned Opc = DefMI->getOpcode();
+ while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) {
+ Register SrcReg = DefMI->getOperand(1).getReg();
+ auto SrcTy = MRI.getType(SrcReg);
+ if (!SrcTy.isValid())
+ break;
+ DefMI = MRI.getVRegDef(SrcReg);
+ DefSrcReg = SrcReg;
+ Opc = DefMI->getOpcode();
+ }
+ return DefinitionAndSourceRegister{DefMI, DefSrcReg};
+}
+
+MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ std::optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ return DefSrcReg ? DefSrcReg->MI : nullptr;
+}
+
+Register llvm::getSrcRegIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ std::optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ return DefSrcReg ? DefSrcReg->Reg : Register();
+}
+
+MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
+ return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
+}
+
+APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
+ if (Size == 32)
+ return APFloat(float(Val));
+ if (Size == 64)
+ return APFloat(Val);
+ if (Size != 16)
+ llvm_unreachable("Unsupported FPConstant size");
+ bool Ignored;
+ APFloat APF(Val);
+ APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ return APF;
+}
+
+std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode,
+ const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI) {
+ auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false);
+ if (!MaybeOp2Cst)
+ return std::nullopt;
+
+ auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false);
+ if (!MaybeOp1Cst)
+ return std::nullopt;
+
+ const APInt &C1 = MaybeOp1Cst->Value;
+ const APInt &C2 = MaybeOp2Cst->Value;
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_PTR_ADD:
+ return C1 + C2;
+ case TargetOpcode::G_AND:
+ return C1 & C2;
+ case TargetOpcode::G_ASHR:
+ return C1.ashr(C2);
+ case TargetOpcode::G_LSHR:
+ return C1.lshr(C2);
+ case TargetOpcode::G_MUL:
+ return C1 * C2;
+ case TargetOpcode::G_OR:
+ return C1 | C2;
+ case TargetOpcode::G_SHL:
+ return C1 << C2;
+ case TargetOpcode::G_SUB:
+ return C1 - C2;
+ case TargetOpcode::G_XOR:
+ return C1 ^ C2;
+ case TargetOpcode::G_UDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.udiv(C2);
+ case TargetOpcode::G_SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.sdiv(C2);
+ case TargetOpcode::G_UREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.urem(C2);
+ case TargetOpcode::G_SREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.srem(C2);
+ case TargetOpcode::G_SMIN:
+ return APIntOps::smin(C1, C2);
+ case TargetOpcode::G_SMAX:
+ return APIntOps::smax(C1, C2);
+ case TargetOpcode::G_UMIN:
+ return APIntOps::umin(C1, C2);
+ case TargetOpcode::G_UMAX:
+ return APIntOps::umax(C1, C2);
+ }
+
+ return std::nullopt;
+}
+
+std::optional<APFloat>
+llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
+ const Register Op2, const MachineRegisterInfo &MRI) {
+ const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
+ if (!Op2Cst)
+ return std::nullopt;
+
+ const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
+ if (!Op1Cst)
+ return std::nullopt;
+
+ APFloat C1 = Op1Cst->getValueAPF();
+ const APFloat &C2 = Op2Cst->getValueAPF();
+ switch (Opcode) {
+ case TargetOpcode::G_FADD:
+ C1.add(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FSUB:
+ C1.subtract(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FMUL:
+ C1.multiply(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FDIV:
+ C1.divide(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FREM:
+ C1.mod(C2);
+ return C1;
+ case TargetOpcode::G_FCOPYSIGN:
+ C1.copySign(C2);
+ return C1;
+ case TargetOpcode::G_FMINNUM:
+ return minnum(C1, C2);
+ case TargetOpcode::G_FMAXNUM:
+ return maxnum(C1, C2);
+ case TargetOpcode::G_FMINIMUM:
+ return minimum(C1, C2);
+ case TargetOpcode::G_FMAXIMUM:
+ return maximum(C1, C2);
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ // FIXME: These operations were unfortunately named. fminnum/fmaxnum do not
+ // follow the IEEE behavior for signaling nans and follow libm's fmin/fmax,
+ // and currently there isn't a nice wrapper in APFloat for the version with
+ // correct snan handling.
+ break;
+ default:
+ break;
+ }
+
+ return std::nullopt;
+}
+
+SmallVector<APInt>
+llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI) {
+ auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
+ if (!SrcVec2)
+ return SmallVector<APInt>();
+
+ auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ if (!SrcVec1)
+ return SmallVector<APInt>();
+
+ SmallVector<APInt> FoldedElements;
+ for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
+ auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
+ SrcVec2->getSourceReg(Idx), MRI);
+ if (!MaybeCst)
+ return SmallVector<APInt>();
+ FoldedElements.push_back(*MaybeCst);
+ }
+ return FoldedElements;
+}
+
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ bool SNaN) {
+ const MachineInstr *DefMI = MRI.getVRegDef(Val);
+ if (!DefMI)
+ return false;
+
+ const TargetMachine& TM = DefMI->getMF()->getTarget();
+ if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
+ return !FPVal->getValueAPF().isNaN() ||
+ (SNaN && !FPVal->getValueAPF().isSignaling());
+ }
+
+ if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (const auto &Op : DefMI->uses())
+ if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
+ return false;
+ return true;
+ }
+
+ switch (DefMI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ if (SNaN)
+ return true;
+
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
+ isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
+ (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
+ isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
+ }
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
+ isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
+ }
+ }
+
+ if (SNaN) {
+ // FP operations quiet. For now, just handle the ones inserted during
+ // legalization.
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCANONICALIZE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
+Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
+ const MachinePointerInfo &MPO) {
+ auto PSV = dyn_cast_if_present<const PseudoSourceValue *>(MPO.V);
+ if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()),
+ MPO.Offset);
+ }
+
+ if (const Value *V = dyn_cast_if_present<const Value *>(MPO.V)) {
+ const Module *M = MF.getFunction().getParent();
+ return V->getPointerAlignment(M->getDataLayout());
+ }
+
+ return Align(1);
+}
+
+Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ MCRegister PhysReg,
+ const TargetRegisterClass &RC,
+ const DebugLoc &DL, LLT RegTy) {
+ MachineBasicBlock &EntryMBB = MF.front();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
+ if (LiveIn) {
+ MachineInstr *Def = MRI.getVRegDef(LiveIn);
+ if (Def) {
+ // FIXME: Should the verifier check this is in the entry block?
+ assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block");
+ return LiveIn;
+ }
+
+ // It's possible the incoming argument register and copy was added during
+ // lowering, but later deleted due to being/becoming dead. If this happens,
+ // re-insert the copy.
+ } else {
+ // The live in register was not present, so add it.
+ LiveIn = MF.addLiveIn(PhysReg, &RC);
+ if (RegTy.isValid())
+ MRI.setType(LiveIn, RegTy);
+ }
+
+ BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn)
+ .addReg(PhysReg);
+ if (!EntryMBB.isLiveIn(PhysReg))
+ EntryMBB.addLiveIn(PhysReg);
+ return LiveIn;
+}
+
+std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode,
+ const Register Op1, uint64_t Imm,
+ const MachineRegisterInfo &MRI) {
+ auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI);
+ if (MaybeOp1Cst) {
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_SEXT_INREG: {
+ LLT Ty = MRI.getType(Op1);
+ return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits());
+ }
+ }
+ }
+ return std::nullopt;
+}
+
+std::optional<APFloat>
+llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
+ const MachineRegisterInfo &MRI) {
+ assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
+ if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) {
+ APFloat DstVal(getFltSemanticForLLT(DstTy));
+ DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
+ APFloat::rmNearestTiesToEven);
+ return DstVal;
+ }
+ return std::nullopt;
+}
+
+std::optional<SmallVector<unsigned>>
+llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(Src);
+ SmallVector<unsigned> FoldedCTLZs;
+ auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> {
+ auto MaybeCst = getIConstantVRegVal(R, MRI);
+ if (!MaybeCst)
+ return std::nullopt;
+ return MaybeCst->countl_zero();
+ };
+ if (Ty.isVector()) {
+ // Try to constant fold each element.
+ auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BV)
+ return std::nullopt;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
+ FoldedCTLZs.emplace_back(*MaybeFold);
+ continue;
+ }
+ return std::nullopt;
+ }
+ return FoldedCTLZs;
+ }
+ if (auto MaybeCst = tryFoldScalar(Src)) {
+ FoldedCTLZs.emplace_back(*MaybeCst);
+ return FoldedCTLZs;
+ }
+ return std::nullopt;
+}
+
+bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB) {
+ std::optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ if (!DefSrcReg)
+ return false;
+
+ const MachineInstr &MI = *DefSrcReg->MI;
+ const LLT Ty = MRI.getType(Reg);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ const ConstantInt *CI = MI.getOperand(1).getCImm();
+ return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }
+ case TargetOpcode::G_SHL: {
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+
+ // TODO: Constant splat
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (*ConstLHS == 1)
+ return true;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (ConstLHS->isSignMask())
+ return true;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // TODO: Probably should have a recursion depth guard since you could have
+ // bitcasted vector elements.
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB))
+ return false;
+
+ return true;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ // Only handle constants since we would need to know if number of leading
+ // zeros is greater than the truncation amount.
+ const unsigned BitWidth = Ty.getScalarSizeInBits();
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ auto Const = getIConstantVRegVal(MO.getReg(), MRI);
+ if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
+ return false;
+ }
+
+ return true;
+ }
+ default:
+ break;
+ }
+
+ if (!KB)
+ return false;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ KnownBits Known = KB->getKnownBits(Reg);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+}
+
+void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
+ AU.addPreserved<StackProtector>();
+}
+
+LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ const LLT OrigElt = OrigTy.getElementType();
+
+ if (TargetTy.isVector()) {
+ const LLT TargetElt = TargetTy.getElementType();
+
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCDElts =
+ std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements());
+ // Prefer the original element type.
+ ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements();
+ return LLT::vector(Mul.divideCoefficientBy(GCDElts),
+ OrigTy.getElementType());
+ }
+ } else {
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigTy;
+ }
+
+ unsigned LCMSize = std::lcm(OrigSize, TargetSize);
+ return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
+ }
+
+ if (TargetTy.isVector()) {
+ unsigned LCMSize = std::lcm(OrigSize, TargetSize);
+ return LLT::fixed_vector(LCMSize / OrigSize, OrigTy);
+ }
+
+ unsigned LCMSize = std::lcm(OrigSize, TargetSize);
+
+ // Preserve pointer types.
+ if (LCMSize == OrigSize)
+ return OrigTy;
+ if (LCMSize == TargetSize)
+ return TargetTy;
+
+ return LLT::scalar(LCMSize);
+}
+
+LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
+ if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy ||
+ (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits()))
+ return getLCMType(OrigTy, TargetTy);
+
+ unsigned OrigTyNumElts = OrigTy.getNumElements();
+ unsigned TargetTyNumElts = TargetTy.getNumElements();
+ if (OrigTyNumElts % TargetTyNumElts == 0)
+ return OrigTy;
+
+ unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts);
+ return LLT::scalarOrVector(ElementCount::getFixed(NumElts),
+ OrigTy.getElementType());
+}
+
+LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ LLT OrigElt = OrigTy.getElementType();
+ if (TargetTy.isVector()) {
+ LLT TargetElt = TargetTy.getElementType();
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCD = std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements());
+ return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt);
+ }
+ } else {
+ // If the source is a vector of pointers, return a pointer element.
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigElt;
+ }
+
+ unsigned GCD = std::gcd(OrigSize, TargetSize);
+ if (GCD == OrigElt.getSizeInBits())
+ return OrigElt;
+
+ // If we can't produce the original element type, we have to use a smaller
+ // scalar.
+ if (GCD < OrigElt.getSizeInBits())
+ return LLT::scalar(GCD);
+ return LLT::fixed_vector(GCD / OrigElt.getSizeInBits(), OrigElt);
+ }
+
+ if (TargetTy.isVector()) {
+ // Try to preserve the original element type.
+ LLT TargetElt = TargetTy.getElementType();
+ if (TargetElt.getSizeInBits() == OrigSize)
+ return OrigTy;
+ }
+
+ unsigned GCD = std::gcd(OrigSize, TargetSize);
+ return LLT::scalar(GCD);
+}
+
+std::optional<int> llvm::getSplatIndex(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Only G_SHUFFLE_VECTOR can have a splat index!");
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
+
+ // If all elements are undefined, this shuffle can be considered a splat.
+ // Return 0 for better potential for callers to simplify.
+ if (FirstDefinedIdx == Mask.end())
+ return 0;
+
+ // Make sure all remaining elements are either undef or the same
+ // as the first non-undef value.
+ int SplatValue = *FirstDefinedIdx;
+ if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
+ [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
+ return std::nullopt;
+
+ return SplatValue;
+}
+
+static bool isBuildVectorOp(unsigned Opcode) {
+ return Opcode == TargetOpcode::G_BUILD_VECTOR ||
+ Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
+}
+
+namespace {
+
+std::optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ MachineInstr *MI = getDefIgnoringCopies(VReg, MRI);
+ if (!MI)
+ return std::nullopt;
+
+ bool isConcatVectorsOp = MI->getOpcode() == TargetOpcode::G_CONCAT_VECTORS;
+ if (!isBuildVectorOp(MI->getOpcode()) && !isConcatVectorsOp)
+ return std::nullopt;
+
+ std::optional<ValueAndVReg> SplatValAndReg;
+ for (MachineOperand &Op : MI->uses()) {
+ Register Element = Op.getReg();
+ // If we have a G_CONCAT_VECTOR, we recursively look into the
+ // vectors that we're concatenating to see if they're splats.
+ auto ElementValAndReg =
+ isConcatVectorsOp
+ ? getAnyConstantSplat(Element, MRI, AllowUndef)
+ : getAnyConstantVRegValWithLookThrough(Element, MRI, true, true);
+
+ // If AllowUndef, treat undef as value that will result in a constant splat.
+ if (!ElementValAndReg) {
+ if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element)))
+ continue;
+ return std::nullopt;
+ }
+
+ // Record splat value
+ if (!SplatValAndReg)
+ SplatValAndReg = ElementValAndReg;
+
+ // Different constant than the one already recorded, not a constant splat.
+ if (SplatValAndReg->Value != ElementValAndReg->Value)
+ return std::nullopt;
+ }
+
+ return SplatValAndReg;
+}
+
+} // end anonymous namespace
+
+bool llvm::isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
+ return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
+ return false;
+}
+
+bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue,
+ AllowUndef);
+}
+
+std::optional<APInt>
+llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) {
+ std::optional<ValueAndVReg> ValAndVReg =
+ getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return ValAndVReg->Value;
+ }
+
+ return std::nullopt;
+}
+
+std::optional<APInt>
+llvm::getIConstantSplatVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI);
+}
+
+std::optional<int64_t>
+llvm::getIConstantSplatSExtVal(const Register Reg,
+ const MachineRegisterInfo &MRI) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false))
+ return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
+ return std::nullopt;
+}
+
+std::optional<int64_t>
+llvm::getIConstantSplatSExtVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI);
+}
+
+std::optional<FPValueAndVReg>
+llvm::getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef))
+ return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return std::nullopt;
+}
+
+bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, 0, AllowUndef);
+}
+
+bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef);
+}
+
+std::optional<RegOrConstant>
+llvm::getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) {
+ unsigned Opc = MI.getOpcode();
+ if (!isBuildVectorOp(Opc))
+ return std::nullopt;
+ if (auto Splat = getIConstantSplatSExtVal(MI, MRI))
+ return RegOrConstant(*Splat);
+ auto Reg = MI.getOperand(1).getReg();
+ if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
+ [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
+ return std::nullopt;
+ return RegOrConstant(Reg);
+}
+
+static bool isConstantScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP = true,
+ bool AllowOpaqueConstants = true) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return true;
+ case TargetOpcode::G_FCONSTANT:
+ return AllowFP;
+ case TargetOpcode::G_GLOBAL_VALUE:
+ case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_BLOCK_ADDR:
+ case TargetOpcode::G_JUMP_TABLE:
+ return AllowOpaqueConstants;
+ default:
+ return false;
+ }
+}
+
+bool llvm::isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return true;
+ GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
+ if (!BV)
+ return false;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
+ getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+bool llvm::isConstantOrConstantVector(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP, bool AllowOpaqueConstants) {
+ if (isConstantScalar(MI, MRI, AllowFP, AllowOpaqueConstants))
+ return true;
+
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return false;
+
+ const unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I != NumOps; ++I) {
+ const MachineInstr *ElementDef = MRI.getVRegDef(MI.getOperand(I).getReg());
+ if (!isConstantScalar(*ElementDef, MRI, AllowFP, AllowOpaqueConstants))
+ return false;
+ }
+
+ return true;
+}
+
+std::optional<APInt>
+llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return C->Value;
+ auto MaybeCst = getIConstantSplatSExtVal(MI, MRI);
+ if (!MaybeCst)
+ return std::nullopt;
+ const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
+ return APInt(ScalarSize, *MaybeCst, true);
+}
+
+bool llvm::isNullOrNullSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, bool AllowUndefs) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return AllowUndefs;
+ case TargetOpcode::G_CONSTANT:
+ return MI.getOperand(1).getCImm()->isNullValue();
+ case TargetOpcode::G_FCONSTANT: {
+ const ConstantFP *FPImm = MI.getOperand(1).getFPImm();
+ return FPImm->isZero() && !FPImm->isNegative();
+ }
+ default:
+ if (!AllowUndefs) // TODO: isBuildVectorAllZeros assumes undef is OK already
+ return false;
+ return isBuildVectorAllZeros(MI, MRI);
+ }
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndefs) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return AllowUndefs;
+ case TargetOpcode::G_CONSTANT:
+ return MI.getOperand(1).getCImm()->isAllOnesValue();
+ default:
+ if (!AllowUndefs) // TODO: isBuildVectorAllOnes assumes undef is OK already
+ return false;
+ return isBuildVectorAllOnes(MI, MRI);
+ }
+}
+
+bool llvm::matchUnaryPredicate(
+ const MachineRegisterInfo &MRI, Register Reg,
+ std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
+
+ const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ return Match(nullptr);
+
+ // TODO: Also handle fconstant
+ if (Def->getOpcode() == TargetOpcode::G_CONSTANT)
+ return Match(Def->getOperand(1).getCImm());
+
+ if (Def->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
+ return false;
+
+ for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
+ Register SrcElt = Def->getOperand(I).getReg();
+ const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI);
+ if (AllowUndefs && SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) {
+ if (!Match(nullptr))
+ return false;
+ continue;
+ }
+
+ if (SrcDef->getOpcode() != TargetOpcode::G_CONSTANT ||
+ !Match(SrcDef->getOperand(1).getCImm()))
+ return false;
+ }
+
+ return true;
+}
+
+bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ return Val & 0x1;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return Val == 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Val == -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
+bool llvm::isConstFalseVal(const TargetLowering &TLI, int64_t Val,
+ bool IsVector, bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ return ~Val & 0x1;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Val == 0;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
+int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
+bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ const auto &F = MBB.getParent()->getFunction();
+ return F.hasOptSize() || F.hasMinSize() ||
+ llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
+}
+
+void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver,
+ SmallInstListTy &DeadInstChain) {
+ for (MachineOperand &Op : MI.uses()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DeadInstChain.insert(MRI.getVRegDef(Op.getReg()));
+ }
+ LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ DeadInstChain.remove(&MI);
+ MI.eraseFromParent();
+ if (LocObserver)
+ LocObserver->checkpoint(false);
+}
+
+void llvm::eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs,
+ MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ SmallInstListTy DeadInstChain;
+ for (MachineInstr *MI : DeadInstrs)
+ saveUsesAndErase(*MI, MRI, LocObserver, DeadInstChain);
+
+ while (!DeadInstChain.empty()) {
+ MachineInstr *Inst = DeadInstChain.pop_back_val();
+ if (!isTriviallyDead(*Inst, MRI))
+ continue;
+ saveUsesAndErase(*Inst, MRI, LocObserver, DeadInstChain);
+ }
+}
+
+void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ return eraseInstrs({&MI}, MRI, LocObserver);
+}
+
+void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
+ for (auto &Def : MI.defs()) {
+ assert(Def.isReg() && "Must be a reg");
+
+ SmallVector<MachineOperand *, 16> DbgUsers;
+ for (auto &MOUse : MRI.use_operands(Def.getReg())) {
+ MachineInstr *DbgValue = MOUse.getParent();
+ // Ignore partially formed DBG_VALUEs.
+ if (DbgValue->isNonListDebugValue() && DbgValue->getNumOperands() == 4) {
+ DbgUsers.push_back(&MOUse);
+ }
+ }
+
+ if (!DbgUsers.empty()) {
+ salvageDebugInfoForDbgValue(MRI, MI, DbgUsers);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
new file mode 100644
index 000000000000..f259cbc1d788
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -0,0 +1,706 @@
+//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+//
+// However, merging globals can have tradeoffs:
+// - it confuses debuggers, tools, and users
+// - it makes linker optimizations less useful (order files, LOHs, ...)
+// - it forces usage of indexed addressing (which isn't necessarily "free")
+// - it can increase register pressure when the uses are disparate enough.
+//
+// We use heuristics to discover the best global grouping we can (cf cl::opts).
+//
+// ===---------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "global-merge"
+
+// FIXME: This is only useful as a last-resort way to disable the pass.
+static cl::opt<bool>
+EnableGlobalMerge("enable-global-merge", cl::Hidden,
+ cl::desc("Enable the global merge pass"),
+ cl::init(true));
+
+static cl::opt<unsigned>
+GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
+ cl::desc("Set maximum offset for global merge pass"),
+ cl::init(0));
+
+static cl::opt<bool> GlobalMergeGroupByUse(
+ "global-merge-group-by-use", cl::Hidden,
+ cl::desc("Improve global merge pass to look at uses"), cl::init(true));
+
+static cl::opt<bool> GlobalMergeIgnoreSingleUse(
+ "global-merge-ignore-single-use", cl::Hidden,
+ cl::desc("Improve global merge pass to ignore globals only used alone"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
+// FIXME: this could be a transitional option, and we probably need to remove
+// it if only we are sure this optimization could always benefit all targets.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+ cl::desc("Enable global merge pass on external linkage"));
+
+STATISTIC(NumMerged, "Number of globals merged");
+
+namespace {
+
+ class GlobalMerge : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions), see the code that passes in the offset in the ARM backend
+ // for more information.
+ unsigned MaxOffset;
+
+ /// Whether we should try to optimize for size only.
+ /// Currently, this applies a dead simple heuristic: only consider globals
+ /// used in minsize functions for merging.
+ /// FIXME: This could learn about optsize, and be used in the cost model.
+ bool OnlyOptimizeForSize = false;
+
+ /// Whether we should merge global variables that have external linkage.
+ bool MergeExternalGlobals = false;
+
+ bool IsMachO = false;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const;
+
+ /// Merge everything in \p Globals for which the corresponding bit
+ /// in \p GlobalSet is set.
+ bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
+ const BitVector &GlobalSet, Module &M, bool isConst,
+ unsigned AddrSpace) const;
+
+ /// Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M, StringRef Name);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ explicit GlobalMerge()
+ : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
+ bool OnlyOptimizeForSize, bool MergeExternalGlobals)
+ : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
+ OnlyOptimizeForSize(OnlyOptimizeForSize),
+ MergeExternalGlobals(MergeExternalGlobals) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
+
+ StringRef getPassName() const override { return "Merge internal globals"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+
+INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const {
+ auto &DL = M.getDataLayout();
+ // FIXME: Find better heuristics
+ llvm::stable_sort(
+ Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ // We don't support scalable global variables.
+ return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
+ DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
+ });
+
+ // If we want to just blindly group all globals together, do so.
+ if (!GlobalMergeGroupByUse) {
+ BitVector AllGlobals(Globals.size());
+ AllGlobals.set();
+ return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+ }
+
+ // If we want to be smarter, look at all uses of each global, to try to
+ // discover all sets of globals used together, and how many times each of
+ // these sets occurred.
+ //
+ // Keep this reasonably efficient, by having an append-only list of all sets
+ // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
+ // code (currently, a Function) to the set of globals seen so far that are
+ // used together in that unit (GlobalUsesByFunction).
+ //
+ // When we look at the Nth global, we know that any new set is either:
+ // - the singleton set {N}, containing this global only, or
+ // - the union of {N} and a previously-discovered set, containing some
+ // combination of the previous N-1 globals.
+ // Using that knowledge, when looking at the Nth global, we can keep:
+ // - a reference to the singleton set {N} (CurGVOnlySetIdx)
+ // - a list mapping each previous set to its union with {N} (EncounteredUGS),
+ // if it actually occurs.
+
+ // We keep track of the sets of globals used together "close enough".
+ struct UsedGlobalSet {
+ BitVector Globals;
+ unsigned UsageCount = 1;
+
+ UsedGlobalSet(size_t Size) : Globals(Size) {}
+ };
+
+ // Each set is unique in UsedGlobalSets.
+ std::vector<UsedGlobalSet> UsedGlobalSets;
+
+ // Avoid repeating the create-global-set pattern.
+ auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
+ UsedGlobalSets.emplace_back(Globals.size());
+ return UsedGlobalSets.back();
+ };
+
+ // The first set is the empty set.
+ CreateGlobalSet().UsageCount = 0;
+
+ // We define "close enough" to be "in the same function".
+ // FIXME: Grouping uses by function is way too aggressive, so we should have
+ // a better metric for distance between uses.
+ // The obvious alternative would be to group by BasicBlock, but that's in
+ // turn too conservative..
+ // Anything in between wouldn't be trivial to compute, so just stick with
+ // per-function grouping.
+
+ // The value type is an index into UsedGlobalSets.
+ // The default (0) conveniently points to the empty set.
+ DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
+
+ // Now, look at each merge-eligible global in turn.
+
+ // Keep track of the sets we already encountered to which we added the
+ // current global.
+ // Each element matches the same-index element in UsedGlobalSets.
+ // This lets us efficiently tell whether a set has already been expanded to
+ // include the current global.
+ std::vector<size_t> EncounteredUGS;
+
+ for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
+ GlobalVariable *GV = Globals[GI];
+
+ // Reset the encountered sets for this global...
+ std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
+ // ...and grow it in case we created new sets for the previous global.
+ EncounteredUGS.resize(UsedGlobalSets.size());
+
+ // We might need to create a set that only consists of the current global.
+ // Keep track of its index into UsedGlobalSets.
+ size_t CurGVOnlySetIdx = 0;
+
+ // For each global, look at all its Uses.
+ for (auto &U : GV->uses()) {
+ // This Use might be a ConstantExpr. We're interested in Instruction
+ // users, so look through ConstantExpr...
+ Use *UI, *UE;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+ if (CE->use_empty())
+ continue;
+ UI = &*CE->use_begin();
+ UE = nullptr;
+ } else if (isa<Instruction>(U.getUser())) {
+ UI = &U;
+ UE = UI->getNext();
+ } else {
+ continue;
+ }
+
+ // ...to iterate on all the instruction users of the global.
+ // Note that we iterate on Uses and not on Users to be able to getNext().
+ for (; UI != UE; UI = UI->getNext()) {
+ Instruction *I = dyn_cast<Instruction>(UI->getUser());
+ if (!I)
+ continue;
+
+ Function *ParentFn = I->getParent()->getParent();
+
+ // If we're only optimizing for size, ignore non-minsize functions.
+ if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
+ continue;
+
+ size_t UGSIdx = GlobalUsesByFunction[ParentFn];
+
+ // If this is the first global the basic block uses, map it to the set
+ // consisting of this global only.
+ if (!UGSIdx) {
+ // If that set doesn't exist yet, create it.
+ if (!CurGVOnlySetIdx) {
+ CurGVOnlySetIdx = UsedGlobalSets.size();
+ CreateGlobalSet().Globals.set(GI);
+ } else {
+ ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
+ }
+
+ GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
+ continue;
+ }
+
+ // If we already encountered this BB, just increment the counter.
+ if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
+ ++UsedGlobalSets[UGSIdx].UsageCount;
+ continue;
+ }
+
+ // If not, the previous set wasn't actually used in this function.
+ --UsedGlobalSets[UGSIdx].UsageCount;
+
+ // If we already expanded the previous set to include this global, just
+ // reuse that expanded set.
+ if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
+ ++UsedGlobalSets[ExpandedIdx].UsageCount;
+ GlobalUsesByFunction[ParentFn] = ExpandedIdx;
+ continue;
+ }
+
+ // If not, create a new set consisting of the union of the previous set
+ // and this global. Mark it as encountered, so we can reuse it later.
+ GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
+ UsedGlobalSets.size();
+
+ UsedGlobalSet &NewUGS = CreateGlobalSet();
+ NewUGS.Globals.set(GI);
+ NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
+ }
+ }
+ }
+
+ // Now we found a bunch of sets of globals used together. We accumulated
+ // the number of times we encountered the sets (i.e., the number of blocks
+ // that use that exact set of globals).
+ //
+ // Multiply that by the size of the set to give us a crude profitability
+ // metric.
+ llvm::stable_sort(UsedGlobalSets,
+ [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+ return UGS1.Globals.count() * UGS1.UsageCount <
+ UGS2.Globals.count() * UGS2.UsageCount;
+ });
+
+ // We can choose to merge all globals together, but ignore globals never used
+ // with another global. This catches the obviously non-profitable cases of
+ // having a single global, but is aggressive enough for any other case.
+ if (GlobalMergeIgnoreSingleUse) {
+ BitVector AllGlobals(Globals.size());
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
+ if (UGS.UsageCount == 0)
+ continue;
+ if (UGS.Globals.count() > 1)
+ AllGlobals |= UGS.Globals;
+ }
+ return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+ }
+
+ // Starting from the sets with the best (=biggest) profitability, find a
+ // good combination.
+ // The ideal (and expensive) solution can only be found by trying all
+ // combinations, looking for the one with the best profitability.
+ // Don't be smart about it, and just pick the first compatible combination,
+ // starting with the sets with the best profitability.
+ BitVector PickedGlobals(Globals.size());
+ bool Changed = false;
+
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
+ if (UGS.UsageCount == 0)
+ continue;
+ if (PickedGlobals.anyCommon(UGS.Globals))
+ continue;
+ PickedGlobals |= UGS.Globals;
+ // If the set only contains one global, there's no point in merging.
+ // Ignore the global for inclusion in other sets though, so keep it in
+ // PickedGlobals.
+ if (UGS.Globals.count() < 2)
+ continue;
+ Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
+ }
+
+ return Changed;
+}
+
+bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
+ const BitVector &GlobalSet, Module &M, bool isConst,
+ unsigned AddrSpace) const {
+ assert(Globals.size() > 1);
+
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Type *Int8Ty = Type::getInt8Ty(M.getContext());
+ auto &DL = M.getDataLayout();
+
+ LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
+ << GlobalSet.find_first() << "\n");
+
+ bool Changed = false;
+ ssize_t i = GlobalSet.find_first();
+ while (i != -1) {
+ ssize_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+ std::vector<unsigned> StructIdxs;
+
+ bool HasExternal = false;
+ StringRef FirstExternalName;
+ Align MaxAlign;
+ unsigned CurIdx = 0;
+ for (j = i; j != -1; j = GlobalSet.find_next(j)) {
+ Type *Ty = Globals[j]->getValueType();
+
+ // Make sure we use the same alignment AsmPrinter would use.
+ Align Alignment = DL.getPreferredAlign(Globals[j]);
+ unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
+ MergedSize += Padding;
+ MergedSize += DL.getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ if (Padding) {
+ Tys.push_back(ArrayType::get(Int8Ty, Padding));
+ Inits.push_back(ConstantAggregateZero::get(Tys.back()));
+ ++CurIdx;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ StructIdxs.push_back(CurIdx++);
+
+ MaxAlign = std::max(MaxAlign, Alignment);
+
+ if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+ HasExternal = true;
+ FirstExternalName = Globals[j]->getName();
+ }
+ }
+
+ // Exit early if there is only one global to merge.
+ if (Tys.size() < 2) {
+ i = j;
+ continue;
+ }
+
+ // If merged variables doesn't have external linkage, we needn't to expose
+ // the symbol after merging.
+ GlobalValue::LinkageTypes Linkage = HasExternal
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+ // Use a packed struct so we can control alignment.
+ StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+
+ // On Darwin external linkage needs to be preserved, otherwise
+ // dsymutil cannot preserve the debug info for the merged
+ // variables. If they have external linkage, use the symbol name
+ // of the first variable merged as the suffix of global symbol
+ // name. This avoids a link-time naming conflict for the
+ // _MergedGlobals symbols.
+ Twine MergedName =
+ (IsMachO && HasExternal)
+ ? "_MergedGlobals_" + FirstExternalName
+ : "_MergedGlobals";
+ auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
+ auto *MergedGV = new GlobalVariable(
+ M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
+ GlobalVariable::NotThreadLocal, AddrSpace);
+
+ MergedGV->setAlignment(MaxAlign);
+ MergedGV->setSection(Globals[i]->getSection());
+
+ const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
+ for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
+ GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+ std::string Name(Globals[k]->getName());
+ GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
+ GlobalValue::DLLStorageClassTypes DLLStorage =
+ Globals[k]->getDLLStorageClass();
+
+ // Copy metadata while adjusting any debug info metadata by the original
+ // global's offset within the merged global.
+ MergedGV->copyMetadata(Globals[k],
+ MergedLayout->getElementOffset(StructIdxs[idx]));
+
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, StructIdxs[idx]),
+ };
+ Constant *GEP =
+ ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+
+ // When the linkage is not internal we must emit an alias for the original
+ // variable name as it may be accessed from another object. On non-Mach-O
+ // we can also emit an alias for internal linkage as it's safe to do so.
+ // It's not safe on Mach-O as the alias (and thus the portion of the
+ // MergedGlobals variable) may be dead stripped at link time.
+ if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
+ GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
+ Linkage, Name, GEP, &M);
+ GA->setVisibility(Visibility);
+ GA->setDLLStorageClass(DLLStorage);
+ }
+
+ NumMerged++;
+ }
+ Changed = true;
+ i = j;
+ }
+
+ return Changed;
+}
+
+void GlobalMerge::collectUsedGlobalVariables(Module &M, StringRef Name) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable(Name);
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M, "llvm.used");
+ collectUsedGlobalVariables(M, "llvm.compiler.used");
+
+ for (Function &F : M) {
+ for (BasicBlock &BB : F) {
+ Instruction *Pad = BB.getFirstNonPHI();
+ if (!Pad->isEHPad())
+ continue;
+
+ // Keep globals used by landingpads and catchpads.
+ for (const Use &U : Pad->operands()) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(U->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
+ for (const Use &Elt : CA->operands()) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+ }
+ }
+ }
+}
+
+bool GlobalMerge::doInitialization(Module &M) {
+ if (!EnableGlobalMerge)
+ return false;
+
+ IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
+
+ auto &DL = M.getDataLayout();
+ DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>>
+ Globals, ConstGlobals, BSSGlobals;
+ bool Changed = false;
+ setMustKeepGlobalVariables(M);
+
+ LLVM_DEBUG({
+ dbgs() << "Number of GV that must be kept: " <<
+ MustKeepGlobalVariables.size() << "\n";
+ for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
+ dbgs() << "Kept: " << *KeptGV << "\n";
+ });
+ // Grab all non-const globals.
+ for (auto &GV : M.globals()) {
+ // Merge is safe for "normal" internal or external globals only
+ if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
+ continue;
+
+ // It's not safe to merge globals that may be preempted
+ if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
+ continue;
+
+ if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
+ !GV.hasInternalLinkage())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(GV.getType());
+ assert(PT && "Global variable is not a pointer!");
+
+ unsigned AddressSpace = PT->getAddressSpace();
+ StringRef Section = GV.getSection();
+
+ // Ignore all 'special' globals.
+ if (GV.getName().startswith("llvm.") ||
+ GV.getName().startswith(".llvm."))
+ continue;
+
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(&GV))
+ continue;
+
+ // Don't merge tagged globals, as each global should have its own unique
+ // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
+ // with compatible alignment and the same contents may be merged as long as
+ // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
+ // size_b / 16`).
+ if (GV.isTagged())
+ continue;
+
+ Type *Ty = GV.getValueType();
+ if (DL.getTypeAllocSize(Ty) < MaxOffset) {
+ if (TM &&
+ TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
+ BSSGlobals[{AddressSpace, Section}].push_back(&GV);
+ else if (GV.isConstant())
+ ConstGlobals[{AddressSpace, Section}].push_back(&GV);
+ else
+ Globals[{AddressSpace, Section}].push_back(&GV);
+ }
+ }
+
+ for (auto &P : Globals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first.first);
+
+ for (auto &P : BSSGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first.first);
+
+ if (EnableGlobalMergeOnConst)
+ for (auto &P : ConstGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, true, P.first.first);
+
+ return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
+ bool OnlyOptimizeForSize,
+ bool MergeExternalByDefault) {
+ bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
+ MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
+ return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
new file mode 100644
index 000000000000..e7b14d700a44
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -0,0 +1,606 @@
+//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Insert hardware loop intrinsics into loops which are deemed profitable by
+/// the target, by querying TargetTransformInfo. A hardware loop comprises of
+/// two intrinsics: one, outside the loop, to set the loop iteration count and
+/// another, in the exit block, to decrement the counter. The decremented value
+/// can either be carried through the loop via a phi or handled in some opaque
+/// way by the target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/HardwareLoops.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+#define DEBUG_TYPE "hardware-loops"
+
+#define HW_LOOPS_NAME "Hardware Loop Insertion"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
+ cl::desc("Force hardware loops intrinsics to be inserted"));
+
+static cl::opt<bool>
+ForceHardwareLoopPHI(
+ "force-hardware-loop-phi", cl::Hidden, cl::init(false),
+ cl::desc("Force hardware loop counter to be updated through a phi"));
+
+static cl::opt<bool>
+ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
+ cl::desc("Force allowance of nested hardware loops"));
+
+static cl::opt<unsigned>
+LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
+ cl::desc("Set the loop decrement value"));
+
+static cl::opt<unsigned>
+CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
+ cl::desc("Set the loop counter bitwidth"));
+
+static cl::opt<bool>
+ForceGuardLoopEntry(
+ "force-hardware-loop-guard", cl::Hidden, cl::init(false),
+ cl::desc("Force generation of loop guard intrinsic"));
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+#ifndef NDEBUG
+static void debugHWLoopFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "HWLoops: " << DebugMsg;
+ if (I)
+ dbgs() << ' ' << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
+static OptimizationRemarkAnalysis
+createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
+ Value *CodeRegion = L->getHeader();
+ DebugLoc DL = L->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
+ R << "hardware-loop not created: ";
+ return R;
+}
+
+namespace {
+
+ void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
+ LLVM_DEBUG(debugHWLoopFailure(Msg, I));
+ ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
+ }
+
+ using TTI = TargetTransformInfo;
+
+ class HardwareLoopsLegacy : public FunctionPass {
+ public:
+ static char ID;
+
+ HardwareLoopsLegacy() : FunctionPass(ID) {
+ initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ }
+ };
+
+ class HardwareLoopsImpl {
+ public:
+ HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
+ DominatorTree &DT, const DataLayout &DL,
+ const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
+ AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts)
+ : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
+ TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
+
+ bool run(Function &F);
+
+ private:
+ // Try to convert the given Loop into a hardware loop.
+ bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
+
+ // Given that the target believes the loop to be profitable, try to
+ // convert it.
+ bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
+
+ ScalarEvolution &SE;
+ LoopInfo &LI;
+ bool PreserveLCSSA;
+ DominatorTree &DT;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+ TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache &AC;
+ OptimizationRemarkEmitter *ORE;
+ HardwareLoopOptions &Opts;
+ bool MadeChange = false;
+ };
+
+ class HardwareLoop {
+ // Expand the trip count scev into a value that we can use.
+ Value *InitLoopCount();
+
+ // Insert the set_loop_iteration intrinsic.
+ Value *InsertIterationSetup(Value *LoopCountInit);
+
+ // Insert the loop_decrement intrinsic.
+ void InsertLoopDec();
+
+ // Insert the loop_decrement_reg intrinsic.
+ Instruction *InsertLoopRegDec(Value *EltsRem);
+
+ // If the target requires the counter value to be updated in the loop,
+ // insert a phi to hold the value. The intended purpose is for use by
+ // loop_decrement_reg.
+ PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
+
+ // Create a new cmp, that checks the returned value of loop_decrement*,
+ // and update the exit branch to use it.
+ void UpdateBranch(Value *EltsRem);
+
+ public:
+ HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
+ const DataLayout &DL,
+ OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts) :
+ SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
+ ExitCount(Info.ExitCount),
+ CountType(Info.CountType),
+ ExitBranch(Info.ExitBranch),
+ LoopDecrement(Info.LoopDecrement),
+ UsePHICounter(Info.CounterInReg),
+ UseLoopGuard(Info.PerformEntryTest) { }
+
+ void Create();
+
+ private:
+ ScalarEvolution &SE;
+ const DataLayout &DL;
+ OptimizationRemarkEmitter *ORE = nullptr;
+ HardwareLoopOptions &Opts;
+ Loop *L = nullptr;
+ Module *M = nullptr;
+ const SCEV *ExitCount = nullptr;
+ Type *CountType = nullptr;
+ BranchInst *ExitBranch = nullptr;
+ Value *LoopDecrement = nullptr;
+ bool UsePHICounter = false;
+ bool UseLoopGuard = false;
+ BasicBlock *BeginBB = nullptr;
+ };
+}
+
+char HardwareLoopsLegacy::ID = 0;
+
+bool HardwareLoopsLegacy::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
+
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &DL = F.getParent()->getDataLayout();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ HardwareLoopOptions Opts;
+ if (ForceHardwareLoops.getNumOccurrences())
+ Opts.setForce(ForceHardwareLoops);
+ if (ForceHardwareLoopPHI.getNumOccurrences())
+ Opts.setForcePhi(ForceHardwareLoopPHI);
+ if (ForceNestedLoop.getNumOccurrences())
+ Opts.setForceNested(ForceNestedLoop);
+ if (ForceGuardLoopEntry.getNumOccurrences())
+ Opts.setForceGuard(ForceGuardLoopEntry);
+ if (LoopDecrement.getNumOccurrences())
+ Opts.setDecrement(LoopDecrement);
+ if (CounterBitWidth.getNumOccurrences())
+ Opts.setCounterBitwidth(CounterBitWidth);
+
+ HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
+ Opts);
+ return Impl.run(F);
+}
+
+PreservedAnalyses HardwareLoopsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &DL = F.getParent()->getDataLayout();
+
+ HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
+ bool Changed = Impl.run(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<BranchProbabilityAnalysis>();
+ return PA;
+}
+
+bool HardwareLoopsImpl::run(Function &F) {
+ LLVMContext &Ctx = F.getParent()->getContext();
+ for (Loop *L : LI)
+ if (L->isOutermost())
+ TryConvertLoop(L, Ctx);
+ return MadeChange;
+}
+
+// Return true if the search should stop, which will be when an inner loop is
+// converted and the parent loop doesn't support containing a hardware loop.
+bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
+ // Process nested loops first.
+ bool AnyChanged = false;
+ for (Loop *SL : *L)
+ AnyChanged |= TryConvertLoop(SL, Ctx);
+ if (AnyChanged) {
+ reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
+ ORE, L);
+ return true; // Stop search.
+ }
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
+
+ HardwareLoopInfo HWLoopInfo(L);
+ if (!HWLoopInfo.canAnalyze(LI)) {
+ reportHWLoopFailure("cannot analyze loop, irreducible control flow",
+ "HWLoopCannotAnalyze", ORE, L);
+ return false;
+ }
+
+ if (!Opts.Force &&
+ !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
+ reportHWLoopFailure("it's not profitable to create a hardware-loop",
+ "HWLoopNotProfitable", ORE, L);
+ return false;
+ }
+
+ // Allow overriding of the counter width and loop decrement value.
+ if (Opts.Bitwidth.has_value()) {
+ HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
+ }
+
+ if (Opts.Decrement.has_value())
+ HWLoopInfo.LoopDecrement =
+ ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
+
+ MadeChange |= TryConvertLoop(HWLoopInfo);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
+}
+
+bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+
+ Loop *L = HWLoopInfo.L;
+ LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
+ Opts.getForcePhi())) {
+ // TODO: there can be many reasons a loop is not considered a
+ // candidate, so we should let isHardwareLoopCandidate fill in the
+ // reason and then report a better message here.
+ reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
+ return false;
+ }
+
+ assert(
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
+ "Hardware Loop must have set exit info.");
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // If we don't have a preheader, then insert one.
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
+ if (!Preheader)
+ return false;
+
+ HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
+ HWLoop.Create();
+ ++NumHWLoops;
+ return true;
+}
+
+void HardwareLoop::Create() {
+ LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
+
+ Value *LoopCountInit = InitLoopCount();
+ if (!LoopCountInit) {
+ reportHWLoopFailure("could not safely create a loop count expression",
+ "HWLoopNotSafe", ORE, L);
+ return;
+ }
+
+ Value *Setup = InsertIterationSetup(LoopCountInit);
+
+ if (UsePHICounter || Opts.ForcePhi) {
+ Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
+ Value *EltsRem = InsertPHICounter(Setup, LoopDec);
+ LoopDec->setOperand(0, EltsRem);
+ UpdateBranch(LoopDec);
+ } else
+ InsertLoopDec();
+
+ // Run through the basic blocks of the loop and see if any of them have dead
+ // PHIs that can be removed.
+ for (auto *I : L->blocks())
+ DeleteDeadPHIs(I);
+}
+
+static bool CanGenerateTest(Loop *L, Value *Count) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader->getSinglePredecessor())
+ return false;
+
+ BasicBlock *Pred = Preheader->getSinglePredecessor();
+ if (!isa<BranchInst>(Pred->getTerminator()))
+ return false;
+
+ auto *BI = cast<BranchInst>(Pred->getTerminator());
+ if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
+ return false;
+
+ // Check that the icmp is checking for equality of Count and zero and that
+ // a non-zero value results in entering the loop.
+ auto ICmp = cast<ICmpInst>(BI->getCondition());
+ LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
+ if (!ICmp->isEquality())
+ return false;
+
+ auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
+ if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
+ return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
+ return false;
+ };
+
+ // Check if Count is a zext.
+ Value *CountBefZext =
+ isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
+
+ if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
+ !IsCompareZero(ICmp, CountBefZext, 0) &&
+ !IsCompareZero(ICmp, CountBefZext, 1))
+ return false;
+
+ unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
+ if (BI->getSuccessor(SuccIdx) != Preheader)
+ return false;
+
+ return true;
+}
+
+Value *HardwareLoop::InitLoopCount() {
+ LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
+ // Can we replace a conditional branch with an intrinsic that sets the
+ // loop counter and tests that is not zero?
+
+ SCEVExpander SCEVE(SE, DL, "loopcnt");
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
+
+ ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
+
+ // If we're trying to use the 'test and set' form of the intrinsic, we need
+ // to replace a conditional branch that is controlling entry to the loop. It
+ // is likely (guaranteed?) that the preheader has an unconditional branch to
+ // the loop header, so also check if it has a single predecessor.
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+ SE.getZero(ExitCount->getType()))) {
+ LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
+ if (Opts.ForceGuard)
+ UseLoopGuard = true;
+ } else
+ UseLoopGuard = false;
+
+ BasicBlock *BB = L->getLoopPreheader();
+ if (UseLoopGuard && BB->getSinglePredecessor() &&
+ cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
+ BasicBlock *Predecessor = BB->getSinglePredecessor();
+ // If it's not safe to create a while loop then don't force it and create a
+ // do-while loop instead
+ if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
+ UseLoopGuard = false;
+ else
+ BB = Predecessor;
+ }
+
+ if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
+ << *ExitCount << "\n");
+ return nullptr;
+ }
+
+ Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
+ BB->getTerminator());
+
+ // FIXME: We've expanded Count where we hope to insert the counter setting
+ // intrinsic. But, in the case of the 'test and set' form, we may fallback to
+ // the just 'set' form and in which case the insertion block is most likely
+ // different. It means there will be instruction(s) in a block that possibly
+ // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
+ // but it's doesn't appear to work in all cases.
+
+ UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
+ BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
+ LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
+ << " - Expanded Count in " << BB->getName() << "\n"
+ << " - Will insert set counter intrinsic into: "
+ << BeginBB->getName() << "\n");
+ return Count;
+}
+
+Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
+ IRBuilder<> Builder(BeginBB->getTerminator());
+ Type *Ty = LoopCountInit->getType();
+ bool UsePhi = UsePHICounter || Opts.ForcePhi;
+ Intrinsic::ID ID = UseLoopGuard
+ ? (UsePhi ? Intrinsic::test_start_loop_iterations
+ : Intrinsic::test_set_loop_iterations)
+ : (UsePhi ? Intrinsic::start_loop_iterations
+ : Intrinsic::set_loop_iterations);
+ Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
+ Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
+
+ // Use the return value of the intrinsic to control the entry of the loop.
+ if (UseLoopGuard) {
+ assert((isa<BranchInst>(BeginBB->getTerminator()) &&
+ cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
+ "Expected conditional branch");
+
+ Value *SetCount =
+ UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
+ auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
+ LoopGuard->setCondition(SetCount);
+ if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
+ LoopGuard->swapSuccessors();
+ }
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
+ << "\n");
+ if (UsePhi && UseLoopGuard)
+ LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
+ return !UsePhi ? LoopCountInit : LoopSetup;
+}
+
+void HardwareLoop::InsertLoopDec() {
+ IRBuilder<> CondBuilder(ExitBranch);
+
+ Function *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
+ LoopDecrement->getType());
+ Value *Ops[] = { LoopDecrement };
+ Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
+ Value *OldCond = ExitBranch->getCondition();
+ ExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(ExitBranch->getSuccessor(0)))
+ ExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
+}
+
+Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
+ IRBuilder<> CondBuilder(ExitBranch);
+
+ Function *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
+ { EltsRem->getType() });
+ Value *Ops[] = { EltsRem, LoopDecrement };
+ Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
+ return cast<Instruction>(Call);
+}
+
+PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = ExitBranch->getParent();
+ IRBuilder<> Builder(Header->getFirstNonPHI());
+ PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
+ Index->addIncoming(NumElts, Preheader);
+ Index->addIncoming(EltsRem, Latch);
+ LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
+ return Index;
+}
+
+void HardwareLoop::UpdateBranch(Value *EltsRem) {
+ IRBuilder<> CondBuilder(ExitBranch);
+ Value *NewCond =
+ CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
+ Value *OldCond = ExitBranch->getCondition();
+ ExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(ExitBranch->getSuccessor(0)))
+ ExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+}
+
+INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+
+FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 000000000000..2ad5820bd9fb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,2360 @@
+//===- IfConversion.cpp - Machine code if conversion pass -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass, which
+// tries to convert conditional branches into predicated instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "if-converter"
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
+
+namespace {
+
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond, // BB is entry of a diamond sub-CFG.
+ ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a
+ // common tail that can be shared.
+ };
+
+ /// One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if analyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See analyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool IsBrReversible : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize = 0;
+ unsigned ExtraCost = 0;
+ unsigned ExtraCost2 = 0;
+ MachineBasicBlock *BB = nullptr;
+ MachineBasicBlock *TrueBB = nullptr;
+ MachineBasicBlock *FalseBB = nullptr;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ IsBrReversible(false), HasFallThrough(false),
+ IsUnpredicable(false), CannotBeCopied(false),
+ ClobbersPred(false) {}
+ };
+
+ /// Record information about pending if-conversions to attempt:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ unsigned NumDups;
+ unsigned NumDups2;
+ bool NeedSubsumption : 1;
+ bool TClobbersPred : 1;
+ bool FClobbersPred : 1;
+
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
+ bool tc = false, bool fc = false)
+ : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
+ TClobbersPred(tc), FClobbersPred(fc) {}
+ };
+
+ /// Results of if-conversion feasibility analysis indexed by basic block
+ /// number.
+ std::vector<BBInfo> BBAnalysis;
+ TargetSchedModel SchedModel;
+
+ const TargetLoweringBase *TLI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+
+ LivePhysRegs Redefs;
+
+ bool PreRegAlloc = true;
+ bool MadeChange = false;
+ int FnNum = -1;
+ std::function<bool(const MachineFunction &)> PredicateFtor;
+
+ public:
+ static char ID;
+
+ IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
+ : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ private:
+ bool reverseBranchCondition(BBInfo &BBI) const;
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ BranchProbability Prediction) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ BranchProbability Prediction) const;
+ bool CountDuplicatedInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ unsigned &Dups1, unsigned &Dups2,
+ MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+ bool SkipUnconditionalBranches) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+ bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+ void AnalyzeBranches(BBInfo &BBI);
+ void ScanInstructions(BBInfo &BBI,
+ MachineBasicBlock::iterator &Begin,
+ MachineBasicBlock::iterator &End,
+ bool BranchUnpredicable = false) const;
+ bool RescanInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ BBInfo &TrueBBI, BBInfo &FalseBBI) const;
+ void AnalyzeBlock(MachineBasicBlock &MBB,
+ std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle = false, bool RevBranch = false,
+ bool hasCommonTail = false);
+ void AnalyzeBlocks(MachineFunction &MF,
+ std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
+ void InvalidatePreds(MachineBasicBlock &MBB);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred,
+ bool RemoveBranch, bool MergeAddEdges);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbers, bool FClobbers);
+ bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbers, bool FClobbers);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<MCPhysReg, 4> *LaterRedefs = nullptr);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ BranchProbability Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
+ }
+
+ bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
+ MachineBasicBlock &CommBB, unsigned Dups,
+ BranchProbability Prediction, bool Forked) const {
+ const MachineFunction &MF = *TBBInfo.BB->getParent();
+ if (MF.getFunction().hasMinSize()) {
+ MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
+ MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
+ MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
+ MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
+
+ unsigned Dups1 = 0, Dups2 = 0;
+ if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TBBInfo.BB, *FBBInfo.BB,
+ /*SkipUnconditionalBranches*/ true))
+ llvm_unreachable("should already have been checked by ValidDiamond");
+
+ unsigned BranchBytes = 0;
+ unsigned CommonBytes = 0;
+
+ // Count common instructions at the start of the true and false blocks.
+ for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+
+ // Count instructions at the end of the true and false blocks, after
+ // the ones we plan to predicate. Analyzable branches will be removed
+ // (unless this is a forked diamond), and all other instructions are
+ // common between the two blocks.
+ for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
+ if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
+ if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : CommBB.terminators()) {
+ if (I.isBranch()) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ }
+ }
+
+ // The common instructions in one branch will be eliminated, halving
+ // their code size.
+ CommonBytes /= 2;
+
+ // Count the instructions which we need to predicate.
+ unsigned NumPredicatedInstructions = 0;
+ for (auto &I : make_range(TIB, TIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+ for (auto &I : make_range(FIB, FIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+
+ // Even though we're optimising for size at the expense of performance,
+ // avoid creating really long predicated blocks.
+ if (NumPredicatedInstructions > 15)
+ return false;
+
+ // Some targets (e.g. Thumb2) need to insert extra instructions to
+ // start predicated blocks.
+ unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
+ MF, NumPredicatedInstructions);
+
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
+ << ", CommonBytes=" << CommonBytes
+ << ", NumPredicatedInstructions="
+ << NumPredicatedInstructions
+ << ", ExtraPredicateBytes=" << ExtraPredicateBytes
+ << ")\n");
+ return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
+ } else {
+ unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
+ unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
+ bool Res = TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(
+ *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
+ FCycle, FBBInfo.ExtraCost2, Prediction);
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
+ << ", FCycle=" << FCycle
+ << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
+ << FBBInfo.ExtraCost2 << ") = " << Res << "\n");
+ return Res;
+ }
+ }
+
+ /// Returns true if Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
+ }
+
+ /// Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
+ const std::unique_ptr<IfcvtToken> &C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (!C1->NeedSubsumption && C2->NeedSubsumption)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+} // end anonymous namespace
+
+char IfConverter::ID = 0;
+
+char &llvm::IfConverterID = IfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
+ return false;
+
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TLI = ST.getTargetLowering();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(&ST);
+
+ if (!TII) return false;
+
+ PreRegAlloc = MRI->isSSA();
+
+ bool BFChange = false;
+ if (!PreRegAlloc) {
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false, MBFI, *MBPI, PSI);
+ BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo());
+ }
+
+ LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ LLVM_DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ std::vector<std::unique_ptr<IfcvtToken>> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back());
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected!");
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ LLVM_DEBUG(dbgs() << "Ifcvt (Simple"
+ << (Kind == ICSimpleFalse ? " false" : "")
+ << "): " << printMBBReference(*BBI.BB) << " ("
+ << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber())
+ << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ LLVM_DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ LLVM_DEBUG(dbgs() << " false");
+ if (isRev)
+ LLVM_DEBUG(dbgs() << " rev");
+ LLVM_DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
+ } else {
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
+ }
+ }
+ break;
+ }
+ case ICDiamond:
+ if (DisableDiamond) break;
+ LLVM_DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
+ Token->TClobbersPred,
+ Token->FClobbersPred);
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumDiamonds;
+ break;
+ case ICForkedDiamond:
+ if (DisableForkedDiamond) break;
+ LLVM_DEBUG(dbgs() << "Ifcvt (Forked Diamond): "
+ << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
+ Token->TClobbersPred,
+ Token->FClobbersPred);
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumForkedDiamonds;
+ break;
+ }
+
+ if (RetVal && MRI->tracksLiveness())
+ recomputeLivenessFlags(*BBI.BB);
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ Tokens.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange && IfCvtBranchFold) {
+ BranchFolder BF(false, false, MBFI, *MBPI, PSI);
+ BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo());
+ }
+
+ MadeChange |= BFChange;
+ return MadeChange;
+}
+
+/// BB has a fallthrough. Find its 'false' successor given its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock *SuccBB : BB->successors()) {
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return nullptr;
+}
+
+/// Reverse the condition of the end of the block branch. Swap block's 'true'
+/// and 'false' successors.
+bool IfConverter::reverseBranchCondition(BBInfo &BBI) const {
+ DebugLoc dl; // FIXME: this is nowhere
+ if (!TII->reverseBranchCondition(BBI.BrCond)) {
+ TII->removeBranch(*BBI.BB);
+ TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// Returns the next block in the function blocks ordering. If it is the end,
+/// returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock &MBB) {
+ MachineFunction::iterator I = MBB.getIterator();
+ MachineFunction::iterator E = MBB.getParent()->end();
+ if (++I == E)
+ return nullptr;
+ return &*I;
+}
+
+/// Returns true if the 'true' block (along with its predecessor) forms a valid
+/// simple shape for ifcvt. It also returns the number of instructions that the
+/// ifcvt would need to duplicate if performed in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ BranchProbability Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction))
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// Returns true if the 'true' and 'false' blocks (along with their common
+/// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is
+/// true, it checks if 'true' block's false branch branches to the 'false' block
+/// rather than the other way around. It also returns the number of instructions
+/// that the ifcvt would need to duplicate if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ BranchProbability Prediction) const {
+ Dups = 0;
+ if (TrueBBI.BB == FalseBBI.BB)
+ return false;
+
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB->getIterator();
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = &*I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+/// Count duplicated instructions and move the iterators to show where they
+/// are.
+/// @param TIB True Iterator Begin
+/// @param FIB False Iterator Begin
+/// These two iterators initially point to the first instruction of the two
+/// blocks, and finally point to the first non-shared instruction.
+/// @param TIE True Iterator End
+/// @param FIE False Iterator End
+/// These two iterators initially point to End() for the two blocks() and
+/// finally point to the first shared instruction in the tail.
+/// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of
+/// two blocks.
+/// @param Dups1 count of duplicated instructions at the beginning of the 2
+/// blocks.
+/// @param Dups2 count of duplicated instructions at the end of the 2 blocks.
+/// @param SkipUnconditionalBranches if true, Don't make sure that
+/// unconditional branches at the end of the blocks are the same. True is
+/// passed when the blocks are analyzable to allow for fallthrough to be
+/// handled.
+/// @return false if the shared portion prevents if conversion.
+bool IfConverter::CountDuplicatedInstructions(
+ MachineBasicBlock::iterator &TIB,
+ MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE,
+ MachineBasicBlock::iterator &FIE,
+ unsigned &Dups1, unsigned &Dups2,
+ MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+ bool SkipUnconditionalBranches) const {
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ TIB = skipDebugInstructionsForward(TIB, TIE, false);
+ FIB = skipDebugInstructionsForward(FIB, FIE, false);
+ if (TIB == TIE || FIB == FIE)
+ break;
+ if (!TIB->isIdenticalTo(*FIB))
+ break;
+ // A pred-clobbering instruction in the shared portion prevents
+ // if-conversion.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->ClobbersPredicate(*TIB, PredDefs, false))
+ return false;
+ // If we get all the way to the branch instructions, don't count them.
+ if (!TIB->isBranch())
+ ++Dups1;
+ ++TIB;
+ ++FIB;
+ }
+
+ // Check for already containing all of the block.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, switch to reverse_iterators. Note that getReverse() returns an
+ // iterator that points to the same instruction, unlike std::reverse_iterator.
+ // We have to do our own shifting so that we get the same range.
+ MachineBasicBlock::reverse_iterator RTIE = std::next(TIE.getReverse());
+ MachineBasicBlock::reverse_iterator RFIE = std::next(FIE.getReverse());
+ const MachineBasicBlock::reverse_iterator RTIB = std::next(TIB.getReverse());
+ const MachineBasicBlock::reverse_iterator RFIB = std::next(FIB.getReverse());
+
+ if (!TBB.succ_empty() || !FBB.succ_empty()) {
+ if (SkipUnconditionalBranches) {
+ while (RTIE != RTIB && RTIE->isUnconditionalBranch())
+ ++RTIE;
+ while (RFIE != RFIB && RFIE->isUnconditionalBranch())
+ ++RFIE;
+ }
+ }
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (RTIE != RTIB && RFIE != RFIB) {
+ // Skip dbg_value instructions. These do not count.
+ // Note that these are reverse iterators going forward.
+ RTIE = skipDebugInstructionsForward(RTIE, RTIB, false);
+ RFIE = skipDebugInstructionsForward(RFIE, RFIB, false);
+ if (RTIE == RTIB || RFIE == RFIB)
+ break;
+ if (!RTIE->isIdenticalTo(*RFIE))
+ break;
+ // We have to verify that any branch instructions are the same, and then we
+ // don't count them toward the # of duplicate instructions.
+ if (!RTIE->isBranch())
+ ++Dups2;
+ ++RTIE;
+ ++RFIE;
+ }
+ TIE = std::next(RTIE.getReverse());
+ FIE = std::next(RFIE.getReverse());
+ return true;
+}
+
+/// RescanInstructions - Run ScanInstructions on a pair of blocks.
+/// @param TIB - True Iterator Begin, points to first non-shared instruction
+/// @param FIB - False Iterator Begin, points to first non-shared instruction
+/// @param TIE - True Iterator End, points past last non-shared instruction
+/// @param FIE - False Iterator End, points past last non-shared instruction
+/// @param TrueBBI - BBInfo to update for the true block.
+/// @param FalseBBI - BBInfo to update for the false block.
+/// @returns - false if either block cannot be predicated or if both blocks end
+/// with a predicate-clobbering instruction.
+bool IfConverter::RescanInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ BBInfo &TrueBBI, BBInfo &FalseBBI) const {
+ bool BranchUnpredicable = true;
+ TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false;
+ ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable);
+ if (TrueBBI.IsUnpredicable)
+ return false;
+ ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable);
+ if (FalseBBI.IsUnpredicable)
+ return false;
+ if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
+ return false;
+ return true;
+}
+
+#ifndef NDEBUG
+static void verifySameBranchInstructions(
+ MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ const MachineBasicBlock::reverse_iterator B1 = MBB1->rend();
+ const MachineBasicBlock::reverse_iterator B2 = MBB2->rend();
+ MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin();
+ MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin();
+ while (E1 != B1 && E2 != B2) {
+ skipDebugInstructionsForward(E1, B1, false);
+ skipDebugInstructionsForward(E2, B2, false);
+ if (E1 == B1 && E2 == B2)
+ break;
+
+ if (E1 == B1) {
+ assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
+ break;
+ }
+ if (E2 == B2) {
+ assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
+ break;
+ }
+
+ if (E1->isBranch() || E2->isBranch())
+ assert(E1->isIdenticalTo(*E2) &&
+ "Branch mis-match, branch instructions don't match.");
+ else
+ break;
+ ++E1;
+ ++E2;
+ }
+}
+#endif
+
+/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) form a diamond if a common tail block is
+/// extracted.
+/// While not strictly a diamond, this pattern would form a diamond if
+/// tail-merging had merged the shared tails.
+/// EBB
+/// _/ \_
+/// | |
+/// TBB FBB
+/// / \ / \
+/// FalseBB TrueBB FalseBB
+/// Currently only handles analyzable branches.
+/// Specifically excludes actual diamonds to avoid overlap.
+bool IfConverter::ValidForkedDiamond(
+ BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
+ return false;
+ // Don't IfConvert blocks that can't be folded into their predecessor.
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // This function is specifically looking for conditional tails, as
+ // unconditional tails are already handled by the standard diamond case.
+ if (TrueBBI.BrCond.size() == 0 ||
+ FalseBBI.BrCond.size() == 0)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *TF = TrueBBI.FalseBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+ MachineBasicBlock *FF = FalseBBI.FalseBB;
+
+ if (!TT)
+ TT = getNextBlock(*TrueBBI.BB);
+ if (!TF)
+ TF = getNextBlock(*TrueBBI.BB);
+ if (!FT)
+ FT = getNextBlock(*FalseBBI.BB);
+ if (!FF)
+ FF = getNextBlock(*FalseBBI.BB);
+
+ if (!TT || !TF)
+ return false;
+
+ // Check successors. If they don't match, bail.
+ if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
+ return false;
+
+ bool FalseReversed = false;
+ if (TF == FT && TT == FF) {
+ // If the branches are opposing, but we can't reverse, don't do it.
+ if (!FalseBBI.IsBrReversible)
+ return false;
+ FalseReversed = true;
+ reverseBranchCondition(FalseBBI);
+ }
+ auto UnReverseOnExit = make_scope_exit([&]() {
+ if (FalseReversed)
+ reverseBranchCondition(FalseBBI);
+ });
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TrueBBI.BB, *FalseBBI.BB,
+ /* SkipUnconditionalBranches */ true))
+ return false;
+
+ TrueBBICalc.BB = TrueBBI.BB;
+ FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
+ if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+ return false;
+
+ // The size is used to decide whether to if-convert, and the shared portions
+ // are subtracted off. Because of the subtraction, we just use the size that
+ // was calculated by the original ScanInstructions, as it is correct.
+ TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+ FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
+ return true;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(
+ BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ // If the True and False BBs are equal we're dealing with a degenerate case
+ // that we don't treat as a diamond.
+ if (TrueBBI.BB == FalseBBI.BB)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(*TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(*FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB)
+ return false;
+
+ // Count duplicate instructions at the beginning and end of the true and
+ // false blocks.
+ // Skip unconditional branches only if we are considering an analyzable
+ // diamond. Otherwise the branches must be the same.
+ bool SkipUnconditionalBranches =
+ TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable;
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TrueBBI.BB, *FalseBBI.BB,
+ SkipUnconditionalBranches))
+ return false;
+
+ TrueBBICalc.BB = TrueBBI.BB;
+ FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
+ if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+ return false;
+ // The size is used to decide whether to if-convert, and the shared portions
+ // are subtracted off. Because of the subtraction, we just use the size that
+ // was calculated by the original ScanInstructions, as it is correct.
+ TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+ FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
+ return true;
+}
+
+/// AnalyzeBranches - Look at the branches at the end of a block to determine if
+/// the block is predicable.
+void IfConverter::AnalyzeBranches(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ BBI.TrueBB = BBI.FalseBB = nullptr;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ if (!BBI.IsBrAnalyzable) {
+ BBI.TrueBB = nullptr;
+ BBI.FalseBB = nullptr;
+ BBI.BrCond.clear();
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ BBI.IsBrReversible = (RevCond.size() == 0) ||
+ !TII->reverseBranchCondition(RevCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ }
+ }
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI,
+ MachineBasicBlock::iterator &Begin,
+ MachineBasicBlock::iterator &End,
+ bool BranchUnpredicable) const {
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return;
+
+ bool AlreadyPredicated = !BBI.Predicate.empty();
+
+ BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
+ BBI.ClobbersPred = false;
+ for (MachineInstr &MI : make_range(Begin, End)) {
+ if (MI.isDebugInstr())
+ continue;
+
+ // It's unsafe to duplicate convergent instructions in this context, so set
+ // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the
+ // following CFG, which is subject to our "simple" transformation.
+ //
+ // BB0 // if (c1) goto BB1; else goto BB2;
+ // / \
+ // BB1 |
+ // | BB2 // if (c2) goto TBB; else goto FBB;
+ // | / |
+ // | / |
+ // TBB |
+ // | |
+ // | FBB
+ // |
+ // exit
+ //
+ // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd
+ // be unconditional, and in BB2, they'd be predicated upon c2), and suppose
+ // TBB contains a convergent instruction. This is safe iff doing so does
+ // not add a control-flow dependency to the convergent instruction -- i.e.,
+ // it's safe iff the set of control flows that leads us to the convergent
+ // instruction does not get smaller after the transformation.
+ //
+ // Originally we executed TBB if c1 || c2. After the transformation, there
+ // are two copies of TBB's instructions. We get to the first if c1, and we
+ // get to the second if !c1 && c2.
+ //
+ // There are clearly fewer ways to satisfy the condition "c1" than
+ // "c1 || c2". Since we've shrunk the set of control flows which lead to
+ // our convergent instruction, the transformation is unsafe.
+ if (MI.isNotDuplicable() || MI.isConvergent())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(MI);
+ bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
+
+ if (BranchUnpredicable && MI.isBranch()) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // A conditional branch is not predicable, but it may be eliminated.
+ if (isCondBr)
+ continue;
+
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = TII->getPredicationCost(MI);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->ClobbersPredicate(MI, PredDefs, true))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(MI)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// Determine if the block is a suitable candidate to be predicated by the
+/// specified predicate.
+/// @param BBI BBInfo for the block to check
+/// @param Pred Predicate array for the branch that leads to BBI
+/// @param isTriangle true if the Analysis is for a triangle
+/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
+/// case
+/// @param hasCommonTail true if BBI shares a tail with a sibling block that
+/// contains any instruction that would make the block unpredicable.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch,
+ bool hasCommonTail) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ // Two blocks may share a common unpredicable tail, but this doesn't prevent
+ // them from being if-converted. The non-shared portion is assumed to have
+ // been checked
+ if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
+ return false;
+
+ // If it is already predicated but we couldn't analyze its terminator, the
+ // latter might fallthrough, but we can't determine where to.
+ // Conservatively avoid if-converting again.
+ if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
+ return false;
+
+ // If it is already predicated, check if the new predicate subsumes
+ // its predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
+ return false;
+
+ if (!hasCommonTail && BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->reverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->reverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// Analyze the structure of the sub-CFG starting from the specified block.
+/// Record its successors and whether it looks like an if-conversion candidate.
+void IfConverter::AnalyzeBlock(
+ MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+ struct BBState {
+ BBState(MachineBasicBlock &MBB) : MBB(&MBB) {}
+ MachineBasicBlock *MBB;
+
+ /// This flag is true if MBB's successors have been analyzed.
+ bool SuccsAnalyzed = false;
+ };
+
+ // Push MBB to the stack.
+ SmallVector<BBState, 16> BBStack(1, MBB);
+
+ while (!BBStack.empty()) {
+ BBState &State = BBStack.back();
+ MachineBasicBlock *BB = State.MBB;
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (!State.SuccsAnalyzed) {
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) {
+ BBStack.pop_back();
+ continue;
+ }
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ AnalyzeBranches(BBI);
+ MachineBasicBlock::iterator Begin = BBI.BB->begin();
+ MachineBasicBlock::iterator End = BBI.BB->end();
+ ScanInstructions(BBI, Begin, End);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is
+ // not considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ // Push the False and True blocks to the stack.
+ State.SuccsAnalyzed = true;
+ BBStack.push_back(*BBI.FalseBB);
+ BBStack.push_back(*BBI.TrueBB);
+ continue;
+ }
+
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ SmallVector<MachineOperand, 4>
+ RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->reverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = !TrueBBI.Predicate.empty();
+ bool FNeedSub = !FalseBBI.Predicate.empty();
+ bool Enqueued = false;
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
+ if (CanRevCond) {
+ BBInfo TrueBBICalc, FalseBBICalc;
+ auto feasibleDiamond = [&](bool Forked) {
+ bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
+ Dups + Dups2, Prediction, Forked);
+ bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
+ /* IsTriangle */ false, /* RevCond */ false,
+ /* hasCommonTail */ true);
+ bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond,
+ /* IsTriangle */ false, /* RevCond */ false,
+ /* hasCommonTail */ true);
+ return MeetsSize && TrueFeasible && FalseFeasible;
+ };
+
+ if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+ TrueBBICalc, FalseBBICalc)) {
+ if (feasibleDiamond(false)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(std::make_unique<IfcvtToken>(
+ BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+ (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+ Enqueued = true;
+ }
+ } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+ TrueBBICalc, FalseBBICalc)) {
+ if (feasibleDiamond(true)) {
+ // ForkedDiamond:
+ // if TBB and FBB have a common tail that includes their conditional
+ // branch instructions, then we can If Convert this pattern.
+ // EBB
+ // _/ \_
+ // | |
+ // TBB FBB
+ // / \ / \
+ // FalseBB TrueBB FalseBB
+ //
+ Tokens.push_back(std::make_unique<IfcvtToken>(
+ BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+ (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+ Enqueued = true;
+ }
+ }
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(
+ std::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(
+ std::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(
+ std::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(std::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
+ FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(
+ std::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(
+ std::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ }
+}
+
+/// Analyze all blocks and find entries for all if-conversion candidates.
+void IfConverter::AnalyzeBlocks(
+ MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+ for (MachineBasicBlock &MBB : MF)
+ AnalyzeBlock(MBB, Tokens);
+
+ // Sort to favor more complex ifcvt scheme.
+ llvm::stable_sort(Tokens, IfcvtTokenCmp);
+}
+
+/// Returns true either if ToMBB is the next block after MBB or that all the
+/// intervening blocks are empty (given MBB can fall through to its next block).
+static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) {
+ MachineFunction::iterator PI = MBB.getIterator();
+ MachineFunction::iterator I = std::next(PI);
+ MachineFunction::iterator TI = ToMBB.getIterator();
+ MachineFunction::iterator E = MBB.getParent()->end();
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(&*I))
+ return false;
+ PI = I++;
+ }
+ // Finally see if the last I is indeed a successor to PI.
+ return PI->isSuccessor(&*I);
+}
+
+/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
+/// can be if-converted. If predecessor is already enqueued, dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) {
+ for (const MachineBasicBlock *Predecessor : MBB.predecessors()) {
+ BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == &MBB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// Inserts an unconditional branch from \p MBB to \p ToMBB.
+static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
+}
+
+/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
+/// values defined in MI which are also live/used by MI.
+static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
+ const TargetRegisterInfo *TRI = MI.getMF()->getSubtarget().getRegisterInfo();
+
+ // Before stepping forward past MI, remember which regs were live
+ // before MI. This is needed to set the Undef flag only when reg is
+ // dead.
+ SparseSet<MCPhysReg, identity<MCPhysReg>> LiveBeforeMI;
+ LiveBeforeMI.setUniverse(TRI->getNumRegs());
+ for (unsigned Reg : Redefs)
+ LiveBeforeMI.insert(Reg);
+
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Clobbers;
+ Redefs.stepForward(MI, Clobbers);
+
+ // Now add the implicit uses for each of the clobbered values.
+ for (auto Clobber : Clobbers) {
+ // FIXME: Const cast here is nasty, but better than making StepForward
+ // take a mutable instruction instead of const.
+ unsigned Reg = Clobber.first;
+ MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
+ MachineInstr *OpMI = Op.getParent();
+ MachineInstrBuilder MIB(*OpMI->getMF(), OpMI);
+ if (Op.isRegMask()) {
+ // First handle regmasks. They clobber any entries in the mask which
+ // means that we need a def for those registers.
+ if (LiveBeforeMI.count(Reg))
+ MIB.addReg(Reg, RegState::Implicit);
+
+ // We also need to add an implicit def of this register for the later
+ // use to read from.
+ // For the register allocator to have allocated a register clobbered
+ // by the call which is used later, it must be the case that
+ // the call doesn't return.
+ MIB.addReg(Reg, RegState::Implicit | RegState::Define);
+ continue;
+ }
+ if (any_of(TRI->subregs_inclusive(Reg),
+ [&](MCPhysReg S) { return LiveBeforeMI.count(S); }))
+ MIB.addReg(Reg, RegState::Implicit);
+ }
+}
+
+/// If convert a simple (split, no rejoin) sub-CFG.
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+ MachineBasicBlock &NextMBB = *NextBBI->BB;
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (CvtMBB.hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
+ if (Kind == ICSimpleFalse)
+ if (TII->reverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ Redefs.init(*TRI);
+
+ if (MRI->tracksLiveness()) {
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ Redefs.addLiveInsNoPristines(CvtMBB);
+ Redefs.addLiveInsNoPristines(NextMBB);
+ }
+
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
+ if (CvtMBB.pred_size() > 1) {
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+
+ // Keep the CFG updated.
+ BBI.BB->removeSuccessor(&CvtMBB, true);
+ } else {
+ // Predicate the instructions in the true block.
+ PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
+
+ // Merge converted block into entry block. The BB to Cvt edge is removed
+ // by MergeBlocks.
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(*BBI.BB, NextMBB)) {
+ InsertUncondBranch(*BBI.BB, NextMBB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(*BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// If convert a triangle sub-CFG.
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+ MachineBasicBlock &NextMBB = *NextBBI->BB;
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (CvtMBB.hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->reverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (reverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock *PBB : CvtMBB.predecessors()) {
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ Redefs.init(*TRI);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveInsNoPristines(CvtMBB);
+ Redefs.addLiveInsNoPristines(NextMBB);
+ }
+
+ bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
+ BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
+
+ if (HasEarlyExit) {
+ // Get probabilities before modifying CvtMBB and BBI.BB.
+ CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB);
+ CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB);
+ BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB);
+ BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
+ }
+
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
+ if (CvtMBB.pred_size() > 1) {
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
+ PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
+
+ // Now merge the entry of the triangle with the true block.
+ MergeBlocks(BBI, *CvtBBI, false);
+ }
+
+ // Keep the CFG updated.
+ BBI.BB->removeSuccessor(&CvtMBB, true);
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->reverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
+ // NewNext = New_Prob(BBI.BB, NextMBB) =
+ // Prob(BBI.BB, NextMBB) +
+ // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB)
+ // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
+ // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB)
+ auto NewTrueBB = getNextBlock(*BBI.BB);
+ auto NewNext = BBNext + BBCvt * CvtNext;
+ auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB);
+ if (NewTrueBBIter != BBI.BB->succ_end())
+ BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
+
+ auto NewFalse = BBCvt * CvtFalse;
+ TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
+ !NextMBB.hasAddressTaken()) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(*BBI.BB, NextMBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(*BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// Common code shared between diamond conversions.
+/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape.
+/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI
+/// and FalseBBI
+/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI
+/// and \p FalseBBI
+/// \p RemoveBranch - Remove the common branch of the two blocks before
+/// predicating. Only false for unanalyzable fallthrough
+/// cases. The caller will replace the branch if necessary.
+/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for
+/// unanalyzable fallthrough
+bool IfConverter::IfConvertDiamondCommon(
+ BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred,
+ bool RemoveBranch, bool MergeAddEdges) {
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
+ // Conservatively abort if-conversion if either BB has its address taken.
+ return false;
+
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->reverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TClobbersPred && !FClobbersPred)
+ DoSwap = true;
+ else if (!TClobbersPred && !FClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ } else if (TClobbersPred && FClobbersPred)
+ llvm_unreachable("Predicate info cannot be clobbered by both sides.");
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
+ MachineBasicBlock &MBB1 = *BBI1->BB;
+ MachineBasicBlock &MBB2 = *BBI2->BB;
+
+ // Initialize the Redefs:
+ // - BB2 live-in regs need implicit uses before being redefined by BB1
+ // instructions.
+ // - BB1 live-out regs need implicit uses before being redefined by BB2
+ // instructions. We start with BB1 live-ins so we have the live-out regs
+ // after tracking the BB1 instructions.
+ Redefs.init(*TRI);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveInsNoPristines(MBB1);
+ Redefs.addLiveInsNoPristines(MBB2);
+ }
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ // Skip dbg_value instructions.
+ MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(false);
+ MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(false);
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries. NumDups1 can include a "return"
+ // instruction, if it's not marked as "branch".
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (DI1 == MBB1.end())
+ break;
+ if (!DI1->isDebugInstr())
+ ++i;
+ }
+ while (NumDups1 != 0) {
+ // Since this instruction is going to be deleted, update call
+ // site info state if the instruction is call instruction.
+ if (DI2->shouldUpdateCallSiteInfo())
+ MBB2.getParent()->eraseCallSiteInfo(&*DI2);
+
+ ++DI2;
+ if (DI2 == MBB2.end())
+ break;
+ if (!DI2->isDebugInstr())
+ --NumDups1;
+ }
+
+ if (MRI->tracksLiveness()) {
+ for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Dummy;
+ Redefs.stepForward(MI, Dummy);
+ }
+ }
+
+ BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
+ MBB2.erase(MBB2.begin(), DI2);
+
+ // The branches have been checked to match, so it is safe to remove the
+ // branch in BB1 and rely on the copy in BB2. The complication is that
+ // the blocks may end with a return instruction, which may or may not
+ // be marked as "branch". If it's not, then it could be included in
+ // "dups1", leaving the blocks potentially empty after moving the common
+ // duplicates.
+#ifndef NDEBUG
+ // Unanalyzable branches must match exactly. Check that now.
+ if (!BBI1->IsBrAnalyzable)
+ verifySameBranchInstructions(&MBB1, &MBB2);
+#endif
+ // Remove duplicated instructions from the tail of MBB1: any branch
+ // instructions, and the common instructions counted by NumDups2.
+ DI1 = MBB1.end();
+ while (DI1 != MBB1.begin()) {
+ MachineBasicBlock::iterator Prev = std::prev(DI1);
+ if (!Prev->isBranch() && !Prev->isDebugInstr())
+ break;
+ DI1 = Prev;
+ }
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert(DI1 != MBB1.begin());
+
+ --DI1;
+
+ // Since this instruction is going to be deleted, update call
+ // site info state if the instruction is call instruction.
+ if (DI1->shouldUpdateCallSiteInfo())
+ MBB1.getParent()->eraseCallSiteInfo(&*DI1);
+
+ // skip dbg_value instructions
+ if (!DI1->isDebugInstr())
+ ++i;
+ }
+ MBB1.erase(DI1, MBB1.end());
+
+ DI2 = BBI2->BB->end();
+ // The branches have been checked to match. Skip over the branch in the false
+ // block so that we don't try to predicate it.
+ if (RemoveBranch)
+ BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
+ else {
+ // Make DI2 point to the end of the range where the common "tail"
+ // instructions could be found.
+ while (DI2 != MBB2.begin()) {
+ MachineBasicBlock::iterator Prev = std::prev(DI2);
+ if (!Prev->isBranch() && !Prev->isDebugInstr())
+ break;
+ DI2 = Prev;
+ }
+ }
+ while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert(DI2 != MBB2.begin());
+ --DI2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugInstr())
+ --NumDups2;
+ }
+
+ // Remember which registers would later be defined by the false block.
+ // This allows us not to predicate instructions in the true block that would
+ // later be re-defined. That is, rather than
+ // subeq r0, r1, #1
+ // addne r0, r1, #1
+ // generate:
+ // sub r0, r1, #1
+ // addne r0, r1, #1
+ SmallSet<MCPhysReg, 4> RedefsByFalse;
+ SmallSet<MCPhysReg, 4> ExtUses;
+ if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
+ for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
+ if (FI.isDebugInstr())
+ continue;
+ SmallVector<MCPhysReg, 4> Defs;
+ for (const MachineOperand &MO : FI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ } else if (!RedefsByFalse.count(Reg)) {
+ // These are defined before ctrl flow reach the 'false' instructions.
+ // They cannot be modified by the 'true' instructions.
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ ExtUses.insert(SubReg);
+ }
+ }
+
+ for (MCPhysReg Reg : Defs) {
+ if (!ExtUses.count(Reg)) {
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ RedefsByFalse.insert(SubReg);
+ }
+ }
+ }
+ }
+
+ // Predicate the 'true' block.
+ PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse);
+
+ // After predicating BBI1, if there is a predicated terminator in BBI1 and
+ // a non-predicated in BBI2, then we don't want to predicate the one from
+ // BBI2. The reason is that if we merged these blocks, we would end up with
+ // two predicated terminators in the same block.
+ // Also, if the branches in MBB1 and MBB2 were non-analyzable, then don't
+ // predicate them either. They were checked to be identical, and so the
+ // same branch would happen regardless of which path was taken.
+ if (!MBB2.empty() && (DI2 == MBB2.end())) {
+ MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator();
+ MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator();
+ bool BB1Predicated = BBI1T != MBB1.end() && TII->isPredicated(*BBI1T);
+ bool BB2NonPredicated = BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T);
+ if (BB2NonPredicated && (BB1Predicated || !BBI2->IsBrAnalyzable))
+ --DI2;
+ }
+
+ // Predicate the 'false' block.
+ PredicateBlock(*BBI2, DI2, *Cond2);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1, MergeAddEdges);
+ MergeBlocks(BBI, *BBI2, MergeAddEdges);
+ return true;
+}
+
+/// If convert an almost-diamond sub-CFG where the true
+/// and false blocks share a common tail.
+bool IfConverter::IfConvertForkedDiamond(
+ BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+ // Save the debug location for later.
+ DebugLoc dl;
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
+ if (TIE != TrueBBI.BB->end())
+ dl = TIE->getDebugLoc();
+ // Removing branches from both blocks is safe, because we have already
+ // determined that both blocks have the same branch instructions. The branch
+ // will be added back at the end, unpredicated.
+ if (!IfConvertDiamondCommon(
+ BBI, TrueBBI, FalseBBI,
+ NumDups1, NumDups2,
+ TClobbersPred, FClobbersPred,
+ /* RemoveBranch */ true, /* MergeAddEdges */ true))
+ return false;
+
+ // Add back the branch.
+ // Debug location saved above when removing the branch from BBI2
+ TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
+ TrueBBI.BrCond, dl);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(*BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// If convert a diamond sub-CFG.
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (!IfConvertDiamondCommon(
+ BBI, TrueBBI, FalseBBI,
+ NumDups1, NumDups2,
+ TClobbersPred, FClobbersPred,
+ /* RemoveBranch */ TrueBBI.IsBrAnalyzable,
+ /* MergeAddEdges */ TailBB == nullptr))
+ return false;
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ // We need to remove the edges to the true and false blocks manually since
+ // we didn't let IfConvertDiamondCommon update the CFG.
+ BBI.BB->removeSuccessor(TrueBBI.BB);
+ BBI.BB->removeSuccessor(FalseBBI.BB, true);
+
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
+ bool CanMergeTail = !TailBBI.HasFallThrough &&
+ !TailBBI.BB->hasAddressTaken();
+ // The if-converted block can still have a predicated terminator
+ // (e.g. a predicated return). If that is the case, we cannot merge
+ // it with the tail block.
+ MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();
+ if (TI != BBI.BB->end() && TII->isPredicated(*TI))
+ CanMergeTail = false;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
+ InsertUncondBranch(*BBI.BB, *TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(*BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+static bool MaySpeculate(const MachineInstr &MI,
+ SmallSet<MCPhysReg, 4> &LaterRedefs) {
+ bool SawStore = true;
+ if (!MI.isSafeToMove(nullptr, SawStore))
+ return false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef() && !LaterRedefs.count(Reg))
+ return false;
+ }
+
+ return true;
+}
+
+/// Predicate instructions from the start of the block to the specified end with
+/// the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<MCPhysReg, 4> *LaterRedefs) {
+ bool AnyUnpred = false;
+ bool MaySpec = LaterRedefs != nullptr;
+ for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
+ if (I.isDebugInstr() || TII->isPredicated(I))
+ continue;
+ // It may be possible not to predicate an instruction if it's the 'true'
+ // side of a diamond and the 'false' side may re-define the instruction's
+ // defs.
+ if (MaySpec && MaySpeculate(I, *LaterRedefs)) {
+ AnyUnpred = true;
+ continue;
+ }
+ // If any instruction is predicated, then every instruction after it must
+ // be predicated.
+ MaySpec = false;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << I << "!\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs);
+ }
+
+ BBI.Predicate.append(Cond.begin(), Cond.end());
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ ++NumIfConvBBs;
+ if (AnyUnpred)
+ ++NumUnpred;
+}
+
+/// Copy and predicate instructions from source BB to the destination block.
+/// Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ MachineBasicBlock &FromMBB = *FromBBI.BB;
+ for (MachineInstr &I : FromMBB) {
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && I.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(&I);
+ // Make a copy of the call site info.
+ if (I.isCandidateForCallSiteEntry())
+ MF.copyCallSiteInfo(&I, MI);
+
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = TII->getPredicationCost(I);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
+
+ if (!TII->isPredicated(I) && !MI->isDebugInstr()) {
+ if (!TII->PredicateInstruction(*MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << I << "!\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(*MI, Redefs);
+ }
+
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(),
+ FromMBB.succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromMBB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+
+ for (MachineBasicBlock *Succ : Succs) {
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+ }
+
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+ ToBBI.Predicate.append(Cond.begin(), Cond.end());
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ ++NumDupBBs;
+}
+
+/// Move all instructions from FromBB to the end of ToBB. This will leave
+/// FromBB as an empty block, so remove all of its successor edges and move it
+/// to the end of the function. If AddEdges is true, i.e., when FromBBI's
+/// branch is being moved, add those successor edges to ToBBI and remove the old
+/// edge from ToBBI to FromBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ MachineBasicBlock &FromMBB = *FromBBI.BB;
+ assert(!FromMBB.hasAddressTaken() &&
+ "Removing a BB whose address is taken!");
+
+ // If we're about to splice an INLINEASM_BR from FromBBI, we need to update
+ // ToBBI's successor list accordingly.
+ if (FromMBB.mayHaveInlineAsmBr())
+ for (MachineInstr &MI : FromMBB)
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ for (MachineOperand &MO : MI.operands())
+ if (MO.isMBB() && !ToBBI.BB->isSuccessor(MO.getMBB()))
+ ToBBI.BB->addSuccessor(MO.getMBB(), BranchProbability::getZero());
+
+ // In case FromMBB contains terminators (e.g. return instruction),
+ // first move the non-terminator instructions, then the terminators.
+ MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
+ MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
+ ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI);
+
+ // If FromBB has non-predicated terminator we should copy it at the end.
+ if (FromTI != FromMBB.end() && !TII->isPredicated(*FromTI))
+ ToTI = ToBBI.BB->end();
+ ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end());
+
+ // Force normalizing the successors' probabilities of ToBBI.BB to convert all
+ // unknown probabilities into known ones.
+ // FIXME: This usage is too tricky and in the future we would like to
+ // eliminate all unknown probabilities in MBB.
+ if (ToBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
+
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.successors());
+ MachineBasicBlock *NBB = getNextBlock(FromMBB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+ // The edge probability from ToBBI.BB to FromMBB, which is only needed when
+ // AddEdges is true and FromMBB is a successor of ToBBI.BB.
+ auto To2FromProb = BranchProbability::getZero();
+ if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
+ // Remove the old edge but remember the edge probability so we can calculate
+ // the correct weights on the new edges being added further down.
+ To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
+ ToBBI.BB->removeSuccessor(&FromMBB);
+ }
+
+ for (MachineBasicBlock *Succ : FromSuccs) {
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough) {
+ FromMBB.removeSuccessor(Succ);
+ continue;
+ }
+
+ auto NewProb = BranchProbability::getZero();
+ if (AddEdges) {
+ // Calculate the edge probability for the edge from ToBBI.BB to Succ,
+ // which is a portion of the edge probability from FromMBB to Succ. The
+ // portion ratio is the edge probability from ToBBI.BB to FromMBB (if
+ // FromBBI is a successor of ToBBI.BB. See comment below for exception).
+ NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
+
+ // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
+ // only happens when if-converting a diamond CFG and FromMBB is the
+ // tail BB. In this case FromMBB post-dominates ToBBI.BB and hence we
+ // could just use the probabilities on FromMBB's out-edges when adding
+ // new successors.
+ if (!To2FromProb.isZero())
+ NewProb *= To2FromProb;
+ }
+
+ FromMBB.removeSuccessor(Succ);
+
+ if (AddEdges) {
+ // If the edge from ToBBI.BB to Succ already exists, update the
+ // probability of this edge by adding NewProb to it. An example is shown
+ // below, in which A is ToBBI.BB and B is FromMBB. In this case we
+ // don't have to set C as A's successor as it already is. We only need to
+ // update the edge probability on A->C. Note that B will not be
+ // immediately removed from A's successors. It is possible that B->D is
+ // not removed either if D is a fallthrough of B. Later the edge A->D
+ // (generated here) and B->D will be combined into one edge. To maintain
+ // correct edge probability of this combined edge, we need to set the edge
+ // probability of A->B to zero, which is already done above. The edge
+ // probability on A->D is calculated by scaling the original probability
+ // on A->B by the probability of B->D.
+ //
+ // Before ifcvt: After ifcvt (assume B->D is kept):
+ //
+ // A A
+ // /| /|\
+ // / B / B|
+ // | /| | ||
+ // |/ | | |/
+ // C D C D
+ //
+ if (ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->setSuccProbability(
+ find(ToBBI.BB->successors(), Succ),
+ MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
+ else
+ ToBBI.BB->addSuccessor(Succ, NewProb);
+ }
+ }
+
+ // Move the now empty FromMBB out of the way to the end of the function so
+ // it doesn't interfere with fallthrough checks done by canFallThroughTo().
+ MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin();
+ if (Last != &FromMBB)
+ FromMBB.moveAfter(Last);
+
+ // Normalize the probabilities of ToBBI.BB's successors with all adjustment
+ // we've done above.
+ if (ToBBI.IsBrAnalyzable && FromBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
+
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+ FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
+
+FunctionPass *
+llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) {
+ return new IfConverter(std::move(Ftor));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
new file mode 100644
index 000000000000..b2a7aad73411
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -0,0 +1,818 @@
+//===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass turns explicit null checks of the form
+//
+// test %r10, %r10
+// je throw_npe
+// movl (%r10), %esi
+// ...
+//
+// to
+//
+// faulting_load_op("movl (%r10), %esi", throw_npe)
+// ...
+//
+// With the help of a runtime that understands the .fault_maps section,
+// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
+// a page fault.
+// Store and LoadStore are also supported.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/FaultMaps.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+
+using namespace llvm;
+
+static cl::opt<int> PageSize("imp-null-check-page-size",
+ cl::desc("The page size of the target in bytes"),
+ cl::init(4096), cl::Hidden);
+
+static cl::opt<unsigned> MaxInstsToConsider(
+ "imp-null-max-insts-to-consider",
+ cl::desc("The max number of instructions to consider hoisting loads over "
+ "(the algorithm is quadratic over this number)"),
+ cl::Hidden, cl::init(8));
+
+#define DEBUG_TYPE "implicit-null-checks"
+
+STATISTIC(NumImplicitNullChecks,
+ "Number of explicit null checks made implicit");
+
+namespace {
+
+class ImplicitNullChecks : public MachineFunctionPass {
+ /// Return true if \c computeDependence can process \p MI.
+ static bool canHandle(const MachineInstr *MI);
+
+ /// Helper function for \c computeDependence. Return true if \p A
+ /// and \p B do not have any dependences between them, and can be
+ /// re-ordered without changing program semantics.
+ bool canReorder(const MachineInstr *A, const MachineInstr *B);
+
+ /// A data type for representing the result computed by \c
+ /// computeDependence. States whether it is okay to reorder the
+ /// instruction passed to \c computeDependence with at most one
+ /// dependency.
+ struct DependenceResult {
+ /// Can we actually re-order \p MI with \p Insts (see \c
+ /// computeDependence).
+ bool CanReorder;
+
+ /// If non-std::nullopt, then an instruction in \p Insts that also must be
+ /// hoisted.
+ std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
+
+ /*implicit*/ DependenceResult(
+ bool CanReorder,
+ std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
+ : CanReorder(CanReorder), PotentialDependence(PotentialDependence) {
+ assert((!PotentialDependence || CanReorder) &&
+ "!CanReorder && PotentialDependence.hasValue() not allowed!");
+ }
+ };
+
+ /// Compute a result for the following question: can \p MI be
+ /// re-ordered from after \p Insts to before it.
+ ///
+ /// \c canHandle should return true for all instructions in \p
+ /// Insts.
+ DependenceResult computeDependence(const MachineInstr *MI,
+ ArrayRef<MachineInstr *> Block);
+
+ /// Represents one null check that can be made implicit.
+ class NullCheck {
+ // The memory operation the null check can be folded into.
+ MachineInstr *MemOperation;
+
+ // The instruction actually doing the null check (Ptr != 0).
+ MachineInstr *CheckOperation;
+
+ // The block the check resides in.
+ MachineBasicBlock *CheckBlock;
+
+ // The block branched to if the pointer is non-null.
+ MachineBasicBlock *NotNullSucc;
+
+ // The block branched to if the pointer is null.
+ MachineBasicBlock *NullSucc;
+
+ // If this is non-null, then MemOperation has a dependency on this
+ // instruction; and it needs to be hoisted to execute before MemOperation.
+ MachineInstr *OnlyDependency;
+
+ public:
+ explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation,
+ MachineBasicBlock *checkBlock,
+ MachineBasicBlock *notNullSucc,
+ MachineBasicBlock *nullSucc,
+ MachineInstr *onlyDependency)
+ : MemOperation(memOperation), CheckOperation(checkOperation),
+ CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc),
+ OnlyDependency(onlyDependency) {}
+
+ MachineInstr *getMemOperation() const { return MemOperation; }
+
+ MachineInstr *getCheckOperation() const { return CheckOperation; }
+
+ MachineBasicBlock *getCheckBlock() const { return CheckBlock; }
+
+ MachineBasicBlock *getNotNullSucc() const { return NotNullSucc; }
+
+ MachineBasicBlock *getNullSucc() const { return NullSucc; }
+
+ MachineInstr *getOnlyDependency() const { return OnlyDependency; }
+ };
+
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MachineFrameInfo *MFI = nullptr;
+
+ bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
+ SmallVectorImpl<NullCheck> &NullCheckList);
+ MachineInstr *insertFaultingInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB);
+ void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
+
+ enum AliasResult {
+ AR_NoAlias,
+ AR_MayAlias,
+ AR_WillAliasEverything
+ };
+
+ /// Returns AR_NoAlias if \p MI memory operation does not alias with
+ /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
+ /// they may alias and any further memory operation may alias with \p PrevMI.
+ AliasResult areMemoryOpsAliased(const MachineInstr &MI,
+ const MachineInstr *PrevMI) const;
+
+ enum SuitabilityResult {
+ SR_Suitable,
+ SR_Unsuitable,
+ SR_Impossible
+ };
+
+ /// Return SR_Suitable if \p MI a memory operation that can be used to
+ /// implicitly null check the value in \p PointerReg, SR_Unsuitable if
+ /// \p MI cannot be used to null check and SR_Impossible if there is
+ /// no sense to continue lookup due to any other instruction will not be able
+ /// to be used. \p PrevInsts is the set of instruction seen since
+ /// the explicit null check on \p PointerReg.
+ SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI,
+ unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts);
+
+ /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block
+ /// if it was hoisted to the NullCheck block. This is used by caller
+ /// canHoistInst to decide if DependenceMI can be hoisted safely.
+ bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI,
+ MachineBasicBlock *NullSucc);
+
+ /// Return true if \p FaultingMI can be hoisted from after the
+ /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
+ /// non-null value if we also need to (and legally can) hoist a dependency.
+ bool canHoistInst(MachineInstr *FaultingMI,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+
+public:
+ static char ID;
+
+ ImplicitNullChecks() : MachineFunctionPass(ID) {
+ initializeImplicitNullChecksPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
+} // end anonymous namespace
+
+bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
+ if (MI->isCall() || MI->mayRaiseFPException() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+ auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
+ (void)IsRegMask;
+
+ assert(llvm::none_of(MI->operands(), IsRegMask) &&
+ "Calls were filtered out above!");
+
+ auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
+ return llvm::all_of(MI->memoperands(), IsUnordered);
+}
+
+ImplicitNullChecks::DependenceResult
+ImplicitNullChecks::computeDependence(const MachineInstr *MI,
+ ArrayRef<MachineInstr *> Block) {
+ assert(llvm::all_of(Block, canHandle) && "Check this first!");
+ assert(!is_contained(Block, MI) && "Block must be exclusive of MI!");
+
+ std::optional<ArrayRef<MachineInstr *>::iterator> Dep;
+
+ for (auto I = Block.begin(), E = Block.end(); I != E; ++I) {
+ if (canReorder(*I, MI))
+ continue;
+
+ if (Dep == std::nullopt) {
+ // Found one possible dependency, keep track of it.
+ Dep = I;
+ } else {
+ // We found two dependencies, so bail out.
+ return {false, std::nullopt};
+ }
+ }
+
+ return {true, Dep};
+}
+
+bool ImplicitNullChecks::canReorder(const MachineInstr *A,
+ const MachineInstr *B) {
+ assert(canHandle(A) && canHandle(B) && "Precondition!");
+
+ // canHandle makes sure that we _can_ correctly analyze the dependencies
+ // between A and B here -- for instance, we should not be dealing with heap
+ // load-store dependencies here.
+
+ for (const auto &MOA : A->operands()) {
+ if (!(MOA.isReg() && MOA.getReg()))
+ continue;
+
+ Register RegA = MOA.getReg();
+ for (const auto &MOB : B->operands()) {
+ if (!(MOB.isReg() && MOB.getReg()))
+ continue;
+
+ Register RegB = MOB.getReg();
+
+ if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getRegInfo().getTargetRegisterInfo();
+ MFI = &MF.getFrameInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ SmallVector<NullCheck, 16> NullCheckList;
+
+ for (auto &MBB : MF)
+ analyzeBlockForNullChecks(MBB, NullCheckList);
+
+ if (!NullCheckList.empty())
+ rewriteNullChecks(NullCheckList);
+
+ return !NullCheckList.empty();
+}
+
+// Return true if any register aliasing \p Reg is live-in into \p MBB.
+static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
+ MachineBasicBlock *MBB, unsigned Reg) {
+ for (MCRegAliasIterator AR(Reg, TRI, /*IncludeSelf*/ true); AR.isValid();
+ ++AR)
+ if (MBB->isLiveIn(*AR))
+ return true;
+ return false;
+}
+
+ImplicitNullChecks::AliasResult
+ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
+ const MachineInstr *PrevMI) const {
+ // If it is not memory access, skip the check.
+ if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
+ return AR_NoAlias;
+ // Load-Load may alias
+ if (!(MI.mayStore() || PrevMI->mayStore()))
+ return AR_NoAlias;
+ // We lost info, conservatively alias. If it was store then no sense to
+ // continue because we won't be able to check against it further.
+ if (MI.memoperands_empty())
+ return MI.mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+ if (PrevMI->memoperands_empty())
+ return PrevMI->mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+
+ for (MachineMemOperand *MMO1 : MI.memoperands()) {
+ // MMO1 should have a value due it comes from operation we'd like to use
+ // as implicit null check.
+ assert(MMO1->getValue() && "MMO1 should have a Value!");
+ for (MachineMemOperand *MMO2 : PrevMI->memoperands()) {
+ if (const PseudoSourceValue *PSV = MMO2->getPseudoValue()) {
+ if (PSV->mayAlias(MFI))
+ return AR_MayAlias;
+ continue;
+ }
+ if (!AA->isNoAlias(
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())))
+ return AR_MayAlias;
+ }
+ }
+ return AR_NoAlias;
+}
+
+ImplicitNullChecks::SuitabilityResult
+ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
+ unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts) {
+ // Implementation restriction for faulting_op insertion
+ // TODO: This could be relaxed if we find a test case which warrants it.
+ if (MI.getDesc().getNumDefs() > 1)
+ return SR_Unsuitable;
+
+ if (!MI.mayLoadOrStore() || MI.isPredicable())
+ return SR_Unsuitable;
+ auto AM = TII->getAddrModeFromMemoryOp(MI, TRI);
+ if (!AM)
+ return SR_Unsuitable;
+ auto AddrMode = *AM;
+ const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg;
+ int64_t Displacement = AddrMode.Displacement;
+
+ // We need the base of the memory instruction to be same as the register
+ // where the null check is performed (i.e. PointerReg).
+ if (BaseReg != PointerReg && ScaledReg != PointerReg)
+ return SR_Unsuitable;
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ unsigned PointerRegSizeInBits = TRI->getRegSizeInBits(PointerReg, MRI);
+ // Bail out of the sizes of BaseReg, ScaledReg and PointerReg are not the
+ // same.
+ if ((BaseReg &&
+ TRI->getRegSizeInBits(BaseReg, MRI) != PointerRegSizeInBits) ||
+ (ScaledReg &&
+ TRI->getRegSizeInBits(ScaledReg, MRI) != PointerRegSizeInBits))
+ return SR_Unsuitable;
+
+ // Returns true if RegUsedInAddr is used for calculating the displacement
+ // depending on addressing mode. Also calculates the Displacement.
+ auto CalculateDisplacementFromAddrMode = [&](Register RegUsedInAddr,
+ int64_t Multiplier) {
+ // The register can be NoRegister, which is defined as zero for all targets.
+ // Consider instruction of interest as `movq 8(,%rdi,8), %rax`. Here the
+ // ScaledReg is %rdi, while there is no BaseReg.
+ if (!RegUsedInAddr)
+ return false;
+ assert(Multiplier && "expected to be non-zero!");
+ MachineInstr *ModifyingMI = nullptr;
+ for (auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MI));
+ It != MI.getParent()->rend(); It++) {
+ const MachineInstr *CurrMI = &*It;
+ if (CurrMI->modifiesRegister(RegUsedInAddr, TRI)) {
+ ModifyingMI = const_cast<MachineInstr *>(CurrMI);
+ break;
+ }
+ }
+ if (!ModifyingMI)
+ return false;
+ // Check for the const value defined in register by ModifyingMI. This means
+ // all other previous values for that register has been invalidated.
+ int64_t ImmVal;
+ if (!TII->getConstValDefinedInReg(*ModifyingMI, RegUsedInAddr, ImmVal))
+ return false;
+ // Calculate the reg size in bits, since this is needed for bailing out in
+ // case of overflow.
+ int32_t RegSizeInBits = TRI->getRegSizeInBits(RegUsedInAddr, MRI);
+ APInt ImmValC(RegSizeInBits, ImmVal, true /*IsSigned*/);
+ APInt MultiplierC(RegSizeInBits, Multiplier);
+ assert(MultiplierC.isStrictlyPositive() &&
+ "expected to be a positive value!");
+ bool IsOverflow;
+ // Sign of the product depends on the sign of the ImmVal, since Multiplier
+ // is always positive.
+ APInt Product = ImmValC.smul_ov(MultiplierC, IsOverflow);
+ if (IsOverflow)
+ return false;
+ APInt DisplacementC(64, Displacement, true /*isSigned*/);
+ DisplacementC = Product.sadd_ov(DisplacementC, IsOverflow);
+ if (IsOverflow)
+ return false;
+
+ // We only handle diplacements upto 64 bits wide.
+ if (DisplacementC.getActiveBits() > 64)
+ return false;
+ Displacement = DisplacementC.getSExtValue();
+ return true;
+ };
+
+ // If a register used in the address is constant, fold it's effect into the
+ // displacement for ease of analysis.
+ bool BaseRegIsConstVal = false, ScaledRegIsConstVal = false;
+ if (CalculateDisplacementFromAddrMode(BaseReg, 1))
+ BaseRegIsConstVal = true;
+ if (CalculateDisplacementFromAddrMode(ScaledReg, AddrMode.Scale))
+ ScaledRegIsConstVal = true;
+
+ // The register which is not null checked should be part of the Displacement
+ // calculation, otherwise we do not know whether the Displacement is made up
+ // by some symbolic values.
+ // This matters because we do not want to incorrectly assume that load from
+ // falls in the zeroth faulting page in the "sane offset check" below.
+ if ((BaseReg && BaseReg != PointerReg && !BaseRegIsConstVal) ||
+ (ScaledReg && ScaledReg != PointerReg && !ScaledRegIsConstVal))
+ return SR_Unsuitable;
+
+ // We want the mem access to be issued at a sane offset from PointerReg,
+ // so that if PointerReg is null then the access reliably page faults.
+ if (!(-PageSize < Displacement && Displacement < PageSize))
+ return SR_Unsuitable;
+
+ // Finally, check whether the current memory access aliases with previous one.
+ for (auto *PrevMI : PrevInsts) {
+ AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
+ if (AR == AR_WillAliasEverything)
+ return SR_Impossible;
+ if (AR == AR_MayAlias)
+ return SR_Unsuitable;
+ }
+ return SR_Suitable;
+}
+
+bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns(
+ MachineInstr *DependenceMI, MachineBasicBlock *NullSucc) {
+ for (const auto &DependenceMO : DependenceMI->operands()) {
+ if (!(DependenceMO.isReg() && DependenceMO.getReg()))
+ continue;
+
+ // Make sure that we won't clobber any live ins to the sibling block by
+ // hoisting Dependency. For instance, we can't hoist INST to before the
+ // null check (even if it safe, and does not violate any dependencies in
+ // the non_null_block) if %rdx is live in to _null_block.
+ //
+ // test %rcx, %rcx
+ // je _null_block
+ // _non_null_block:
+ // %rdx = INST
+ // ...
+ //
+ // This restriction does not apply to the faulting load inst because in
+ // case the pointer loaded from is in the null page, the load will not
+ // semantically execute, and affect machine state. That is, if the load
+ // was loading into %rax and it faults, the value of %rax should stay the
+ // same as it would have been had the load not have executed and we'd have
+ // branched to NullSucc directly.
+ if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
+ return true;
+
+ }
+
+ // The dependence does not clobber live-ins in NullSucc block.
+ return false;
+}
+
+bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc,
+ MachineInstr *&Dependence) {
+ auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
+ if (!DepResult.CanReorder)
+ return false;
+
+ if (!DepResult.PotentialDependence) {
+ Dependence = nullptr;
+ return true;
+ }
+
+ auto DependenceItr = *DepResult.PotentialDependence;
+ auto *DependenceMI = *DependenceItr;
+
+ // We don't want to reason about speculating loads. Note -- at this point
+ // we should have already filtered out all of the other non-speculatable
+ // things, like calls and stores.
+ // We also do not want to hoist stores because it might change the memory
+ // while the FaultingMI may result in faulting.
+ assert(canHandle(DependenceMI) && "Should never have reached here!");
+ if (DependenceMI->mayLoadOrStore())
+ return false;
+
+ if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc))
+ return false;
+
+ auto DepDepResult =
+ computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
+
+ if (!DepDepResult.CanReorder || DepDepResult.PotentialDependence)
+ return false;
+
+ Dependence = DependenceMI;
+ return true;
+}
+
+/// Analyze MBB to check if its terminating branch can be turned into an
+/// implicit null check. If yes, append a description of the said null check to
+/// NullCheckList and return true, else return false.
+bool ImplicitNullChecks::analyzeBlockForNullChecks(
+ MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
+ using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
+
+ MDNode *BranchMD = nullptr;
+ if (auto *BB = MBB.getBasicBlock())
+ BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
+
+ if (!BranchMD)
+ return false;
+
+ MachineBranchPredicate MBP;
+
+ if (TII->analyzeBranchPredicate(MBB, MBP, true))
+ return false;
+
+ // Is the predicate comparing an integer to zero?
+ if (!(MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
+ (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
+ MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
+ return false;
+
+ // If there is a separate condition generation instruction, we chose not to
+ // transform unless we can remove both condition and consuming branch.
+ if (MBP.ConditionDef && !MBP.SingleUseCondition)
+ return false;
+
+ MachineBasicBlock *NotNullSucc, *NullSucc;
+
+ if (MBP.Predicate == MachineBranchPredicate::PRED_NE) {
+ NotNullSucc = MBP.TrueDest;
+ NullSucc = MBP.FalseDest;
+ } else {
+ NotNullSucc = MBP.FalseDest;
+ NullSucc = MBP.TrueDest;
+ }
+
+ // We handle the simplest case for now. We can potentially do better by using
+ // the machine dominator tree.
+ if (NotNullSucc->pred_size() != 1)
+ return false;
+
+ const Register PointerReg = MBP.LHS.getReg();
+
+ if (MBP.ConditionDef) {
+ // To prevent the invalid transformation of the following code:
+ //
+ // mov %rax, %rcx
+ // test %rax, %rax
+ // %rax = ...
+ // je throw_npe
+ // mov(%rcx), %r9
+ // mov(%rax), %r10
+ //
+ // into:
+ //
+ // mov %rax, %rcx
+ // %rax = ....
+ // faulting_load_op("movl (%rax), %r10", throw_npe)
+ // mov(%rcx), %r9
+ //
+ // we must ensure that there are no instructions between the 'test' and
+ // conditional jump that modify %rax.
+ assert(MBP.ConditionDef->getParent() == &MBB &&
+ "Should be in basic block");
+
+ for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
+ if (I->modifiesRegister(PointerReg, TRI))
+ return false;
+ }
+ // Starting with a code fragment like:
+ //
+ // test %rax, %rax
+ // jne LblNotNull
+ //
+ // LblNull:
+ // callq throw_NullPointerException
+ //
+ // LblNotNull:
+ // Inst0
+ // Inst1
+ // ...
+ // Def = Load (%rax + <offset>)
+ // ...
+ //
+ //
+ // we want to end up with
+ //
+ // Def = FaultingLoad (%rax + <offset>), LblNull
+ // jmp LblNotNull ;; explicit or fallthrough
+ //
+ // LblNotNull:
+ // Inst0
+ // Inst1
+ // ...
+ //
+ // LblNull:
+ // callq throw_NullPointerException
+ //
+ //
+ // To see why this is legal, consider the two possibilities:
+ //
+ // 1. %rax is null: since we constrain <offset> to be less than PageSize, the
+ // load instruction dereferences the null page, causing a segmentation
+ // fault.
+ //
+ // 2. %rax is not null: in this case we know that the load cannot fault, as
+ // otherwise the load would've faulted in the original program too and the
+ // original program would've been undefined.
+ //
+ // This reasoning cannot be extended to justify hoisting through arbitrary
+ // control flow. For instance, in the example below (in pseudo-C)
+ //
+ // if (ptr == null) { throw_npe(); unreachable; }
+ // if (some_cond) { return 42; }
+ // v = ptr->field; // LD
+ // ...
+ //
+ // we cannot (without code duplication) use the load marked "LD" to null check
+ // ptr -- clause (2) above does not apply in this case. In the above program
+ // the safety of ptr->field can be dependent on some_cond; and, for instance,
+ // ptr could be some non-null invalid reference that never gets loaded from
+ // because some_cond is always true.
+
+ SmallVector<MachineInstr *, 8> InstsSeenSoFar;
+
+ for (auto &MI : *NotNullSucc) {
+ if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider)
+ return false;
+
+ MachineInstr *Dependence;
+ SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar);
+ if (SR == SR_Impossible)
+ return false;
+ if (SR == SR_Suitable &&
+ canHoistInst(&MI, InstsSeenSoFar, NullSucc, Dependence)) {
+ NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
+ NullSucc, Dependence);
+ return true;
+ }
+
+ // If MI re-defines the PointerReg in a way that changes the value of
+ // PointerReg if it was null, then we cannot move further.
+ if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI))
+ return false;
+ InstsSeenSoFar.push_back(&MI);
+ }
+
+ return false;
+}
+
+/// Wrap a machine instruction, MI, into a FAULTING machine instruction.
+/// The FAULTING instruction does the same load/store as MI
+/// (defining the same register), and branches to HandlerMBB if the mem access
+/// faults. The FAULTING instruction is inserted at the end of MBB.
+MachineInstr *ImplicitNullChecks::insertFaultingInstr(
+ MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *HandlerMBB) {
+ const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
+ // all targets.
+
+ DebugLoc DL;
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ assert(NumDefs <= 1 && "other cases unhandled!");
+
+ unsigned DefReg = NoRegister;
+ if (NumDefs != 0) {
+ DefReg = MI->getOperand(0).getReg();
+ assert(NumDefs == 1 && "expected exactly one def!");
+ }
+
+ FaultMaps::FaultKind FK;
+ if (MI->mayLoad())
+ FK =
+ MI->mayStore() ? FaultMaps::FaultingLoadStore : FaultMaps::FaultingLoad;
+ else
+ FK = FaultMaps::FaultingStore;
+
+ auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_OP), DefReg)
+ .addImm(FK)
+ .addMBB(HandlerMBB)
+ .addImm(MI->getOpcode());
+
+ for (auto &MO : MI->uses()) {
+ if (MO.isReg()) {
+ MachineOperand NewMO = MO;
+ if (MO.isUse()) {
+ NewMO.setIsKill(false);
+ } else {
+ assert(MO.isDef() && "Expected def or use");
+ NewMO.setIsDead(false);
+ }
+ MIB.add(NewMO);
+ } else {
+ MIB.add(MO);
+ }
+ }
+
+ MIB.setMemRefs(MI->memoperands());
+
+ return MIB;
+}
+
+/// Rewrite the null checks in NullCheckList into implicit null checks.
+void ImplicitNullChecks::rewriteNullChecks(
+ ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) {
+ DebugLoc DL;
+
+ for (const auto &NC : NullCheckList) {
+ // Remove the conditional branch dependent on the null check.
+ unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock());
+ (void)BranchesRemoved;
+ assert(BranchesRemoved > 0 && "expected at least one branch!");
+
+ if (auto *DepMI = NC.getOnlyDependency()) {
+ DepMI->removeFromParent();
+ NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
+ }
+
+ // Insert a faulting instruction where the conditional branch was
+ // originally. We check earlier ensures that this bit of code motion
+ // is legal. We do not touch the successors list for any basic block
+ // since we haven't changed control flow, we've just made it implicit.
+ MachineInstr *FaultingInstr = insertFaultingInstr(
+ NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
+ // Now the values defined by MemOperation, if any, are live-in of
+ // the block of MemOperation.
+ // The original operation may define implicit-defs alongside
+ // the value.
+ MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
+ for (const MachineOperand &MO : FaultingInstr->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg || MBB->isLiveIn(Reg))
+ continue;
+ MBB->addLiveIn(Reg);
+ }
+
+ if (auto *DepMI = NC.getOnlyDependency()) {
+ for (auto &MO : DepMI->all_defs()) {
+ if (!MO.getReg() || MO.isDead())
+ continue;
+ if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
+ NC.getNotNullSucc()->addLiveIn(MO.getReg());
+ }
+ }
+
+ NC.getMemOperation()->eraseFromParent();
+ if (auto *CheckOp = NC.getCheckOperation())
+ CheckOp->eraseFromParent();
+
+ // Insert an *unconditional* branch to not-null successor - we expect
+ // block placement to remove fallthroughs later.
+ TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
+ /*Cond=*/std::nullopt, DL);
+
+ NumImplicitNullChecks++;
+ }
+}
+
+char ImplicitNullChecks::ID = 0;
+
+char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
+
+INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE,
+ "Implicit null checks", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE,
+ "Implicit null checks", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
new file mode 100644
index 000000000000..012892166ae7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -0,0 +1,270 @@
+//===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Implements an expansion pass to turn `indirectbr` instructions in the IR
+/// into `switch` instructions. This works by enumerating the basic blocks in
+/// a dense range of integers, replacing each `blockaddr` constant with the
+/// corresponding integer constant, and then building a switch that maps from
+/// the integers to the actual blocks. All of the indirectbr instructions in the
+/// function are redirected to this common switch.
+///
+/// While this is generically useful if a target is unable to codegen
+/// `indirectbr` natively, it is primarily useful when there is some desire to
+/// get the builtin non-jump-table lowering of a switch even when the input
+/// source contained an explicit indirect branch construct.
+///
+/// Note that it doesn't make any sense to enable this pass unless a target also
+/// disables jump-table lowering of switches. Doing that is likely to pessimize
+/// the code.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <optional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "indirectbr-expand"
+
+namespace {
+
+class IndirectBrExpandPass : public FunctionPass {
+ const TargetLowering *TLI = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ IndirectBrExpandPass() : FunctionPass(ID) {
+ initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+} // end anonymous namespace
+
+char IndirectBrExpandPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(IndirectBrExpandPass, DEBUG_TYPE,
+ "Expand indirectbr instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(IndirectBrExpandPass, DEBUG_TYPE,
+ "Expand indirectbr instructions", false, false)
+
+FunctionPass *llvm::createIndirectBrExpandPass() {
+ return new IndirectBrExpandPass();
+}
+
+bool IndirectBrExpandPass::runOnFunction(Function &F) {
+ auto &DL = F.getParent()->getDataLayout();
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ auto &STI = *TM.getSubtargetImpl(F);
+ if (!STI.enableIndirectBrExpand())
+ return false;
+ TLI = STI.getTargetLowering();
+
+ std::optional<DomTreeUpdater> DTU;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
+
+ SmallVector<IndirectBrInst *, 1> IndirectBrs;
+
+ // Set of all potential successors for indirectbr instructions.
+ SmallPtrSet<BasicBlock *, 4> IndirectBrSuccs;
+
+ // Build a list of indirectbrs that we want to rewrite.
+ for (BasicBlock &BB : F)
+ if (auto *IBr = dyn_cast<IndirectBrInst>(BB.getTerminator())) {
+ // Handle the degenerate case of no successors by replacing the indirectbr
+ // with unreachable as there is no successor available.
+ if (IBr->getNumSuccessors() == 0) {
+ (void)new UnreachableInst(F.getContext(), IBr);
+ IBr->eraseFromParent();
+ continue;
+ }
+
+ IndirectBrs.push_back(IBr);
+ for (BasicBlock *SuccBB : IBr->successors())
+ IndirectBrSuccs.insert(SuccBB);
+ }
+
+ if (IndirectBrs.empty())
+ return false;
+
+ // If we need to replace any indirectbrs we need to establish integer
+ // constants that will correspond to each of the basic blocks in the function
+ // whose address escapes. We do that here and rewrite all the blockaddress
+ // constants to just be those integer constants cast to a pointer type.
+ SmallVector<BasicBlock *, 4> BBs;
+
+ for (BasicBlock &BB : F) {
+ // Skip blocks that aren't successors to an indirectbr we're going to
+ // rewrite.
+ if (!IndirectBrSuccs.count(&BB))
+ continue;
+
+ auto IsBlockAddressUse = [&](const Use &U) {
+ return isa<BlockAddress>(U.getUser());
+ };
+ auto BlockAddressUseIt = llvm::find_if(BB.uses(), IsBlockAddressUse);
+ if (BlockAddressUseIt == BB.use_end())
+ continue;
+
+ assert(std::find_if(std::next(BlockAddressUseIt), BB.use_end(),
+ IsBlockAddressUse) == BB.use_end() &&
+ "There should only ever be a single blockaddress use because it is "
+ "a constant and should be uniqued.");
+
+ auto *BA = cast<BlockAddress>(BlockAddressUseIt->getUser());
+
+ // Skip if the constant was formed but ended up not being used (due to DCE
+ // or whatever).
+ if (!BA->isConstantUsed())
+ continue;
+
+ // Compute the index we want to use for this basic block. We can't use zero
+ // because null can be compared with block addresses.
+ int BBIndex = BBs.size() + 1;
+ BBs.push_back(&BB);
+
+ auto *ITy = cast<IntegerType>(DL.getIntPtrType(BA->getType()));
+ ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex);
+
+ // Now rewrite the blockaddress to an integer constant based on the index.
+ // FIXME: This part doesn't properly recognize other uses of blockaddress
+ // expressions, for instance, where they are used to pass labels to
+ // asm-goto. This part of the pass needs a rework.
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType()));
+ }
+
+ if (BBs.empty()) {
+ // There are no blocks whose address is taken, so any indirectbr instruction
+ // cannot get a valid input and we can replace all of them with unreachable.
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ if (DTU)
+ Updates.reserve(IndirectBrSuccs.size());
+ for (auto *IBr : IndirectBrs) {
+ if (DTU) {
+ for (BasicBlock *SuccBB : IBr->successors())
+ Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
+ }
+ (void)new UnreachableInst(F.getContext(), IBr);
+ IBr->eraseFromParent();
+ }
+ if (DTU) {
+ assert(Updates.size() == IndirectBrSuccs.size() &&
+ "Got unexpected update count.");
+ DTU->applyUpdates(Updates);
+ }
+ return true;
+ }
+
+ BasicBlock *SwitchBB;
+ Value *SwitchValue;
+
+ // Compute a common integer type across all the indirectbr instructions.
+ IntegerType *CommonITy = nullptr;
+ for (auto *IBr : IndirectBrs) {
+ auto *ITy =
+ cast<IntegerType>(DL.getIntPtrType(IBr->getAddress()->getType()));
+ if (!CommonITy || ITy->getBitWidth() > CommonITy->getBitWidth())
+ CommonITy = ITy;
+ }
+
+ auto GetSwitchValue = [CommonITy](IndirectBrInst *IBr) {
+ return CastInst::CreatePointerCast(
+ IBr->getAddress(), CommonITy,
+ Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr);
+ };
+
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+
+ if (IndirectBrs.size() == 1) {
+ // If we only have one indirectbr, we can just directly replace it within
+ // its block.
+ IndirectBrInst *IBr = IndirectBrs[0];
+ SwitchBB = IBr->getParent();
+ SwitchValue = GetSwitchValue(IBr);
+ if (DTU) {
+ Updates.reserve(IndirectBrSuccs.size());
+ for (BasicBlock *SuccBB : IBr->successors())
+ Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
+ assert(Updates.size() == IndirectBrSuccs.size() &&
+ "Got unexpected update count.");
+ }
+ IBr->eraseFromParent();
+ } else {
+ // Otherwise we need to create a new block to hold the switch across BBs,
+ // jump to that block instead of each indirectbr, and phi together the
+ // values for the switch.
+ SwitchBB = BasicBlock::Create(F.getContext(), "switch_bb", &F);
+ auto *SwitchPN = PHINode::Create(CommonITy, IndirectBrs.size(),
+ "switch_value_phi", SwitchBB);
+ SwitchValue = SwitchPN;
+
+ // Now replace the indirectbr instructions with direct branches to the
+ // switch block and fill out the PHI operands.
+ if (DTU)
+ Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size());
+ for (auto *IBr : IndirectBrs) {
+ SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent());
+ BranchInst::Create(SwitchBB, IBr);
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB});
+ for (BasicBlock *SuccBB : IBr->successors())
+ Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
+ }
+ IBr->eraseFromParent();
+ }
+ }
+
+ // Now build the switch in the block. The block will have no terminator
+ // already.
+ auto *SI = SwitchInst::Create(SwitchValue, BBs[0], BBs.size(), SwitchBB);
+
+ // Add a case for each block.
+ for (int i : llvm::seq<int>(1, BBs.size()))
+ SI->addCase(ConstantInt::get(CommonITy, i + 1), BBs[i]);
+
+ if (DTU) {
+ // If there were multiple indirectbr's, they may have common successors,
+ // but in the dominator tree, we only track unique edges.
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessors;
+ Updates.reserve(Updates.size() + BBs.size());
+ for (BasicBlock *BB : BBs) {
+ if (UniqueSuccessors.insert(BB).second)
+ Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ }
+ DTU->applyUpdates(Updates);
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 000000000000..277c6be418c5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,1691 @@
+//===- InlineSpiller.cpp - Insert spills and restores inline --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
+STATISTIC(NumSnippets, "Number of spilled snippets");
+STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumSpillsRemoved, "Number of spills removed");
+STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumReloadsRemoved, "Number of reloads removed");
+STATISTIC(NumFolded, "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads, "Number of folded loads");
+STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+
+static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
+ cl::desc("Disable inline spill hoisting"));
+static cl::opt<bool>
+RestrictStatepointRemat("restrict-statepoint-remat",
+ cl::init(false), cl::Hidden,
+ cl::desc("Restrict remat for statepoint operands"));
+
+namespace {
+
+class HoistSpillHelper : private LiveRangeEdit::Delegate {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+ InsertPointAnalysis IPA;
+
+ // Map from StackSlot to the LiveInterval of the original register.
+ // Note the LiveInterval of the original register may have been deleted
+ // after it is spilled. We keep a copy here to track the range where
+ // spills can be moved.
+ DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI;
+
+ // Map from pair of (StackSlot and Original VNI) to a set of spills which
+ // have the same stackslot and have equal values defined by Original VNI.
+ // These spills are mergeable and are hoist candidates.
+ using MergeableSpillsMap =
+ MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>;
+ MergeableSpillsMap MergeableSpills;
+
+ /// This is the map from original register to a set containing all its
+ /// siblings. To hoist a spill to another BB, we need to find out a live
+ /// sibling there and use it as the source of the new spill.
+ DenseMap<Register, SmallSetVector<Register, 16>> Virt2SiblingsMap;
+
+ bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
+ MachineBasicBlock &BB, Register &LiveReg);
+
+ void rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI,
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
+
+public:
+ HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ IPA(LIS, mf.getNumBlockIDs()) {}
+
+ void addToMergeableSpills(MachineInstr &Spill, int StackSlot,
+ unsigned Original);
+ bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot);
+ void hoistAllSpills();
+ void LRE_DidCloneVirtReg(Register, Register) override;
+};
+
+class InlineSpiller : public Spiller {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveRangeEdit *Edit = nullptr;
+ LiveInterval *StackInt = nullptr;
+ int StackSlot;
+ Register Original;
+
+ // All registers to spill to StackSlot, including the main register.
+ SmallVector<Register, 8> RegsToSpill;
+
+ // All COPY instructions to/from snippets.
+ // They are ignored since both operands refer to the same stack slot.
+ // For bundled copies, this will only include the first header copy.
+ SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+ // Values that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> UsedValues;
+
+ // Dead defs generated during spilling.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ // Object records spills information and does the hoisting.
+ HoistSpillHelper HSpiller;
+
+ // Live range weight calculator.
+ VirtRegAuxInfo &VRAI;
+
+ ~InlineSpiller() override = default;
+
+public:
+ InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM,
+ VirtRegAuxInfo &VRAI)
+ : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
+ LSS(Pass.getAnalysis<LiveStacks>()),
+ MDT(Pass.getAnalysis<MachineDominatorTree>()),
+ Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
+ MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
+ TRI(*MF.getSubtarget().getRegisterInfo()),
+ MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ HSpiller(Pass, MF, VRM), VRAI(VRAI) {}
+
+ void spill(LiveRangeEdit &) override;
+ void postOptimization() override;
+
+private:
+ bool isSnippet(const LiveInterval &SnipLI);
+ void collectRegsToSpill();
+
+ bool isRegToSpill(Register Reg) { return is_contained(RegsToSpill, Reg); }
+
+ bool isSibling(Register Reg);
+ bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
+ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+ void markValueUsed(LiveInterval*, VNInfo*);
+ bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI);
+ bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
+ void reMaterializeAll();
+
+ bool coalesceStackAccess(MachineInstr *MI, Register Reg);
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>,
+ MachineInstr *LoadMI = nullptr);
+ void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI);
+ void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI);
+
+ void spillAroundUses(Register Reg);
+ void spillAll();
+};
+
+} // end anonymous namespace
+
+Spiller::~Spiller() = default;
+
+void Spiller::anchor() {}
+
+Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
+ MachineFunction &MF, VirtRegMap &VRM,
+ VirtRegAuxInfo &VRAI) {
+ return new InlineSpiller(Pass, MF, VRM, VRAI);
+}
+
+//===----------------------------------------------------------------------===//
+// Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// If MI is a COPY to or from Reg, return the other register, otherwise return
+/// 0.
+static Register isCopyOf(const MachineInstr &MI, Register Reg) {
+ assert(!MI.isBundled());
+ if (!MI.isCopy())
+ return Register();
+
+ const MachineOperand &DstOp = MI.getOperand(0);
+ const MachineOperand &SrcOp = MI.getOperand(1);
+
+ // TODO: Probably only worth allowing subreg copies with undef dests.
+ if (DstOp.getSubReg() != SrcOp.getSubReg())
+ return Register();
+ if (DstOp.getReg() == Reg)
+ return SrcOp.getReg();
+ if (SrcOp.getReg() == Reg)
+ return DstOp.getReg();
+ return Register();
+}
+
+/// Check for a copy bundle as formed by SplitKit.
+static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg) {
+ if (!FirstMI.isBundled())
+ return isCopyOf(FirstMI, Reg);
+
+ assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() &&
+ "expected to see first instruction in bundle");
+
+ Register SnipReg;
+ MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator();
+ while (I->isBundledWithSucc()) {
+ const MachineInstr &MI = *I;
+ if (!MI.isCopy())
+ return Register();
+
+ const MachineOperand &DstOp = MI.getOperand(0);
+ const MachineOperand &SrcOp = MI.getOperand(1);
+ if (DstOp.getReg() == Reg) {
+ if (!SnipReg)
+ SnipReg = SrcOp.getReg();
+ else if (SnipReg != SrcOp.getReg())
+ return Register();
+ } else if (SrcOp.getReg() == Reg) {
+ if (!SnipReg)
+ SnipReg = DstOp.getReg();
+ else if (SnipReg != DstOp.getReg())
+ return Register();
+ }
+
+ ++I;
+ }
+
+ return Register();
+}
+
+static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
+ for (const MachineOperand &MO : MI.all_defs())
+ if (MO.getReg().isVirtual())
+ LIS.getInterval(MO.getReg());
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+ Register Reg = Edit->getReg();
+
+ // A snippet is a tiny live range with only a single instruction using it
+ // besides copies to/from Reg or spills/fills.
+ // Exception is done for statepoint instructions which will fold fills
+ // into their operands.
+ // We accept:
+ //
+ // %snip = COPY %Reg / FILL fi#
+ // %snip = USE %snip
+ // %snip = STATEPOINT %snip in var arg area
+ // %Reg = COPY %snip / SPILL %snip, fi#
+ //
+ if (!LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ // Number of defs should not exceed 2 not accounting defs coming from
+ // statepoint instructions.
+ unsigned NumValNums = SnipLI.getNumValNums();
+ for (auto *VNI : SnipLI.vnis()) {
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT)
+ --NumValNums;
+ }
+ if (NumValNums > 2)
+ return false;
+
+ MachineInstr *UseMI = nullptr;
+
+ // Check that all uses satisfy our criteria.
+ for (MachineRegisterInfo::reg_bundle_nodbg_iterator
+ RI = MRI.reg_bundle_nodbg_begin(SnipLI.reg()),
+ E = MRI.reg_bundle_nodbg_end();
+ RI != E;) {
+ MachineInstr &MI = *RI++;
+
+ // Allow copies to/from Reg.
+ if (isCopyOfBundle(MI, Reg))
+ continue;
+
+ // Allow stack slot loads.
+ int FI;
+ if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow stack slot stores.
+ if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ if (StatepointOpers::isFoldableReg(&MI, SnipLI.reg()))
+ continue;
+
+ // Allow a single additional instruction.
+ if (UseMI && &MI != UseMI)
+ return false;
+ UseMI = &MI;
+ }
+ return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+ Register Reg = Edit->getReg();
+
+ // Main register always spills.
+ RegsToSpill.assign(1, Reg);
+ SnippetCopies.clear();
+
+ // Snippets all have the same original, so there can't be any for an original
+ // register.
+ if (Original == Reg)
+ return;
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
+ Register SnipReg = isCopyOfBundle(MI, Reg);
+ if (!isSibling(SnipReg))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+ if (!isSnippet(SnipLI))
+ continue;
+ SnippetCopies.insert(&MI);
+ if (isRegToSpill(SnipReg))
+ continue;
+ RegsToSpill.push_back(SnipReg);
+ LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ ++NumSnippets;
+ }
+}
+
+bool InlineSpiller::isSibling(Register Reg) {
+ return Reg.isVirtual() && VRM.getOriginal(Reg) == Original;
+}
+
+/// It is beneficial to spill to earlier place in the same BB in case
+/// as follows:
+/// There is an alternative def earlier in the same MBB.
+/// Hoist the spill as far as possible in SpillMBB. This can ease
+/// register pressure:
+///
+/// x = def
+/// y = use x
+/// s = copy x
+///
+/// Hoisting the spill of s to immediately after the def removes the
+/// interference between x and y:
+///
+/// x = def
+/// spill x
+/// y = use killed x
+///
+/// This hoist only helps when the copy kills its source.
+///
+bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
+ MachineInstr &CopyMI) {
+ SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+#ifndef NDEBUG
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+ assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
+#endif
+
+ Register SrcReg = CopyMI.getOperand(1).getReg();
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
+ LiveQueryResult SrcQ = SrcLI.Query(Idx);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
+ if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
+ return false;
+
+ // Conservatively extend the stack slot range to the range of the original
+ // value. We may be able to do better with stack slot coloring by being more
+ // careful here.
+ assert(StackInt && "No stack slot assigned yet.");
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+ LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
+
+ // We are going to spill SrcVNI immediately after its def, so clear out
+ // any later spills of the same value.
+ eliminateRedundantSpills(SrcLI, SrcVNI);
+
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
+ MachineBasicBlock::iterator MII;
+ if (SrcVNI->isPHIDef())
+ MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
+ else {
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
+ assert(DefMI && "Defining instruction disappeared");
+ MII = DefMI;
+ ++MII;
+ }
+ MachineInstrSpan MIS(MII, MBB);
+ // Insert spill without kill flag immediately after def.
+ TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
+ MRI.getRegClass(SrcReg), &TRI, Register());
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
+ for (const MachineInstr &MI : make_range(MIS.begin(), MII))
+ getVDefInterval(MI, LIS);
+ --MII; // Point to store instruction.
+ LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (MIS.begin() == MII)
+ HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
+ ++NumSpills;
+ return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+ assert(VNI && "Missing value");
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(&SLI, VNI));
+ assert(StackInt && "No stack slot assigned yet.");
+
+ do {
+ LiveInterval *LI;
+ std::tie(LI, VNI) = WorkList.pop_back_val();
+ Register Reg = LI->reg();
+ LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@'
+ << VNI->def << " in " << *LI << '\n');
+
+ // Regs to spill are taken care of.
+ if (isRegToSpill(Reg))
+ continue;
+
+ // Add all of VNI's live range to StackInt.
+ StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+ LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+ // Find all spills and copies of VNI.
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.use_nodbg_bundles(Reg))) {
+ if (!MI.isCopy() && !MI.mayStore())
+ continue;
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (LI->getVNInfoAt(Idx) != VNI)
+ continue;
+
+ // Follow sibling copies down the dominator tree.
+ if (Register DstReg = isCopyOfBundle(MI, Reg)) {
+ if (isSibling(DstReg)) {
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
+
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ }
+ continue;
+ }
+
+ // Erase spills.
+ int FI;
+ if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+ LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI);
+ // eliminateDeadDefs won't normally remove stores, so switch opcode.
+ MI.setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(&MI);
+ ++NumSpillsRemoved;
+ if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
+ --NumSpills;
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+//===----------------------------------------------------------------------===//
+// Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(LI, VNI));
+ do {
+ std::tie(LI, VNI) = WorkList.pop_back_val();
+ if (!UsedValues.insert(VNI).second)
+ continue;
+
+ if (VNI->isPHIDef()) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock *P : MBB->predecessors()) {
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P));
+ if (PVNI)
+ WorkList.push_back(std::make_pair(LI, PVNI));
+ }
+ continue;
+ }
+
+ // Follow snippet copies.
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (!SnippetCopies.count(MI))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+ assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy");
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
+ assert(SnipVNI && "Snippet undefined before copy");
+ WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+ } while (!WorkList.empty());
+}
+
+bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
+ MachineInstr &MI) {
+ if (!RestrictStatepointRemat)
+ return true;
+ // Here's a quick explanation of the problem we're trying to handle here:
+ // * There are some pseudo instructions with more vreg uses than there are
+ // physical registers on the machine.
+ // * This is normally handled by spilling the vreg, and folding the reload
+ // into the user instruction. (Thus decreasing the number of used vregs
+ // until the remainder can be assigned to physregs.)
+ // * However, since we may try to spill vregs in any order, we can end up
+ // trying to spill each operand to the instruction, and then rematting it
+ // instead. When that happens, the new live intervals (for the remats) are
+ // expected to be trivially assignable (i.e. RS_Done). However, since we
+ // may have more remats than physregs, we're guaranteed to fail to assign
+ // one.
+ // At the moment, we only handle this for STATEPOINTs since they're the only
+ // pseudo op where we've seen this. If we start seeing other instructions
+ // with the same problem, we need to revisit this.
+ if (MI.getOpcode() != TargetOpcode::STATEPOINT)
+ return true;
+ // For STATEPOINTs we allow re-materialization for fixed arguments only hoping
+ // that number of physical registers is enough to cover all fixed arguments.
+ // If it is not true we need to revisit it.
+ for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
+ EndIdx = MI.getNumOperands();
+ Idx < EndIdx; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (MO.isReg() && MO.getReg() == VReg)
+ return false;
+ }
+ return true;
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
+ // Analyze instruction
+ SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg(), &Ops);
+
+ if (!RI.Reads)
+ return false;
+
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
+
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (MachineOperand &MO : MI.all_uses())
+ if (MO.getReg() == VirtReg.reg())
+ MO.setIsUndef();
+ LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
+ return true;
+ }
+
+ if (SnippetCopies.count(&MI))
+ return false;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ LiveRangeEdit::Remat RM(ParentVNI);
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+ return false;
+ }
+
+ // If the instruction also writes VirtReg.reg, it had better not require the
+ // same register for uses and defs.
+ if (RI.Tied) {
+ markValueUsed(&VirtReg, ParentVNI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI);
+ return false;
+ }
+
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->canFoldAsLoad() &&
+ foldMemoryOperand(Ops, RM.OrigMI)) {
+ Edit->markRematerialized(RM.ParentVNI);
+ ++NumFoldedLoads;
+ return true;
+ }
+
+ // If we can't guarantee that we'll be able to actually assign the new vreg,
+ // we can't remat.
+ if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg(), MI)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+ return false;
+ }
+
+ // Allocate a new register for the remat.
+ Register NewVReg = Edit->createFrom(Original);
+
+ // Finally we can rematerialize OrigMI before MI.
+ SlotIndex DefIdx =
+ Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
+
+ // We take the DebugLoc from MI, since OrigMI may be attributed to a
+ // different source location.
+ auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
+ NewMI->setDebugLoc(MI.getDebugLoc());
+
+ (void)DefIdx;
+ LLVM_DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
+
+ // Replace operands
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) {
+ MO.setReg(NewVReg);
+ MO.setIsKill();
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n');
+
+ ++NumRemats;
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ if (!Edit->anyRematerializable())
+ return;
+
+ UsedValues.clear();
+
+ // Try to remat before all uses of snippets.
+ bool anyRemat = false;
+ for (Register Reg : RegsToSpill) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
+ // Debug values are not allowed to affect codegen.
+ if (MI.isDebugValue())
+ continue;
+
+ assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "
+ "instruction that isn't a DBG_VALUE");
+
+ anyRemat |= reMaterializeFor(LI, MI);
+ }
+ }
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ for (Register Reg : RegsToSpill) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (VNInfo *VNI : LI.vnis()) {
+ if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ MI->addRegisterDead(Reg, &TRI);
+ if (!MI->allDefsAreDead())
+ continue;
+ LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);
+ DeadDefs.push_back(MI);
+ }
+ }
+
+ // Eliminate dead code after remat. Note that some snippet copies may be
+ // deleted here.
+ if (DeadDefs.empty())
+ return;
+ LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+
+ // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions
+ // after rematerialization. To remove a VNI for a vreg from its LiveInterval,
+ // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all
+ // removed, PHI VNI are still left in the LiveInterval.
+ // So to get rid of unused reg, we need to check whether it has non-dbg
+ // reference instead of whether it has non-empty interval.
+ unsigned ResultPos = 0;
+ for (Register Reg : RegsToSpill) {
+ if (MRI.reg_nodbg_empty(Reg)) {
+ Edit->eraseVirtReg(Reg);
+ continue;
+ }
+
+ assert(LIS.hasInterval(Reg) &&
+ (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) &&
+ "Empty and not used live-range?!");
+
+ RegsToSpill[ResultPos++] = Reg;
+ }
+ RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
+ LLVM_DEBUG(dbgs() << RegsToSpill.size()
+ << " registers to spill after remat.\n");
+}
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, Register Reg) {
+ int FI = 0;
+ Register InstrReg = TII.isLoadFromStackSlot(*MI, FI);
+ bool IsLoad = InstrReg;
+ if (!IsLoad)
+ InstrReg = TII.isStoreToStackSlot(*MI, FI);
+
+ // We have a stack access. Is it the right register and slot?
+ if (InstrReg != Reg || FI != StackSlot)
+ return false;
+
+ if (!IsLoad)
+ HSpiller.rmFromMergeableSpills(*MI, StackSlot);
+
+ LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+
+ if (IsLoad) {
+ ++NumReloadsRemoved;
+ --NumReloads;
+ } else {
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+
+ return true;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+// Dump the range of instructions from B to E with their slot indexes.
+static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
+ MachineBasicBlock::iterator E,
+ LiveIntervals const &LIS,
+ const char *const header,
+ Register VReg = Register()) {
+ char NextLine = '\n';
+ char SlotIndent = '\t';
+
+ if (std::next(B) == E) {
+ NextLine = ' ';
+ SlotIndent = ' ';
+ }
+
+ dbgs() << '\t' << header << ": " << NextLine;
+
+ for (MachineBasicBlock::iterator I = B; I != E; ++I) {
+ SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot();
+
+ // If a register was passed in and this instruction has it as a
+ // destination that is marked as an early clobber, print the
+ // early-clobber slot index.
+ if (VReg) {
+ MachineOperand *MO = I->findRegisterDefOperand(VReg);
+ if (MO && MO->isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+
+ dbgs() << SlotIndent << Idx << '\t' << *I;
+ }
+}
+#endif
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops Operand indices from AnalyzeVirtRegInBundle().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
+ MachineInstr *LoadMI) {
+ if (Ops.empty())
+ return false;
+ // Don't attempt folding in bundles.
+ MachineInstr *MI = Ops.front().first;
+ if (Ops.back().first != MI || MI->isBundled())
+ return false;
+
+ bool WasCopy = MI->isCopy();
+ Register ImpReg;
+
+ // TII::foldMemoryOperand will do what we need here for statepoint
+ // (fold load into use and remove corresponding def). We will replace
+ // uses of removed def with loads (spillAroundUses).
+ // For that to work we need to untie def and use to pass it through
+ // foldMemoryOperand and signal foldPatchpoint that it is allowed to
+ // fold them.
+ bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT;
+
+ // Spill subregs if the target allows it.
+ // We always want to spill subregs for stackmap/patchpoint pseudos.
+ bool SpillSubRegs = TII.isSubregFoldable() ||
+ MI->getOpcode() == TargetOpcode::STATEPOINT ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI->getOpcode() == TargetOpcode::STACKMAP;
+
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (const auto &OpPair : Ops) {
+ unsigned Idx = OpPair.second;
+ assert(MI == OpPair.first && "Instruction conflict during operand folding");
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ // No point restoring an undef read, and we'll produce an invalid live
+ // interval.
+ // TODO: Is this really the correct way to handle undef tied uses?
+ if (MO.isUse() && !MO.readsReg() && !MO.isTied())
+ continue;
+
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
+ continue;
+ }
+
+ if (!SpillSubRegs && MO.getSubReg())
+ return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (UntieRegs || !MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ // If we only have implicit uses, we won't be able to fold that.
+ // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try!
+ if (FoldOps.empty())
+ return false;
+
+ MachineInstrSpan MIS(MI, MI->getParent());
+
+ SmallVector<std::pair<unsigned, unsigned> > TiedOps;
+ if (UntieRegs)
+ for (unsigned Idx : FoldOps) {
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (!MO.isTied())
+ continue;
+ unsigned Tied = MI->findTiedOperandIdx(Idx);
+ if (MO.isUse())
+ TiedOps.emplace_back(Tied, Idx);
+ else {
+ assert(MO.isDef() && "Tied to not use and def?");
+ TiedOps.emplace_back(Idx, Tied);
+ }
+ MI->untieRegOperand(Idx);
+ }
+
+ MachineInstr *FoldMI =
+ LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
+ : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
+ if (!FoldMI) {
+ // Re-tie operands.
+ for (auto Tied : TiedOps)
+ MI->tieOperands(Tied.first, Tied.second);
+ return false;
+ }
+
+ // Remove LIS for any dead defs in the original MI not in FoldMI.
+ for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ Register Reg = MO->getReg();
+ if (!Reg || Reg.isVirtual() || MRI.isReserved(Reg)) {
+ continue;
+ }
+ // Skip non-Defs, including undef uses and internal reads.
+ if (MO->isUse())
+ continue;
+ PhysRegInfo RI = AnalyzePhysRegInBundle(*FoldMI, Reg, &TRI);
+ if (RI.FullyDefined)
+ continue;
+ // FoldMI does not define this physreg. Remove the LI segment.
+ assert(MO->isDead() && "Cannot fold physreg def");
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+ LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
+ }
+
+ int FI;
+ if (TII.isStoreToStackSlot(*MI, FI) &&
+ HSpiller.rmFromMergeableSpills(*MI, FI))
+ --NumSpills;
+ LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
+ // Update the call site info.
+ if (MI->isCandidateForCallSiteEntry())
+ MI->getMF()->moveCallSiteInfo(MI, FoldMI);
+
+ // If we've folded a store into an instruction labelled with debug-info,
+ // record a substitution from the old operand to the memory operand. Handle
+ // the simple common case where operand 0 is the one being folded, plus when
+ // the destination operand is also a tied def. More values could be
+ // substituted / preserved with more analysis.
+ if (MI->peekDebugInstrNum() && Ops[0].second == 0) {
+ // Helper lambda.
+ auto MakeSubstitution = [this,FoldMI,MI,&Ops]() {
+ // Substitute old operand zero to the new instructions memory operand.
+ unsigned OldOperandNum = Ops[0].second;
+ unsigned NewNum = FoldMI->getDebugInstrNum();
+ unsigned OldNum = MI->getDebugInstrNum();
+ MF.makeDebugValueSubstitution({OldNum, OldOperandNum},
+ {NewNum, MachineFunction::DebugOperandMemNumber});
+ };
+
+ const MachineOperand &Op0 = MI->getOperand(Ops[0].second);
+ if (Ops.size() == 1 && Op0.isDef()) {
+ MakeSubstitution();
+ } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(1).isTied() &&
+ Op0.getReg() == MI->getOperand(1).getReg()) {
+ MakeSubstitution();
+ }
+ } else if (MI->peekDebugInstrNum()) {
+ // This is a debug-labelled instruction, but the operand being folded isn't
+ // at operand zero. Most likely this means it's a load being folded in.
+ // Substitute any register defs from operand zero up to the one being
+ // folded -- past that point, we don't know what the new operand indexes
+ // will be.
+ MF.substituteDebugValuesForInst(*MI, *FoldMI, Ops[0].second);
+ }
+
+ MI->eraseFromParent();
+
+ // Insert any new instructions other than FoldMI into the LIS maps.
+ assert(!MIS.empty() && "Unexpected empty span of instructions!");
+ for (MachineInstr &MI : MIS)
+ if (&MI != FoldMI)
+ LIS.InsertMachineInstrInMaps(MI);
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->removeOperand(i - 1);
+ }
+
+ LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
+ "folded"));
+
+ if (!WasCopy)
+ ++NumFolded;
+ else if (Ops.front().second == 0) {
+ ++NumSpills;
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (std::distance(MIS.begin(), MIS.end()) <= 1)
+ HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
+ } else
+ ++NumReloads;
+ return true;
+}
+
+void InlineSpiller::insertReload(Register NewVReg,
+ SlotIndex Idx,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ MachineInstrSpan MIS(MI, &MBB);
+ TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI, Register());
+
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
+
+ LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload",
+ NewVReg));
+ ++NumReloads;
+}
+
+/// Check if \p Def fully defines a VReg with an undefined value.
+/// If that's the case, that means the value of VReg is actually
+/// not relevant.
+static bool isRealSpill(const MachineInstr &Def) {
+ if (!Def.isImplicitDef())
+ return true;
+ assert(Def.getNumOperands() == 1 &&
+ "Implicit def with more than one definition");
+ // We can say that the VReg defined by Def is undef, only if it is
+ // fully defined by Def. Otherwise, some of the lanes may not be
+ // undef and the value of the VReg matters.
+ return Def.getOperand(0).getSubReg();
+}
+
+/// insertSpill - Insert a spill of NewVReg after MI.
+void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
+ MachineBasicBlock::iterator MI) {
+ // Spill are not terminators, so inserting spills after terminators will
+ // violate invariants in MachineVerifier.
+ assert(!MI->isTerminator() && "Inserting a spill after a terminator");
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ MachineInstrSpan MIS(MI, &MBB);
+ MachineBasicBlock::iterator SpillBefore = std::next(MI);
+ bool IsRealSpill = isRealSpill(*MI);
+
+ if (IsRealSpill)
+ TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI, Register());
+ else
+ // Don't spill undef value.
+ // Anything works for undef, in particular keeping the memory
+ // uninitialized is a viable option and it saves code size and
+ // run time.
+ BuildMI(MBB, SpillBefore, MI->getDebugLoc(), TII.get(TargetOpcode::KILL))
+ .addReg(NewVReg, getKillRegState(isKill));
+
+ MachineBasicBlock::iterator Spill = std::next(MI);
+ LIS.InsertMachineInstrRangeInMaps(Spill, MIS.end());
+ for (const MachineInstr &MI : make_range(Spill, MIS.end()))
+ getVDefInterval(MI, LIS);
+
+ LLVM_DEBUG(
+ dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill"));
+ ++NumSpills;
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (IsRealSpill && std::distance(Spill, MIS.end()) <= 1)
+ HSpiller.addToMergeableSpills(*Spill, StackSlot, Original);
+}
+
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(Register Reg) {
+ LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');
+ LiveInterval &OldLI = LIS.getInterval(Reg);
+
+ // Iterate over instructions using Reg.
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
+ // Debug values are not allowed to affect codegen.
+ if (MI.isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ MachineBasicBlock *MBB = MI.getParent();
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI);
+ buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg);
+ MBB->erase(MI);
+ continue;
+ }
+
+ assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "
+ "instruction that isn't a DBG_VALUE");
+
+ // Ignore copies to/from snippets. We'll delete them.
+ if (SnippetCopies.count(&MI))
+ continue;
+
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(&MI, Reg))
+ continue;
+
+ // Analyze instruction.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops);
+
+ // Find the slot index where this instruction reads and writes OldLI.
+ // This is usually the def slot, except for tied early clobbers.
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
+ if (SlotIndex::isSameInstr(Idx, VNI->def))
+ Idx = VNI->def;
+
+ // Check for a sibling copy.
+ Register SibReg = isCopyOfBundle(MI, Reg);
+ if (SibReg && isSibling(SibReg)) {
+ // This may actually be a copy between snippets.
+ if (isRegToSpill(SibReg)) {
+ LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI);
+ SnippetCopies.insert(&MI);
+ continue;
+ }
+ if (RI.Writes) {
+ if (hoistSpillInsideBB(OldLI, MI)) {
+ // This COPY is now dead, the value is already in the stack slot.
+ MI.getOperand(0).setIsDead();
+ DeadDefs.push_back(&MI);
+ continue;
+ }
+ } else {
+ // This is a reload for a sib-reg copy. Drop spills downstream.
+ LiveInterval &SibLI = LIS.getInterval(SibReg);
+ eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+ // The COPY will fold to a reload below.
+ }
+ }
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(Ops))
+ continue;
+
+ // Create a new virtual register for spill/fill.
+ // FIXME: Infer regclass from instruction alone.
+ Register NewVReg = Edit->createFrom(Reg);
+
+ if (RI.Reads)
+ insertReload(NewVReg, Idx, &MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
+ MO.setReg(NewVReg);
+ if (MO.isUse()) {
+ if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n');
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (RI.Writes)
+ if (hasLiveDef)
+ insertSpill(NewVReg, true, &MI);
+ }
+}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+ // Update LiveStacks now that we are committed to spilling.
+ if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+ StackSlot = VRM.assignVirt2StackSlot(Original);
+ StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+ StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
+ } else
+ StackInt = &LSS.getInterval(StackSlot);
+
+ if (Original != Edit->getReg())
+ VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+ assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+ for (Register Reg : RegsToSpill)
+ StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
+ StackInt->getValNumInfo(0));
+ LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+ // Spill around uses of all RegsToSpill.
+ for (Register Reg : RegsToSpill)
+ spillAroundUses(Reg);
+
+ // Hoisted spills may cause dead code.
+ if (!DeadDefs.empty()) {
+ LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+ }
+
+ // Finally delete the SnippetCopies.
+ for (Register Reg : RegsToSpill) {
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
+ assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
+ // FIXME: Do this with a LiveRangeEdit callback.
+ LIS.getSlotIndexes()->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
+ }
+ }
+
+ // Delete all spilled registers.
+ for (Register Reg : RegsToSpill)
+ Edit->eraseVirtReg(Reg);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ ++NumSpilledRanges;
+ Edit = &edit;
+ assert(!Register::isStackSlot(edit.getReg()) &&
+ "Trying to spill a stack slot.");
+ // Share a stack slot among all descendants of Original.
+ Original = VRM.getOriginal(edit.getReg());
+ StackSlot = VRM.getStackSlot(Original);
+ StackInt = nullptr;
+
+ LLVM_DEBUG(dbgs() << "Inline spilling "
+ << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
+ << ':' << edit.getParent() << "\nFrom original "
+ << printReg(Original) << '\n');
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
+ assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+ collectRegsToSpill();
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (!RegsToSpill.empty())
+ spillAll();
+
+ Edit->calculateRegClassAndHint(MF, VRAI);
+}
+
+/// Optimizations after all the reg selections and spills are done.
+void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); }
+
+/// When a spill is inserted, add the spill to MergeableSpills map.
+void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
+ unsigned Original) {
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ // save a copy of LiveInterval in StackSlotToOrigLI because the original
+ // LiveInterval may be cleared after all its references are spilled.
+ if (!StackSlotToOrigLI.contains(StackSlot)) {
+ auto LI = std::make_unique<LiveInterval>(OrigLI.reg(), OrigLI.weight());
+ LI->assign(OrigLI, Allocator);
+ StackSlotToOrigLI[StackSlot] = std::move(LI);
+ }
+ SlotIndex Idx = LIS.getInstructionIndex(Spill);
+ VNInfo *OrigVNI = StackSlotToOrigLI[StackSlot]->getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ MergeableSpills[MIdx].insert(&Spill);
+}
+
+/// When a spill is removed, remove the spill from MergeableSpills map.
+/// Return true if the spill is removed successfully.
+bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
+ int StackSlot) {
+ auto It = StackSlotToOrigLI.find(StackSlot);
+ if (It == StackSlotToOrigLI.end())
+ return false;
+ SlotIndex Idx = LIS.getInstructionIndex(Spill);
+ VNInfo *OrigVNI = It->second->getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ return MergeableSpills[MIdx].erase(&Spill);
+}
+
+/// Check BB to see if it is a possible target BB to place a hoisted spill,
+/// i.e., there should be a living sibling of OrigReg at the insert point.
+bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
+ MachineBasicBlock &BB, Register &LiveReg) {
+ SlotIndex Idx = IPA.getLastInsertPoint(OrigLI, BB);
+ // The original def could be after the last insert point in the root block,
+ // we can't hoist to here.
+ if (Idx < OrigVNI.def) {
+ // TODO: We could be better here. If LI is not alive in landing pad
+ // we could hoist spill after LIP.
+ LLVM_DEBUG(dbgs() << "can't spill in root block - def after LIP\n");
+ return false;
+ }
+ Register OrigReg = OrigLI.reg();
+ SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg];
+ assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI");
+
+ for (const Register &SibReg : Siblings) {
+ LiveInterval &LI = LIS.getInterval(SibReg);
+ VNInfo *VNI = LI.getVNInfoAt(Idx);
+ if (VNI) {
+ LiveReg = SibReg;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Remove redundant spills in the same BB. Save those redundant spills in
+/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
+void HoistSpillHelper::rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // For each spill saw, check SpillBBToSpill[] and see if its BB already has
+ // another spill inside. If a BB contains more than one spill, only keep the
+ // earlier spill with smaller SlotIndex.
+ for (auto *const CurrentSpill : Spills) {
+ MachineBasicBlock *Block = CurrentSpill->getParent();
+ MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
+ MachineInstr *PrevSpill = SpillBBToSpill[Node];
+ if (PrevSpill) {
+ SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
+ SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
+ MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
+ MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
+ SpillsToRm.push_back(SpillToRm);
+ SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
+ } else {
+ SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
+ }
+ }
+ for (auto *const SpillToRm : SpillsToRm)
+ Spills.erase(SpillToRm);
+}
+
+/// Starting from \p Root find a top-down traversal order of the dominator
+/// tree to visit all basic blocks containing the elements of \p Spills.
+/// Redundant spills will be found and put into \p SpillsToRm at the same
+/// time. \p SpillBBToSpill will be populated as part of the process and
+/// maps a basic block to the first store occurring in the basic block.
+/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
+void HoistSpillHelper::getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // The set contains all the possible BB nodes to which we may hoist
+ // original spills.
+ SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
+ // Save the BB nodes on the path from the first BB node containing
+ // non-redundant spill to the Root node.
+ SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
+ // All the spills to be hoisted must originate from a single def instruction
+ // to the OrigReg. It means the def instruction should dominate all the spills
+ // to be hoisted. We choose the BB where the def instruction is located as
+ // the Root.
+ MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
+ // For every node on the dominator tree with spill, walk up on the dominator
+ // tree towards the Root node until it is reached. If there is other node
+ // containing spill in the middle of the path, the previous spill saw will
+ // be redundant and the node containing it will be removed. All the nodes on
+ // the path starting from the first node with non-redundant spill to the Root
+ // node will be added to the WorkSet, which will contain all the possible
+ // locations where spills may be hoisted to after the loop below is done.
+ for (auto *const Spill : Spills) {
+ MachineBasicBlock *Block = Spill->getParent();
+ MachineDomTreeNode *Node = MDT[Block];
+ MachineInstr *SpillToRm = nullptr;
+ while (Node != RootIDomNode) {
+ // If Node dominates Block, and it already contains a spill, the spill in
+ // Block will be redundant.
+ if (Node != MDT[Block] && SpillBBToSpill[Node]) {
+ SpillToRm = SpillBBToSpill[MDT[Block]];
+ break;
+ /// If we see the Node already in WorkSet, the path from the Node to
+ /// the Root node must already be traversed by another spill.
+ /// Then no need to repeat.
+ } else if (WorkSet.count(Node)) {
+ break;
+ } else {
+ NodesOnPath.insert(Node);
+ }
+ Node = Node->getIDom();
+ }
+ if (SpillToRm) {
+ SpillsToRm.push_back(SpillToRm);
+ } else {
+ // Add a BB containing the original spills to SpillsToKeep -- i.e.,
+ // set the initial status before hoisting start. The value of BBs
+ // containing original spills is set to 0, in order to descriminate
+ // with BBs containing hoisted spills which will be inserted to
+ // SpillsToKeep later during hoisting.
+ SpillsToKeep[MDT[Block]] = 0;
+ WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
+ }
+ NodesOnPath.clear();
+ }
+
+ // Sort the nodes in WorkSet in top-down order and save the nodes
+ // in Orders. Orders will be used for hoisting in runHoistSpills.
+ unsigned idx = 0;
+ Orders.push_back(MDT.getBase().getNode(Root));
+ do {
+ MachineDomTreeNode *Node = Orders[idx++];
+ for (MachineDomTreeNode *Child : Node->children()) {
+ if (WorkSet.count(Child))
+ Orders.push_back(Child);
+ }
+ } while (idx != Orders.size());
+ assert(Orders.size() == WorkSet.size() &&
+ "Orders have different size with WorkSet");
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++)
+ LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
+ LLVM_DEBUG(dbgs() << "\n");
+#endif
+}
+
+/// Try to hoist spills according to BB hotness. The spills to removed will
+/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
+/// \p SpillsToIns.
+void HoistSpillHelper::runHoistSpills(
+ LiveInterval &OrigLI, VNInfo &OrigVNI,
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
+ // Visit order of dominator tree nodes.
+ SmallVector<MachineDomTreeNode *, 32> Orders;
+ // SpillsToKeep contains all the nodes where spills are to be inserted
+ // during hoisting. If the spill to be inserted is an original spill
+ // (not a hoisted one), the value of the map entry is 0. If the spill
+ // is a hoisted spill, the value of the map entry is the VReg to be used
+ // as the source of the spill.
+ DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
+ // Map from BB to the first spill inside of it.
+ DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
+
+ rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
+
+ MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
+ getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
+ SpillBBToSpill);
+
+ // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of
+ // nodes set and the cost of all the spills inside those nodes.
+ // The nodes set are the locations where spills are to be inserted
+ // in the subtree of current node.
+ using NodesCostPair =
+ std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>;
+ DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
+
+ // Iterate Orders set in reverse order, which will be a bottom-up order
+ // in the dominator tree. Once we visit a dom tree node, we know its
+ // children have already been visited and the spill locations in the
+ // subtrees of all the children have been determined.
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++) {
+ MachineBasicBlock *Block = (*RIt)->getBlock();
+
+ // If Block contains an original spill, simply continue.
+ if (SpillsToKeep.contains(*RIt) && !SpillsToKeep[*RIt]) {
+ SpillsInSubTreeMap[*RIt].first.insert(*RIt);
+ // SpillsInSubTreeMap[*RIt].second contains the cost of spill.
+ SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
+ continue;
+ }
+
+ // Collect spills in subtree of current node (*RIt) to
+ // SpillsInSubTreeMap[*RIt].first.
+ for (MachineDomTreeNode *Child : (*RIt)->children()) {
+ if (!SpillsInSubTreeMap.contains(Child))
+ continue;
+ // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
+ // should be placed before getting the begin and end iterators of
+ // SpillsInSubTreeMap[Child].first, or else the iterators may be
+ // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time
+ // and the map grows and then the original buckets in the map are moved.
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ SubTreeCost += SpillsInSubTreeMap[Child].second;
+ auto BI = SpillsInSubTreeMap[Child].first.begin();
+ auto EI = SpillsInSubTreeMap[Child].first.end();
+ SpillsInSubTree.insert(BI, EI);
+ SpillsInSubTreeMap.erase(Child);
+ }
+
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ // No spills in subtree, simply continue.
+ if (SpillsInSubTree.empty())
+ continue;
+
+ // Check whether Block is a possible candidate to insert spill.
+ Register LiveReg;
+ if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg))
+ continue;
+
+ // If there are multiple spills that could be merged, bias a little
+ // to hoist the spill.
+ BranchProbability MarginProb = (SpillsInSubTree.size() > 1)
+ ? BranchProbability(9, 10)
+ : BranchProbability(1, 1);
+ if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) {
+ // Hoist: Move spills to current Block.
+ for (auto *const SpillBB : SpillsInSubTree) {
+ // When SpillBB is a BB contains original spill, insert the spill
+ // to SpillsToRm.
+ if (SpillsToKeep.contains(SpillBB) && !SpillsToKeep[SpillBB]) {
+ MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
+ SpillsToRm.push_back(SpillToRm);
+ }
+ // SpillBB will not contain spill anymore, remove it from SpillsToKeep.
+ SpillsToKeep.erase(SpillBB);
+ }
+ // Current Block is the BB containing the new hoisted spill. Add it to
+ // SpillsToKeep. LiveReg is the source of the new spill.
+ SpillsToKeep[*RIt] = LiveReg;
+ LLVM_DEBUG({
+ dbgs() << "spills in BB: ";
+ for (const auto Rspill : SpillsInSubTree)
+ dbgs() << Rspill->getBlock()->getNumber() << " ";
+ dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
+ << "\n";
+ });
+ SpillsInSubTree.clear();
+ SpillsInSubTree.insert(*RIt);
+ SubTreeCost = MBFI.getBlockFreq(Block);
+ }
+ }
+ // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
+ // save them to SpillsToIns.
+ for (const auto &Ent : SpillsToKeep) {
+ if (Ent.second)
+ SpillsToIns[Ent.first->getBlock()] = Ent.second;
+ }
+}
+
+/// For spills with equal values, remove redundant spills and hoist those left
+/// to less hot spots.
+///
+/// Spills with equal values will be collected into the same set in
+/// MergeableSpills when spill is inserted. These equal spills are originated
+/// from the same defining instruction and are dominated by the instruction.
+/// Before hoisting all the equal spills, redundant spills inside in the same
+/// BB are first marked to be deleted. Then starting from the spills left, walk
+/// up on the dominator tree towards the Root node where the define instruction
+/// is located, mark the dominated spills to be deleted along the way and
+/// collect the BB nodes on the path from non-dominated spills to the define
+/// instruction into a WorkSet. The nodes in WorkSet are the candidate places
+/// where we are considering to hoist the spills. We iterate the WorkSet in
+/// bottom-up order, and for each node, we will decide whether to hoist spills
+/// inside its subtree to that node. In this way, we can get benefit locally
+/// even if hoisting all the equal spills to one cold place is impossible.
+void HoistSpillHelper::hoistAllSpills() {
+ SmallVector<Register, 4> NewVRegs;
+ LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
+
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ Register Original = VRM.getPreSplitReg(Reg);
+ if (!MRI.def_empty(Reg))
+ Virt2SiblingsMap[Original].insert(Reg);
+ }
+
+ // Each entry in MergeableSpills contains a spill set with equal values.
+ for (auto &Ent : MergeableSpills) {
+ int Slot = Ent.first.first;
+ LiveInterval &OrigLI = *StackSlotToOrigLI[Slot];
+ VNInfo *OrigVNI = Ent.first.second;
+ SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
+ if (Ent.second.empty())
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
+ << "Equal spills in BB: ";
+ for (const auto spill : EqValSpills)
+ dbgs() << spill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // SpillsToRm is the spill set to be removed from EqValSpills.
+ SmallVector<MachineInstr *, 16> SpillsToRm;
+ // SpillsToIns is the spill set to be newly inserted after hoisting.
+ DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
+
+ runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
+
+ LLVM_DEBUG({
+ dbgs() << "Finally inserted spills in BB: ";
+ for (const auto &Ispill : SpillsToIns)
+ dbgs() << Ispill.first->getNumber() << " ";
+ dbgs() << "\nFinally removed spills in BB: ";
+ for (const auto Rspill : SpillsToRm)
+ dbgs() << Rspill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // Stack live range update.
+ LiveInterval &StackIntvl = LSS.getInterval(Slot);
+ if (!SpillsToIns.empty() || !SpillsToRm.empty())
+ StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
+ StackIntvl.getValNumInfo(0));
+
+ // Insert hoisted spills.
+ for (auto const &Insert : SpillsToIns) {
+ MachineBasicBlock *BB = Insert.first;
+ Register LiveReg = Insert.second;
+ MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB);
+ MachineInstrSpan MIS(MII, BB);
+ TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot,
+ MRI.getRegClass(LiveReg), &TRI, Register());
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
+ for (const MachineInstr &MI : make_range(MIS.begin(), MII))
+ getVDefInterval(MI, LIS);
+ ++NumSpills;
+ }
+
+ // Remove redundant spills or change them to dead instructions.
+ NumSpills -= SpillsToRm.size();
+ for (auto *const RMEnt : SpillsToRm) {
+ RMEnt->setDesc(TII.get(TargetOpcode::KILL));
+ for (unsigned i = RMEnt->getNumOperands(); i; --i) {
+ MachineOperand &MO = RMEnt->getOperand(i - 1);
+ if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
+ RMEnt->removeOperand(i - 1);
+ }
+ }
+ Edit.eliminateDeadDefs(SpillsToRm, std::nullopt);
+ }
+}
+
+/// For VirtReg clone, the \p New register should have the same physreg or
+/// stackslot as the \p old register.
+void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) {
+ if (VRM.hasPhys(Old))
+ VRM.assignVirt2Phys(New, VRM.getPhys(Old));
+ else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT)
+ VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old));
+ else
+ llvm_unreachable("VReg should be assigned either physreg or stackslot");
+ if (VRM.hasShape(Old))
+ VRM.assignVirt2Shape(New, VRM.getShape(Old));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 000000000000..ae197ee5553a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,258 @@
+//===- InterferenceCache.cpp - Caching per-block interference -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InterferenceCache.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Static member used for null interference cursors.
+const InterferenceCache::BlockInterference
+ InterferenceCache::Cursor::NoInterference;
+
+// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
+// buffer of size NumPhysRegs to speed up alloc/clear for targets with large
+// reg files). Calloced memory is used for good form, and quites tools like
+// Valgrind too, but zero initialized memory is not required by the algorithm:
+// this is because PhysRegEntries works like a SparseSet and its entries are
+// only valid when there is a corresponding CacheEntries assignment. There is
+// also support for when pass managers are reused for targets with different
+// numbers of PhysRegs: in this case PhysRegEntries is freed and reinitialized.
+void InterferenceCache::reinitPhysRegEntries() {
+ if (PhysRegEntriesCount == TRI->getNumRegs()) return;
+ free(PhysRegEntries);
+ PhysRegEntriesCount = TRI->getNumRegs();
+ PhysRegEntries = static_cast<unsigned char*>(
+ safe_calloc(PhysRegEntriesCount, sizeof(unsigned char)));
+}
+
+void InterferenceCache::init(MachineFunction *mf,
+ LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri) {
+ MF = mf;
+ LIUArray = liuarray;
+ TRI = tri;
+ reinitPhysRegEntries();
+ for (Entry &E : Entries)
+ E.clear(mf, indexes, lis);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) {
+ unsigned char E = PhysRegEntries[PhysReg.id()];
+ if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+ if (!Entries[E].valid(LIUArray, TRI))
+ Entries[E].revalidate(LIUArray, TRI);
+ return &Entries[E];
+ }
+ // No valid entry exists, pick the next round-robin entry.
+ E = RoundRobin;
+ if (++RoundRobin == CacheEntries)
+ RoundRobin = 0;
+ for (unsigned i = 0; i != CacheEntries; ++i) {
+ // Skip entries that are in use.
+ if (Entries[E].hasRefs()) {
+ if (++E == CacheEntries)
+ E = 0;
+ continue;
+ }
+ Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+ PhysRegEntries[PhysReg] = E;
+ return &Entries[E];
+ }
+ llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ // Invalidate all block entries.
+ ++Tag;
+ // Invalidate all iterators.
+ PrevPos = SlotIndex();
+ unsigned i = 0;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ RegUnits[i++].VirtTag = LIUArray[Unit].getTag();
+}
+
+void InterferenceCache::Entry::reset(MCRegister physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF) {
+ assert(!hasRefs() && "Cannot reset cache entry with references");
+ // LIU's changed, invalidate cache.
+ ++Tag;
+ PhysReg = physReg;
+ Blocks.resize(MF->getNumBlockIDs());
+
+ // Reset iterators.
+ PrevPos = SlotIndex();
+ RegUnits.clear();
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ RegUnits.push_back(LIUArray[Unit]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(Unit);
+ }
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ unsigned i = 0, e = RegUnits.size();
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (i == e)
+ return false;
+ if (LIUArray[Unit].changedSince(RegUnits[i].VirtTag))
+ return false;
+ ++i;
+ }
+ return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+ // Use advanceTo only when possible.
+ if (PrevPos != Start) {
+ if (!PrevPos.isValid() || Start < PrevPos) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.find(Start);
+ RUI.FixedI = RUI.Fixed->find(Start);
+ }
+ } else {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.advanceTo(Start);
+ if (RUI.FixedI != RUI.Fixed->end())
+ RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
+ }
+ }
+ PrevPos = Start;
+ }
+
+ MachineFunction::const_iterator MFI =
+ MF->getBlockNumbered(MBBNum)->getIterator();
+ BlockInterference *BI = &Blocks[MBBNum];
+ ArrayRef<SlotIndex> RegMaskSlots;
+ ArrayRef<const uint32_t*> RegMaskBits;
+ while (true) {
+ BI->Tag = Tag;
+ BI->First = BI->Last = SlotIndex();
+
+ // Check for first interference from virtregs.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid())
+ continue;
+ SlotIndex StartI = I.start();
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Same thing for fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::const_iterator I = RegUnits[i].FixedI;
+ LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ if (I == E)
+ continue;
+ SlotIndex StartI = I->start;
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Also check for register mask interference.
+ RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+ SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+ for (unsigned i = 0, e = RegMaskSlots.size();
+ i != e && RegMaskSlots[i] < Limit; ++i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+ // Register mask i clobbers PhysReg before the LIU interference.
+ BI->First = RegMaskSlots[i];
+ break;
+ }
+
+ PrevPos = Stop;
+ if (BI->First.isValid())
+ break;
+
+ // No interference in this block? Go ahead and precompute the next block.
+ if (++MFI == MF->end())
+ return;
+ MBBNum = MFI->getNumber();
+ BI = &Blocks[MBBNum];
+ if (BI->Tag == Tag)
+ return;
+ std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+ }
+
+ // Check for last interference in block.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid() || I.start() >= Stop)
+ continue;
+ I.advanceTo(Stop);
+ bool Backup = !I.valid() || I.start() >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I.stop();
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::iterator &I = RegUnits[i].FixedI;
+ LiveRange *LR = RegUnits[i].Fixed;
+ if (I == LR->end() || I->start >= Stop)
+ continue;
+ I = LR->advanceTo(I, Stop);
+ bool Backup = I == LR->end() || I->start >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I->end;
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Also check for register mask interference.
+ SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+ for (unsigned i = RegMaskSlots.size();
+ i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+ // Register mask i-1 clobbers PhysReg after the LIU interference.
+ // Model the regmask clobber as a dead def.
+ BI->Last = RegMaskSlots[i-1].getDeadSlot();
+ break;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 000000000000..2a176b4f2cf7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,243 @@
+//===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference from LiveIntervalUnions,
+// fixed RegUnit interference, and register masks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+
+namespace llvm {
+
+class LiveIntervals;
+class MachineFunction;
+class TargetRegisterInfo;
+
+class LLVM_LIBRARY_VISIBILITY InterferenceCache {
+ /// BlockInterference - information about the interference in a single basic
+ /// block.
+ struct BlockInterference {
+ unsigned Tag = 0;
+ SlotIndex First;
+ SlotIndex Last;
+
+ BlockInterference() = default;
+ };
+
+ /// Entry - A cache entry containing interference information for all aliases
+ /// of PhysReg in all basic blocks.
+ class Entry {
+ /// PhysReg - The register currently represented.
+ MCRegister PhysReg = 0;
+
+ /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+ /// change.
+ unsigned Tag = 0;
+
+ /// RefCount - The total number of Cursor instances referring to this Entry.
+ unsigned RefCount = 0;
+
+ /// MF - The current function.
+ MachineFunction *MF = nullptr;
+
+ /// Indexes - Mapping block numbers to SlotIndex ranges.
+ SlotIndexes *Indexes = nullptr;
+
+ /// LIS - Used for accessing register mask interference maps.
+ LiveIntervals *LIS = nullptr;
+
+ /// PrevPos - The previous position the iterators were moved to.
+ SlotIndex PrevPos;
+
+ /// RegUnitInfo - Information tracked about each RegUnit in PhysReg.
+ /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos)
+ /// had just been called.
+ struct RegUnitInfo {
+ /// Iterator pointing into the LiveIntervalUnion containing virtual
+ /// register interference.
+ LiveIntervalUnion::SegmentIter VirtI;
+
+ /// Tag of the LIU last time we looked.
+ unsigned VirtTag;
+
+ /// Fixed interference in RegUnit.
+ LiveRange *Fixed = nullptr;
+
+ /// Iterator pointing into the fixed RegUnit interference.
+ LiveInterval::iterator FixedI;
+
+ RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()) {
+ VirtI.setMap(LIU.getMap());
+ }
+ };
+
+ /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have
+ /// more than 4 RegUnits.
+ SmallVector<RegUnitInfo, 4> RegUnits;
+
+ /// Blocks - Interference for each block in the function.
+ SmallVector<BlockInterference, 8> Blocks;
+
+ /// update - Recompute Blocks[MBBNum]
+ void update(unsigned MBBNum);
+
+ public:
+ Entry() = default;
+
+ void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
+ assert(!hasRefs() && "Cannot clear cache entry with references");
+ PhysReg = MCRegister::NoRegister;
+ MF = mf;
+ Indexes = indexes;
+ LIS = lis;
+ }
+
+ MCRegister getPhysReg() const { return PhysReg; }
+
+ void addRef(int Delta) { RefCount += Delta; }
+
+ bool hasRefs() const { return RefCount > 0; }
+
+ void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// valid - Return true if this is a valid entry for physReg.
+ bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// reset - Initialize entry to represent physReg's aliases.
+ void reset(MCRegister physReg, LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI, const MachineFunction *MF);
+
+ /// get - Return an up to date BlockInterference.
+ BlockInterference *get(unsigned MBBNum) {
+ if (Blocks[MBBNum].Tag != Tag)
+ update(MBBNum);
+ return &Blocks[MBBNum];
+ }
+ };
+
+ // We don't keep a cache entry for every physical register, that would use too
+ // much memory. Instead, a fixed number of cache entries are used in a round-
+ // robin manner.
+ enum { CacheEntries = 32 };
+
+ const TargetRegisterInfo *TRI = nullptr;
+ LiveIntervalUnion *LIUArray = nullptr;
+ MachineFunction *MF = nullptr;
+
+ // Point to an entry for each physreg. The entry pointed to may not be up to
+ // date, and it may have been reused for a different physreg.
+ unsigned char* PhysRegEntries = nullptr;
+ size_t PhysRegEntriesCount = 0;
+
+ // Next round-robin entry to be picked.
+ unsigned RoundRobin = 0;
+
+ // The actual cache entries.
+ Entry Entries[CacheEntries];
+
+ // get - Get a valid entry for PhysReg.
+ Entry *get(MCRegister PhysReg);
+
+public:
+ InterferenceCache() = default;
+ InterferenceCache &operator=(const InterferenceCache &other) = delete;
+ InterferenceCache(const InterferenceCache &other) = delete;
+ ~InterferenceCache() {
+ free(PhysRegEntries);
+ }
+
+ void reinitPhysRegEntries();
+
+ /// init - Prepare cache for a new function.
+ void init(MachineFunction *mf, LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes, LiveIntervals *lis,
+ const TargetRegisterInfo *tri);
+
+ /// getMaxCursors - Return the maximum number of concurrent cursors that can
+ /// be supported.
+ unsigned getMaxCursors() const { return CacheEntries; }
+
+ /// Cursor - The primary query interface for the block interference cache.
+ class Cursor {
+ Entry *CacheEntry = nullptr;
+ const BlockInterference *Current = nullptr;
+ static const BlockInterference NoInterference;
+
+ void setEntry(Entry *E) {
+ Current = nullptr;
+ // Update reference counts. Nothing happens when RefCount reaches 0, so
+ // we don't have to check for E == CacheEntry etc.
+ if (CacheEntry)
+ CacheEntry->addRef(-1);
+ CacheEntry = E;
+ if (CacheEntry)
+ CacheEntry->addRef(+1);
+ }
+
+ public:
+ /// Cursor - Create a dangling cursor.
+ Cursor() = default;
+
+ Cursor(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ }
+
+ Cursor &operator=(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ return *this;
+ }
+
+ ~Cursor() { setEntry(nullptr); }
+
+ /// setPhysReg - Point this cursor to PhysReg's interference.
+ void setPhysReg(InterferenceCache &Cache, MCRegister PhysReg) {
+ // Release reference before getting a new one. That guarantees we can
+ // actually have CacheEntries live cursors.
+ setEntry(nullptr);
+ if (PhysReg.isValid())
+ setEntry(Cache.get(PhysReg));
+ }
+
+ /// moveTo - Move cursor to basic block MBBNum.
+ void moveToBlock(unsigned MBBNum) {
+ Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference;
+ }
+
+ /// hasInterference - Return true if the current block has any interference.
+ bool hasInterference() {
+ return Current->First.isValid();
+ }
+
+ /// first - Return the starting index of the first interfering range in the
+ /// current block.
+ SlotIndex first() {
+ return Current->First;
+ }
+
+ /// last - Return the ending index of the last interfering range in the
+ /// current block.
+ SlotIndex last() {
+ return Current->Last;
+ }
+ };
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
new file mode 100644
index 000000000000..6b3848531569
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -0,0 +1,538 @@
+//===- InterleavedAccessPass.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Interleaved Access pass, which identifies
+// interleaved memory accesses and transforms them into target specific
+// intrinsics.
+//
+// An interleaved load reads data from memory into several vectors, with
+// DE-interleaving the data on a factor. An interleaved store writes several
+// vectors to memory with RE-interleaving the data on a factor.
+//
+// As interleaved accesses are difficult to identified in CodeGen (mainly
+// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
+// IR), we identify and transform them to intrinsics in this pass so the
+// intrinsics can be easily matched into target specific instructions later in
+// CodeGen.
+//
+// E.g. An interleaved load (Factor = 2):
+// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
+// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
+//
+// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
+// intrinsic in ARM backend.
+//
+// In X86, this can be further optimized into a set of target
+// specific loads followed by an optimized sequence of shuffles.
+//
+// E.g. An interleaved store (Factor = 3):
+// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+// store <12 x i32> %i.vec, <12 x i32>* %ptr
+//
+// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
+// intrinsic in ARM backend.
+//
+// Similarly, a set of interleaved stores can be transformed into an optimized
+// sequence of shuffles followed by a set of target specific stores for X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-access"
+
+static cl::opt<bool> LowerInterleavedAccesses(
+ "lower-interleaved-accesses",
+ cl::desc("Enable lowering interleaved accesses to intrinsics"),
+ cl::init(true), cl::Hidden);
+
+namespace {
+
+class InterleavedAccess : public FunctionPass {
+public:
+ static char ID;
+
+ InterleavedAccess() : FunctionPass(ID) {
+ initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Interleaved Access Pass"; }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+private:
+ DominatorTree *DT = nullptr;
+ const TargetLowering *TLI = nullptr;
+
+ /// The maximum supported interleave factor.
+ unsigned MaxFactor = 0u;
+
+ /// Transform an interleaved load into target specific intrinsics.
+ bool lowerInterleavedLoad(LoadInst *LI,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// Transform an interleaved store into target specific intrinsics.
+ bool lowerInterleavedStore(StoreInst *SI,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// Transform a load and a deinterleave intrinsic into target specific
+ /// instructions.
+ bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// Transform an interleave intrinsic and a store into target specific
+ /// instructions.
+ bool lowerInterleaveIntrinsic(IntrinsicInst *II,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// Returns true if the uses of an interleaved load by the
+ /// extractelement instructions in \p Extracts can be replaced by uses of the
+ /// shufflevector instructions in \p Shuffles instead. If so, the necessary
+ /// replacements are also performed.
+ bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
+ ArrayRef<ShuffleVectorInst *> Shuffles);
+
+ /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
+ /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
+ /// interleaving load. Any newly created shuffles that operate on \p LI will
+ /// be added to \p Shuffles. Returns true, if any changes to the IR have been
+ /// made.
+ bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
+ LoadInst *LI);
+};
+
+} // end anonymous namespace.
+
+char InterleavedAccess::ID = 0;
+
+INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
+ "Lower interleaved memory accesses to target specific intrinsics", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
+ "Lower interleaved memory accesses to target specific intrinsics", false,
+ false)
+
+FunctionPass *llvm::createInterleavedAccessPass() {
+ return new InterleavedAccess();
+}
+
+/// Check if the mask is a DE-interleave mask of the given factor
+/// \p Factor like:
+/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
+static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
+ unsigned &Index) {
+ // Check all potential start indices from 0 to (Factor - 1).
+ for (Index = 0; Index < Factor; Index++) {
+ unsigned i = 0;
+
+ // Check that elements are in ascending order by Factor. Ignore undef
+ // elements.
+ for (; i < Mask.size(); i++)
+ if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
+ break;
+
+ if (i == Mask.size())
+ return true;
+ }
+
+ return false;
+}
+
+/// Check if the mask is a DE-interleave mask for an interleaved load.
+///
+/// E.g. DE-interleave masks (Factor = 2) could be:
+/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
+/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
+static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
+ unsigned &Index, unsigned MaxFactor,
+ unsigned NumLoadElements) {
+ if (Mask.size() < 2)
+ return false;
+
+ // Check potential Factors.
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ // Make sure we don't produce a load wider than the input load.
+ if (Mask.size() * Factor > NumLoadElements)
+ return false;
+ if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
+ return true;
+ }
+
+ return false;
+}
+
+/// Check if the mask can be used in an interleaved store.
+//
+/// It checks for a more general pattern than the RE-interleave mask.
+/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
+/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
+///
+/// The particular case of an RE-interleave mask is:
+/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
+static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
+ unsigned MaxFactor) {
+ unsigned NumElts = SVI->getShuffleMask().size();
+ if (NumElts < 4)
+ return false;
+
+ // Check potential Factors.
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ if (SVI->isInterleave(Factor))
+ return true;
+ }
+
+ return false;
+}
+
+bool InterleavedAccess::lowerInterleavedLoad(
+ LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
+ return false;
+
+ // Check if all users of this load are shufflevectors. If we encounter any
+ // users that are extractelement instructions or binary operators, we save
+ // them to later check if they can be modified to extract from one of the
+ // shufflevectors instead of the load.
+
+ SmallVector<ShuffleVectorInst *, 4> Shuffles;
+ SmallVector<ExtractElementInst *, 4> Extracts;
+ // BinOpShuffles need to be handled a single time in case both operands of the
+ // binop are the same load.
+ SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
+
+ for (auto *User : LI->users()) {
+ auto *Extract = dyn_cast<ExtractElementInst>(User);
+ if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
+ Extracts.push_back(Extract);
+ continue;
+ }
+ if (auto *BI = dyn_cast<BinaryOperator>(User)) {
+ if (all_of(BI->users(), [](auto *U) {
+ auto *SVI = dyn_cast<ShuffleVectorInst>(U);
+ return SVI && isa<UndefValue>(SVI->getOperand(1));
+ })) {
+ for (auto *SVI : BI->users())
+ BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
+ continue;
+ }
+ }
+ auto *SVI = dyn_cast<ShuffleVectorInst>(User);
+ if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
+ return false;
+
+ Shuffles.push_back(SVI);
+ }
+
+ if (Shuffles.empty() && BinOpShuffles.empty())
+ return false;
+
+ unsigned Factor, Index;
+
+ unsigned NumLoadElements =
+ cast<FixedVectorType>(LI->getType())->getNumElements();
+ auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
+ // Check if the first shufflevector is DE-interleave shuffle.
+ if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
+ NumLoadElements))
+ return false;
+
+ // Holds the corresponding index for each DE-interleave shuffle.
+ SmallVector<unsigned, 4> Indices;
+
+ Type *VecTy = FirstSVI->getType();
+
+ // Check if other shufflevectors are also DE-interleaved of the same type
+ // and factor as the first shufflevector.
+ for (auto *Shuffle : Shuffles) {
+ if (Shuffle->getType() != VecTy)
+ return false;
+ if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
+ Index))
+ return false;
+
+ assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
+ Indices.push_back(Index);
+ }
+ for (auto *Shuffle : BinOpShuffles) {
+ if (Shuffle->getType() != VecTy)
+ return false;
+ if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
+ Index))
+ return false;
+
+ assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
+
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+ Indices.push_back(Index);
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+ Indices.push_back(Index);
+ }
+
+ // Try and modify users of the load that are extractelement instructions to
+ // use the shufflevector instructions instead of the load.
+ if (!tryReplaceExtracts(Extracts, Shuffles))
+ return false;
+
+ bool BinOpShuffleChanged =
+ replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+
+ // Try to create target specific intrinsics to replace the load and shuffles.
+ if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
+ }
+
+ append_range(DeadInsts, Shuffles);
+
+ DeadInsts.push_back(LI);
+ return true;
+}
+
+bool InterleavedAccess::replaceBinOpShuffles(
+ ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+ for (auto *SVI : BinOpShuffles) {
+ BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
+ Type *BIOp0Ty = BI->getOperand(0)->getType();
+ ArrayRef<int> Mask = SVI->getShuffleMask();
+ assert(all_of(Mask, [&](int Idx) {
+ return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
+ }));
+
+ auto *NewSVI1 =
+ new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
+ Mask, SVI->getName(), SVI);
+ auto *NewSVI2 = new ShuffleVectorInst(
+ BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
+ SVI->getName(), SVI);
+ BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
+ BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
+ SVI->replaceAllUsesWith(NewBI);
+ LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
+ << "\n With : " << *NewSVI1 << "\n And : "
+ << *NewSVI2 << "\n And : " << *NewBI << "\n");
+ RecursivelyDeleteTriviallyDeadInstructions(SVI);
+ if (NewSVI1->getOperand(0) == LI)
+ Shuffles.push_back(NewSVI1);
+ if (NewSVI2->getOperand(0) == LI)
+ Shuffles.push_back(NewSVI2);
+ }
+
+ return !BinOpShuffles.empty();
+}
+
+bool InterleavedAccess::tryReplaceExtracts(
+ ArrayRef<ExtractElementInst *> Extracts,
+ ArrayRef<ShuffleVectorInst *> Shuffles) {
+ // If there aren't any extractelement instructions to modify, there's nothing
+ // to do.
+ if (Extracts.empty())
+ return true;
+
+ // Maps extractelement instructions to vector-index pairs. The extractlement
+ // instructions will be modified to use the new vector and index operands.
+ DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
+
+ for (auto *Extract : Extracts) {
+ // The vector index that is extracted.
+ auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
+ auto Index = IndexOperand->getSExtValue();
+
+ // Look for a suitable shufflevector instruction. The goal is to modify the
+ // extractelement instruction (which uses an interleaved load) to use one
+ // of the shufflevector instructions instead of the load.
+ for (auto *Shuffle : Shuffles) {
+ // If the shufflevector instruction doesn't dominate the extract, we
+ // can't create a use of it.
+ if (!DT->dominates(Shuffle, Extract))
+ continue;
+
+ // Inspect the indices of the shufflevector instruction. If the shuffle
+ // selects the same index that is extracted, we can modify the
+ // extractelement instruction.
+ SmallVector<int, 4> Indices;
+ Shuffle->getShuffleMask(Indices);
+ for (unsigned I = 0; I < Indices.size(); ++I)
+ if (Indices[I] == Index) {
+ assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
+ "Vector operations do not match");
+ ReplacementMap[Extract] = std::make_pair(Shuffle, I);
+ break;
+ }
+
+ // If we found a suitable shufflevector instruction, stop looking.
+ if (ReplacementMap.count(Extract))
+ break;
+ }
+
+ // If we did not find a suitable shufflevector instruction, the
+ // extractelement instruction cannot be modified, so we must give up.
+ if (!ReplacementMap.count(Extract))
+ return false;
+ }
+
+ // Finally, perform the replacements.
+ IRBuilder<> Builder(Extracts[0]->getContext());
+ for (auto &Replacement : ReplacementMap) {
+ auto *Extract = Replacement.first;
+ auto *Vector = Replacement.second.first;
+ auto Index = Replacement.second.second;
+ Builder.SetInsertPoint(Extract);
+ Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
+ Extract->eraseFromParent();
+ }
+
+ return true;
+}
+
+bool InterleavedAccess::lowerInterleavedStore(
+ StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!SI->isSimple())
+ return false;
+
+ auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
+ return false;
+
+ // Check if the shufflevector is RE-interleave shuffle.
+ unsigned Factor;
+ if (!isReInterleaveMask(SVI, Factor, MaxFactor))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+
+ // Try to create target specific intrinsics to replace the store and shuffle.
+ if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
+ return false;
+
+ // Already have a new target specific interleaved store. Erase the old store.
+ DeadInsts.push_back(SI);
+ DeadInsts.push_back(SVI);
+ return true;
+}
+
+bool InterleavedAccess::lowerDeinterleaveIntrinsic(
+ IntrinsicInst *DI, SmallVector<Instruction *, 32> &DeadInsts) {
+ LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
+
+ if (!LI || !LI->hasOneUse() || !LI->isSimple())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
+
+ // Try and match this with target specific intrinsics.
+ if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
+ return false;
+
+ // We now have a target-specific load, so delete the old one.
+ DeadInsts.push_back(DI);
+ DeadInsts.push_back(LI);
+ return true;
+}
+
+bool InterleavedAccess::lowerInterleaveIntrinsic(
+ IntrinsicInst *II, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!II->hasOneUse())
+ return false;
+
+ StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
+
+ if (!SI || !SI->isSimple())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
+
+ // Try and match this with target specific intrinsics.
+ if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
+ return false;
+
+ // We now have a target-specific store, so delete the old one.
+ DeadInsts.push_back(SI);
+ DeadInsts.push_back(II);
+ return true;
+}
+
+bool InterleavedAccess::runOnFunction(Function &F) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC || !LowerInterleavedAccesses)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
+
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &TM = TPC->getTM<TargetMachine>();
+ TLI = TM.getSubtargetImpl(F)->getTargetLowering();
+ MaxFactor = TLI->getMaxSupportedInterleaveFactor();
+
+ // Holds dead instructions that will be erased later.
+ SmallVector<Instruction *, 32> DeadInsts;
+ bool Changed = false;
+
+ for (auto &I : instructions(F)) {
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ Changed |= lowerInterleavedLoad(LI, DeadInsts);
+
+ if (auto *SI = dyn_cast<StoreInst>(&I))
+ Changed |= lowerInterleavedStore(SI, DeadInsts);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ // At present, we only have intrinsics to represent (de)interleaving
+ // with a factor of 2.
+ if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
+ Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
+ if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+ Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
+ }
+ }
+
+ for (auto *I : DeadInsts)
+ I->eraseFromParent();
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
new file mode 100644
index 000000000000..d0ad6e45b4d3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -0,0 +1,1363 @@
+//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+//
+// This file defines the interleaved-load-combine pass. The pass searches for
+// ShuffleVectorInstruction that execute interleaving loads. If a matching
+// pattern is found, it adds a combined load and further instructions in a
+// pattern that is detectable by InterleavedAccesPass. The old instructions are
+// left dead to be removed later. The pass is specifically designed to be
+// executed just before InterleavedAccesPass to find any left-over instances
+// that are not detected within former passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+#include <cassert>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-load-combine"
+
+namespace {
+
+/// Statistic counter
+STATISTIC(NumInterleavedLoadCombine, "Number of combined loads");
+
+/// Option to disable the pass
+static cl::opt<bool> DisableInterleavedLoadCombine(
+ "disable-" DEBUG_TYPE, cl::init(false), cl::Hidden,
+ cl::desc("Disable combining of interleaved loads"));
+
+struct VectorInfo;
+
+struct InterleavedLoadCombineImpl {
+public:
+ InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
+ TargetMachine &TM)
+ : F(F), DT(DT), MSSA(MSSA),
+ TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
+ TTI(TM.getTargetTransformInfo(F)) {}
+
+ /// Scan the function for interleaved load candidates and execute the
+ /// replacement if applicable.
+ bool run();
+
+private:
+ /// Function this pass is working on
+ Function &F;
+
+ /// Dominator Tree Analysis
+ DominatorTree &DT;
+
+ /// Memory Alias Analyses
+ MemorySSA &MSSA;
+
+ /// Target Lowering Information
+ const TargetLowering &TLI;
+
+ /// Target Transform Information
+ const TargetTransformInfo TTI;
+
+ /// Find the instruction in sets LIs that dominates all others, return nullptr
+ /// if there is none.
+ LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);
+
+ /// Replace interleaved load candidates. It does additional
+ /// analyses if this makes sense. Returns true on success and false
+ /// of nothing has been changed.
+ bool combine(std::list<VectorInfo> &InterleavedLoad,
+ OptimizationRemarkEmitter &ORE);
+
+ /// Given a set of VectorInfo containing candidates for a given interleave
+ /// factor, find a set that represents a 'factor' interleaved load.
+ bool findPattern(std::list<VectorInfo> &Candidates,
+ std::list<VectorInfo> &InterleavedLoad, unsigned Factor,
+ const DataLayout &DL);
+}; // InterleavedLoadCombine
+
+/// First Order Polynomial on an n-Bit Integer Value
+///
+/// Polynomial(Value) = Value * B + A + E*2^(n-e)
+///
+/// A and B are the coefficients. E*2^(n-e) is an error within 'e' most
+/// significant bits. It is introduced if an exact computation cannot be proven
+/// (e.q. division by 2).
+///
+/// As part of this optimization multiple loads will be combined. It necessary
+/// to prove that loads are within some relative offset to each other. This
+/// class is used to prove relative offsets of values loaded from memory.
+///
+/// Representing an integer in this form is sound since addition in two's
+/// complement is associative (trivial) and multiplication distributes over the
+/// addition (see Proof(1) in Polynomial::mul). Further, both operations
+/// commute.
+//
+// Example:
+// declare @fn(i64 %IDX, <4 x float>* %PTR) {
+// %Pa1 = add i64 %IDX, 2
+// %Pa2 = lshr i64 %Pa1, 1
+// %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2
+// %Va = load <4 x float>, <4 x float>* %Pa3
+//
+// %Pb1 = add i64 %IDX, 4
+// %Pb2 = lshr i64 %Pb1, 1
+// %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2
+// %Vb = load <4 x float>, <4 x float>* %Pb3
+// ... }
+//
+// The goal is to prove that two loads load consecutive addresses.
+//
+// In this case the polynomials are constructed by the following
+// steps.
+//
+// The number tag #e specifies the error bits.
+//
+// Pa_0 = %IDX #0
+// Pa_1 = %IDX + 2 #0 | add 2
+// Pa_2 = %IDX/2 + 1 #1 | lshr 1
+// Pa_3 = %IDX/2 + 1 #1 | GEP, step signext to i64
+// Pa_4 = (%IDX/2)*16 + 16 #0 | GEP, multiply index by sizeof(4) for floats
+// Pa_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components
+//
+// Pb_0 = %IDX #0
+// Pb_1 = %IDX + 4 #0 | add 2
+// Pb_2 = %IDX/2 + 2 #1 | lshr 1
+// Pb_3 = %IDX/2 + 2 #1 | GEP, step signext to i64
+// Pb_4 = (%IDX/2)*16 + 32 #0 | GEP, multiply index by sizeof(4) for floats
+// Pb_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components
+//
+// Pb_5 - Pa_5 = 16 #0 | subtract to get the offset
+//
+// Remark: %PTR is not maintained within this class. So in this instance the
+// offset of 16 can only be assumed if the pointers are equal.
+//
+class Polynomial {
+ /// Operations on B
+ enum BOps {
+ LShr,
+ Mul,
+ SExt,
+ Trunc,
+ };
+
+ /// Number of Error Bits e
+ unsigned ErrorMSBs = (unsigned)-1;
+
+ /// Value
+ Value *V = nullptr;
+
+ /// Coefficient B
+ SmallVector<std::pair<BOps, APInt>, 4> B;
+
+ /// Coefficient A
+ APInt A;
+
+public:
+ Polynomial(Value *V) : V(V) {
+ IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
+ if (Ty) {
+ ErrorMSBs = 0;
+ this->V = V;
+ A = APInt(Ty->getBitWidth(), 0);
+ }
+ }
+
+ Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
+ : ErrorMSBs(ErrorMSBs), A(A) {}
+
+ Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
+ : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {}
+
+ Polynomial() = default;
+
+ /// Increment and clamp the number of undefined bits.
+ void incErrorMSBs(unsigned amt) {
+ if (ErrorMSBs == (unsigned)-1)
+ return;
+
+ ErrorMSBs += amt;
+ if (ErrorMSBs > A.getBitWidth())
+ ErrorMSBs = A.getBitWidth();
+ }
+
+ /// Decrement and clamp the number of undefined bits.
+ void decErrorMSBs(unsigned amt) {
+ if (ErrorMSBs == (unsigned)-1)
+ return;
+
+ if (ErrorMSBs > amt)
+ ErrorMSBs -= amt;
+ else
+ ErrorMSBs = 0;
+ }
+
+ /// Apply an add on the polynomial
+ Polynomial &add(const APInt &C) {
+ // Note: Addition is associative in two's complement even when in case of
+ // signed overflow.
+ //
+ // Error bits can only propagate into higher significant bits. As these are
+ // already regarded as undefined, there is no change.
+ //
+ // Theorem: Adding a constant to a polynomial does not change the error
+ // term.
+ //
+ // Proof:
+ //
+ // Since the addition is associative and commutes:
+ //
+ // (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e)
+ // [qed]
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ A += C;
+ return *this;
+ }
+
+ /// Apply a multiplication onto the polynomial.
+ Polynomial &mul(const APInt &C) {
+ // Note: Multiplication distributes over the addition
+ //
+ // Theorem: Multiplication distributes over the addition
+ //
+ // Proof(1):
+ //
+ // (B+A)*C =-
+ // = (B + A) + (B + A) + .. {C Times}
+ // addition is associative and commutes, hence
+ // = B + B + .. {C Times} .. + A + A + .. {C times}
+ // = B*C + A*C
+ // (see (function add) for signed values and overflows)
+ // [qed]
+ //
+ // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out
+ // to the left.
+ //
+ // Proof(2):
+ //
+ // Let B' and A' be the n-Bit inputs with some unknown errors EA,
+ // EB at e leading bits. B' and A' can be written down as:
+ //
+ // B' = B + 2^(n-e)*EB
+ // A' = A + 2^(n-e)*EA
+ //
+ // Let C' be an input with c trailing zero bits. C' can be written as
+ //
+ // C' = C*2^c
+ //
+ // Therefore we can compute the result by using distributivity and
+ // commutativity.
+ //
+ // (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' =
+ // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+ // = (B'+A') * C' =
+ // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+ // = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' =
+ // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' =
+ // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c =
+ // = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c =
+ //
+ // Let EC be the final error with EC = C*(EB + EA)
+ //
+ // = (B + A)*C' + EC*2^(n-e)*2^c =
+ // = (B + A)*C' + EC*2^(n-(e-c))
+ //
+ // Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c
+ // less error bits than the input. c bits are shifted out to the left.
+ // [qed]
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ // Multiplying by one is a no-op.
+ if (C.isOne()) {
+ return *this;
+ }
+
+ // Multiplying by zero removes the coefficient B and defines all bits.
+ if (C.isZero()) {
+ ErrorMSBs = 0;
+ deleteB();
+ }
+
+ // See Proof(2): Trailing zero bits indicate a left shift. This removes
+ // leading bits from the result even if they are undefined.
+ decErrorMSBs(C.countr_zero());
+
+ A *= C;
+ pushBOperation(Mul, C);
+ return *this;
+ }
+
+ /// Apply a logical shift right on the polynomial
+ Polynomial &lshr(const APInt &C) {
+ // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e')
+ // where
+ // e' = e + 1,
+ // E is a e-bit number,
+ // E' is a e'-bit number,
+ // holds under the following precondition:
+ // pre(1): A % 2 = 0
+ // pre(2): e < n, (see Theorem(2) for the trivial case with e=n)
+ // where >> expresses a logical shift to the right, with adding zeros.
+ //
+ // We need to show that for every, E there is a E'
+ //
+ // B = b_h * 2^(n-1) + b_m * 2 + b_l
+ // A = a_h * 2^(n-1) + a_m * 2 (pre(1))
+ //
+ // where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers
+ //
+ // Let X = (B + A + E*2^(n-e)) >> 1
+ // Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1
+ //
+ // X = [B + A + E*2^(n-e)] >> 1 =
+ // = [ b_h * 2^(n-1) + b_m * 2 + b_l +
+ // + a_h * 2^(n-1) + a_m * 2 +
+ // + E * 2^(n-e) ] >> 1 =
+ //
+ // The sum is built by putting the overflow of [a_m + b+n] into the term
+ // 2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within
+ // this bit is discarded. This is expressed by % 2.
+ //
+ // The bit in position 0 cannot overflow into the term (b_m + a_m).
+ //
+ // = [ ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) +
+ // + ((b_m + a_m) % 2^(n-2)) * 2 +
+ // + b_l + E * 2^(n-e) ] >> 1 =
+ //
+ // The shift is computed by dividing the terms by 2 and by cutting off
+ // b_l.
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-(e+1)) =
+ //
+ // by the definition in the Theorem e+1 = e'
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-e') =
+ //
+ // Compute Y by applying distributivity first
+ //
+ // Y = (B >> 1) + (A >> 1) + E*2^(n-e') =
+ // = (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 +
+ // + (a_h * 2^(n-1) + a_m * 2) >> 1 +
+ // + E * 2^(n-e) >> 1 =
+ //
+ // Again, the shift is computed by dividing the terms by 2 and by cutting
+ // off b_l.
+ //
+ // = b_h * 2^(n-2) + b_m +
+ // + a_h * 2^(n-2) + a_m +
+ // + E * 2^(n-(e+1)) =
+ //
+ // Again, the sum is built by putting the overflow of [a_m + b+n] into
+ // the term 2^(n-1). But this time there is room for a second bit in the
+ // term 2^(n-2) we add this bit to a new term and denote it o_h in a
+ // second step.
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) +
+ // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-(e+1)) =
+ //
+ // Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1
+ // Further replace e+1 by e'.
+ //
+ // = o_h * 2^(n-1) +
+ // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-e') =
+ //
+ // Move o_h into the error term and construct E'. To ensure that there is
+ // no 2^x with negative x, this step requires pre(2) (e < n).
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + o_h * 2^(e'-1) * 2^(n-e') + | pre(2), move 2^(e'-1)
+ // | out of the old exponent
+ // + E * 2^(n-e') =
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + [o_h * 2^(e'-1) + E] * 2^(n-e') + | move 2^(e'-1) out of
+ // | the old exponent
+ //
+ // Let E' = o_h * 2^(e'-1) + E
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E' * 2^(n-e')
+ //
+ // Because X and Y are distinct only in there error terms and E' can be
+ // constructed as shown the theorem holds.
+ // [qed]
+ //
+ // For completeness in case of the case e=n it is also required to show that
+ // distributivity can be applied.
+ //
+ // In this case Theorem(1) transforms to (the pre-condition on A can also be
+ // dropped)
+ //
+ // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E'
+ // where
+ // A, B, E, E' are two's complement numbers with the same bit
+ // width
+ //
+ // Let A + B + E = X
+ // Let (B >> 1) + (A >> 1) = Y
+ //
+ // Therefore we need to show that for every X and Y there is an E' which
+ // makes the equation
+ //
+ // X = Y + E'
+ //
+ // hold. This is trivially the case for E' = X - Y.
+ //
+ // [qed]
+ //
+ // Remark: Distributing lshr with and arbitrary number n can be expressed as
+ // ((((B + A) lshr 1) lshr 1) ... ) {n times}.
+ // This construction induces n additional error bits at the left.
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ if (C.isZero())
+ return *this;
+
+ // Test if the result will be zero
+ unsigned shiftAmt = C.getZExtValue();
+ if (shiftAmt >= C.getBitWidth())
+ return mul(APInt(C.getBitWidth(), 0));
+
+ // The proof that shiftAmt LSBs are zero for at least one summand is only
+ // possible for the constant number.
+ //
+ // If this can be proven add shiftAmt to the error counter
+ // `ErrorMSBs`. Otherwise set all bits as undefined.
+ if (A.countr_zero() < shiftAmt)
+ ErrorMSBs = A.getBitWidth();
+ else
+ incErrorMSBs(shiftAmt);
+
+ // Apply the operation.
+ pushBOperation(LShr, C);
+ A = A.lshr(shiftAmt);
+
+ return *this;
+ }
+
+ /// Apply a sign-extend or truncate operation on the polynomial.
+ Polynomial &sextOrTrunc(unsigned n) {
+ if (n < A.getBitWidth()) {
+ // Truncate: Clearly undefined Bits on the MSB side are removed
+ // if there are any.
+ decErrorMSBs(A.getBitWidth() - n);
+ A = A.trunc(n);
+ pushBOperation(Trunc, APInt(sizeof(n) * 8, n));
+ }
+ if (n > A.getBitWidth()) {
+ // Extend: Clearly extending first and adding later is different
+ // to adding first and extending later in all extended bits.
+ incErrorMSBs(n - A.getBitWidth());
+ A = A.sext(n);
+ pushBOperation(SExt, APInt(sizeof(n) * 8, n));
+ }
+
+ return *this;
+ }
+
+ /// Test if there is a coefficient B.
+ bool isFirstOrder() const { return V != nullptr; }
+
+ /// Test coefficient B of two Polynomials are equal.
+ bool isCompatibleTo(const Polynomial &o) const {
+ // The polynomial use different bit width.
+ if (A.getBitWidth() != o.A.getBitWidth())
+ return false;
+
+ // If neither Polynomial has the Coefficient B.
+ if (!isFirstOrder() && !o.isFirstOrder())
+ return true;
+
+ // The index variable is different.
+ if (V != o.V)
+ return false;
+
+ // Check the operations.
+ if (B.size() != o.B.size())
+ return false;
+
+ auto *ob = o.B.begin();
+ for (const auto &b : B) {
+ if (b != *ob)
+ return false;
+ ob++;
+ }
+
+ return true;
+ }
+
+ /// Subtract two polynomials, return an undefined polynomial if
+ /// subtraction is not possible.
+ Polynomial operator-(const Polynomial &o) const {
+ // Return an undefined polynomial if incompatible.
+ if (!isCompatibleTo(o))
+ return Polynomial();
+
+ // If the polynomials are compatible (meaning they have the same
+ // coefficient on B), B is eliminated. Thus a polynomial solely
+ // containing A is returned
+ return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));
+ }
+
+ /// Subtract a constant from a polynomial,
+ Polynomial operator-(uint64_t C) const {
+ Polynomial Result(*this);
+ Result.A -= C;
+ return Result;
+ }
+
+ /// Add a constant to a polynomial,
+ Polynomial operator+(uint64_t C) const {
+ Polynomial Result(*this);
+ Result.A += C;
+ return Result;
+ }
+
+ /// Returns true if it can be proven that two Polynomials are equal.
+ bool isProvenEqualTo(const Polynomial &o) {
+ // Subtract both polynomials and test if it is fully defined and zero.
+ Polynomial r = *this - o;
+ return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());
+ }
+
+ /// Print the polynomial into a stream.
+ void print(raw_ostream &OS) const {
+ OS << "[{#ErrBits:" << ErrorMSBs << "} ";
+
+ if (V) {
+ for (auto b : B)
+ OS << "(";
+ OS << "(" << *V << ") ";
+
+ for (auto b : B) {
+ switch (b.first) {
+ case LShr:
+ OS << "LShr ";
+ break;
+ case Mul:
+ OS << "Mul ";
+ break;
+ case SExt:
+ OS << "SExt ";
+ break;
+ case Trunc:
+ OS << "Trunc ";
+ break;
+ }
+
+ OS << b.second << ") ";
+ }
+ }
+
+ OS << "+ " << A << "]";
+ }
+
+private:
+ void deleteB() {
+ V = nullptr;
+ B.clear();
+ }
+
+ void pushBOperation(const BOps Op, const APInt &C) {
+ if (isFirstOrder()) {
+ B.push_back(std::make_pair(Op, C));
+ return;
+ }
+ }
+};
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) {
+ S.print(OS);
+ return OS;
+}
+#endif
+
+/// VectorInfo stores abstract the following information for each vector
+/// element:
+///
+/// 1) The the memory address loaded into the element as Polynomial
+/// 2) a set of load instruction necessary to construct the vector,
+/// 3) a set of all other instructions that are necessary to create the vector and
+/// 4) a pointer value that can be used as relative base for all elements.
+struct VectorInfo {
+private:
+ VectorInfo(const VectorInfo &c) : VTy(c.VTy) {
+ llvm_unreachable(
+ "Copying VectorInfo is neither implemented nor necessary,");
+ }
+
+public:
+ /// Information of a Vector Element
+ struct ElementInfo {
+ /// Offset Polynomial.
+ Polynomial Ofs;
+
+ /// The Load Instruction used to Load the entry. LI is null if the pointer
+ /// of the load instruction does not point on to the entry
+ LoadInst *LI;
+
+ ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)
+ : Ofs(Offset), LI(LI) {}
+ };
+
+ /// Basic-block the load instructions are within
+ BasicBlock *BB = nullptr;
+
+ /// Pointer value of all participation load instructions
+ Value *PV = nullptr;
+
+ /// Participating load instructions
+ std::set<LoadInst *> LIs;
+
+ /// Participating instructions
+ std::set<Instruction *> Is;
+
+ /// Final shuffle-vector instruction
+ ShuffleVectorInst *SVI = nullptr;
+
+ /// Information of the offset for each vector element
+ ElementInfo *EI;
+
+ /// Vector Type
+ FixedVectorType *const VTy;
+
+ VectorInfo(FixedVectorType *VTy) : VTy(VTy) {
+ EI = new ElementInfo[VTy->getNumElements()];
+ }
+
+ VectorInfo &operator=(const VectorInfo &other) = delete;
+
+ virtual ~VectorInfo() { delete[] EI; }
+
+ unsigned getDimension() const { return VTy->getNumElements(); }
+
+ /// Test if the VectorInfo can be part of an interleaved load with the
+ /// specified factor.
+ ///
+ /// \param Factor of the interleave
+ /// \param DL Targets Datalayout
+ ///
+ /// \returns true if this is possible and false if not
+ bool isInterleaved(unsigned Factor, const DataLayout &DL) const {
+ unsigned Size = DL.getTypeAllocSize(VTy->getElementType());
+ for (unsigned i = 1; i < getDimension(); i++) {
+ if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// Recursively computes the vector information stored in V.
+ ///
+ /// This function delegates the work to specialized implementations
+ ///
+ /// \param V Value to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {
+ ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
+ if (SVI)
+ return computeFromSVI(SVI, Result, DL);
+ LoadInst *LI = dyn_cast<LoadInst>(V);
+ if (LI)
+ return computeFromLI(LI, Result, DL);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ return computeFromBCI(BCI, Result, DL);
+ return false;
+ }
+
+ /// BitCastInst specialization to compute the vector information.
+ ///
+ /// \param BCI BitCastInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,
+ const DataLayout &DL) {
+ Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0));
+
+ if (!Op)
+ return false;
+
+ FixedVectorType *VTy = dyn_cast<FixedVectorType>(Op->getType());
+ if (!VTy)
+ return false;
+
+ // We can only cast from large to smaller vectors
+ if (Result.VTy->getNumElements() % VTy->getNumElements())
+ return false;
+
+ unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();
+ unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());
+ unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());
+
+ if (NewSize * Factor != OldSize)
+ return false;
+
+ VectorInfo Old(VTy);
+ if (!compute(Op, Old, DL))
+ return false;
+
+ for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {
+ for (unsigned j = 0; j < Factor; j++) {
+ Result.EI[i + j] =
+ ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,
+ j == 0 ? Old.EI[i / Factor].LI : nullptr);
+ }
+ }
+
+ Result.BB = Old.BB;
+ Result.PV = Old.PV;
+ Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());
+ Result.Is.insert(Old.Is.begin(), Old.Is.end());
+ Result.Is.insert(BCI);
+ Result.SVI = nullptr;
+
+ return true;
+ }
+
+ /// ShuffleVectorInst specialization to compute vector information.
+ ///
+ /// \param SVI ShuffleVectorInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// Compute the left and the right side vector information and merge them by
+ /// applying the shuffle operation. This function also ensures that the left
+ /// and right side have compatible loads. This means that all loads are with
+ /// in the same basic block and are based on the same pointer.
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
+ const DataLayout &DL) {
+ FixedVectorType *ArgTy =
+ cast<FixedVectorType>(SVI->getOperand(0)->getType());
+
+ // Compute the left hand vector information.
+ VectorInfo LHS(ArgTy);
+ if (!compute(SVI->getOperand(0), LHS, DL))
+ LHS.BB = nullptr;
+
+ // Compute the right hand vector information.
+ VectorInfo RHS(ArgTy);
+ if (!compute(SVI->getOperand(1), RHS, DL))
+ RHS.BB = nullptr;
+
+ // Neither operand produced sensible results?
+ if (!LHS.BB && !RHS.BB)
+ return false;
+ // Only RHS produced sensible results?
+ else if (!LHS.BB) {
+ Result.BB = RHS.BB;
+ Result.PV = RHS.PV;
+ }
+ // Only LHS produced sensible results?
+ else if (!RHS.BB) {
+ Result.BB = LHS.BB;
+ Result.PV = LHS.PV;
+ }
+ // Both operands produced sensible results?
+ else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {
+ Result.BB = LHS.BB;
+ Result.PV = LHS.PV;
+ }
+ // Both operands produced sensible results but they are incompatible.
+ else {
+ return false;
+ }
+
+ // Merge and apply the operation on the offset information.
+ if (LHS.BB) {
+ Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());
+ Result.Is.insert(LHS.Is.begin(), LHS.Is.end());
+ }
+ if (RHS.BB) {
+ Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());
+ Result.Is.insert(RHS.Is.begin(), RHS.Is.end());
+ }
+ Result.Is.insert(SVI);
+ Result.SVI = SVI;
+
+ int j = 0;
+ for (int i : SVI->getShuffleMask()) {
+ assert((i < 2 * (signed)ArgTy->getNumElements()) &&
+ "Invalid ShuffleVectorInst (index out of bounds)");
+
+ if (i < 0)
+ Result.EI[j] = ElementInfo();
+ else if (i < (signed)ArgTy->getNumElements()) {
+ if (LHS.BB)
+ Result.EI[j] = LHS.EI[i];
+ else
+ Result.EI[j] = ElementInfo();
+ } else {
+ if (RHS.BB)
+ Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()];
+ else
+ Result.EI[j] = ElementInfo();
+ }
+ j++;
+ }
+
+ return true;
+ }
+
+ /// LoadInst specialization to compute vector information.
+ ///
+ /// This function also acts as abort condition to the recursion.
+ ///
+ /// \param LI LoadInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromLI(LoadInst *LI, VectorInfo &Result,
+ const DataLayout &DL) {
+ Value *BasePtr;
+ Polynomial Offset;
+
+ if (LI->isVolatile())
+ return false;
+
+ if (LI->isAtomic())
+ return false;
+
+ // Get the base polynomial
+ computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL);
+
+ Result.BB = LI->getParent();
+ Result.PV = BasePtr;
+ Result.LIs.insert(LI);
+ Result.Is.insert(LI);
+
+ for (unsigned i = 0; i < Result.getDimension(); i++) {
+ Value *Idx[2] = {
+ ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
+ };
+ int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, ArrayRef(Idx, 2));
+ Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
+ }
+
+ return true;
+ }
+
+ /// Recursively compute polynomial of a value.
+ ///
+ /// \param BO Input binary operation
+ /// \param Result Result polynomial
+ static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {
+ Value *LHS = BO.getOperand(0);
+ Value *RHS = BO.getOperand(1);
+
+ // Find the RHS Constant if any
+ ConstantInt *C = dyn_cast<ConstantInt>(RHS);
+ if ((!C) && BO.isCommutative()) {
+ C = dyn_cast<ConstantInt>(LHS);
+ if (C)
+ std::swap(LHS, RHS);
+ }
+
+ switch (BO.getOpcode()) {
+ case Instruction::Add:
+ if (!C)
+ break;
+
+ computePolynomial(*LHS, Result);
+ Result.add(C->getValue());
+ return;
+
+ case Instruction::LShr:
+ if (!C)
+ break;
+
+ computePolynomial(*LHS, Result);
+ Result.lshr(C->getValue());
+ return;
+
+ default:
+ break;
+ }
+
+ Result = Polynomial(&BO);
+ }
+
+ /// Recursively compute polynomial of a value
+ ///
+ /// \param V input value
+ /// \param Result result polynomial
+ static void computePolynomial(Value &V, Polynomial &Result) {
+ if (auto *BO = dyn_cast<BinaryOperator>(&V))
+ computePolynomialBinOp(*BO, Result);
+ else
+ Result = Polynomial(&V);
+ }
+
+ /// Compute the Polynomial representation of a Pointer type.
+ ///
+ /// \param Ptr input pointer value
+ /// \param Result result polynomial
+ /// \param BasePtr pointer the polynomial is based on
+ /// \param DL Datalayout of the target machine
+ static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,
+ Value *&BasePtr,
+ const DataLayout &DL) {
+ // Not a pointer type? Return an undefined polynomial
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType());
+ if (!PtrTy) {
+ Result = Polynomial();
+ BasePtr = nullptr;
+ return;
+ }
+ unsigned PointerBits =
+ DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
+
+ /// Skip pointer casts. Return Zero polynomial otherwise
+ if (isa<CastInst>(&Ptr)) {
+ CastInst &CI = *cast<CastInst>(&Ptr);
+ switch (CI.getOpcode()) {
+ case Instruction::BitCast:
+ computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);
+ break;
+ default:
+ BasePtr = &Ptr;
+ Polynomial(PointerBits, 0);
+ break;
+ }
+ }
+ /// Resolve GetElementPtrInst.
+ else if (isa<GetElementPtrInst>(&Ptr)) {
+ GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr);
+
+ APInt BaseOffset(PointerBits, 0);
+
+ // Check if we can compute the Offset with accumulateConstantOffset
+ if (GEP.accumulateConstantOffset(DL, BaseOffset)) {
+ Result = Polynomial(BaseOffset);
+ BasePtr = GEP.getPointerOperand();
+ return;
+ } else {
+ // Otherwise we allow that the last index operand of the GEP is
+ // non-constant.
+ unsigned idxOperand, e;
+ SmallVector<Value *, 4> Indices;
+ for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;
+ idxOperand++) {
+ ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand));
+ if (!IDX)
+ break;
+ Indices.push_back(IDX);
+ }
+
+ // It must also be the last operand.
+ if (idxOperand + 1 != e) {
+ Result = Polynomial();
+ BasePtr = nullptr;
+ return;
+ }
+
+ // Compute the polynomial of the index operand.
+ computePolynomial(*GEP.getOperand(idxOperand), Result);
+
+ // Compute base offset from zero based index, excluding the last
+ // variable operand.
+ BaseOffset =
+ DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);
+
+ // Apply the operations of GEP to the polynomial.
+ unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());
+ Result.sextOrTrunc(PointerBits);
+ Result.mul(APInt(PointerBits, ResultSize));
+ Result.add(BaseOffset);
+ BasePtr = GEP.getPointerOperand();
+ }
+ }
+ // All other instructions are handled by using the value as base pointer and
+ // a zero polynomial.
+ else {
+ BasePtr = &Ptr;
+ Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);
+ }
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const {
+ if (PV)
+ OS << *PV;
+ else
+ OS << "(none)";
+ OS << " + ";
+ for (unsigned i = 0; i < getDimension(); i++)
+ OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;
+ OS << "]";
+ }
+#endif
+};
+
+} // anonymous namespace
+
+bool InterleavedLoadCombineImpl::findPattern(
+ std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad,
+ unsigned Factor, const DataLayout &DL) {
+ for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {
+ unsigned i;
+ // Try to find an interleaved load using the front of Worklist as first line
+ unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());
+
+ // List containing iterators pointing to the VectorInfos of the candidates
+ std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end());
+
+ for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {
+ if (C->VTy != C0->VTy)
+ continue;
+ if (C->BB != C0->BB)
+ continue;
+ if (C->PV != C0->PV)
+ continue;
+
+ // Check the current value matches any of factor - 1 remaining lines
+ for (i = 1; i < Factor; i++) {
+ if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {
+ Res[i] = C;
+ }
+ }
+
+ for (i = 1; i < Factor; i++) {
+ if (Res[i] == Candidates.end())
+ break;
+ }
+ if (i == Factor) {
+ Res[0] = C0;
+ break;
+ }
+ }
+
+ if (Res[0] != Candidates.end()) {
+ // Move the result into the output
+ for (unsigned i = 0; i < Factor; i++) {
+ InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+LoadInst *
+InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
+ assert(!LIs.empty() && "No load instructions given.");
+
+ // All LIs are within the same BB. Select the first for a reference.
+ BasicBlock *BB = (*LIs.begin())->getParent();
+ BasicBlock::iterator FLI = llvm::find_if(
+ *BB, [&LIs](Instruction &I) -> bool { return is_contained(LIs, &I); });
+ assert(FLI != BB->end());
+
+ return cast<LoadInst>(FLI);
+}
+
+bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
+ OptimizationRemarkEmitter &ORE) {
+ LLVM_DEBUG(dbgs() << "Checking interleaved load\n");
+
+ // The insertion point is the LoadInst which loads the first values. The
+ // following tests are used to proof that the combined load can be inserted
+ // just before InsertionPoint.
+ LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;
+
+ // Test if the offset is computed
+ if (!InsertionPoint)
+ return false;
+
+ std::set<LoadInst *> LIs;
+ std::set<Instruction *> Is;
+ std::set<Instruction *> SVIs;
+
+ InstructionCost InterleavedCost;
+ InstructionCost InstructionCost = 0;
+ const TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency;
+
+ // Get the interleave factor
+ unsigned Factor = InterleavedLoad.size();
+
+ // Merge all input sets used in analysis
+ for (auto &VI : InterleavedLoad) {
+ // Generate a set of all load instructions to be combined
+ LIs.insert(VI.LIs.begin(), VI.LIs.end());
+
+ // Generate a set of all instructions taking part in load
+ // interleaved. This list excludes the instructions necessary for the
+ // polynomial construction.
+ Is.insert(VI.Is.begin(), VI.Is.end());
+
+ // Generate the set of the final ShuffleVectorInst.
+ SVIs.insert(VI.SVI);
+ }
+
+ // There is nothing to combine.
+ if (LIs.size() < 2)
+ return false;
+
+ // Test if all participating instruction will be dead after the
+ // transformation. If intermediate results are used, no performance gain can
+ // be expected. Also sum the cost of the Instructions beeing left dead.
+ for (const auto &I : Is) {
+ // Compute the old cost
+ InstructionCost += TTI.getInstructionCost(I, CostKind);
+
+ // The final SVIs are allowed not to be dead, all uses will be replaced
+ if (SVIs.find(I) != SVIs.end())
+ continue;
+
+ // If there are users outside the set to be eliminated, we abort the
+ // transformation. No gain can be expected.
+ for (auto *U : I->users()) {
+ if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
+ return false;
+ }
+ }
+
+ // We need to have a valid cost in order to proceed.
+ if (!InstructionCost.isValid())
+ return false;
+
+ // We know that all LoadInst are within the same BB. This guarantees that
+ // either everything or nothing is loaded.
+ LoadInst *First = findFirstLoad(LIs);
+
+ // To be safe that the loads can be combined, iterate over all loads and test
+ // that the corresponding defining access dominates first LI. This guarantees
+ // that there are no aliasing stores in between the loads.
+ auto FMA = MSSA.getMemoryAccess(First);
+ for (auto *LI : LIs) {
+ auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess();
+ if (!MSSA.dominates(MADef, FMA))
+ return false;
+ }
+ assert(!LIs.empty() && "There are no LoadInst to combine");
+
+ // It is necessary that insertion point dominates all final ShuffleVectorInst.
+ for (auto &VI : InterleavedLoad) {
+ if (!DT.dominates(InsertionPoint, VI.SVI))
+ return false;
+ }
+
+ // All checks are done. Add instructions detectable by InterleavedAccessPass
+ // The old instruction will are left dead.
+ IRBuilder<> Builder(InsertionPoint);
+ Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
+ unsigned ElementsPerSVI =
+ cast<FixedVectorType>(InterleavedLoad.front().SVI->getType())
+ ->getNumElements();
+ FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI);
+
+ auto Indices = llvm::to_vector<4>(llvm::seq<unsigned>(0, Factor));
+ InterleavedCost = TTI.getInterleavedMemoryOpCost(
+ Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
+ InsertionPoint->getPointerAddressSpace(), CostKind);
+
+ if (InterleavedCost >= InstructionCost) {
+ return false;
+ }
+
+ // Create a pointer cast for the wide load.
+ auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0),
+ ILTy->getPointerTo(),
+ "interleaved.wide.ptrcast");
+
+ // Create the wide load and update the MemorySSA.
+ auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlign(),
+ "interleaved.wide.load");
+ auto MSSAU = MemorySSAUpdater(&MSSA);
+ MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
+ LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
+ MSSAU.insertUse(MSSALoad, /*RenameUses=*/ true);
+
+ // Create the final SVIs and replace all uses.
+ int i = 0;
+ for (auto &VI : InterleavedLoad) {
+ SmallVector<int, 4> Mask;
+ for (unsigned j = 0; j < ElementsPerSVI; j++)
+ Mask.push_back(i + j * Factor);
+
+ Builder.SetInsertPoint(VI.SVI);
+ auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle");
+ VI.SVI->replaceAllUsesWith(SVI);
+ i++;
+ }
+
+ NumInterleavedLoadCombine++;
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI)
+ << "Load interleaved combined with factor "
+ << ore::NV("Factor", Factor);
+ });
+
+ return true;
+}
+
+bool InterleavedLoadCombineImpl::run() {
+ OptimizationRemarkEmitter ORE(&F);
+ bool changed = false;
+ unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor();
+
+ auto &DL = F.getParent()->getDataLayout();
+
+ // Start with the highest factor to avoid combining and recombining.
+ for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
+ std::list<VectorInfo> Candidates;
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) {
+ // We don't support scalable vectors in this pass.
+ if (isa<ScalableVectorType>(SVI->getType()))
+ continue;
+
+ Candidates.emplace_back(cast<FixedVectorType>(SVI->getType()));
+
+ if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
+ Candidates.pop_back();
+ continue;
+ }
+
+ if (!Candidates.back().isInterleaved(Factor, DL)) {
+ Candidates.pop_back();
+ }
+ }
+ }
+ }
+
+ std::list<VectorInfo> InterleavedLoad;
+ while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {
+ if (combine(InterleavedLoad, ORE)) {
+ changed = true;
+ } else {
+ // Remove the first element of the Interleaved Load but put the others
+ // back on the list and continue searching
+ Candidates.splice(Candidates.begin(), InterleavedLoad,
+ std::next(InterleavedLoad.begin()),
+ InterleavedLoad.end());
+ }
+ InterleavedLoad.clear();
+ }
+ }
+
+ return changed;
+}
+
+namespace {
+/// This pass combines interleaved loads into a pattern detectable by
+/// InterleavedAccessPass.
+struct InterleavedLoadCombine : public FunctionPass {
+ static char ID;
+
+ InterleavedLoadCombine() : FunctionPass(ID) {
+ initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Interleaved Load Combine Pass";
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (DisableInterleavedLoadCombine)
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()
+ << "\n");
+
+ return InterleavedLoadCombineImpl(
+ F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+ TPC->getTM<TargetMachine>())
+ .run();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+};
+} // anonymous namespace
+
+char InterleavedLoadCombine::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+ InterleavedLoadCombine, DEBUG_TYPE,
+ "Combine interleaved loads into wide loads and shufflevector instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(
+ InterleavedLoadCombine, DEBUG_TYPE,
+ "Combine interleaved loads into wide loads and shufflevector instructions",
+ false, false)
+
+FunctionPass *
+llvm::createInterleavedLoadCombinePass() {
+ auto P = new InterleavedLoadCombine();
+ return P;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 000000000000..61920a0e04ab
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,474 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// This function is used when we want to lower an intrinsic call to a call of
+/// an external function. This handles hard cases such as when there was already
+/// a prototype for the external function, but that prototype doesn't match the
+/// arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getModule();
+ // Get or insert the definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ FunctionCallee FCache =
+ M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+/// Emit the code to lower bswap of V before the specified instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getScalarSizeInBits();
+
+ IRBuilder<> Builder(IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(V->getType(), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(V->getType(), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(V->getType(),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(V->getType(),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(V->getType(),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(V->getType(),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(V->getType(),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(V->getType(),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// Emit the code to lower ctpop of V before the specified instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// Emit the code to lower ctlz of V before the specified instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CI->arg_begin(), CI->arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CI->arg_begin(), CI->arg_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CI->arg_begin(), CI->arg_end(),
+ CI->getArgOperand(0)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI);
+ LLVMContext &Context = CI->getContext();
+
+ const Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ report_fatal_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ Value *V = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getArgOperand(0);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::get_dynamic_area_offset:
+ errs() << "WARNING: this target does not support the custom llvm.get."
+ "dynamic.area.offset. It is being lowered to a constant 0\n";
+ // Just lower it to a constant 0 because for most targets
+ // @llvm.get.dynamic.area.offset is lowered to zero.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
+ break;
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(
+ ConstantPointerNull::get(cast<PointerType>(CI->getType())));
+ break;
+ case Intrinsic::addressofreturnaddress:
+ errs() << "WARNING: this target does not support the "
+ "llvm.addressofreturnaddress intrinsic.\n";
+ CI->replaceAllUsesWith(
+ ConstantPointerNull::get(cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_label:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Just drop the annotation, but forward the value
+ CI->replaceAllUsesWith(CI->getOperand(0));
+ break;
+
+ case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::var_annotation:
+ break; // Strip out these intrinsics
+
+ case Intrinsic::memcpy: {
+ Type *IntPtr = DL.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ Type *IntPtr = DL.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ Value *Op0 = CI->getArgOperand(0);
+ Type *IntPtr = DL.getIntPtrType(Op0->getType());
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = Op0;
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::sin: {
+ ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl");
+ break;
+ }
+ case Intrinsic::cos: {
+ ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl");
+ break;
+ }
+ case Intrinsic::floor: {
+ ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl");
+ break;
+ }
+ case Intrinsic::ceil: {
+ ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill");
+ break;
+ }
+ case Intrinsic::trunc: {
+ ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl");
+ break;
+ }
+ case Intrinsic::round: {
+ ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl");
+ break;
+ }
+ case Intrinsic::roundeven: {
+ ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl");
+ break;
+ }
+ case Intrinsic::copysign: {
+ ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
+ break;
+ }
+ case Intrinsic::get_rounding:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->arg_size() != 1 || CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ Module *M = CI->getModule();
+ Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
new file mode 100644
index 000000000000..f1953c363b59
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
@@ -0,0 +1,233 @@
+//===- JMCInstrumenter.cpp - JMC Instrumentation --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// JMCInstrumenter pass:
+// - instrument each function with a call to __CheckForDebuggerJustMyCode. The
+// sole argument should be defined in .msvcjmc. Each flag is 1 byte initilized
+// to 1.
+// - create the dummy COMDAT function __JustMyCode_Default to prevent linking
+// error if __CheckForDebuggerJustMyCode is not available.
+// - For MSVC:
+// add "/alternatename:__CheckForDebuggerJustMyCode=__JustMyCode_Default" to
+// "llvm.linker.options"
+// For ELF:
+// Rename __JustMyCode_Default to __CheckForDebuggerJustMyCode and mark it as
+// weak symbol.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "jmc-instrument"
+
+namespace {
+struct JMCInstrumenter : public ModulePass {
+ static char ID;
+ JMCInstrumenter() : ModulePass(ID) {
+ initializeJMCInstrumenterPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+};
+char JMCInstrumenter::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(
+ JMCInstrumenter, DEBUG_TYPE,
+ "Instrument function entry with call to __CheckForDebuggerJustMyCode",
+ false, false)
+
+ModulePass *llvm::createJMCInstrumenterPass() { return new JMCInstrumenter(); }
+
+namespace {
+const char CheckFunctionName[] = "__CheckForDebuggerJustMyCode";
+
+std::string getFlagName(DISubprogram &SP, bool UseX86FastCall) {
+ // absolute windows path: windows_backslash
+ // relative windows backslash path: windows_backslash
+ // relative windows slash path: posix
+ // absolute posix path: posix
+ // relative posix path: posix
+ sys::path::Style PathStyle =
+ has_root_name(SP.getDirectory(), sys::path::Style::windows_backslash) ||
+ SP.getDirectory().contains("\\") ||
+ SP.getFilename().contains("\\")
+ ? sys::path::Style::windows_backslash
+ : sys::path::Style::posix;
+ // Best effort path normalization. This is to guarantee an unique flag symbol
+ // is produced for the same directory. Some builds may want to use relative
+ // paths, or paths with a specific prefix (see the -fdebug-compilation-dir
+ // flag), so only hash paths in debuginfo. Don't expand them to absolute
+ // paths.
+ SmallString<256> FilePath(SP.getDirectory());
+ sys::path::append(FilePath, PathStyle, SP.getFilename());
+ sys::path::native(FilePath, PathStyle);
+ sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, PathStyle);
+
+ // The naming convention for the flag name is __<hash>_<file name> with '.' in
+ // <file name> replaced with '@'. For example C:\file.any.c would have a flag
+ // __D032E919_file@any@c. The naming convention match MSVC's format however
+ // the match is not required to make JMC work. The hashing function used here
+ // is different from MSVC's.
+
+ std::string Suffix;
+ for (auto C : sys::path::filename(FilePath, PathStyle))
+ Suffix.push_back(C == '.' ? '@' : C);
+
+ sys::path::remove_filename(FilePath, PathStyle);
+ return (UseX86FastCall ? "_" : "__") +
+ utohexstr(djbHash(FilePath), /*LowerCase=*/false,
+ /*Width=*/8) +
+ "_" + Suffix;
+}
+
+void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) {
+ Module &M = *GV.getParent();
+ DICompileUnit *CU = SP.getUnit();
+ assert(CU);
+ DIBuilder DB(M, false, CU);
+
+ auto *DType =
+ DB.createBasicType("unsigned char", 8, dwarf::DW_ATE_unsigned_char,
+ llvm::DINode::FlagArtificial);
+
+ auto *DGVE = DB.createGlobalVariableExpression(
+ CU, GV.getName(), /*LinkageName=*/StringRef(), SP.getFile(),
+ /*LineNo=*/0, DType, /*IsLocalToUnit=*/true, /*IsDefined=*/true);
+ GV.addMetadata(LLVMContext::MD_dbg, *DGVE);
+ DB.finalize();
+}
+
+FunctionType *getCheckFunctionType(LLVMContext &Ctx) {
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx);
+ return FunctionType::get(VoidTy, VoidPtrTy, false);
+}
+
+Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) {
+ LLVMContext &Ctx = M.getContext();
+ const char *DefaultCheckFunctionName =
+ UseX86FastCall ? "_JustMyCode_Default" : "__JustMyCode_Default";
+ // Create the function.
+ Function *DefaultCheckFunc =
+ Function::Create(getCheckFunctionType(Ctx), GlobalValue::ExternalLinkage,
+ DefaultCheckFunctionName, &M);
+ DefaultCheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ DefaultCheckFunc->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall)
+ DefaultCheckFunc->addParamAttr(0, Attribute::InReg);
+
+ BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", DefaultCheckFunc);
+ ReturnInst::Create(Ctx, EntryBB);
+ return DefaultCheckFunc;
+}
+} // namespace
+
+bool JMCInstrumenter::runOnModule(Module &M) {
+ bool Changed = false;
+ LLVMContext &Ctx = M.getContext();
+ Triple ModuleTriple(M.getTargetTriple());
+ bool IsMSVC = ModuleTriple.isKnownWindowsMSVCEnvironment();
+ bool IsELF = ModuleTriple.isOSBinFormatELF();
+ assert((IsELF || IsMSVC) && "Unsupported triple for JMC");
+ bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86;
+ const char *const FlagSymbolSection = IsELF ? ".data.just.my.code" : ".msvcjmc";
+
+ GlobalValue *CheckFunction = nullptr;
+ DenseMap<DISubprogram *, Constant *> SavedFlags(8);
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto *SP = F.getSubprogram();
+ if (!SP)
+ continue;
+
+ Constant *&Flag = SavedFlags[SP];
+ if (!Flag) {
+ std::string FlagName = getFlagName(*SP, UseX86FastCall);
+ IntegerType *FlagTy = Type::getInt8Ty(Ctx);
+ Flag = M.getOrInsertGlobal(FlagName, FlagTy, [&] {
+ // FIXME: Put the GV in comdat and have linkonce_odr linkage to save
+ // .msvcjmc section space? maybe not worth it.
+ GlobalVariable *GV = new GlobalVariable(
+ M, FlagTy, /*isConstant=*/false, GlobalValue::InternalLinkage,
+ ConstantInt::get(FlagTy, 1), FlagName);
+ GV->setSection(FlagSymbolSection);
+ GV->setAlignment(Align(1));
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ attachDebugInfo(*GV, *SP);
+ return GV;
+ });
+ }
+
+ if (!CheckFunction) {
+ Function *DefaultCheckFunc =
+ createDefaultCheckFunction(M, UseX86FastCall);
+ if (IsELF) {
+ DefaultCheckFunc->setName(CheckFunctionName);
+ DefaultCheckFunc->setLinkage(GlobalValue::WeakAnyLinkage);
+ CheckFunction = DefaultCheckFunc;
+ } else {
+ assert(!M.getFunction(CheckFunctionName) &&
+ "JMC instrument more than once?");
+ auto *CheckFunc = cast<Function>(
+ M.getOrInsertFunction(CheckFunctionName, getCheckFunctionType(Ctx))
+ .getCallee());
+ CheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ CheckFunc->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall) {
+ CheckFunc->setCallingConv(CallingConv::X86_FastCall);
+ CheckFunc->addParamAttr(0, Attribute::InReg);
+ }
+ CheckFunction = CheckFunc;
+
+ StringRef DefaultCheckFunctionName = DefaultCheckFunc->getName();
+ appendToUsed(M, {DefaultCheckFunc});
+ Comdat *C = M.getOrInsertComdat(DefaultCheckFunctionName);
+ C->setSelectionKind(Comdat::Any);
+ DefaultCheckFunc->setComdat(C);
+ // Add a linker option /alternatename to set the default implementation
+ // for the check function.
+ // https://devblogs.microsoft.com/oldnewthing/20200731-00/?p=104024
+ std::string AltOption = std::string("/alternatename:") +
+ CheckFunctionName + "=" +
+ DefaultCheckFunctionName.str();
+ llvm::Metadata *Ops[] = {llvm::MDString::get(Ctx, AltOption)};
+ MDTuple *N = MDNode::get(Ctx, Ops);
+ M.getOrInsertNamedMetadata("llvm.linker.options")->addOperand(N);
+ }
+ }
+ // FIXME: it would be nice to make CI scheduling boundary, although in
+ // practice it does not matter much.
+ auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction,
+ {Flag}, "", &*F.begin()->getFirstInsertionPt());
+ CI->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall) {
+ CI->setCallingConv(CallingConv::X86_FastCall);
+ CI->addParamAttr(0, Attribute::InReg);
+ }
+
+ Changed = true;
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp b/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp
new file mode 100644
index 000000000000..bffa02ca8afd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp
@@ -0,0 +1,111 @@
+//===---- KCFI.cpp - Implements Kernel Control-Flow Integrity (KCFI) ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements Kernel Control-Flow Integrity (KCFI) indirect call
+// check lowering. For each call instruction with a cfi-type attribute, it
+// emits an arch-specific check before the call, and bundles the check and
+// the call to prevent unintentional modifications.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "kcfi"
+#define KCFI_PASS_NAME "Insert KCFI indirect call checks"
+
+STATISTIC(NumKCFIChecksAdded, "Number of indirect call checks added");
+
+namespace {
+class KCFI : public MachineFunctionPass {
+public:
+ static char ID;
+
+ KCFI() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return KCFI_PASS_NAME; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// Machine instruction info used throughout the class.
+ const TargetInstrInfo *TII = nullptr;
+
+ /// Target lowering for arch-specific parts.
+ const TargetLowering *TLI = nullptr;
+
+ /// Emits a KCFI check before an indirect call.
+ /// \returns true if the check was added and false otherwise.
+ bool emitCheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator I) const;
+};
+
+char KCFI::ID = 0;
+} // end anonymous namespace
+
+INITIALIZE_PASS(KCFI, DEBUG_TYPE, KCFI_PASS_NAME, false, false)
+
+FunctionPass *llvm::createKCFIPass() { return new KCFI(); }
+
+bool KCFI::emitCheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator MBBI) const {
+ assert(TII && "Target instruction info was not initialized");
+ assert(TLI && "Target lowering was not initialized");
+
+ // If the call instruction is bundled, we can only emit a check safely if
+ // it's the first instruction in the bundle.
+ if (MBBI->isBundled() && !std::prev(MBBI)->isBundle())
+ report_fatal_error("Cannot emit a KCFI check for a bundled call");
+
+ // Emit a KCFI check for the call instruction at MBBI. The implementation
+ // must unfold memory operands if applicable.
+ MachineInstr *Check = TLI->EmitKCFICheck(MBB, MBBI, TII);
+
+ // Clear the original call's CFI type.
+ assert(MBBI->isCall() && "Unexpected instruction type");
+ MBBI->setCFIType(*MBB.getParent(), 0);
+
+ // If not already bundled, bundle the check and the call to prevent
+ // further changes.
+ if (!MBBI->isBundled())
+ finalizeBundle(MBB, Check->getIterator(), std::next(MBBI->getIterator()));
+
+ ++NumKCFIChecksAdded;
+ return true;
+}
+
+bool KCFI::runOnMachineFunction(MachineFunction &MF) {
+ const Module *M = MF.getMMI().getModule();
+ if (!M->getModuleFlag("kcfi"))
+ return false;
+
+ const auto &SubTarget = MF.getSubtarget();
+ TII = SubTarget.getInstrInfo();
+ TLI = SubTarget.getTargetLowering();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ // Use instr_iterator because we don't want to skip bundles.
+ for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+ MIE = MBB.instr_end();
+ MII != MIE; ++MII) {
+ if (MII->isCall() && MII->getCFIType())
+ Changed |= emitCheck(MBB, MII);
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 000000000000..d02ec1db1165
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,301 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ EnableTrapUnreachable("trap-unreachable", cl::Hidden,
+ cl::desc("Enable generating trap for unreachable"));
+
+void LLVMTargetMachine::initAsmInfo() {
+ MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+ assert(MRI && "Unable to create reg info");
+ MII.reset(TheTarget.createMCInstrInfo());
+ assert(MII && "Unable to create instruction info");
+ // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
+ // to some backends having subtarget feature dependent module level
+ // code generation. This is similar to the hack in the AsmPrinter for
+ // module level assembly etc.
+ STI.reset(TheTarget.createMCSubtargetInfo(
+ getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
+ assert(STI && "Unable to create subtarget info");
+
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
+ *MRI, getTargetTriple().str(), Options.MCOptions);
+ // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
+ // and if the old one gets included then MCAsmInfo will be NULL and
+ // we'll crash later.
+ // Provide the user with a useful error message about what's wrong.
+ assert(TmpAsmInfo && "MCAsmInfo not initialized. "
+ "Make sure you include the correct TargetSelect.h"
+ "and that InitializeAllTargetMCs() is being invoked!");
+
+ if (Options.BinutilsVersion.first > 0)
+ TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion);
+
+ if (Options.DisableIntegratedAS) {
+ TmpAsmInfo->setUseIntegratedAssembler(false);
+ // If there is explict option disable integratedAS, we can't use it for
+ // inlineasm either.
+ TmpAsmInfo->setParseInlineAsmUsingAsmParser(false);
+ }
+
+ TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
+
+ TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections);
+
+ TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
+
+ if (Options.ExceptionModel != ExceptionHandling::None)
+ TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
+
+ AsmInfo.reset(TmpAsmInfo);
+}
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+ StringRef DataLayoutString,
+ const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
+ this->RM = RM;
+ this->CMModel = CM;
+ this->OptLevel = OL;
+
+ if (EnableTrapUnreachable)
+ this->Options.TrapUnreachable = true;
+}
+
+TargetTransformInfo
+LLVMTargetMachine::getTargetTransformInfo(const Function &F) const {
+ return TargetTransformInfo(BasicTTIImpl(this, F));
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static TargetPassConfig *
+addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
+ bool DisableVerify,
+ MachineModuleInfoWrapperPass &MMIWP) {
+ // Targets may override createPassConfig to provide a target-specific
+ // subclass.
+ TargetPassConfig *PassConfig = TM.createPassConfig(PM);
+ // Set PassConfig options provided by TargetMachine.
+ PassConfig->setDisableVerify(DisableVerify);
+ PM.add(PassConfig);
+ PM.add(&MMIWP);
+
+ if (PassConfig->addISelPasses())
+ return nullptr;
+ PassConfig->addMachinePasses();
+ PassConfig->setInitialized();
+ return PassConfig;
+}
+
+bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
+ raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType,
+ MCContext &Context) {
+ Expected<std::unique_ptr<MCStreamer>> MCStreamerOrErr =
+ createMCStreamer(Out, DwoOut, FileType, Context);
+ if (auto Err = MCStreamerOrErr.takeError())
+ return true;
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(*MCStreamerOrErr));
+ if (!Printer)
+ return true;
+
+ PM.add(Printer);
+ return false;
+}
+
+Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+ MCContext &Context) {
+ if (Options.MCOptions.MCSaveTempLabels)
+ Context.setAllowTemporaryLabels(false);
+
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ const MCInstrInfo &MII = *getMCInstrInfo();
+
+ std::unique_ptr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
+ getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI);
+
+ // Create a code emitter if asked to show the encoding.
+ std::unique_ptr<MCCodeEmitter> MCE;
+ if (Options.MCOptions.ShowMCEncoding)
+ MCE.reset(getTarget().createMCCodeEmitter(MII, Context));
+
+ bool UseDwarfDirectory = false;
+ switch (Options.MCOptions.MCUseDwarfDirectory) {
+ case MCTargetOptions::DisableDwarfDirectory:
+ UseDwarfDirectory = false;
+ break;
+ case MCTargetOptions::EnableDwarfDirectory:
+ UseDwarfDirectory = true;
+ break;
+ case MCTargetOptions::DefaultDwarfDirectory:
+ UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault();
+ break;
+ }
+
+ std::unique_ptr<MCAsmBackend> MAB(
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
+ auto FOut = std::make_unique<formatted_raw_ostream>(Out);
+ MCStreamer *S = getTarget().createAsmStreamer(
+ Context, std::move(FOut), Options.MCOptions.AsmVerbose,
+ UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB),
+ Options.MCOptions.ShowMCInst);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, Context);
+ if (!MCE)
+ return make_error<StringError>("createMCCodeEmitter failed",
+ inconvertibleErrorCode());
+ MCAsmBackend *MAB =
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
+ if (!MAB)
+ return make_error<StringError>("createMCAsmBackend failed",
+ inconvertibleErrorCode());
+
+ Triple T(getTargetTriple().str());
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(
+ T, Context, std::unique_ptr<MCAsmBackend>(MAB),
+ DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut)
+ : MAB->createObjectWriter(Out),
+ std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ true));
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(getTarget().createNullStreamer(Context));
+ break;
+ }
+
+ return std::move(AsmStreamer);
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType, bool DisableVerify,
+ MachineModuleInfoWrapperPass *MMIWP) {
+ // Add common CodeGen passes.
+ if (!MMIWP)
+ MMIWP = new MachineModuleInfoWrapperPass(this);
+ TargetPassConfig *PassConfig =
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
+ if (!PassConfig)
+ return true;
+
+ if (TargetPassConfig::willCompleteCodeGenPipeline()) {
+ if (addAsmPrinter(PM, Out, DwoOut, FileType, MMIWP->getMMI().getContext()))
+ return true;
+ } else {
+ // MIR printing is redundant with -filetype=null.
+ if (FileType != CGFT_Null)
+ PM.add(createPrintMIRPass(Out));
+ }
+
+ PM.add(createFreeMachineFunctionPass());
+ return false;
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+ raw_pwrite_stream &Out,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this);
+ TargetPassConfig *PassConfig =
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
+ if (!PassConfig)
+ return true;
+ assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
+ "Cannot emit MC with limited codegen pipeline");
+
+ Ctx = &MMIWP->getMMI().getContext();
+ // libunwind is unable to load compact unwind dynamically, so we must generate
+ // DWARF unwind info for the JIT.
+ Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always;
+ if (Options.MCOptions.MCSaveTempLabels)
+ Ctx->setAllowTemporaryLabels(false);
+
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ std::unique_ptr<MCCodeEmitter> MCE(
+ getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx));
+ std::unique_ptr<MCAsmBackend> MAB(
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
+ if (!MCE || !MAB)
+ return true;
+
+ const Triple &T = getTargetTriple();
+ std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
+ T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE),
+ STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ true));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
+ if (!Printer)
+ return true;
+
+ PM.add(Printer);
+ PM.add(createFreeMachineFunctionPass());
+
+ return false; // success!
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 000000000000..fab6b8d10a33
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,147 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "scheduler"
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return RHSNum < LHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = nullptr;
+ for (const SDep &P : SU->Preds) {
+ SUnit &Pred = *P.getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return nullptr;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
+ ++NumNodesBlocking;
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push_back(SU);
+}
+
+
+// scheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
+ for (const SDep &Succ : SU->Succs)
+ AdjustPriorityOfUnscheduledPreds(Succ.getSUnit());
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+ if (empty()) return nullptr;
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != std::prev(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
+ return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = find(Queue, SU);
+ assert(I != Queue.end() && "Queue doesn't contain the SU being removed!");
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ dbgs() << "Latency Priority Queue\n";
+ dbgs() << " Number of Queue Entries: " << Queue.size() << "\n";
+ for (const SUnit *SU : Queue) {
+ dbgs() << " ";
+ DAG->dumpNode(*SU);
+ }
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
new file mode 100644
index 000000000000..39b44b917d9e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -0,0 +1,98 @@
+///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
+///
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// This is an alternative analysis pass to MachineBlockFrequencyInfo. The
+/// difference is that with this pass the block frequencies are not computed
+/// when the analysis pass is executed but rather when the BFI result is
+/// explicitly requested by the analysis client.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-machine-block-freq"
+
+INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+
+char LazyMachineBlockFrequencyInfoPass::ID = 0;
+
+LazyMachineBlockFrequencyInfoPass::LazyMachineBlockFrequencyInfoPass()
+ : MachineFunctionPass(ID) {
+ initializeLazyMachineBlockFrequencyInfoPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS,
+ const Module *M) const {
+ getBFI().print(OS, M);
+}
+
+void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LazyMachineBlockFrequencyInfoPass::releaseMemory() {
+ OwnedMBFI.reset();
+ OwnedMLI.reset();
+ OwnedMDT.reset();
+}
+
+MachineBlockFrequencyInfo &
+LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
+ auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (MBFI) {
+ LLVM_DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
+ return *MBFI;
+ }
+
+ auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ LLVM_DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
+ LLVM_DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
+
+ if (!MLI) {
+ LLVM_DEBUG(dbgs() << "Building LoopInfo on the fly\n");
+ // First create a dominator tree.
+ LLVM_DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");
+
+ if (!MDT) {
+ LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n");
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(*MF);
+ MDT = OwnedMDT.get();
+ }
+
+ // Generate LoopInfo from it.
+ OwnedMLI = std::make_unique<MachineLoopInfo>();
+ OwnedMLI->getBase().analyze(MDT->getBase());
+ MLI = OwnedMLI.get();
+ }
+
+ OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>();
+ OwnedMBFI->calculate(*MF, MBPI, *MLI);
+ return *OwnedMBFI;
+}
+
+bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction(
+ MachineFunction &F) {
+ MF = &F;
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
new file mode 100644
index 000000000000..47c19c3d8ec4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -0,0 +1,347 @@
+//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LexicalScopes analysis.
+//
+// This pass collects lexical scope information and maps machine instructions
+// to respective lexical scopes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lexicalscopes"
+
+/// reset - Reset the instance so that it's prepared for another function.
+void LexicalScopes::reset() {
+ MF = nullptr;
+ CurrentFnLexicalScope = nullptr;
+ LexicalScopeMap.clear();
+ AbstractScopeMap.clear();
+ InlinedLexicalScopeMap.clear();
+ AbstractScopesList.clear();
+ DominatedBlocks.clear();
+}
+
+/// initialize - Scan machine function and constuct lexical scope nest.
+void LexicalScopes::initialize(const MachineFunction &Fn) {
+ reset();
+ // Don't attempt any lexical scope creation for a NoDebug compile unit.
+ if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return;
+ MF = &Fn;
+ SmallVector<InsnRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
+ extractLexicalScopes(MIRanges, MI2ScopeMap);
+ if (CurrentFnLexicalScope) {
+ constructScopeNest(CurrentFnLexicalScope);
+ assignInstructionRanges(MIRanges, MI2ScopeMap);
+ }
+}
+
+/// extractLexicalScopes - Extract instruction ranges for each lexical scopes
+/// for the given machine function.
+void LexicalScopes::extractLexicalScopes(
+ SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (const auto &MBB : *MF) {
+ const MachineInstr *RangeBeginMI = nullptr;
+ const MachineInstr *PrevMI = nullptr;
+ const DILocation *PrevDL = nullptr;
+ for (const auto &MInsn : MBB) {
+ // Ignore DBG_VALUE and similar instruction that do not contribute to any
+ // instruction in the output.
+ if (MInsn.isMetaInstruction())
+ continue;
+
+ // Check if instruction has valid location information.
+ const DILocation *MIDL = MInsn.getDebugLoc();
+ if (!MIDL) {
+ PrevMI = &MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = &MInsn;
+ continue;
+ }
+
+ if (RangeBeginMI) {
+ // If we have already seen a beginning of an instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ InsnRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = &MInsn;
+
+ // Reset previous markers.
+ PrevMI = &MInsn;
+ PrevDL = MIDL;
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && PrevDL) {
+ InsnRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ }
+ }
+}
+
+/// findLexicalScope - Find lexical scope, either regular or inlined, for the
+/// given DebugLoc. Return NULL if not found.
+LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
+ DILocalScope *Scope = DL->getScope();
+ if (!Scope)
+ return nullptr;
+
+ // The scope that we were created with could have an extra file - which
+ // isn't what we care about in this case.
+ Scope = Scope->getNonLexicalBlockFileScope();
+
+ if (auto *IA = DL->getInlinedAt()) {
+ auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
+ return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
+ }
+ return findLexicalScope(Scope);
+}
+
+/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+/// not available then create new lexical scope.
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
+ const DILocation *IA) {
+ if (IA) {
+ // Skip scopes inlined from a NoDebug compile unit.
+ if (Scope->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return getOrCreateLexicalScope(IA);
+ // Create an abstract scope for inlined function.
+ getOrCreateAbstractScope(Scope);
+ // Create an inlined scope for inlined function.
+ return getOrCreateInlinedScope(Scope, IA);
+ }
+
+ return getOrCreateRegularScope(Scope);
+}
+
+/// getOrCreateRegularScope - Find or create a regular lexical scope.
+LexicalScope *
+LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
+
+ auto I = LexicalScopeMap.find(Scope);
+ if (I != LexicalScopeMap.end())
+ return &I->second;
+
+ // FIXME: Should the following dyn_cast be DILexicalBlock?
+ LexicalScope *Parent = nullptr;
+ if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+ Parent = getOrCreateLexicalScope(Block->getScope());
+ I = LexicalScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Scope),
+ std::forward_as_tuple(Parent, Scope, nullptr,
+ false)).first;
+
+ if (!Parent) {
+ assert(cast<DISubprogram>(Scope)->describes(&MF->getFunction()));
+ assert(!CurrentFnLexicalScope);
+ CurrentFnLexicalScope = &I->second;
+ }
+
+ return &I->second;
+}
+
+/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
+LexicalScope *
+LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
+ const DILocation *InlinedAt) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
+ std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt);
+ auto I = InlinedLexicalScopeMap.find(P);
+ if (I != InlinedLexicalScopeMap.end())
+ return &I->second;
+
+ LexicalScope *Parent;
+ if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+ Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt);
+ else
+ Parent = getOrCreateLexicalScope(InlinedAt);
+
+ I = InlinedLexicalScopeMap
+ .emplace(std::piecewise_construct, std::forward_as_tuple(P),
+ std::forward_as_tuple(Parent, Scope, InlinedAt, false))
+ .first;
+ return &I->second;
+}
+
+/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
+LexicalScope *
+LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
+ auto I = AbstractScopeMap.find(Scope);
+ if (I != AbstractScopeMap.end())
+ return &I->second;
+
+ // FIXME: Should the following isa be DILexicalBlock?
+ LexicalScope *Parent = nullptr;
+ if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+ Parent = getOrCreateAbstractScope(Block->getScope());
+
+ I = AbstractScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Scope),
+ std::forward_as_tuple(Parent, Scope,
+ nullptr, true)).first;
+ if (isa<DISubprogram>(Scope))
+ AbstractScopesList.push_back(&I->second);
+ return &I->second;
+}
+
+/// constructScopeNest - Traverse the Scope tree depth-first, storing
+/// traversal state in WorkStack and recording the depth-first
+/// numbering (setDFSIn, setDFSOut) for edge classification.
+void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
+ assert(Scope && "Unable to calculate scope dominance graph!");
+ SmallVector<std::pair<LexicalScope *, size_t>, 4> WorkStack;
+ WorkStack.push_back(std::make_pair(Scope, 0));
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ auto &ScopePosition = WorkStack.back();
+ LexicalScope *WS = ScopePosition.first;
+ size_t ChildNum = ScopePosition.second++;
+ const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
+ if (ChildNum < Children.size()) {
+ auto &ChildScope = Children[ChildNum];
+ WorkStack.push_back(std::make_pair(ChildScope, 0));
+ ChildScope->setDFSIn(++Counter);
+ } else {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// assignInstructionRanges - Find ranges of instructions covered by each
+/// lexical scope.
+void LexicalScopes::assignInstructionRanges(
+ SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+ LexicalScope *PrevLexicalScope = nullptr;
+ for (const auto &R : MIRanges) {
+ LexicalScope *S = MI2ScopeMap.lookup(R.first);
+ assert(S && "Lost LexicalScope for a machine instruction!");
+ if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
+ PrevLexicalScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevLexicalScope = S;
+ }
+
+ if (PrevLexicalScope)
+ PrevLexicalScope->closeInsnRange();
+}
+
+/// getMachineBasicBlocks - Populate given set using machine basic blocks which
+/// have machine instructions that belong to lexical scope identified by
+/// DebugLoc.
+void LexicalScopes::getMachineBasicBlocks(
+ const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+ assert(MF && "Method called on a uninitialized LexicalScopes object!");
+ MBBs.clear();
+
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return;
+
+ if (Scope == CurrentFnLexicalScope) {
+ for (const auto &MBB : *MF)
+ MBBs.insert(&MBB);
+ return;
+ }
+
+ // The scope ranges can cover multiple basic blocks in each span. Iterate over
+ // all blocks (in the order they are in the function) until we reach the one
+ // containing the end of the span.
+ SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
+ for (auto &R : InsnRanges)
+ for (auto CurMBBIt = R.first->getParent()->getIterator(),
+ EndBBIt = std::next(R.second->getParent()->getIterator());
+ CurMBBIt != EndBBIt; CurMBBIt++)
+ MBBs.insert(&*CurMBBIt);
+}
+
+bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
+ assert(MF && "Unexpected uninitialized LexicalScopes object!");
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return false;
+
+ // Current function scope covers all basic blocks in the function.
+ if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
+ return true;
+
+ // Fetch all the blocks in DLs scope. Because the range / block list also
+ // contain any subscopes, any instruction that DL dominates can be found in
+ // the block set.
+ //
+ // Cache the set of fetched blocks to avoid repeatedly recomputing the set in
+ // the LiveDebugValues pass.
+ std::unique_ptr<BlockSetT> &Set = DominatedBlocks[DL];
+ if (!Set) {
+ Set = std::make_unique<BlockSetT>();
+ getMachineBasicBlocks(DL, *Set);
+ }
+ return Set->contains(MBB);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LexicalScope::dump(unsigned Indent) const {
+ raw_ostream &err = dbgs();
+ err.indent(Indent);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ err.indent(Indent);
+ N->dump();
+ if (AbstractScope)
+ err << std::string(Indent, ' ') << "Abstract Scope\n";
+
+ if (!Children.empty())
+ err << std::string(Indent + 2, ' ') << "Children ...\n";
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ if (Children[i] != this)
+ Children[i]->dump(Indent + 2);
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
new file mode 100644
index 000000000000..57df9b67fd02
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -0,0 +1,4230 @@
+//===- InstrRefBasedImpl.cpp - Tracking Debug Value MIs -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file InstrRefBasedImpl.cpp
+///
+/// This is a separate implementation of LiveDebugValues, see
+/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
+///
+/// This pass propagates variable locations between basic blocks, resolving
+/// control flow conflicts between them. The problem is SSA construction, where
+/// each debug instruction assigns the *value* that a variable has, and every
+/// instruction where the variable is in scope uses that variable. The resulting
+/// map of instruction-to-value is then translated into a register (or spill)
+/// location for each variable over each instruction.
+///
+/// The primary difference from normal SSA construction is that we cannot
+/// _create_ PHI values that contain variable values. CodeGen has already
+/// completed, and we can't alter it just to make debug-info complete. Thus:
+/// we can identify function positions where we would like a PHI value for a
+/// variable, but must search the MachineFunction to see whether such a PHI is
+/// available. If no such PHI exists, the variable location must be dropped.
+///
+/// To achieve this, we perform two kinds of analysis. First, we identify
+/// every value defined by every instruction (ignoring those that only move
+/// another value), then re-compute an SSA-form representation of the
+/// MachineFunction, using value propagation to eliminate any un-necessary
+/// PHI values. This gives us a map of every value computed in the function,
+/// and its location within the register file / stack.
+///
+/// Secondly, for each variable we perform the same analysis, where each debug
+/// instruction is considered a def, and every instruction where the variable
+/// is in lexical scope as a use. Value propagation is used again to eliminate
+/// any un-necessary PHIs. This gives us a map of each variable to the value
+/// it should have in a block.
+///
+/// Once both are complete, we have two maps for each block:
+/// * Variables to the values they should have,
+/// * Values to the register / spill slot they are located in.
+/// After which we can marry-up variable values with a location, and emit
+/// DBG_VALUE instructions specifying those locations. Variable locations may
+/// be dropped in this process due to the desired variable value not being
+/// resident in any machine location, or because there is no PHI value in any
+/// location that accurately represents the desired value. The building of
+/// location lists for each block is left to DbgEntityHistoryCalculator.
+///
+/// This pass is kept efficient because the size of the first SSA problem
+/// is proportional to the working-set size of the function, which the compiler
+/// tries to keep small. (It's also proportional to the number of blocks).
+/// Additionally, we repeatedly perform the second SSA problem analysis with
+/// only the variables and blocks in a single lexical scope, exploiting their
+/// locality.
+///
+/// ### Terminology
+///
+/// A machine location is a register or spill slot, a value is something that's
+/// defined by an instruction or PHI node, while a variable value is the value
+/// assigned to a variable. A variable location is a machine location, that must
+/// contain the appropriate variable value. A value that is a PHI node is
+/// occasionally called an mphi.
+///
+/// The first SSA problem is the "machine value location" problem,
+/// because we're determining which machine locations contain which values.
+/// The "locations" are constant: what's unknown is what value they contain.
+///
+/// The second SSA problem (the one for variables) is the "variable value
+/// problem", because it's determining what values a variable has, rather than
+/// what location those values are placed in.
+///
+/// TODO:
+/// Overlapping fragments
+/// Entry values
+/// Add back DEBUG statements for debugging this
+/// Collect statistics
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <functional>
+#include <queue>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "InstrRefBasedImpl.h"
+#include "LiveDebugValues.h"
+#include <optional>
+
+using namespace llvm;
+using namespace LiveDebugValues;
+
+// SSAUpdaterImple sets DEBUG_TYPE, change it.
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "livedebugvalues"
+
+// Act more like the VarLoc implementation, by propagating some locations too
+// far and ignoring some transfers.
+static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
+ cl::desc("Act like old LiveDebugValues did"),
+ cl::init(false));
+
+// Limit for the maximum number of stack slots we should track, past which we
+// will ignore any spills. InstrRefBasedLDV gathers detailed information on all
+// stack slots which leads to high memory consumption, and in some scenarios
+// (such as asan with very many locals) the working set of the function can be
+// very large, causing many spills. In these scenarios, it is very unlikely that
+// the developer has hundreds of variables live at the same time that they're
+// carefully thinking about -- instead, they probably autogenerated the code.
+// When this happens, gracefully stop tracking excess spill slots, rather than
+// consuming all the developer's memory.
+static cl::opt<unsigned>
+ StackWorkingSetLimit("livedebugvalues-max-stack-slots", cl::Hidden,
+ cl::desc("livedebugvalues-stack-ws-limit"),
+ cl::init(250));
+
+DbgOpID DbgOpID::UndefID = DbgOpID(0xffffffff);
+
+/// Tracker for converting machine value locations and variable values into
+/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
+/// specifying block live-in locations and transfers within blocks.
+///
+/// Operating on a per-block basis, this class takes a (pre-loaded) MLocTracker
+/// and must be initialized with the set of variable values that are live-in to
+/// the block. The caller then repeatedly calls process(). TransferTracker picks
+/// out variable locations for the live-in variable values (if there _is_ a
+/// location) and creates the corresponding DBG_VALUEs. Then, as the block is
+/// stepped through, transfers of values between machine locations are
+/// identified and if profitable, a DBG_VALUE created.
+///
+/// This is where debug use-before-defs would be resolved: a variable with an
+/// unavailable value could materialize in the middle of a block, when the
+/// value becomes available. Or, we could detect clobbers and re-specify the
+/// variable in a backup location. (XXX these are unimplemented).
+class TransferTracker {
+public:
+ const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
+ /// This machine location tracker is assumed to always contain the up-to-date
+ /// value mapping for all machine locations. TransferTracker only reads
+ /// information from it. (XXX make it const?)
+ MLocTracker *MTracker;
+ MachineFunction &MF;
+ bool ShouldEmitDebugEntryValues;
+
+ /// Record of all changes in variable locations at a block position. Awkwardly
+ /// we allow inserting either before or after the point: MBB != nullptr
+ /// indicates it's before, otherwise after.
+ struct Transfer {
+ MachineBasicBlock::instr_iterator Pos; /// Position to insert DBG_VALUes
+ MachineBasicBlock *MBB; /// non-null if we should insert after.
+ SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert.
+ };
+
+ /// Stores the resolved operands (machine locations and constants) and
+ /// qualifying meta-information needed to construct a concrete DBG_VALUE-like
+ /// instruction.
+ struct ResolvedDbgValue {
+ SmallVector<ResolvedDbgOp> Ops;
+ DbgValueProperties Properties;
+
+ ResolvedDbgValue(SmallVectorImpl<ResolvedDbgOp> &Ops,
+ DbgValueProperties Properties)
+ : Ops(Ops.begin(), Ops.end()), Properties(Properties) {}
+
+ /// Returns all the LocIdx values used in this struct, in the order in which
+ /// they appear as operands in the debug value; may contain duplicates.
+ auto loc_indices() const {
+ return map_range(
+ make_filter_range(
+ Ops, [](const ResolvedDbgOp &Op) { return !Op.IsConst; }),
+ [](const ResolvedDbgOp &Op) { return Op.Loc; });
+ }
+ };
+
+ /// Collection of transfers (DBG_VALUEs) to be inserted.
+ SmallVector<Transfer, 32> Transfers;
+
+ /// Local cache of what-value-is-in-what-LocIdx. Used to identify differences
+ /// between TransferTrackers view of variable locations and MLocTrackers. For
+ /// example, MLocTracker observes all clobbers, but TransferTracker lazily
+ /// does not.
+ SmallVector<ValueIDNum, 32> VarLocs;
+
+ /// Map from LocIdxes to which DebugVariables are based that location.
+ /// Mantained while stepping through the block. Not accurate if
+ /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
+ DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+
+ /// Map from DebugVariable to it's current location and qualifying meta
+ /// information. To be used in conjunction with ActiveMLocs to construct
+ /// enough information for the DBG_VALUEs for a particular LocIdx.
+ DenseMap<DebugVariable, ResolvedDbgValue> ActiveVLocs;
+
+ /// Temporary cache of DBG_VALUEs to be entered into the Transfers collection.
+ SmallVector<MachineInstr *, 4> PendingDbgValues;
+
+ /// Record of a use-before-def: created when a value that's live-in to the
+ /// current block isn't available in any machine location, but it will be
+ /// defined in this block.
+ struct UseBeforeDef {
+ /// Value of this variable, def'd in block.
+ SmallVector<DbgOp> Values;
+ /// Identity of this variable.
+ DebugVariable Var;
+ /// Additional variable properties.
+ DbgValueProperties Properties;
+ UseBeforeDef(ArrayRef<DbgOp> Values, const DebugVariable &Var,
+ const DbgValueProperties &Properties)
+ : Values(Values.begin(), Values.end()), Var(Var),
+ Properties(Properties) {}
+ };
+
+ /// Map from instruction index (within the block) to the set of UseBeforeDefs
+ /// that become defined at that instruction.
+ DenseMap<unsigned, SmallVector<UseBeforeDef, 1>> UseBeforeDefs;
+
+ /// The set of variables that are in UseBeforeDefs and can become a location
+ /// once the relevant value is defined. An element being erased from this
+ /// collection prevents the use-before-def materializing.
+ DenseSet<DebugVariable> UseBeforeDefVariables;
+
+ const TargetRegisterInfo &TRI;
+ const BitVector &CalleeSavedRegs;
+
+ TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker,
+ MachineFunction &MF, const TargetRegisterInfo &TRI,
+ const BitVector &CalleeSavedRegs, const TargetPassConfig &TPC)
+ : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI),
+ CalleeSavedRegs(CalleeSavedRegs) {
+ TLI = MF.getSubtarget().getTargetLowering();
+ auto &TM = TPC.getTM<TargetMachine>();
+ ShouldEmitDebugEntryValues = TM.Options.ShouldEmitDebugEntryValues();
+ }
+
+ bool isCalleeSaved(LocIdx L) const {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ if (Reg >= MTracker->NumRegs)
+ return false;
+ for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ // An estimate of the expected lifespan of values at a machine location, with
+ // a greater value corresponding to a longer expected lifespan, i.e. spill
+ // slots generally live longer than callee-saved registers which generally
+ // live longer than non-callee-saved registers. The minimum value of 0
+ // corresponds to an illegal location that cannot have a "lifespan" at all.
+ enum class LocationQuality : unsigned char {
+ Illegal = 0,
+ Register,
+ CalleeSavedRegister,
+ SpillSlot,
+ Best = SpillSlot
+ };
+
+ class LocationAndQuality {
+ unsigned Location : 24;
+ unsigned Quality : 8;
+
+ public:
+ LocationAndQuality() : Location(0), Quality(0) {}
+ LocationAndQuality(LocIdx L, LocationQuality Q)
+ : Location(L.asU64()), Quality(static_cast<unsigned>(Q)) {}
+ LocIdx getLoc() const {
+ if (!Quality)
+ return LocIdx::MakeIllegalLoc();
+ return LocIdx(Location);
+ }
+ LocationQuality getQuality() const { return LocationQuality(Quality); }
+ bool isIllegal() const { return !Quality; }
+ bool isBest() const { return getQuality() == LocationQuality::Best; }
+ };
+
+ // Returns the LocationQuality for the location L iff the quality of L is
+ // is strictly greater than the provided minimum quality.
+ std::optional<LocationQuality>
+ getLocQualityIfBetter(LocIdx L, LocationQuality Min) const {
+ if (L.isIllegal())
+ return std::nullopt;
+ if (Min >= LocationQuality::SpillSlot)
+ return std::nullopt;
+ if (MTracker->isSpill(L))
+ return LocationQuality::SpillSlot;
+ if (Min >= LocationQuality::CalleeSavedRegister)
+ return std::nullopt;
+ if (isCalleeSaved(L))
+ return LocationQuality::CalleeSavedRegister;
+ if (Min >= LocationQuality::Register)
+ return std::nullopt;
+ return LocationQuality::Register;
+ }
+
+ /// For a variable \p Var with the live-in value \p Value, attempts to resolve
+ /// the DbgValue to a concrete DBG_VALUE, emitting that value and loading the
+ /// tracking information to track Var throughout the block.
+ /// \p ValueToLoc is a map containing the best known location for every
+ /// ValueIDNum that Value may use.
+ /// \p MBB is the basic block that we are loading the live-in value for.
+ /// \p DbgOpStore is the map containing the DbgOpID->DbgOp mapping needed to
+ /// determine the values used by Value.
+ void loadVarInloc(MachineBasicBlock &MBB, DbgOpIDMap &DbgOpStore,
+ const DenseMap<ValueIDNum, LocationAndQuality> &ValueToLoc,
+ DebugVariable Var, DbgValue Value) {
+ SmallVector<DbgOp> DbgOps;
+ SmallVector<ResolvedDbgOp> ResolvedDbgOps;
+ bool IsValueValid = true;
+ unsigned LastUseBeforeDef = 0;
+
+ // If every value used by the incoming DbgValue is available at block
+ // entry, ResolvedDbgOps will contain the machine locations/constants for
+ // those values and will be used to emit a debug location.
+ // If one or more values are not yet available, but will all be defined in
+ // this block, then LastUseBeforeDef will track the instruction index in
+ // this BB at which the last of those values is defined, DbgOps will
+ // contain the values that we will emit when we reach that instruction.
+ // If one or more values are undef or not available throughout this block,
+ // and we can't recover as an entry value, we set IsValueValid=false and
+ // skip this variable.
+ for (DbgOpID ID : Value.getDbgOpIDs()) {
+ DbgOp Op = DbgOpStore.find(ID);
+ DbgOps.push_back(Op);
+ if (ID.isUndef()) {
+ IsValueValid = false;
+ break;
+ }
+ if (ID.isConst()) {
+ ResolvedDbgOps.push_back(Op.MO);
+ continue;
+ }
+
+ // If the value has no location, we can't make a variable location.
+ const ValueIDNum &Num = Op.ID;
+ auto ValuesPreferredLoc = ValueToLoc.find(Num);
+ if (ValuesPreferredLoc->second.isIllegal()) {
+ // If it's a def that occurs in this block, register it as a
+ // use-before-def to be resolved as we step through the block.
+ // Continue processing values so that we add any other UseBeforeDef
+ // entries needed for later.
+ if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) {
+ LastUseBeforeDef = std::max(LastUseBeforeDef,
+ static_cast<unsigned>(Num.getInst()));
+ continue;
+ }
+ recoverAsEntryValue(Var, Value.Properties, Num);
+ IsValueValid = false;
+ break;
+ }
+
+ // Defer modifying ActiveVLocs until after we've confirmed we have a
+ // live range.
+ LocIdx M = ValuesPreferredLoc->second.getLoc();
+ ResolvedDbgOps.push_back(M);
+ }
+
+ // If we cannot produce a valid value for the LiveIn value within this
+ // block, skip this variable.
+ if (!IsValueValid)
+ return;
+
+ // Add UseBeforeDef entry for the last value to be defined in this block.
+ if (LastUseBeforeDef) {
+ addUseBeforeDef(Var, Value.Properties, DbgOps,
+ LastUseBeforeDef);
+ return;
+ }
+
+ // The LiveIn value is available at block entry, begin tracking and record
+ // the transfer.
+ for (const ResolvedDbgOp &Op : ResolvedDbgOps)
+ if (!Op.IsConst)
+ ActiveMLocs[Op.Loc].insert(Var);
+ auto NewValue = ResolvedDbgValue{ResolvedDbgOps, Value.Properties};
+ auto Result = ActiveVLocs.insert(std::make_pair(Var, NewValue));
+ if (!Result.second)
+ Result.first->second = NewValue;
+ PendingDbgValues.push_back(
+ MTracker->emitLoc(ResolvedDbgOps, Var, Value.Properties));
+ }
+
+ /// Load object with live-in variable values. \p mlocs contains the live-in
+ /// values in each machine location, while \p vlocs the live-in variable
+ /// values. This method picks variable locations for the live-in variables,
+ /// creates DBG_VALUEs and puts them in #Transfers, then prepares the other
+ /// object fields to track variable locations as we step through the block.
+ /// FIXME: could just examine mloctracker instead of passing in \p mlocs?
+ void
+ loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, DbgOpIDMap &DbgOpStore,
+ const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
+ unsigned NumLocs) {
+ ActiveMLocs.clear();
+ ActiveVLocs.clear();
+ VarLocs.clear();
+ VarLocs.reserve(NumLocs);
+ UseBeforeDefs.clear();
+ UseBeforeDefVariables.clear();
+
+ // Map of the preferred location for each value.
+ DenseMap<ValueIDNum, LocationAndQuality> ValueToLoc;
+
+ // Initialized the preferred-location map with illegal locations, to be
+ // filled in later.
+ for (const auto &VLoc : VLocs)
+ if (VLoc.second.Kind == DbgValue::Def)
+ for (DbgOpID OpID : VLoc.second.getDbgOpIDs())
+ if (!OpID.ID.IsConst)
+ ValueToLoc.insert({DbgOpStore.find(OpID).ID, LocationAndQuality()});
+
+ ActiveMLocs.reserve(VLocs.size());
+ ActiveVLocs.reserve(VLocs.size());
+
+ // Produce a map of value numbers to the current machine locs they live
+ // in. When emulating VarLocBasedImpl, there should only be one
+ // location; when not, we get to pick.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &VNum = MLocs[Idx.asU64()];
+ if (VNum == ValueIDNum::EmptyValue)
+ continue;
+ VarLocs.push_back(VNum);
+
+ // Is there a variable that wants a location for this value? If not, skip.
+ auto VIt = ValueToLoc.find(VNum);
+ if (VIt == ValueToLoc.end())
+ continue;
+
+ auto &Previous = VIt->second;
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ std::optional<LocationQuality> ReplacementQuality =
+ getLocQualityIfBetter(Idx, Previous.getQuality());
+ if (ReplacementQuality)
+ Previous = LocationAndQuality(Idx, *ReplacementQuality);
+ }
+
+ // Now map variables to their picked LocIdxes.
+ for (const auto &Var : VLocs) {
+ loadVarInloc(MBB, DbgOpStore, ValueToLoc, Var.first, Var.second);
+ }
+ flushDbgValues(MBB.begin(), &MBB);
+ }
+
+ /// Record that \p Var has value \p ID, a value that becomes available
+ /// later in the function.
+ void addUseBeforeDef(const DebugVariable &Var,
+ const DbgValueProperties &Properties,
+ const SmallVectorImpl<DbgOp> &DbgOps, unsigned Inst) {
+ UseBeforeDefs[Inst].emplace_back(DbgOps, Var, Properties);
+ UseBeforeDefVariables.insert(Var);
+ }
+
+ /// After the instruction at index \p Inst and position \p pos has been
+ /// processed, check whether it defines a variable value in a use-before-def.
+ /// If so, and the variable value hasn't changed since the start of the
+ /// block, create a DBG_VALUE.
+ void checkInstForNewValues(unsigned Inst, MachineBasicBlock::iterator pos) {
+ auto MIt = UseBeforeDefs.find(Inst);
+ if (MIt == UseBeforeDefs.end())
+ return;
+
+ // Map of values to the locations that store them for every value used by
+ // the variables that may have become available.
+ SmallDenseMap<ValueIDNum, LocationAndQuality> ValueToLoc;
+
+ // Populate ValueToLoc with illegal default mappings for every value used by
+ // any UseBeforeDef variables for this instruction.
+ for (auto &Use : MIt->second) {
+ if (!UseBeforeDefVariables.count(Use.Var))
+ continue;
+
+ for (DbgOp &Op : Use.Values) {
+ assert(!Op.isUndef() && "UseBeforeDef erroneously created for a "
+ "DbgValue with undef values.");
+ if (Op.IsConst)
+ continue;
+
+ ValueToLoc.insert({Op.ID, LocationAndQuality()});
+ }
+ }
+
+ // Exit early if we have no DbgValues to produce.
+ if (ValueToLoc.empty())
+ return;
+
+ // Determine the best location for each desired value.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &LocValueID = Location.Value;
+
+ // Is there a variable that wants a location for this value? If not, skip.
+ auto VIt = ValueToLoc.find(LocValueID);
+ if (VIt == ValueToLoc.end())
+ continue;
+
+ auto &Previous = VIt->second;
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ std::optional<LocationQuality> ReplacementQuality =
+ getLocQualityIfBetter(Idx, Previous.getQuality());
+ if (ReplacementQuality)
+ Previous = LocationAndQuality(Idx, *ReplacementQuality);
+ }
+
+ // Using the map of values to locations, produce a final set of values for
+ // this variable.
+ for (auto &Use : MIt->second) {
+ if (!UseBeforeDefVariables.count(Use.Var))
+ continue;
+
+ SmallVector<ResolvedDbgOp> DbgOps;
+
+ for (DbgOp &Op : Use.Values) {
+ if (Op.IsConst) {
+ DbgOps.push_back(Op.MO);
+ continue;
+ }
+ LocIdx NewLoc = ValueToLoc.find(Op.ID)->second.getLoc();
+ if (NewLoc.isIllegal())
+ break;
+ DbgOps.push_back(NewLoc);
+ }
+
+ // If at least one value used by this debug value is no longer available,
+ // i.e. one of the values was killed before we finished defining all of
+ // the values used by this variable, discard.
+ if (DbgOps.size() != Use.Values.size())
+ continue;
+
+ // Otherwise, we're good to go.
+ PendingDbgValues.push_back(
+ MTracker->emitLoc(DbgOps, Use.Var, Use.Properties));
+ }
+ flushDbgValues(pos, nullptr);
+ }
+
+ /// Helper to move created DBG_VALUEs into Transfers collection.
+ void flushDbgValues(MachineBasicBlock::iterator Pos, MachineBasicBlock *MBB) {
+ if (PendingDbgValues.size() == 0)
+ return;
+
+ // Pick out the instruction start position.
+ MachineBasicBlock::instr_iterator BundleStart;
+ if (MBB && Pos == MBB->begin())
+ BundleStart = MBB->instr_begin();
+ else
+ BundleStart = getBundleStart(Pos->getIterator());
+
+ Transfers.push_back({BundleStart, MBB, PendingDbgValues});
+ PendingDbgValues.clear();
+ }
+
+ bool isEntryValueVariable(const DebugVariable &Var,
+ const DIExpression *Expr) const {
+ if (!Var.getVariable()->isParameter())
+ return false;
+
+ if (Var.getInlinedAt())
+ return false;
+
+ if (Expr->getNumElements() > 0 && !Expr->isDeref())
+ return false;
+
+ return true;
+ }
+
+ bool isEntryValueValue(const ValueIDNum &Val) const {
+ // Must be in entry block (block number zero), and be a PHI / live-in value.
+ if (Val.getBlock() || !Val.isPHI())
+ return false;
+
+ // Entry values must enter in a register.
+ if (MTracker->isSpill(Val.getLoc()))
+ return false;
+
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI.getFrameRegister(MF);
+ Register Reg = MTracker->LocIdxToLocID[Val.getLoc()];
+ return Reg != SP && Reg != FP;
+ }
+
+ bool recoverAsEntryValue(const DebugVariable &Var,
+ const DbgValueProperties &Prop,
+ const ValueIDNum &Num) {
+ // Is this variable location a candidate to be an entry value. First,
+ // should we be trying this at all?
+ if (!ShouldEmitDebugEntryValues)
+ return false;
+
+ const DIExpression *DIExpr = Prop.DIExpr;
+
+ // We don't currently emit entry values for DBG_VALUE_LISTs.
+ if (Prop.IsVariadic) {
+ // If this debug value can be converted to be non-variadic, then do so;
+ // otherwise give up.
+ auto NonVariadicExpression =
+ DIExpression::convertToNonVariadicExpression(DIExpr);
+ if (!NonVariadicExpression)
+ return false;
+ DIExpr = *NonVariadicExpression;
+ }
+
+ // Is the variable appropriate for entry values (i.e., is a parameter).
+ if (!isEntryValueVariable(Var, DIExpr))
+ return false;
+
+ // Is the value assigned to this variable still the entry value?
+ if (!isEntryValueValue(Num))
+ return false;
+
+ // Emit a variable location using an entry value expression.
+ DIExpression *NewExpr =
+ DIExpression::prepend(DIExpr, DIExpression::EntryValue);
+ Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
+ MachineOperand MO = MachineOperand::CreateReg(Reg, false);
+
+ PendingDbgValues.push_back(
+ emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false}));
+ return true;
+ }
+
+ /// Change a variable value after encountering a DBG_VALUE inside a block.
+ void redefVar(const MachineInstr &MI) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+
+ // Ignore non-register locations, we don't transfer those.
+ if (MI.isUndefDebugValue() ||
+ all_of(MI.debug_operands(),
+ [](const MachineOperand &MO) { return !MO.isReg(); })) {
+ auto It = ActiveVLocs.find(Var);
+ if (It != ActiveVLocs.end()) {
+ for (LocIdx Loc : It->second.loc_indices())
+ ActiveMLocs[Loc].erase(Var);
+ ActiveVLocs.erase(It);
+ }
+ // Any use-before-defs no longer apply.
+ UseBeforeDefVariables.erase(Var);
+ return;
+ }
+
+ SmallVector<ResolvedDbgOp> NewLocs;
+ for (const MachineOperand &MO : MI.debug_operands()) {
+ if (MO.isReg()) {
+ // Any undef regs have already been filtered out above.
+ Register Reg = MO.getReg();
+ LocIdx NewLoc = MTracker->getRegMLoc(Reg);
+ NewLocs.push_back(NewLoc);
+ } else {
+ NewLocs.push_back(MO);
+ }
+ }
+
+ redefVar(MI, Properties, NewLocs);
+ }
+
+ /// Handle a change in variable location within a block. Terminate the
+ /// variables current location, and record the value it now refers to, so
+ /// that we can detect location transfers later on.
+ void redefVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ SmallVectorImpl<ResolvedDbgOp> &NewLocs) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ // Any use-before-defs no longer apply.
+ UseBeforeDefVariables.erase(Var);
+
+ // Erase any previous location.
+ auto It = ActiveVLocs.find(Var);
+ if (It != ActiveVLocs.end()) {
+ for (LocIdx Loc : It->second.loc_indices())
+ ActiveMLocs[Loc].erase(Var);
+ }
+
+ // If there _is_ no new location, all we had to do was erase.
+ if (NewLocs.empty()) {
+ if (It != ActiveVLocs.end())
+ ActiveVLocs.erase(It);
+ return;
+ }
+
+ SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs;
+ for (ResolvedDbgOp &Op : NewLocs) {
+ if (Op.IsConst)
+ continue;
+
+ LocIdx NewLoc = Op.Loc;
+
+ // Check whether our local copy of values-by-location in #VarLocs is out
+ // of date. Wipe old tracking data for the location if it's been clobbered
+ // in the meantime.
+ if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ for (const auto &P : ActiveMLocs[NewLoc]) {
+ auto LostVLocIt = ActiveVLocs.find(P);
+ if (LostVLocIt != ActiveVLocs.end()) {
+ for (LocIdx Loc : LostVLocIt->second.loc_indices()) {
+ // Every active variable mapping for NewLoc will be cleared, no
+ // need to track individual variables.
+ if (Loc == NewLoc)
+ continue;
+ LostMLocs.emplace_back(Loc, P);
+ }
+ }
+ ActiveVLocs.erase(P);
+ }
+ for (const auto &LostMLoc : LostMLocs)
+ ActiveMLocs[LostMLoc.first].erase(LostMLoc.second);
+ LostMLocs.clear();
+ It = ActiveVLocs.find(Var);
+ ActiveMLocs[NewLoc.asU64()].clear();
+ VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
+ }
+
+ ActiveMLocs[NewLoc].insert(Var);
+ }
+
+ if (It == ActiveVLocs.end()) {
+ ActiveVLocs.insert(
+ std::make_pair(Var, ResolvedDbgValue(NewLocs, Properties)));
+ } else {
+ It->second.Ops.assign(NewLocs);
+ It->second.Properties = Properties;
+ }
+ }
+
+ /// Account for a location \p mloc being clobbered. Examine the variable
+ /// locations that will be terminated: and try to recover them by using
+ /// another location. Optionally, given \p MakeUndef, emit a DBG_VALUE to
+ /// explicitly terminate a location if it can't be recovered.
+ void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos,
+ bool MakeUndef = true) {
+ auto ActiveMLocIt = ActiveMLocs.find(MLoc);
+ if (ActiveMLocIt == ActiveMLocs.end())
+ return;
+
+ // What was the old variable value?
+ ValueIDNum OldValue = VarLocs[MLoc.asU64()];
+ clobberMloc(MLoc, OldValue, Pos, MakeUndef);
+ }
+ /// Overload that takes an explicit value \p OldValue for when the value in
+ /// \p MLoc has changed and the TransferTracker's locations have not been
+ /// updated yet.
+ void clobberMloc(LocIdx MLoc, ValueIDNum OldValue,
+ MachineBasicBlock::iterator Pos, bool MakeUndef = true) {
+ auto ActiveMLocIt = ActiveMLocs.find(MLoc);
+ if (ActiveMLocIt == ActiveMLocs.end())
+ return;
+
+ VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue;
+
+ // Examine the remaining variable locations: if we can find the same value
+ // again, we can recover the location.
+ std::optional<LocIdx> NewLoc;
+ for (auto Loc : MTracker->locations())
+ if (Loc.Value == OldValue)
+ NewLoc = Loc.Idx;
+
+ // If there is no location, and we weren't asked to make the variable
+ // explicitly undef, then stop here.
+ if (!NewLoc && !MakeUndef) {
+ // Try and recover a few more locations with entry values.
+ for (const auto &Var : ActiveMLocIt->second) {
+ auto &Prop = ActiveVLocs.find(Var)->second.Properties;
+ recoverAsEntryValue(Var, Prop, OldValue);
+ }
+ flushDbgValues(Pos, nullptr);
+ return;
+ }
+
+ // Examine all the variables based on this location.
+ DenseSet<DebugVariable> NewMLocs;
+ // If no new location has been found, every variable that depends on this
+ // MLoc is dead, so end their existing MLoc->Var mappings as well.
+ SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs;
+ for (const auto &Var : ActiveMLocIt->second) {
+ auto ActiveVLocIt = ActiveVLocs.find(Var);
+ // Re-state the variable location: if there's no replacement then NewLoc
+ // is std::nullopt and a $noreg DBG_VALUE will be created. Otherwise, a
+ // DBG_VALUE identifying the alternative location will be emitted.
+ const DbgValueProperties &Properties = ActiveVLocIt->second.Properties;
+
+ // Produce the new list of debug ops - an empty list if no new location
+ // was found, or the existing list with the substitution MLoc -> NewLoc
+ // otherwise.
+ SmallVector<ResolvedDbgOp> DbgOps;
+ if (NewLoc) {
+ ResolvedDbgOp OldOp(MLoc);
+ ResolvedDbgOp NewOp(*NewLoc);
+ // Insert illegal ops to overwrite afterwards.
+ DbgOps.insert(DbgOps.begin(), ActiveVLocIt->second.Ops.size(),
+ ResolvedDbgOp(LocIdx::MakeIllegalLoc()));
+ replace_copy(ActiveVLocIt->second.Ops, DbgOps.begin(), OldOp, NewOp);
+ }
+
+ PendingDbgValues.push_back(MTracker->emitLoc(DbgOps, Var, Properties));
+
+ // Update machine locations <=> variable locations maps. Defer updating
+ // ActiveMLocs to avoid invalidating the ActiveMLocIt iterator.
+ if (!NewLoc) {
+ for (LocIdx Loc : ActiveVLocIt->second.loc_indices()) {
+ if (Loc != MLoc)
+ LostMLocs.emplace_back(Loc, Var);
+ }
+ ActiveVLocs.erase(ActiveVLocIt);
+ } else {
+ ActiveVLocIt->second.Ops = DbgOps;
+ NewMLocs.insert(Var);
+ }
+ }
+
+ // Remove variables from ActiveMLocs if they no longer use any other MLocs
+ // due to being killed by this clobber.
+ for (auto &LocVarIt : LostMLocs) {
+ auto LostMLocIt = ActiveMLocs.find(LocVarIt.first);
+ assert(LostMLocIt != ActiveMLocs.end() &&
+ "Variable was using this MLoc, but ActiveMLocs[MLoc] has no "
+ "entries?");
+ LostMLocIt->second.erase(LocVarIt.second);
+ }
+
+ // We lazily track what locations have which values; if we've found a new
+ // location for the clobbered value, remember it.
+ if (NewLoc)
+ VarLocs[NewLoc->asU64()] = OldValue;
+
+ flushDbgValues(Pos, nullptr);
+
+ // Commit ActiveMLoc changes.
+ ActiveMLocIt->second.clear();
+ if (!NewMLocs.empty())
+ for (auto &Var : NewMLocs)
+ ActiveMLocs[*NewLoc].insert(Var);
+ }
+
+ /// Transfer variables based on \p Src to be based on \p Dst. This handles
+ /// both register copies as well as spills and restores. Creates DBG_VALUEs
+ /// describing the movement.
+ void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {
+ // Does Src still contain the value num we expect? If not, it's been
+ // clobbered in the meantime, and our variable locations are stale.
+ if (VarLocs[Src.asU64()] != MTracker->readMLoc(Src))
+ return;
+
+ // assert(ActiveMLocs[Dst].size() == 0);
+ //^^^ Legitimate scenario on account of un-clobbered slot being assigned to?
+
+ // Move set of active variables from one location to another.
+ auto MovingVars = ActiveMLocs[Src];
+ ActiveMLocs[Dst].insert(MovingVars.begin(), MovingVars.end());
+ VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
+
+ // For each variable based on Src; create a location at Dst.
+ ResolvedDbgOp SrcOp(Src);
+ ResolvedDbgOp DstOp(Dst);
+ for (const auto &Var : MovingVars) {
+ auto ActiveVLocIt = ActiveVLocs.find(Var);
+ assert(ActiveVLocIt != ActiveVLocs.end());
+
+ // Update all instances of Src in the variable's tracked values to Dst.
+ std::replace(ActiveVLocIt->second.Ops.begin(),
+ ActiveVLocIt->second.Ops.end(), SrcOp, DstOp);
+
+ MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var,
+ ActiveVLocIt->second.Properties);
+ PendingDbgValues.push_back(MI);
+ }
+ ActiveMLocs[Src].clear();
+ flushDbgValues(Pos, nullptr);
+
+ // XXX XXX XXX "pretend to be old LDV" means dropping all tracking data
+ // about the old location.
+ if (EmulateOldLDV)
+ VarLocs[Src.asU64()] = ValueIDNum::EmptyValue;
+ }
+
+ MachineInstrBuilder emitMOLoc(const MachineOperand &MO,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.add(MO);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0);
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Properties.DIExpr);
+ return MIB;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1};
+
+#ifndef NDEBUG
+void ResolvedDbgOp::dump(const MLocTracker *MTrack) const {
+ if (IsConst) {
+ dbgs() << MO;
+ } else {
+ dbgs() << MTrack->LocIdxToName(Loc);
+ }
+}
+void DbgOp::dump(const MLocTracker *MTrack) const {
+ if (IsConst) {
+ dbgs() << MO;
+ } else if (!isUndef()) {
+ dbgs() << MTrack->IDAsString(ID);
+ }
+}
+void DbgOpID::dump(const MLocTracker *MTrack, const DbgOpIDMap *OpStore) const {
+ if (!OpStore) {
+ dbgs() << "ID(" << asU32() << ")";
+ } else {
+ OpStore->find(*this).dump(MTrack);
+ }
+}
+void DbgValue::dump(const MLocTracker *MTrack,
+ const DbgOpIDMap *OpStore) const {
+ if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == VPHI || Kind == Def) {
+ if (Kind == VPHI)
+ dbgs() << "VPHI(" << BlockNo << ",";
+ else
+ dbgs() << "Def(";
+ for (unsigned Idx = 0; Idx < getDbgOpIDs().size(); ++Idx) {
+ getDbgOpID(Idx).dump(MTrack, OpStore);
+ if (Idx != 0)
+ dbgs() << ",";
+ }
+ dbgs() << ")";
+ }
+ if (Properties.Indirect)
+ dbgs() << " indir";
+ if (Properties.DIExpr)
+ dbgs() << " " << *Properties.DIExpr;
+}
+#endif
+
+MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const TargetLowering &TLI)
+ : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
+ LocIdxToIDNum(ValueIDNum::EmptyValue), LocIdxToLocID(0) {
+ NumRegs = TRI.getNumRegs();
+ reset();
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
+
+ // Always track SP. This avoids the implicit clobbering caused by regmasks
+ // from affectings its values. (LiveDebugValues disbelieves calls and
+ // regmasks that claim to clobber SP).
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (SP) {
+ unsigned ID = getLocID(SP);
+ (void)lookupOrTrackRegister(ID);
+
+ for (MCRegAliasIterator RAI(SP, &TRI, true); RAI.isValid(); ++RAI)
+ SPAliases.insert(*RAI);
+ }
+
+ // Build some common stack positions -- full registers being spilt to the
+ // stack.
+ StackSlotIdxes.insert({{8, 0}, 0});
+ StackSlotIdxes.insert({{16, 0}, 1});
+ StackSlotIdxes.insert({{32, 0}, 2});
+ StackSlotIdxes.insert({{64, 0}, 3});
+ StackSlotIdxes.insert({{128, 0}, 4});
+ StackSlotIdxes.insert({{256, 0}, 5});
+ StackSlotIdxes.insert({{512, 0}, 6});
+
+ // Traverse all the subregister idxes, and ensure there's an index for them.
+ // Duplicates are no problem: we're interested in their position in the
+ // stack slot, we don't want to type the slot.
+ for (unsigned int I = 1; I < TRI.getNumSubRegIndices(); ++I) {
+ unsigned Size = TRI.getSubRegIdxSize(I);
+ unsigned Offs = TRI.getSubRegIdxOffset(I);
+ unsigned Idx = StackSlotIdxes.size();
+
+ // Some subregs have -1, -2 and so forth fed into their fields, to mean
+ // special backend things. Ignore those.
+ if (Size > 60000 || Offs > 60000)
+ continue;
+
+ StackSlotIdxes.insert({{Size, Offs}, Idx});
+ }
+
+ // There may also be strange register class sizes (think x86 fp80s).
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ unsigned Size = TRI.getRegSizeInBits(*RC);
+
+ // We might see special reserved values as sizes, and classes for other
+ // stuff the machine tries to model. If it's more than 512 bits, then it
+ // is very unlikely to be a register than can be spilt.
+ if (Size > 512)
+ continue;
+
+ unsigned Idx = StackSlotIdxes.size();
+ StackSlotIdxes.insert({{Size, 0}, Idx});
+ }
+
+ for (auto &Idx : StackSlotIdxes)
+ StackIdxesToPos[Idx.second] = Idx.first;
+
+ NumSlotIdxes = StackSlotIdxes.size();
+}
+
+LocIdx MLocTracker::trackRegister(unsigned ID) {
+ assert(ID != 0);
+ LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
+ LocIdxToIDNum.grow(NewIdx);
+ LocIdxToLocID.grow(NewIdx);
+
+ // Default: it's an mphi.
+ ValueIDNum ValNum = {CurBB, 0, NewIdx};
+ // Was this reg ever touched by a regmask?
+ for (const auto &MaskPair : reverse(Masks)) {
+ if (MaskPair.first->clobbersPhysReg(ID)) {
+ // There was an earlier def we skipped.
+ ValNum = {CurBB, MaskPair.second, NewIdx};
+ break;
+ }
+ }
+
+ LocIdxToIDNum[NewIdx] = ValNum;
+ LocIdxToLocID[NewIdx] = ID;
+ return NewIdx;
+}
+
+void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB,
+ unsigned InstID) {
+ // Def any register we track have that isn't preserved. The regmask
+ // terminates the liveness of a register, meaning its value can't be
+ // relied upon -- we represent this by giving it a new value.
+ for (auto Location : locations()) {
+ unsigned ID = LocIdxToLocID[Location.Idx];
+ // Don't clobber SP, even if the mask says it's clobbered.
+ if (ID < NumRegs && !SPAliases.count(ID) && MO->clobbersPhysReg(ID))
+ defReg(ID, CurBB, InstID);
+ }
+ Masks.push_back(std::make_pair(MO, InstID));
+}
+
+std::optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
+ SpillLocationNo SpillID(SpillLocs.idFor(L));
+
+ if (SpillID.id() == 0) {
+ // If there is no location, and we have reached the limit of how many stack
+ // slots to track, then don't track this one.
+ if (SpillLocs.size() >= StackWorkingSetLimit)
+ return std::nullopt;
+
+ // Spill location is untracked: create record for this one, and all
+ // subregister slots too.
+ SpillID = SpillLocationNo(SpillLocs.insert(L));
+ for (unsigned StackIdx = 0; StackIdx < NumSlotIdxes; ++StackIdx) {
+ unsigned L = getSpillIDWithIdx(SpillID, StackIdx);
+ LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
+ LocIdxToIDNum.grow(Idx);
+ LocIdxToLocID.grow(Idx);
+ LocIDToLocIdx.push_back(Idx);
+ LocIdxToLocID[Idx] = L;
+ // Initialize to PHI value; corresponds to the location's live-in value
+ // during transfer function construction.
+ LocIdxToIDNum[Idx] = ValueIDNum(CurBB, 0, Idx);
+ }
+ }
+ return SpillID;
+}
+
+std::string MLocTracker::LocIdxToName(LocIdx Idx) const {
+ unsigned ID = LocIdxToLocID[Idx];
+ if (ID >= NumRegs) {
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ ID -= NumRegs;
+ unsigned Slot = ID / NumSlotIdxes;
+ return Twine("slot ")
+ .concat(Twine(Slot).concat(Twine(" sz ").concat(Twine(Pos.first)
+ .concat(Twine(" offs ").concat(Twine(Pos.second))))))
+ .str();
+ } else {
+ return TRI.getRegAsmName(ID).str();
+ }
+}
+
+std::string MLocTracker::IDAsString(const ValueIDNum &Num) const {
+ std::string DefName = LocIdxToName(Num.getLoc());
+ return Num.asString(DefName);
+}
+
+#ifndef NDEBUG
+LLVM_DUMP_METHOD void MLocTracker::dump() {
+ for (auto Location : locations()) {
+ std::string MLocName = LocIdxToName(Location.Value.getLoc());
+ std::string DefName = Location.Value.asString(MLocName);
+ dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
+ }
+}
+
+LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() {
+ for (auto Location : locations()) {
+ std::string foo = LocIdxToName(Location.Idx);
+ dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
+ }
+}
+#endif
+
+MachineInstrBuilder
+MLocTracker::emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+
+ const MCInstrDesc &Desc = Properties.IsVariadic
+ ? TII.get(TargetOpcode::DBG_VALUE_LIST)
+ : TII.get(TargetOpcode::DBG_VALUE);
+
+#ifdef EXPENSIVE_CHECKS
+ assert(all_of(DbgOps,
+ [](const ResolvedDbgOp &Op) {
+ return Op.IsConst || !Op.Loc.isIllegal();
+ }) &&
+ "Did not expect illegal ops in DbgOps.");
+ assert((DbgOps.size() == 0 ||
+ DbgOps.size() == Properties.getLocationOpCount()) &&
+ "Expected to have either one DbgOp per MI LocationOp, or none.");
+#endif
+
+ auto GetRegOp = [](unsigned Reg) -> MachineOperand {
+ return MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
+ };
+
+ SmallVector<MachineOperand> MOs;
+
+ auto EmitUndef = [&]() {
+ MOs.clear();
+ MOs.assign(Properties.getLocationOpCount(), GetRegOp(0));
+ return BuildMI(MF, DL, Desc, false, MOs, Var.getVariable(),
+ Properties.DIExpr);
+ };
+
+ // Don't bother passing any real operands to BuildMI if any of them would be
+ // $noreg.
+ if (DbgOps.empty())
+ return EmitUndef();
+
+ bool Indirect = Properties.Indirect;
+
+ const DIExpression *Expr = Properties.DIExpr;
+
+ assert(DbgOps.size() == Properties.getLocationOpCount());
+
+ // If all locations are valid, accumulate them into our list of
+ // MachineOperands. For any spilled locations, either update the indirectness
+ // register or apply the appropriate transformations in the DIExpression.
+ for (size_t Idx = 0; Idx < Properties.getLocationOpCount(); ++Idx) {
+ const ResolvedDbgOp &Op = DbgOps[Idx];
+
+ if (Op.IsConst) {
+ MOs.push_back(Op.MO);
+ continue;
+ }
+
+ LocIdx MLoc = Op.Loc;
+ unsigned LocID = LocIdxToLocID[MLoc];
+ if (LocID >= NumRegs) {
+ SpillLocationNo SpillID = locIDToSpill(LocID);
+ StackSlotPos StackIdx = locIDToSpillIdx(LocID);
+ unsigned short Offset = StackIdx.second;
+
+ // TODO: support variables that are located in spill slots, with non-zero
+ // offsets from the start of the spill slot. It would require some more
+ // complex DIExpression calculations. This doesn't seem to be produced by
+ // LLVM right now, so don't try and support it.
+ // Accept no-subregister slots and subregisters where the offset is zero.
+ // The consumer should already have type information to work out how large
+ // the variable is.
+ if (Offset == 0) {
+ const SpillLoc &Spill = SpillLocs[SpillID.id()];
+ unsigned Base = Spill.SpillBase;
+
+ // There are several ways we can dereference things, and several inputs
+ // to consider:
+ // * NRVO variables will appear with IsIndirect set, but should have
+ // nothing else in their DIExpressions,
+ // * Variables with DW_OP_stack_value in their expr already need an
+ // explicit dereference of the stack location,
+ // * Values that don't match the variable size need DW_OP_deref_size,
+ // * Everything else can just become a simple location expression.
+
+ // We need to use deref_size whenever there's a mismatch between the
+ // size of value and the size of variable portion being read.
+ // Additionally, we should use it whenever dealing with stack_value
+ // fragments, to avoid the consumer having to determine the deref size
+ // from DW_OP_piece.
+ bool UseDerefSize = false;
+ unsigned ValueSizeInBits = getLocSizeInBits(MLoc);
+ unsigned DerefSizeInBytes = ValueSizeInBits / 8;
+ if (auto Fragment = Var.getFragment()) {
+ unsigned VariableSizeInBits = Fragment->SizeInBits;
+ if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex())
+ UseDerefSize = true;
+ } else if (auto Size = Var.getVariable()->getSizeInBits()) {
+ if (*Size != ValueSizeInBits) {
+ UseDerefSize = true;
+ }
+ }
+
+ SmallVector<uint64_t, 5> OffsetOps;
+ TRI.getOffsetOpcodes(Spill.SpillOffset, OffsetOps);
+ bool StackValue = false;
+
+ if (Properties.Indirect) {
+ // This is something like an NRVO variable, where the pointer has been
+ // spilt to the stack. It should end up being a memory location, with
+ // the pointer to the variable loaded off the stack with a deref:
+ assert(!Expr->isImplicit());
+ OffsetOps.push_back(dwarf::DW_OP_deref);
+ } else if (UseDerefSize && Expr->isSingleLocationExpression()) {
+ // TODO: Figure out how to handle deref size issues for variadic
+ // values.
+ // We're loading a value off the stack that's not the same size as the
+ // variable. Add / subtract stack offset, explicitly deref with a
+ // size, and add DW_OP_stack_value if not already present.
+ OffsetOps.push_back(dwarf::DW_OP_deref_size);
+ OffsetOps.push_back(DerefSizeInBytes);
+ StackValue = true;
+ } else if (Expr->isComplex() || Properties.IsVariadic) {
+ // A variable with no size ambiguity, but with extra elements in it's
+ // expression. Manually dereference the stack location.
+ OffsetOps.push_back(dwarf::DW_OP_deref);
+ } else {
+ // A plain value that has been spilt to the stack, with no further
+ // context. Request a location expression, marking the DBG_VALUE as
+ // IsIndirect.
+ Indirect = true;
+ }
+
+ Expr = DIExpression::appendOpsToArg(Expr, OffsetOps, Idx, StackValue);
+ MOs.push_back(GetRegOp(Base));
+ } else {
+ // This is a stack location with a weird subregister offset: emit an
+ // undef DBG_VALUE instead.
+ return EmitUndef();
+ }
+ } else {
+ // Non-empty, non-stack slot, must be a plain register.
+ MOs.push_back(GetRegOp(LocID));
+ }
+ }
+
+ return BuildMI(MF, DL, Desc, Indirect, MOs, Var.getVariable(), Expr);
+}
+
+/// Default construct and initialize the pass.
+InstrRefBasedLDV::InstrRefBasedLDV() = default;
+
+bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ return isCalleeSavedReg(Reg);
+}
+bool InstrRefBasedLDV::isCalleeSavedReg(Register R) const {
+ for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// Something to restore in the future.
+// void InstrRefBasedLDV::printVarLocInMBB(..)
+#endif
+
+std::optional<SpillLocationNo>
+InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ Register Reg;
+ StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ return MTracker->getOrTrackSpillLoc({Reg, Offset});
+}
+
+std::optional<LocIdx>
+InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
+ std::optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI);
+ if (!SpillLoc)
+ return std::nullopt;
+
+ // Where in the stack slot is this value defined -- i.e., what size of value
+ // is this? An important question, because it could be loaded into a register
+ // from the stack at some point. Happily the memory operand will tell us
+ // the size written to the stack.
+ auto *MemOperand = *MI.memoperands_begin();
+ unsigned SizeInBits = MemOperand->getSizeInBits();
+
+ // Find that position in the stack indexes we're tracking.
+ auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0});
+ if (IdxIt == MTracker->StackSlotIdxes.end())
+ // That index is not tracked. This is suprising, and unlikely to ever
+ // occur, but the safe action is to indicate the variable is optimised out.
+ return std::nullopt;
+
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second);
+ return MTracker->getSpillMLoc(SpillID);
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
+ if (!MI.isDebugValue())
+ return false;
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+ DbgValueProperties Properties(MI);
+
+ // If there are no instructions in this lexical scope, do no location tracking
+ // at all, this variable shouldn't get a legitimate location range.
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // handled it; by doing nothing
+
+ // MLocTracker needs to know that this register is read, even if it's only
+ // read by a debug inst.
+ for (const MachineOperand &MO : MI.debug_operands())
+ if (MO.isReg() && MO.getReg() != 0)
+ (void)MTracker->readReg(MO.getReg());
+
+ // If we're preparing for the second analysis (variables), the machine value
+ // locations are already solved, and we report this DBG_VALUE and the value
+ // it refers to to VLocTracker.
+ if (VTracker) {
+ SmallVector<DbgOpID> DebugOps;
+ // Feed defVar the new variable location, or if this is a DBG_VALUE $noreg,
+ // feed defVar None.
+ if (!MI.isUndefDebugValue()) {
+ for (const MachineOperand &MO : MI.debug_operands()) {
+ // There should be no undef registers here, as we've screened for undef
+ // debug values.
+ if (MO.isReg()) {
+ DebugOps.push_back(DbgOpStore.insert(MTracker->readReg(MO.getReg())));
+ } else if (MO.isImm() || MO.isFPImm() || MO.isCImm()) {
+ DebugOps.push_back(DbgOpStore.insert(MO));
+ } else {
+ llvm_unreachable("Unexpected debug operand type.");
+ }
+ }
+ }
+ VTracker->defVar(MI, Properties, DebugOps);
+ }
+
+ // If performing final tracking of transfers, report this variable definition
+ // to the TransferTracker too.
+ if (TTracker)
+ TTracker->redefVar(MI);
+ return true;
+}
+
+std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
+ unsigned InstNo, unsigned OpNo, MachineInstr &MI,
+ const ValueTable *MLiveOuts, const ValueTable *MLiveIns) {
+ // Various optimizations may have happened to the value during codegen,
+ // recorded in the value substitution table. Apply any substitutions to
+ // the instruction / operand number in this DBG_INSTR_REF, and collect
+ // any subregister extractions performed during optimization.
+ const MachineFunction &MF = *MI.getParent()->getParent();
+
+ // Create dummy substitution with Src set, for lookup.
+ auto SoughtSub =
+ MachineFunction::DebugSubstitution({InstNo, OpNo}, {0, 0}, 0);
+
+ SmallVector<unsigned, 4> SeenSubregs;
+ auto LowerBoundIt = llvm::lower_bound(MF.DebugValueSubstitutions, SoughtSub);
+ while (LowerBoundIt != MF.DebugValueSubstitutions.end() &&
+ LowerBoundIt->Src == SoughtSub.Src) {
+ std::tie(InstNo, OpNo) = LowerBoundIt->Dest;
+ SoughtSub.Src = LowerBoundIt->Dest;
+ if (unsigned Subreg = LowerBoundIt->Subreg)
+ SeenSubregs.push_back(Subreg);
+ LowerBoundIt = llvm::lower_bound(MF.DebugValueSubstitutions, SoughtSub);
+ }
+
+ // Default machine value number is <None> -- if no instruction defines
+ // the corresponding value, it must have been optimized out.
+ std::optional<ValueIDNum> NewID;
+
+ // Try to lookup the instruction number, and find the machine value number
+ // that it defines. It could be an instruction, or a PHI.
+ auto InstrIt = DebugInstrNumToInstr.find(InstNo);
+ auto PHIIt = llvm::lower_bound(DebugPHINumToValue, InstNo);
+ if (InstrIt != DebugInstrNumToInstr.end()) {
+ const MachineInstr &TargetInstr = *InstrIt->second.first;
+ uint64_t BlockNo = TargetInstr.getParent()->getNumber();
+
+ // Pick out the designated operand. It might be a memory reference, if
+ // a register def was folded into a stack store.
+ if (OpNo == MachineFunction::DebugOperandMemNumber &&
+ TargetInstr.hasOneMemOperand()) {
+ std::optional<LocIdx> L = findLocationForMemOperand(TargetInstr);
+ if (L)
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
+ } else if (OpNo != MachineFunction::DebugOperandMemNumber) {
+ // Permit the debug-info to be completely wrong: identifying a nonexistant
+ // operand, or one that is not a register definition, means something
+ // unexpected happened during optimisation. Broken debug-info, however,
+ // shouldn't crash the compiler -- instead leave the variable value as
+ // None, which will make it appear "optimised out".
+ if (OpNo < TargetInstr.getNumOperands()) {
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ unsigned LocID = MTracker->getLocID(MO.getReg());
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+ }
+
+ if (!NewID) {
+ LLVM_DEBUG(
+ { dbgs() << "Seen instruction reference to illegal operand\n"; });
+ }
+ }
+ // else: NewID is left as None.
+ } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
+ // It's actually a PHI value. Which value it is might not be obvious, use
+ // the resolver helper to find out.
+ NewID = resolveDbgPHIs(*MI.getParent()->getParent(), MLiveOuts, MLiveIns,
+ MI, InstNo);
+ }
+
+ // Apply any subregister extractions, in reverse. We might have seen code
+ // like this:
+ // CALL64 @foo, implicit-def $rax
+ // %0:gr64 = COPY $rax
+ // %1:gr32 = COPY %0.sub_32bit
+ // %2:gr16 = COPY %1.sub_16bit
+ // %3:gr8 = COPY %2.sub_8bit
+ // In which case each copy would have been recorded as a substitution with
+ // a subregister qualifier. Apply those qualifiers now.
+ if (NewID && !SeenSubregs.empty()) {
+ unsigned Offset = 0;
+ unsigned Size = 0;
+
+ // Look at each subregister that we passed through, and progressively
+ // narrow in, accumulating any offsets that occur. Substitutions should
+ // only ever be the same or narrower width than what they read from;
+ // iterate in reverse order so that we go from wide to small.
+ for (unsigned Subreg : reverse(SeenSubregs)) {
+ unsigned ThisSize = TRI->getSubRegIdxSize(Subreg);
+ unsigned ThisOffset = TRI->getSubRegIdxOffset(Subreg);
+ Offset += ThisOffset;
+ Size = (Size == 0) ? ThisSize : std::min(Size, ThisSize);
+ }
+
+ // If that worked, look for an appropriate subregister with the register
+ // where the define happens. Don't look at values that were defined during
+ // a stack write: we can't currently express register locations within
+ // spills.
+ LocIdx L = NewID->getLoc();
+ if (NewID && !MTracker->isSpill(L)) {
+ // Find the register class for the register where this def happened.
+ // FIXME: no index for this?
+ Register Reg = MTracker->LocIdxToLocID[L];
+ const TargetRegisterClass *TRC = nullptr;
+ for (const auto *TRCI : TRI->regclasses())
+ if (TRCI->contains(Reg))
+ TRC = TRCI;
+ assert(TRC && "Couldn't find target register class?");
+
+ // If the register we have isn't the right size or in the right place,
+ // Try to find a subregister inside it.
+ unsigned MainRegSize = TRI->getRegSizeInBits(*TRC);
+ if (Size != MainRegSize || Offset) {
+ // Enumerate all subregisters, searching.
+ Register NewReg = 0;
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, SR);
+ unsigned SubregSize = TRI->getSubRegIdxSize(Subreg);
+ unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg);
+ if (SubregSize == Size && SubregOffset == Offset) {
+ NewReg = SR;
+ break;
+ }
+ }
+
+ // If we didn't find anything: there's no way to express our value.
+ if (!NewReg) {
+ NewID = std::nullopt;
+ } else {
+ // Re-state the value as being defined within the subregister
+ // that we found.
+ LocIdx NewLoc = MTracker->lookupOrTrackRegister(NewReg);
+ NewID = ValueIDNum(NewID->getBlock(), NewID->getInst(), NewLoc);
+ }
+ }
+ } else {
+ // If we can't handle subregisters, unset the new value.
+ NewID = std::nullopt;
+ }
+ }
+
+ return NewID;
+}
+
+bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
+ if (!MI.isDebugRef())
+ return false;
+
+ // Only handle this instruction when we are building the variable value
+ // transfer function.
+ if (!VTracker && !TTracker)
+ return false;
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // Handled by doing nothing. This variable is never in scope.
+
+ SmallVector<DbgOpID> DbgOpIDs;
+ for (const MachineOperand &MO : MI.debug_operands()) {
+ if (!MO.isDbgInstrRef()) {
+ assert(!MO.isReg() && "DBG_INSTR_REF should not contain registers");
+ DbgOpID ConstOpID = DbgOpStore.insert(DbgOp(MO));
+ DbgOpIDs.push_back(ConstOpID);
+ continue;
+ }
+
+ unsigned InstNo = MO.getInstrRefInstrIndex();
+ unsigned OpNo = MO.getInstrRefOpIndex();
+
+ // Default machine value number is <None> -- if no instruction defines
+ // the corresponding value, it must have been optimized out.
+ std::optional<ValueIDNum> NewID =
+ getValueForInstrRef(InstNo, OpNo, MI, MLiveOuts, MLiveIns);
+ // We have a value number or std::nullopt. If the latter, then kill the
+ // entire debug value.
+ if (NewID) {
+ DbgOpIDs.push_back(DbgOpStore.insert(*NewID));
+ } else {
+ DbgOpIDs.clear();
+ break;
+ }
+ }
+
+ // We have a DbgOpID for every value or for none. Tell the variable value
+ // tracker about it. The rest of this LiveDebugValues implementation acts
+ // exactly the same for DBG_INSTR_REFs as DBG_VALUEs (just, the former can
+ // refer to values that aren't immediately available).
+ DbgValueProperties Properties(Expr, false, true);
+ if (VTracker)
+ VTracker->defVar(MI, Properties, DbgOpIDs);
+
+ // If we're on the final pass through the function, decompose this INSTR_REF
+ // into a plain DBG_VALUE.
+ if (!TTracker)
+ return true;
+
+ // Fetch the concrete DbgOps now, as we will need them later.
+ SmallVector<DbgOp> DbgOps;
+ for (DbgOpID OpID : DbgOpIDs) {
+ DbgOps.push_back(DbgOpStore.find(OpID));
+ }
+
+ // Pick a location for the machine value number, if such a location exists.
+ // (This information could be stored in TransferTracker to make it faster).
+ SmallDenseMap<ValueIDNum, TransferTracker::LocationAndQuality> FoundLocs;
+ SmallVector<ValueIDNum> ValuesToFind;
+ // Initialized the preferred-location map with illegal locations, to be
+ // filled in later.
+ for (const DbgOp &Op : DbgOps) {
+ if (!Op.IsConst)
+ if (FoundLocs.insert({Op.ID, TransferTracker::LocationAndQuality()})
+ .second)
+ ValuesToFind.push_back(Op.ID);
+ }
+
+ for (auto Location : MTracker->locations()) {
+ LocIdx CurL = Location.Idx;
+ ValueIDNum ID = MTracker->readMLoc(CurL);
+ auto ValueToFindIt = find(ValuesToFind, ID);
+ if (ValueToFindIt == ValuesToFind.end())
+ continue;
+ auto &Previous = FoundLocs.find(ID)->second;
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ std::optional<TransferTracker::LocationQuality> ReplacementQuality =
+ TTracker->getLocQualityIfBetter(CurL, Previous.getQuality());
+ if (ReplacementQuality) {
+ Previous = TransferTracker::LocationAndQuality(CurL, *ReplacementQuality);
+ if (Previous.isBest()) {
+ ValuesToFind.erase(ValueToFindIt);
+ if (ValuesToFind.empty())
+ break;
+ }
+ }
+ }
+
+ SmallVector<ResolvedDbgOp> NewLocs;
+ for (const DbgOp &DbgOp : DbgOps) {
+ if (DbgOp.IsConst) {
+ NewLocs.push_back(DbgOp.MO);
+ continue;
+ }
+ LocIdx FoundLoc = FoundLocs.find(DbgOp.ID)->second.getLoc();
+ if (FoundLoc.isIllegal()) {
+ NewLocs.clear();
+ break;
+ }
+ NewLocs.push_back(FoundLoc);
+ }
+ // Tell transfer tracker that the variable value has changed.
+ TTracker->redefVar(MI, Properties, NewLocs);
+
+ // If there were values with no location, but all such values are defined in
+ // later instructions in this block, this is a block-local use-before-def.
+ if (!DbgOps.empty() && NewLocs.empty()) {
+ bool IsValidUseBeforeDef = true;
+ uint64_t LastUseBeforeDef = 0;
+ for (auto ValueLoc : FoundLocs) {
+ ValueIDNum NewID = ValueLoc.first;
+ LocIdx FoundLoc = ValueLoc.second.getLoc();
+ if (!FoundLoc.isIllegal())
+ continue;
+ // If we have an value with no location that is not defined in this block,
+ // then it has no location in this block, leaving this value undefined.
+ if (NewID.getBlock() != CurBB || NewID.getInst() <= CurInst) {
+ IsValidUseBeforeDef = false;
+ break;
+ }
+ LastUseBeforeDef = std::max(LastUseBeforeDef, NewID.getInst());
+ }
+ if (IsValidUseBeforeDef) {
+ TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false, true},
+ DbgOps, LastUseBeforeDef);
+ }
+ }
+
+ // Produce a DBG_VALUE representing what this DBG_INSTR_REF meant.
+ // This DBG_VALUE is potentially a $noreg / undefined location, if
+ // FoundLoc is illegal.
+ // (XXX -- could morph the DBG_INSTR_REF in the future).
+ MachineInstr *DbgMI = MTracker->emitLoc(NewLocs, V, Properties);
+
+ TTracker->PendingDbgValues.push_back(DbgMI);
+ TTracker->flushDbgValues(MI.getIterator(), nullptr);
+ return true;
+}
+
+bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
+ if (!MI.isDebugPHI())
+ return false;
+
+ // Analyse these only when solving the machine value location problem.
+ if (VTracker || TTracker)
+ return true;
+
+ // First operand is the value location, either a stack slot or register.
+ // Second is the debug instruction number of the original PHI.
+ const MachineOperand &MO = MI.getOperand(0);
+ unsigned InstrNum = MI.getOperand(1).getImm();
+
+ auto EmitBadPHI = [this, &MI, InstrNum]() -> bool {
+ // Helper lambda to do any accounting when we fail to find a location for
+ // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a
+ // dead stack slot, for example.
+ // Record a DebugPHIRecord with an empty value + location.
+ DebugPHINumToValue.push_back(
+ {InstrNum, MI.getParent(), std::nullopt, std::nullopt});
+ return true;
+ };
+
+ if (MO.isReg() && MO.getReg()) {
+ // The value is whatever's currently in the register. Read and record it,
+ // to be analysed later.
+ Register Reg = MO.getReg();
+ ValueIDNum Num = MTracker->readReg(Reg);
+ auto PHIRec = DebugPHIRecord(
+ {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)});
+ DebugPHINumToValue.push_back(PHIRec);
+
+ // Ensure this register is tracked.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ MTracker->lookupOrTrackRegister(*RAI);
+ } else if (MO.isFI()) {
+ // The value is whatever's in this stack slot.
+ unsigned FI = MO.getIndex();
+
+ // If the stack slot is dead, then this was optimized away.
+ // FIXME: stack slot colouring should account for slots that get merged.
+ if (MFI->isDeadObjectIndex(FI))
+ return EmitBadPHI();
+
+ // Identify this spill slot, ensure it's tracked.
+ Register Base;
+ StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
+ SpillLoc SL = {Base, Offs};
+ std::optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL);
+
+ // We might be able to find a value, but have chosen not to, to avoid
+ // tracking too much stack information.
+ if (!SpillNo)
+ return EmitBadPHI();
+
+ // Any stack location DBG_PHI should have an associate bit-size.
+ assert(MI.getNumOperands() == 3 && "Stack DBG_PHI with no size?");
+ unsigned slotBitSize = MI.getOperand(2).getImm();
+
+ unsigned SpillID = MTracker->getLocID(*SpillNo, {slotBitSize, 0});
+ LocIdx SpillLoc = MTracker->getSpillMLoc(SpillID);
+ ValueIDNum Result = MTracker->readMLoc(SpillLoc);
+
+ // Record this DBG_PHI for later analysis.
+ auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), Result, SpillLoc});
+ DebugPHINumToValue.push_back(DbgPHI);
+ } else {
+ // Else: if the operand is neither a legal register or a stack slot, then
+ // we're being fed illegal debug-info. Record an empty PHI, so that any
+ // debug users trying to read this number will be put off trying to
+ // interpret the value.
+ LLVM_DEBUG(
+ { dbgs() << "Seen DBG_PHI with unrecognised operand format\n"; });
+ return EmitBadPHI();
+ }
+
+ return true;
+}
+
+void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
+ // Meta Instructions do not affect the debug liveness of any register they
+ // define.
+ if (MI.isImplicitDef()) {
+ // Except when there's an implicit def, and the location it's defining has
+ // no value number. The whole point of an implicit def is to announce that
+ // the register is live, without be specific about it's value. So define
+ // a value if there isn't one already.
+ ValueIDNum Num = MTracker->readReg(MI.getOperand(0).getReg());
+ // Has a legitimate value -> ignore the implicit def.
+ if (Num.getLoc() != 0)
+ return;
+ // Otherwise, def it here.
+ } else if (MI.isMetaInstruction())
+ return;
+
+ // We always ignore SP defines on call instructions, they don't actually
+ // change the value of the stack pointer... except for win32's _chkstk. This
+ // is rare: filter quickly for the common case (no stack adjustments, not a
+ // call, etc). If it is a call that modifies SP, recognise the SP register
+ // defs.
+ bool CallChangesSP = false;
+ if (AdjustsStackInCalls && MI.isCall() && MI.getOperand(0).isSymbol() &&
+ !strcmp(MI.getOperand(0).getSymbolName(), StackProbeSymbolName.data()))
+ CallChangesSP = true;
+
+ // Test whether we should ignore a def of this register due to it being part
+ // of the stack pointer.
+ auto IgnoreSPAlias = [this, &MI, CallChangesSP](Register R) -> bool {
+ if (CallChangesSP)
+ return false;
+ return MI.isCall() && MTracker->SPAliases.count(R);
+ };
+
+ // Find the regs killed by MI, and find regmasks of preserved regs.
+ // Max out the number of statically allocated elements in `DeadRegs`, as this
+ // prevents fallback to std::set::count() operations.
+ SmallSet<uint32_t, 32> DeadRegs;
+ SmallVector<const uint32_t *, 4> RegMasks;
+ SmallVector<const MachineOperand *, 4> RegMaskPtrs;
+ for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def.
+ if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical() &&
+ !IgnoreSPAlias(MO.getReg())) {
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ // FIXME: Can we break out of this loop early if no insertion occurs?
+ DeadRegs.insert(*RAI);
+ } else if (MO.isRegMask()) {
+ RegMasks.push_back(MO.getRegMask());
+ RegMaskPtrs.push_back(&MO);
+ }
+ }
+
+ // Tell MLocTracker about all definitions, of regmasks and otherwise.
+ for (uint32_t DeadReg : DeadRegs)
+ MTracker->defReg(DeadReg, CurBB, CurInst);
+
+ for (const auto *MO : RegMaskPtrs)
+ MTracker->writeRegMask(MO, CurBB, CurInst);
+
+ // If this instruction writes to a spill slot, def that slot.
+ if (hasFoldedStackStore(MI)) {
+ if (std::optional<SpillLocationNo> SpillNo =
+ extractSpillBaseRegAndOffset(MI)) {
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L));
+ }
+ }
+ }
+
+ if (!TTracker)
+ return;
+
+ // When committing variable values to locations: tell transfer tracker that
+ // we've clobbered things. It may be able to recover the variable from a
+ // different location.
+
+ // Inform TTracker about any direct clobbers.
+ for (uint32_t DeadReg : DeadRegs) {
+ LocIdx Loc = MTracker->lookupOrTrackRegister(DeadReg);
+ TTracker->clobberMloc(Loc, MI.getIterator(), false);
+ }
+
+ // Look for any clobbers performed by a register mask. Only test locations
+ // that are actually being tracked.
+ if (!RegMaskPtrs.empty()) {
+ for (auto L : MTracker->locations()) {
+ // Stack locations can't be clobbered by regmasks.
+ if (MTracker->isSpill(L.Idx))
+ continue;
+
+ Register Reg = MTracker->LocIdxToLocID[L.Idx];
+ if (IgnoreSPAlias(Reg))
+ continue;
+
+ for (const auto *MO : RegMaskPtrs)
+ if (MO->clobbersPhysReg(Reg))
+ TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
+ }
+ }
+
+ // Tell TTracker about any folded stack store.
+ if (hasFoldedStackStore(MI)) {
+ if (std::optional<SpillLocationNo> SpillNo =
+ extractSpillBaseRegAndOffset(MI)) {
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ TTracker->clobberMloc(L, MI.getIterator(), true);
+ }
+ }
+ }
+}
+
+void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
+ // In all circumstances, re-def all aliases. It's definitely a new value now.
+ for (MCRegAliasIterator RAI(DstRegNum, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
+ MTracker->setReg(DstRegNum, SrcValue);
+
+ // Copy subregisters from one location to another.
+ for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {
+ unsigned SrcSubReg = SRI.getSubReg();
+ unsigned SubRegIdx = SRI.getSubRegIndex();
+ unsigned DstSubReg = TRI->getSubReg(DstRegNum, SubRegIdx);
+ if (!DstSubReg)
+ continue;
+
+ // Do copy. There are two matching subregisters, the source value should
+ // have been def'd when the super-reg was, the latter might not be tracked
+ // yet.
+ // This will force SrcSubReg to be tracked, if it isn't yet. Will read
+ // mphi values if it wasn't tracked.
+ LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg);
+ LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg);
+ (void)SrcL;
+ (void)DstL;
+ ValueIDNum CpyValue = MTracker->readReg(SrcSubReg);
+
+ MTracker->setReg(DstSubReg, CpyValue);
+ }
+}
+
+std::optional<SpillLocationNo>
+InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF) {
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return std::nullopt;
+
+ // Reject any memory operand that's aliased -- we can't guarantee its value.
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ if (PVal->isAliased(MFI))
+ return std::nullopt;
+
+ if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+ return std::nullopt; // This is not a spill instruction, since no valid size
+ // was returned from either function.
+
+ return extractSpillBaseRegAndOffset(MI);
+}
+
+bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!isSpillInstruction(MI, MF))
+ return false;
+
+ int FI;
+ Reg = TII->isStoreToStackSlotPostFE(MI, FI);
+ return Reg != 0;
+}
+
+std::optional<SpillLocationNo>
+InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!MI.hasOneMemOperand())
+ return std::nullopt;
+
+ // FIXME: Handle folded restore instructions with more than one memory
+ // operand.
+ if (MI.getRestoreSize(TII)) {
+ Reg = MI.getOperand(0).getReg();
+ return extractSpillBaseRegAndOffset(MI);
+ }
+ return std::nullopt;
+}
+
+bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
+ // XXX -- it's too difficult to implement VarLocBasedImpl's stack location
+ // limitations under the new model. Therefore, when comparing them, compare
+ // versions that don't attempt spills or restores at all.
+ if (EmulateOldLDV)
+ return false;
+
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int DummyFI = -1;
+ if (!TII->isStoreToStackSlotPostFE(MI, DummyFI) &&
+ !TII->isLoadFromStackSlotPostFE(MI, DummyFI))
+ return false;
+
+ MachineFunction *MF = MI.getMF();
+ unsigned Reg;
+
+ LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int FIDummy;
+ if (!TII->isStoreToStackSlotPostFE(MI, FIDummy) &&
+ !TII->isLoadFromStackSlotPostFE(MI, FIDummy))
+ return false;
+
+ // First, if there are any DBG_VALUEs pointing at a spill slot that is
+ // written to, terminate that variable location. The value in memory
+ // will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
+ if (std::optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) {
+ // Un-set this location and clobber, so that earlier locations don't
+ // continue past this store.
+ for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*Loc, SlotIdx);
+ std::optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
+ if (!MLoc)
+ continue;
+
+ // We need to over-write the stack slot with something (here, a def at
+ // this instruction) to ensure no values are preserved in this stack slot
+ // after the spill. It also prevents TTracker from trying to recover the
+ // location and re-installing it in the same place.
+ ValueIDNum Def(CurBB, CurInst, *MLoc);
+ MTracker->setMLoc(*MLoc, Def);
+ if (TTracker)
+ TTracker->clobberMloc(*MLoc, MI.getIterator());
+ }
+ }
+
+ // Try to recognise spill and restore instructions that may transfer a value.
+ if (isLocationSpill(MI, MF, Reg)) {
+ // isLocationSpill returning true should guarantee we can extract a
+ // location.
+ SpillLocationNo Loc = *extractSpillBaseRegAndOffset(MI);
+
+ auto DoTransfer = [&](Register SrcReg, unsigned SpillID) {
+ auto ReadValue = MTracker->readReg(SrcReg);
+ LocIdx DstLoc = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(DstLoc, ReadValue);
+
+ if (TTracker) {
+ LocIdx SrcLoc = MTracker->getRegMLoc(SrcReg);
+ TTracker->transferMlocs(SrcLoc, DstLoc, MI.getIterator());
+ }
+ };
+
+ // Then, transfer subreg bits.
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ // Ensure this reg is tracked,
+ (void)MTracker->lookupOrTrackRegister(SR);
+ unsigned SubregIdx = TRI->getSubRegIndex(Reg, SR);
+ unsigned SpillID = MTracker->getLocID(Loc, SubregIdx);
+ DoTransfer(SR, SpillID);
+ }
+
+ // Directly lookup size of main source reg, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
+ } else {
+ std::optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg);
+ if (!Loc)
+ return false;
+
+ // Assumption: we're reading from the base of the stack slot, not some
+ // offset into it. It seems very unlikely LLVM would ever generate
+ // restores where this wasn't true. This then becomes a question of what
+ // subregisters in the destination register line up with positions in the
+ // stack slot.
+
+ // Def all registers that alias the destination.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Now find subregisters within the destination register, and load values
+ // from stack slot positions.
+ auto DoTransfer = [&](Register DestReg, unsigned SpillID) {
+ LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID);
+ auto ReadValue = MTracker->readMLoc(SrcIdx);
+ MTracker->setReg(DestReg, ReadValue);
+ };
+
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, SR);
+ unsigned SpillID = MTracker->getLocID(*Loc, Subreg);
+ DoTransfer(SR, SpillID);
+ }
+
+ // Directly look up this registers slot idx by size, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(*Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
+ }
+ return true;
+}
+
+bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return false;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+
+ Register SrcReg = SrcRegOp->getReg();
+ Register DestReg = DestRegOp->getReg();
+
+ // Ignore identity copies. Yep, these make it as far as LiveDebugValues.
+ if (SrcReg == DestReg)
+ return true;
+
+ // For emulating VarLocBasedImpl:
+ // We want to recognize instructions where destination register is callee
+ // saved register. If register that could be clobbered by the call is
+ // included, there would be a great chance that it is going to be clobbered
+ // soon. It is more likely that previous register, which is callee saved, is
+ // going to stay unclobbered longer, even if it is killed.
+ //
+ // For InstrRefBasedImpl, we can track multiple locations per value, so
+ // ignore this condition.
+ if (EmulateOldLDV && !isCalleeSavedReg(DestReg))
+ return false;
+
+ // InstrRefBasedImpl only followed killing copies.
+ if (EmulateOldLDV && !SrcRegOp->isKill())
+ return false;
+
+ // Before we update MTracker, remember which values were present in each of
+ // the locations about to be overwritten, so that we can recover any
+ // potentially clobbered variables.
+ DenseMap<LocIdx, ValueIDNum> ClobberedLocs;
+ if (TTracker) {
+ for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) {
+ LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI);
+ auto MLocIt = TTracker->ActiveMLocs.find(ClobberedLoc);
+ // If ActiveMLocs isn't tracking this location or there are no variables
+ // using it, don't bother remembering.
+ if (MLocIt == TTracker->ActiveMLocs.end() || MLocIt->second.empty())
+ continue;
+ ValueIDNum Value = MTracker->readReg(*RAI);
+ ClobberedLocs[ClobberedLoc] = Value;
+ }
+ }
+
+ // Copy MTracker info, including subregs if available.
+ InstrRefBasedLDV::performCopy(SrcReg, DestReg);
+
+ // The copy might have clobbered variables based on the destination register.
+ // Tell TTracker about it, passing the old ValueIDNum to search for
+ // alternative locations (or else terminating those variables).
+ if (TTracker) {
+ for (auto LocVal : ClobberedLocs) {
+ TTracker->clobberMloc(LocVal.first, LocVal.second, MI.getIterator(), false);
+ }
+ }
+
+ // Only produce a transfer of DBG_VALUE within a block where old LDV
+ // would have. We might make use of the additional value tracking in some
+ // other way, later.
+ if (TTracker && isCalleeSavedReg(DestReg) && SrcRegOp->isKill())
+ TTracker->transferMlocs(MTracker->getRegMLoc(SrcReg),
+ MTracker->getRegMLoc(DestReg), MI.getIterator());
+
+ // VarLocBasedImpl would quit tracking the old location after copying.
+ if (EmulateOldLDV && SrcReg != DestReg)
+ MTracker->defReg(SrcReg, CurBB, CurInst);
+
+ return true;
+}
+
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed debug instruction to analyze for
+/// fragment usage.
+void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
+ assert(MI.isDebugValueLike());
+ DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
+
+ // If this is the first sighting of this variable, then we are guaranteed
+ // there are currently no overlapping fragments either. Initialize the set
+ // of seen fragments, record no overlaps for the current one, and return.
+ auto SeenIt = SeenFragments.find(MIVar.getVariable());
+ if (SeenIt == SeenFragments.end()) {
+ SmallSet<FragmentInfo, 4> OneFragment;
+ OneFragment.insert(ThisFragment);
+ SeenFragments.insert({MIVar.getVariable(), OneFragment});
+
+ OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ return;
+ }
+
+ // If this particular Variable/Fragment pair already exists in the overlap
+ // map, it has already been accounted for.
+ auto IsInOLapMap =
+ OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ if (!IsInOLapMap.second)
+ return;
+
+ auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+ auto &AllSeenFragments = SeenIt->second;
+
+ // Otherwise, examine all other seen fragments for this variable, with "this"
+ // fragment being a previously unseen fragment. Record any pair of
+ // overlapping fragments.
+ for (const auto &ASeenFragment : AllSeenFragments) {
+ // Does this previously seen fragment overlap?
+ if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+ // Yes: Mark the current fragment as being overlapped.
+ ThisFragmentsOverlaps.push_back(ASeenFragment);
+ // Mark the previously seen fragment as being overlapped by the current
+ // one.
+ auto ASeenFragmentsOverlaps =
+ OverlapFragments.find({MIVar.getVariable(), ASeenFragment});
+ assert(ASeenFragmentsOverlaps != OverlapFragments.end() &&
+ "Previously seen var fragment has no vector of overlaps");
+ ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+ }
+ }
+
+ AllSeenFragments.insert(ThisFragment);
+}
+
+void InstrRefBasedLDV::process(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
+ // Try to interpret an MI as a debug or transfer instruction. Only if it's
+ // none of these should we interpret it's register defs as new value
+ // definitions.
+ if (transferDebugValue(MI))
+ return;
+ if (transferDebugInstrRef(MI, MLiveOuts, MLiveIns))
+ return;
+ if (transferDebugPHI(MI))
+ return;
+ if (transferRegisterCopy(MI))
+ return;
+ if (transferSpillOrRestoreInst(MI))
+ return;
+ transferRegisterDef(MI);
+}
+
+void InstrRefBasedLDV::produceMLocTransferFunction(
+ MachineFunction &MF, SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks) {
+ // Because we try to optimize around register mask operands by ignoring regs
+ // that aren't currently tracked, we set up something ugly for later: RegMask
+ // operands that are seen earlier than the first use of a register, still need
+ // to clobber that register in the transfer function. But this information
+ // isn't actively recorded. Instead, we track each RegMask used in each block,
+ // and accumulated the clobbered but untracked registers in each block into
+ // the following bitvector. Later, if new values are tracked, we can add
+ // appropriate clobbers.
+ SmallVector<BitVector, 32> BlockMasks;
+ BlockMasks.resize(MaxNumBlocks);
+
+ // Reserve one bit per register for the masks described above.
+ unsigned BVWords = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ for (auto &BV : BlockMasks)
+ BV.resize(TRI->getNumRegs(), true);
+
+ // Step through all instructions and inhale the transfer function.
+ for (auto &MBB : MF) {
+ // Object fields that are read by trackers to know where we are in the
+ // function.
+ CurBB = MBB.getNumber();
+ CurInst = 1;
+
+ // Set all machine locations to a PHI value. For transfer function
+ // production only, this signifies the live-in value to the block.
+ MTracker->reset();
+ MTracker->setMPhis(CurBB);
+
+ // Step through each instruction in this block.
+ for (auto &MI : MBB) {
+ // Pass in an empty unique_ptr for the value tables when accumulating the
+ // machine transfer function.
+ process(MI, nullptr, nullptr);
+
+ // Also accumulate fragment map.
+ if (MI.isDebugValueLike())
+ accumulateFragmentMap(MI);
+
+ // Create a map from the instruction number (if present) to the
+ // MachineInstr and its position.
+ if (uint64_t InstrNo = MI.peekDebugInstrNum()) {
+ auto InstrAndPos = std::make_pair(&MI, CurInst);
+ auto InsertResult =
+ DebugInstrNumToInstr.insert(std::make_pair(InstrNo, InstrAndPos));
+
+ // There should never be duplicate instruction numbers.
+ assert(InsertResult.second);
+ (void)InsertResult;
+ }
+
+ ++CurInst;
+ }
+
+ // Produce the transfer function, a map of machine location to new value. If
+ // any machine location has the live-in phi value from the start of the
+ // block, it's live-through and doesn't need recording in the transfer
+ // function.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &P = Location.Value;
+ if (P.isPHI() && P.getLoc() == Idx.asU64())
+ continue;
+
+ // Insert-or-update.
+ auto &TransferMap = MLocTransfer[CurBB];
+ auto Result = TransferMap.insert(std::make_pair(Idx.asU64(), P));
+ if (!Result.second)
+ Result.first->second = P;
+ }
+
+ // Accumulate any bitmask operands into the clobbered reg mask for this
+ // block.
+ for (auto &P : MTracker->Masks) {
+ BlockMasks[CurBB].clearBitsNotInMask(P.first->getRegMask(), BVWords);
+ }
+ }
+
+ // Compute a bitvector of all the registers that are tracked in this block.
+ BitVector UsedRegs(TRI->getNumRegs());
+ for (auto Location : MTracker->locations()) {
+ unsigned ID = MTracker->LocIdxToLocID[Location.Idx];
+ // Ignore stack slots, and aliases of the stack pointer.
+ if (ID >= TRI->getNumRegs() || MTracker->SPAliases.count(ID))
+ continue;
+ UsedRegs.set(ID);
+ }
+
+ // Check that any regmask-clobber of a register that gets tracked, is not
+ // live-through in the transfer function. It needs to be clobbered at the
+ // very least.
+ for (unsigned int I = 0; I < MaxNumBlocks; ++I) {
+ BitVector &BV = BlockMasks[I];
+ BV.flip();
+ BV &= UsedRegs;
+ // This produces all the bits that we clobber, but also use. Check that
+ // they're all clobbered or at least set in the designated transfer
+ // elem.
+ for (unsigned Bit : BV.set_bits()) {
+ unsigned ID = MTracker->getLocID(Bit);
+ LocIdx Idx = MTracker->LocIDToLocIdx[ID];
+ auto &TransferMap = MLocTransfer[I];
+
+ // Install a value representing the fact that this location is effectively
+ // written to in this block. As there's no reserved value, instead use
+ // a value number that is never generated. Pick the value number for the
+ // first instruction in the block, def'ing this location, which we know
+ // this block never used anyway.
+ ValueIDNum NotGeneratedNum = ValueIDNum(I, 1, Idx);
+ auto Result =
+ TransferMap.insert(std::make_pair(Idx.asU64(), NotGeneratedNum));
+ if (!Result.second) {
+ ValueIDNum &ValueID = Result.first->second;
+ if (ValueID.getBlock() == I && ValueID.isPHI())
+ // It was left as live-through. Set it to clobbered.
+ ValueID = NotGeneratedNum;
+ }
+ }
+ }
+}
+
+bool InstrRefBasedLDV::mlocJoin(
+ MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ FuncValueTable &OutLocs, ValueTable &InLocs) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+ bool Changed = false;
+
+ // Handle value-propagation when control flow merges on entry to a block. For
+ // any location without a PHI already placed, the location has the same value
+ // as its predecessors. If a PHI is placed, test to see whether it's now a
+ // redundant PHI that we can eliminate.
+
+ SmallVector<const MachineBasicBlock *, 8> BlockOrders;
+ for (auto *Pred : MBB.predecessors())
+ BlockOrders.push_back(Pred);
+
+ // Visit predecessors in RPOT order.
+ auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ return BBToOrder.find(A)->second < BBToOrder.find(B)->second;
+ };
+ llvm::sort(BlockOrders, Cmp);
+
+ // Skip entry block.
+ if (BlockOrders.size() == 0)
+ return false;
+
+ // Step through all machine locations, look at each predecessor and test
+ // whether we can eliminate redundant PHIs.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+
+ // Pick out the first predecessors live-out value for this location. It's
+ // guaranteed to not be a backedge, as we order by RPO.
+ ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+ // If we've already eliminated a PHI here, do no further checking, just
+ // propagate the first live-in value into this block.
+ if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) {
+ if (InLocs[Idx.asU64()] != FirstVal) {
+ InLocs[Idx.asU64()] = FirstVal;
+ Changed |= true;
+ }
+ continue;
+ }
+
+ // We're now examining a PHI to see whether it's un-necessary. Loop around
+ // the other live-in values and test whether they're all the same.
+ bool Disagree = false;
+ for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
+ const MachineBasicBlock *PredMBB = BlockOrders[I];
+ const ValueIDNum &PredLiveOut =
+ OutLocs[PredMBB->getNumber()][Idx.asU64()];
+
+ // Incoming values agree, continue trying to eliminate this PHI.
+ if (FirstVal == PredLiveOut)
+ continue;
+
+ // We can also accept a PHI value that feeds back into itself.
+ if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx))
+ continue;
+
+ // Live-out of a predecessor disagrees with the first predecessor.
+ Disagree = true;
+ }
+
+ // No disagreement? No PHI. Otherwise, leave the PHI in live-ins.
+ if (!Disagree) {
+ InLocs[Idx.asU64()] = FirstVal;
+ Changed |= true;
+ }
+ }
+
+ // TODO: Reimplement NumInserted and NumRemoved.
+ return Changed;
+}
+
+void InstrRefBasedLDV::findStackIndexInterference(
+ SmallVectorImpl<unsigned> &Slots) {
+ // We could spend a bit of time finding the exact, minimal, set of stack
+ // indexes that interfere with each other, much like reg units. Or, we can
+ // rely on the fact that:
+ // * The smallest / lowest index will interfere with everything at zero
+ // offset, which will be the largest set of registers,
+ // * Most indexes with non-zero offset will end up being interference units
+ // anyway.
+ // So just pick those out and return them.
+
+ // We can rely on a single-byte stack index existing already, because we
+ // initialize them in MLocTracker.
+ auto It = MTracker->StackSlotIdxes.find({8, 0});
+ assert(It != MTracker->StackSlotIdxes.end());
+ Slots.push_back(It->second);
+
+ // Find anything that has a non-zero offset and add that too.
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ // Is offset zero? If so, ignore.
+ if (!Pair.first.second)
+ continue;
+ Slots.push_back(Pair.second);
+ }
+}
+
+void InstrRefBasedLDV::placeMLocPHIs(
+ MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ FuncValueTable &MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ SmallVector<unsigned, 4> StackUnits;
+ findStackIndexInterference(StackUnits);
+
+ // To avoid repeatedly running the PHI placement algorithm, leverage the
+ // fact that a def of register MUST also def its register units. Find the
+ // units for registers, place PHIs for them, and then replicate them for
+ // aliasing registers. Some inputs that are never def'd (DBG_PHIs of
+ // arguments) don't lead to register units being tracked, just place PHIs for
+ // those registers directly. Stack slots have their own form of "unit",
+ // store them to one side.
+ SmallSet<Register, 32> RegUnitsToPHIUp;
+ SmallSet<LocIdx, 32> NormalLocsToPHI;
+ SmallSet<SpillLocationNo, 32> StackSlots;
+ for (auto Location : MTracker->locations()) {
+ LocIdx L = Location.Idx;
+ if (MTracker->isSpill(L)) {
+ StackSlots.insert(MTracker->locIDToSpill(MTracker->LocIdxToLocID[L]));
+ continue;
+ }
+
+ Register R = MTracker->LocIdxToLocID[L];
+ SmallSet<Register, 8> FoundRegUnits;
+ bool AnyIllegal = false;
+ for (MCRegUnit Unit : TRI->regunits(R.asMCReg())) {
+ for (MCRegUnitRootIterator URoot(Unit, TRI); URoot.isValid(); ++URoot) {
+ if (!MTracker->isRegisterTracked(*URoot)) {
+ // Not all roots were loaded into the tracking map: this register
+ // isn't actually def'd anywhere, we only read from it. Generate PHIs
+ // for this reg, but don't iterate units.
+ AnyIllegal = true;
+ } else {
+ FoundRegUnits.insert(*URoot);
+ }
+ }
+ }
+
+ if (AnyIllegal) {
+ NormalLocsToPHI.insert(L);
+ continue;
+ }
+
+ RegUnitsToPHIUp.insert(FoundRegUnits.begin(), FoundRegUnits.end());
+ }
+
+ // Lambda to fetch PHIs for a given location, and write into the PHIBlocks
+ // collection.
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
+ auto CollectPHIsForLoc = [&](LocIdx L) {
+ // Collect the set of defs.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ MachineBasicBlock *MBB = OrderToBB[I];
+ const auto &TransferFunc = MLocTransfer[MBB->getNumber()];
+ if (TransferFunc.find(L) != TransferFunc.end())
+ DefBlocks.insert(MBB);
+ }
+
+ // The entry block defs the location too: it's the live-in / argument value.
+ // Only insert if there are other defs though; everything is trivially live
+ // through otherwise.
+ if (!DefBlocks.empty())
+ DefBlocks.insert(&*MF.begin());
+
+ // Ask the SSA construction algorithm where we should put PHIs. Clear
+ // anything that might have been hanging around from earlier.
+ PHIBlocks.clear();
+ BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks);
+ };
+
+ auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) {
+ for (const MachineBasicBlock *MBB : PHIBlocks)
+ MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L);
+ };
+
+ // For locations with no reg units, just place PHIs.
+ for (LocIdx L : NormalLocsToPHI) {
+ CollectPHIsForLoc(L);
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+ }
+
+ // For stack slots, calculate PHIs for the equivalent of the units, then
+ // install for each index.
+ for (SpillLocationNo Slot : StackSlots) {
+ for (unsigned Idx : StackUnits) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Slot, Idx);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ CollectPHIsForLoc(L);
+ InstallPHIsAtLoc(L);
+
+ // Find anything that aliases this stack index, install PHIs for it too.
+ unsigned Size, Offset;
+ std::tie(Size, Offset) = MTracker->StackIdxesToPos[Idx];
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ unsigned ThisSize, ThisOffset;
+ std::tie(ThisSize, ThisOffset) = Pair.first;
+ if (ThisSize + ThisOffset <= Offset || Size + Offset <= ThisOffset)
+ continue;
+
+ unsigned ThisID = MTracker->getSpillIDWithIdx(Slot, Pair.second);
+ LocIdx ThisL = MTracker->getSpillMLoc(ThisID);
+ InstallPHIsAtLoc(ThisL);
+ }
+ }
+ }
+
+ // For reg units, place PHIs, and then place them for any aliasing registers.
+ for (Register R : RegUnitsToPHIUp) {
+ LocIdx L = MTracker->lookupOrTrackRegister(R);
+ CollectPHIsForLoc(L);
+
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+
+ // Now find aliases and install PHIs for those.
+ for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) {
+ // Super-registers that are "above" the largest register read/written by
+ // the function will alias, but will not be tracked.
+ if (!MTracker->isRegisterTracked(*RAI))
+ continue;
+
+ LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI);
+ InstallPHIsAtLoc(AliasLoc);
+ }
+ }
+}
+
+void InstrRefBasedLDV::buildMLocValueMap(
+ MachineFunction &MF, FuncValueTable &MInLocs, FuncValueTable &MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist, Pending;
+
+ // We track what is on the current and pending worklist to avoid inserting
+ // the same thing twice. We could avoid this with a custom priority queue,
+ // but this is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
+
+ // Initialize worklist with every block to be visited. Also produce list of
+ // all blocks.
+ SmallPtrSet<MachineBasicBlock *, 32> AllBlocks;
+ for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
+ Worklist.push(I);
+ OnWorklist.insert(OrderToBB[I]);
+ AllBlocks.insert(OrderToBB[I]);
+ }
+
+ // Initialize entry block to PHIs. These represent arguments.
+ for (auto Location : MTracker->locations())
+ MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx);
+
+ MTracker->reset();
+
+ // Start by placing PHIs, using the usual SSA constructor algorithm. Consider
+ // any machine-location that isn't live-through a block to be def'd in that
+ // block.
+ placeMLocPHIs(MF, AllBlocks, MInLocs, MLocTransfer);
+
+ // Propagate values to eliminate redundant PHIs. At the same time, this
+ // produces the table of Block x Location => Value for the entry to each
+ // block.
+ // The kind of PHIs we can eliminate are, for example, where one path in a
+ // conditional spills and restores a register, and the register still has
+ // the same value once control flow joins, unbeknowns to the PHI placement
+ // code. Propagating values allows us to identify such un-necessary PHIs and
+ // remove them.
+ SmallPtrSet<const MachineBasicBlock *, 16> Visited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ // Vector for storing the evaluated block transfer function.
+ SmallVector<std::pair<LocIdx, ValueIDNum>, 32> ToRemap;
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ // Join the values in all predecessor blocks.
+ bool InLocsChanged;
+ InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ InLocsChanged |= Visited.insert(MBB).second;
+
+ // Don't examine transfer function if we've visited this loc at least
+ // once, and inlocs haven't changed.
+ if (!InLocsChanged)
+ continue;
+
+ // Load the current set of live-ins into MLocTracker.
+ MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+
+ // Each element of the transfer function can be a new def, or a read of
+ // a live-in value. Evaluate each element, and store to "ToRemap".
+ ToRemap.clear();
+ for (auto &P : MLocTransfer[CurBB]) {
+ if (P.second.getBlock() == CurBB && P.second.isPHI()) {
+ // This is a movement of whatever was live in. Read it.
+ ValueIDNum NewID = MTracker->readMLoc(P.second.getLoc());
+ ToRemap.push_back(std::make_pair(P.first, NewID));
+ } else {
+ // It's a def. Just set it.
+ assert(P.second.getBlock() == CurBB);
+ ToRemap.push_back(std::make_pair(P.first, P.second));
+ }
+ }
+
+ // Commit the transfer function changes into mloc tracker, which
+ // transforms the contents of the MLocTracker into the live-outs.
+ for (auto &P : ToRemap)
+ MTracker->setMLoc(P.first, P.second);
+
+ // Now copy out-locs from mloc tracker into out-loc vector, checking
+ // whether changes have occurred. These changes can have come from both
+ // the transfer function, and mlocJoin.
+ bool OLChanged = false;
+ for (auto Location : MTracker->locations()) {
+ OLChanged |= MOutLocs[CurBB][Location.Idx.asU64()] != Location.Value;
+ MOutLocs[CurBB][Location.Idx.asU64()] = Location.Value;
+ }
+
+ MTracker->reset();
+
+ // No need to examine successors again if out-locs didn't change.
+ if (!OLChanged)
+ continue;
+
+ // All successors should be visited: put any back-edges on the pending
+ // list for the next pass-through, and any other successors to be
+ // visited this pass, if they're not going to be already.
+ for (auto *s : MBB->successors()) {
+ // Does branching to this successor represent a back-edge?
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ // No: visit it during this dataflow iteration.
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else {
+ // Yes: visit it on the next iteration.
+ if (OnPending.insert(s).second)
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+
+ Worklist.swap(Pending);
+ std::swap(OnPending, OnWorklist);
+ OnPending.clear();
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // Once all the live-ins don't change on mlocJoin(), we've eliminated all
+ // redundant PHIs.
+}
+
+void InstrRefBasedLDV::BlockPHIPlacement(
+ const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) {
+ // Apply IDF calculator to the designated set of location defs, storing
+ // required PHIs into PHIBlocks. Uses the dominator tree stored in the
+ // InstrRefBasedLDV object.
+ IDFCalculatorBase<MachineBasicBlock, false> IDF(DomTree->getBase());
+
+ IDF.setLiveInBlocks(AllBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ IDF.calculate(PHIBlocks);
+}
+
+bool InstrRefBasedLDV::pickVPHILoc(
+ SmallVectorImpl<DbgOpID> &OutValues, const MachineBasicBlock &MBB,
+ const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
+
+ // No predecessors means no PHIs.
+ if (BlockOrders.empty())
+ return false;
+
+ // All the location operands that do not already agree need to be joined,
+ // track the indices of each such location operand here.
+ SmallDenseSet<unsigned> LocOpsToJoin;
+
+ auto FirstValueIt = LiveOuts.find(BlockOrders[0]);
+ if (FirstValueIt == LiveOuts.end())
+ return false;
+ const DbgValue &FirstValue = *FirstValueIt->second;
+
+ for (const auto p : BlockOrders) {
+ auto OutValIt = LiveOuts.find(p);
+ if (OutValIt == LiveOuts.end())
+ // If we have a predecessor not in scope, we'll never find a PHI position.
+ return false;
+ const DbgValue &OutVal = *OutValIt->second;
+
+ // No-values cannot have locations we can join on.
+ if (OutVal.Kind == DbgValue::NoVal)
+ return false;
+
+ // For unjoined VPHIs where we don't know the location, we definitely
+ // can't find a join loc unless the VPHI is a backedge.
+ if (OutVal.isUnjoinedPHI() && OutVal.BlockNo != MBB.getNumber())
+ return false;
+
+ if (!FirstValue.Properties.isJoinable(OutVal.Properties))
+ return false;
+
+ for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) {
+ // An unjoined PHI has no defined locations, and so a shared location must
+ // be found for every operand.
+ if (OutVal.isUnjoinedPHI()) {
+ LocOpsToJoin.insert(Idx);
+ continue;
+ }
+ DbgOpID FirstValOp = FirstValue.getDbgOpID(Idx);
+ DbgOpID OutValOp = OutVal.getDbgOpID(Idx);
+ if (FirstValOp != OutValOp) {
+ // We can never join constant ops - the ops must either both be equal
+ // constant ops or non-const ops.
+ if (FirstValOp.isConst() || OutValOp.isConst())
+ return false;
+ else
+ LocOpsToJoin.insert(Idx);
+ }
+ }
+ }
+
+ SmallVector<DbgOpID> NewDbgOps;
+
+ for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) {
+ // If this op doesn't need to be joined because the values agree, use that
+ // already-agreed value.
+ if (!LocOpsToJoin.contains(Idx)) {
+ NewDbgOps.push_back(FirstValue.getDbgOpID(Idx));
+ continue;
+ }
+
+ std::optional<ValueIDNum> JoinedOpLoc =
+ pickOperandPHILoc(Idx, MBB, LiveOuts, MOutLocs, BlockOrders);
+
+ if (!JoinedOpLoc)
+ return false;
+
+ NewDbgOps.push_back(DbgOpStore.insert(*JoinedOpLoc));
+ }
+
+ OutValues.append(NewDbgOps);
+ return true;
+}
+
+std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc(
+ unsigned DbgOpIdx, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts,
+ FuncValueTable &MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
+
+ // Collect a set of locations from predecessor where its live-out value can
+ // be found.
+ SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ unsigned NumLocs = MTracker->getNumLocs();
+
+ for (const auto p : BlockOrders) {
+ unsigned ThisBBNum = p->getNumber();
+ auto OutValIt = LiveOuts.find(p);
+ assert(OutValIt != LiveOuts.end());
+ const DbgValue &OutVal = *OutValIt->second;
+ DbgOpID OutValOpID = OutVal.getDbgOpID(DbgOpIdx);
+ DbgOp OutValOp = DbgOpStore.find(OutValOpID);
+ assert(!OutValOp.IsConst);
+
+ // Create new empty vector of locations.
+ Locs.resize(Locs.size() + 1);
+
+ // If the live-in value is a def, find the locations where that value is
+ // present. Do the same for VPHIs where we know the VPHI value.
+ if (OutVal.Kind == DbgValue::Def ||
+ (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() &&
+ !OutValOp.isUndef())) {
+ ValueIDNum ValToLookFor = OutValOp.ID;
+ // Search the live-outs of the predecessor for the specified value.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ Locs.back().push_back(LocIdx(I));
+ }
+ } else {
+ assert(OutVal.Kind == DbgValue::VPHI);
+ // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e.
+ // a value that's live-through the whole loop. (It has to be a backedge,
+ // because a block can't dominate itself). We can accept as a PHI location
+ // any location where the other predecessors agree, _and_ the machine
+ // locations feed back into themselves. Therefore, add all self-looping
+ // machine-value PHI locations.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I));
+ if (MOutLocs[ThisBBNum][I] == MPHI)
+ Locs.back().push_back(LocIdx(I));
+ }
+ }
+ }
+ // We should have found locations for all predecessors, or returned.
+ assert(Locs.size() == BlockOrders.size());
+
+ // Starting with the first set of locations, take the intersection with
+ // subsequent sets.
+ SmallVector<LocIdx, 4> CandidateLocs = Locs[0];
+ for (unsigned int I = 1; I < Locs.size(); ++I) {
+ auto &LocVec = Locs[I];
+ SmallVector<LocIdx, 4> NewCandidates;
+ std::set_intersection(CandidateLocs.begin(), CandidateLocs.end(),
+ LocVec.begin(), LocVec.end(), std::inserter(NewCandidates, NewCandidates.begin()));
+ CandidateLocs = NewCandidates;
+ }
+ if (CandidateLocs.empty())
+ return std::nullopt;
+
+ // We now have a set of LocIdxes that contain the right output value in
+ // each of the predecessors. Pick the lowest; if there's a register loc,
+ // that'll be it.
+ LocIdx L = *CandidateLocs.begin();
+
+ // Return a PHI-value-number for the found location.
+ ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L};
+ return PHIVal;
+}
+
+bool InstrRefBasedLDV::vlocJoin(
+ MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DbgValue &LiveIn) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+ bool Changed = false;
+
+ // Order predecessors by RPOT order, for exploring them in that order.
+ SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors());
+
+ auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return BBToOrder[A] < BBToOrder[B];
+ };
+
+ llvm::sort(BlockOrders, Cmp);
+
+ unsigned CurBlockRPONum = BBToOrder[&MBB];
+
+ // Collect all the incoming DbgValues for this variable, from predecessor
+ // live-out values.
+ SmallVector<InValueT, 8> Values;
+ bool Bail = false;
+ int BackEdgesStart = 0;
+ for (auto *p : BlockOrders) {
+ // If the predecessor isn't in scope / to be explored, we'll never be
+ // able to join any locations.
+ if (!BlocksToExplore.contains(p)) {
+ Bail = true;
+ break;
+ }
+
+ // All Live-outs will have been initialized.
+ DbgValue &OutLoc = *VLOCOutLocs.find(p)->second;
+
+ // Keep track of where back-edges begin in the Values vector. Relies on
+ // BlockOrders being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[p];
+ if (ThisBBRPONum < CurBlockRPONum)
+ ++BackEdgesStart;
+
+ Values.push_back(std::make_pair(p, &OutLoc));
+ }
+
+ // If there were no values, or one of the predecessors couldn't have a
+ // value, then give up immediately. It's not safe to produce a live-in
+ // value. Leave as whatever it was before.
+ if (Bail || Values.size() == 0)
+ return false;
+
+ // All (non-entry) blocks have at least one non-backedge predecessor.
+ // Pick the variable value from the first of these, to compare against
+ // all others.
+ const DbgValue &FirstVal = *Values[0].second;
+
+ // If the old live-in value is not a PHI then either a) no PHI is needed
+ // here, or b) we eliminated the PHI that was here. If so, we can just
+ // propagate in the first parent's incoming value.
+ if (LiveIn.Kind != DbgValue::VPHI || LiveIn.BlockNo != MBB.getNumber()) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ }
+
+ // Scan for variable values that can never be resolved: if they have
+ // different DIExpressions, different indirectness, or are mixed constants /
+ // non-constants.
+ for (const auto &V : Values) {
+ if (!V.second->Properties.isJoinable(FirstVal.Properties))
+ return false;
+ if (V.second->Kind == DbgValue::NoVal)
+ return false;
+ if (!V.second->hasJoinableLocOps(FirstVal))
+ return false;
+ }
+
+ // Try to eliminate this PHI. Do the incoming values all agree?
+ bool Disagree = false;
+ for (auto &V : Values) {
+ if (*V.second == FirstVal)
+ continue; // No disagreement.
+
+ // If both values are not equal but have equal non-empty IDs then they refer
+ // to the same value from different sources (e.g. one is VPHI and the other
+ // is Def), which does not cause disagreement.
+ if (V.second->hasIdenticalValidLocOps(FirstVal))
+ continue;
+
+ // Eliminate if a backedge feeds a VPHI back into itself.
+ if (V.second->Kind == DbgValue::VPHI &&
+ V.second->BlockNo == MBB.getNumber() &&
+ // Is this a backedge?
+ std::distance(Values.begin(), &V) >= BackEdgesStart)
+ continue;
+
+ Disagree = true;
+ }
+
+ // No disagreement -> live-through value.
+ if (!Disagree) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ } else {
+ // Otherwise use a VPHI.
+ DbgValue VPHI(MBB.getNumber(), FirstVal.Properties, DbgValue::VPHI);
+ Changed = LiveIn != VPHI;
+ if (Changed)
+ LiveIn = VPHI;
+ return Changed;
+ }
+}
+
+void InstrRefBasedLDV::getBlocksForScope(
+ const DILocation *DILoc,
+ SmallPtrSetImpl<const MachineBasicBlock *> &BlocksToExplore,
+ const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks) {
+ // Get the set of "normal" in-lexical-scope blocks.
+ LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
+
+ // VarLoc LiveDebugValues tracks variable locations that are defined in
+ // blocks not in scope. This is something we could legitimately ignore, but
+ // lets allow it for now for the sake of coverage.
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+
+ // Storage for artificial blocks we intend to add to BlocksToExplore.
+ DenseSet<const MachineBasicBlock *> ToAdd;
+
+ // To avoid needlessly dropping large volumes of variable locations, propagate
+ // variables through aritifical blocks, i.e. those that don't have any
+ // instructions in scope at all. To accurately replicate VarLoc
+ // LiveDebugValues, this means exploring all artificial successors too.
+ // Perform a depth-first-search to enumerate those blocks.
+ for (const auto *MBB : BlocksToExplore) {
+ // Depth-first-search state: each node is a block and which successor
+ // we're currently exploring.
+ SmallVector<std::pair<const MachineBasicBlock *,
+ MachineBasicBlock::const_succ_iterator>,
+ 8>
+ DFS;
+
+ // Find any artificial successors not already tracked.
+ for (auto *succ : MBB->successors()) {
+ if (BlocksToExplore.count(succ))
+ continue;
+ if (!ArtificialBlocks.count(succ))
+ continue;
+ ToAdd.insert(succ);
+ DFS.push_back({succ, succ->succ_begin()});
+ }
+
+ // Search all those blocks, depth first.
+ while (!DFS.empty()) {
+ const MachineBasicBlock *CurBB = DFS.back().first;
+ MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
+ // Walk back if we've explored this blocks successors to the end.
+ if (CurSucc == CurBB->succ_end()) {
+ DFS.pop_back();
+ continue;
+ }
+
+ // If the current successor is artificial and unexplored, descend into
+ // it.
+ if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
+ ToAdd.insert(*CurSucc);
+ DFS.push_back({*CurSucc, (*CurSucc)->succ_begin()});
+ continue;
+ }
+
+ ++CurSucc;
+ }
+ };
+
+ BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
+}
+
+void InstrRefBasedLDV::buildVLocValueMap(
+ const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
+ FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs) {
+ // This method is much like buildMLocValueMap: but focuses on a single
+ // LexicalScope at a time. Pick out a set of blocks and variables that are
+ // to have their value assignments solved, then run our dataflow algorithm
+ // until a fixedpoint is reached.
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist, Pending;
+ SmallPtrSet<MachineBasicBlock *, 16> OnWorklist, OnPending;
+
+ // The set of blocks we'll be examining.
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+
+ // The order in which to examine them (RPO).
+ SmallVector<MachineBasicBlock *, 8> BlockOrders;
+
+ // RPO ordering function.
+ auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return BBToOrder[A] < BBToOrder[B];
+ };
+
+ getBlocksForScope(DILoc, BlocksToExplore, AssignBlocks);
+
+ // Single block scope: not interesting! No propagation at all. Note that
+ // this could probably go above ArtificialBlocks without damage, but
+ // that then produces output differences from original-live-debug-values,
+ // which propagates from a single block into many artificial ones.
+ if (BlocksToExplore.size() == 1)
+ return;
+
+ // Convert a const set to a non-const set. LexicalScopes
+ // getMachineBasicBlocks returns const MBB pointers, IDF wants mutable ones.
+ // (Neither of them mutate anything).
+ SmallPtrSet<MachineBasicBlock *, 8> MutBlocksToExplore;
+ for (const auto *MBB : BlocksToExplore)
+ MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB));
+
+ // Picks out relevants blocks RPO order and sort them.
+ for (const auto *MBB : BlocksToExplore)
+ BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
+
+ llvm::sort(BlockOrders, Cmp);
+ unsigned NumBlocks = BlockOrders.size();
+
+ // Allocate some vectors for storing the live ins and live outs. Large.
+ SmallVector<DbgValue, 32> LiveIns, LiveOuts;
+ LiveIns.reserve(NumBlocks);
+ LiveOuts.reserve(NumBlocks);
+
+ // Initialize all values to start as NoVals. This signifies "it's live
+ // through, but we don't know what it is".
+ DbgValueProperties EmptyProperties(EmptyExpr, false, false);
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns.push_back(EmptyDbgValue);
+ LiveOuts.push_back(EmptyDbgValue);
+ }
+
+ // Produce by-MBB indexes of live-in/live-outs, to ease lookup within
+ // vlocJoin.
+ LiveIdxT LiveOutIdx, LiveInIdx;
+ LiveOutIdx.reserve(NumBlocks);
+ LiveInIdx.reserve(NumBlocks);
+ for (unsigned I = 0; I < NumBlocks; ++I) {
+ LiveOutIdx[BlockOrders[I]] = &LiveOuts[I];
+ LiveInIdx[BlockOrders[I]] = &LiveIns[I];
+ }
+
+ // Loop over each variable and place PHIs for it, then propagate values
+ // between blocks. This keeps the locality of working on one lexical scope at
+ // at time, but avoids re-processing variable values because some other
+ // variable has been assigned.
+ for (const auto &Var : VarsWeCareAbout) {
+ // Re-initialize live-ins and live-outs, to clear the remains of previous
+ // variables live-ins / live-outs.
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns[I] = EmptyDbgValue;
+ LiveOuts[I] = EmptyDbgValue;
+ }
+
+ // Place PHIs for variable values, using the LLVM IDF calculator.
+ // Collect the set of blocks where variables are def'd.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
+ auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
+ if (TransferFunc.contains(Var))
+ DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
+ }
+
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
+
+ // Request the set of PHIs we should insert for this variable. If there's
+ // only one value definition, things are very simple.
+ if (DefBlocks.size() == 1) {
+ placePHIsForSingleVarDefinition(MutBlocksToExplore, *DefBlocks.begin(),
+ AllTheVLocs, Var, Output);
+ continue;
+ }
+
+ // Otherwise: we need to place PHIs through SSA and propagate values.
+ BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks);
+
+ // Insert PHIs into the per-block live-in tables for this variable.
+ for (MachineBasicBlock *PHIMBB : PHIBlocks) {
+ unsigned BlockNo = PHIMBB->getNumber();
+ DbgValue *LiveIn = LiveInIdx[PHIMBB];
+ *LiveIn = DbgValue(BlockNo, EmptyProperties, DbgValue::VPHI);
+ }
+
+ for (auto *MBB : BlockOrders) {
+ Worklist.push(BBToOrder[MBB]);
+ OnWorklist.insert(MBB);
+ }
+
+ // Iterate over all the blocks we selected, propagating the variables value.
+ // This loop does two things:
+ // * Eliminates un-necessary VPHIs in vlocJoin,
+ // * Evaluates the blocks transfer function (i.e. variable assignments) and
+ // stores the result to the blocks live-outs.
+ // Always evaluate the transfer function on the first iteration, and when
+ // the live-ins change thereafter.
+ bool FirstTrip = true;
+ while (!Worklist.empty() || !Pending.empty()) {
+ while (!Worklist.empty()) {
+ auto *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ auto LiveInsIt = LiveInIdx.find(MBB);
+ assert(LiveInsIt != LiveInIdx.end());
+ DbgValue *LiveIn = LiveInsIt->second;
+
+ // Join values from predecessors. Updates LiveInIdx, and writes output
+ // into JoinedInLocs.
+ bool InLocsChanged =
+ vlocJoin(*MBB, LiveOutIdx, BlocksToExplore, *LiveIn);
+
+ SmallVector<const MachineBasicBlock *, 8> Preds;
+ for (const auto *Pred : MBB->predecessors())
+ Preds.push_back(Pred);
+
+ // If this block's live-in value is a VPHI, try to pick a machine-value
+ // for it. This makes the machine-value available and propagated
+ // through all blocks by the time value propagation finishes. We can't
+ // do this any earlier as it needs to read the block live-outs.
+ if (LiveIn->Kind == DbgValue::VPHI && LiveIn->BlockNo == (int)CurBB) {
+ // There's a small possibility that on a preceeding path, a VPHI is
+ // eliminated and transitions from VPHI-with-location to
+ // live-through-value. As a result, the selected location of any VPHI
+ // might change, so we need to re-compute it on each iteration.
+ SmallVector<DbgOpID> JoinedOps;
+
+ if (pickVPHILoc(JoinedOps, *MBB, LiveOutIdx, MOutLocs, Preds)) {
+ bool NewLocPicked = !equal(LiveIn->getDbgOpIDs(), JoinedOps);
+ InLocsChanged |= NewLocPicked;
+ if (NewLocPicked)
+ LiveIn->setDbgOpIDs(JoinedOps);
+ }
+ }
+
+ if (!InLocsChanged && !FirstTrip)
+ continue;
+
+ DbgValue *LiveOut = LiveOutIdx[MBB];
+ bool OLChanged = false;
+
+ // Do transfer function.
+ auto &VTracker = AllTheVLocs[MBB->getNumber()];
+ auto TransferIt = VTracker.Vars.find(Var);
+ if (TransferIt != VTracker.Vars.end()) {
+ // Erase on empty transfer (DBG_VALUE $noreg).
+ if (TransferIt->second.Kind == DbgValue::Undef) {
+ DbgValue NewVal(MBB->getNumber(), EmptyProperties, DbgValue::NoVal);
+ if (*LiveOut != NewVal) {
+ *LiveOut = NewVal;
+ OLChanged = true;
+ }
+ } else {
+ // Insert new variable value; or overwrite.
+ if (*LiveOut != TransferIt->second) {
+ *LiveOut = TransferIt->second;
+ OLChanged = true;
+ }
+ }
+ } else {
+ // Just copy live-ins to live-outs, for anything not transferred.
+ if (*LiveOut != *LiveIn) {
+ *LiveOut = *LiveIn;
+ OLChanged = true;
+ }
+ }
+
+ // If no live-out value changed, there's no need to explore further.
+ if (!OLChanged)
+ continue;
+
+ // We should visit all successors. Ensure we'll visit any non-backedge
+ // successors during this dataflow iteration; book backedge successors
+ // to be visited next time around.
+ for (auto *s : MBB->successors()) {
+ // Ignore out of scope / not-to-be-explored successors.
+ if (!LiveInIdx.contains(s))
+ continue;
+
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ std::swap(OnWorklist, OnPending);
+ OnPending.clear();
+ assert(Pending.empty());
+ FirstTrip = false;
+ }
+
+ // Save live-ins to output vector. Ignore any that are still marked as being
+ // VPHIs with no location -- those are variables that we know the value of,
+ // but are not actually available in the register file.
+ for (auto *MBB : BlockOrders) {
+ DbgValue *BlockLiveIn = LiveInIdx[MBB];
+ if (BlockLiveIn->Kind == DbgValue::NoVal)
+ continue;
+ if (BlockLiveIn->isUnjoinedPHI())
+ continue;
+ if (BlockLiveIn->Kind == DbgValue::VPHI)
+ BlockLiveIn->Kind = DbgValue::Def;
+ assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() ==
+ Var.getFragment() && "Fragment info missing during value prop");
+ Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
+ }
+ } // Per-variable loop.
+
+ BlockOrders.clear();
+ BlocksToExplore.clear();
+}
+
+void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
+ const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+ MachineBasicBlock *AssignMBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+ const DebugVariable &Var, LiveInsT &Output) {
+ // If there is a single definition of the variable, then working out it's
+ // value everywhere is very simple: it's every block dominated by the
+ // definition. At the dominance frontier, the usual algorithm would:
+ // * Place PHIs,
+ // * Propagate values into them,
+ // * Find there's no incoming variable value from the other incoming branches
+ // of the dominance frontier,
+ // * Specify there's no variable value in blocks past the frontier.
+ // This is a common case, hence it's worth special-casing it.
+
+ // Pick out the variables value from the block transfer function.
+ VLocTracker &VLocs = AllTheVLocs[AssignMBB->getNumber()];
+ auto ValueIt = VLocs.Vars.find(Var);
+ const DbgValue &Value = ValueIt->second;
+
+ // If it's an explicit assignment of "undef", that means there is no location
+ // anyway, anywhere.
+ if (Value.Kind == DbgValue::Undef)
+ return;
+
+ // Assign the variable value to entry to each dominated block that's in scope.
+ // Skip the definition block -- it's assigned the variable value in the middle
+ // of the block somewhere.
+ for (auto *ScopeBlock : InScopeBlocks) {
+ if (!DomTree->properlyDominates(AssignMBB, ScopeBlock))
+ continue;
+
+ Output[ScopeBlock->getNumber()].push_back({Var, Value});
+ }
+
+ // All blocks that aren't dominated have no live-in value, thus no variable
+ // value will be given to them.
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void InstrRefBasedLDV::dump_mloc_transfer(
+ const MLocTransferMap &mloc_transfer) const {
+ for (const auto &P : mloc_transfer) {
+ std::string foo = MTracker->LocIdxToName(P.first);
+ std::string bar = MTracker->IDAsString(P.second);
+ dbgs() << "Loc " << foo << " --> " << bar << "\n";
+ }
+}
+#endif
+
+void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
+ // Build some useful data structures.
+
+ LLVMContext &Context = MF.getFunction().getContext();
+ EmptyExpr = DIExpression::get(Context, {});
+
+ auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+ if (const DebugLoc &DL = MI.getDebugLoc())
+ return DL.getLine() != 0;
+ return false;
+ };
+ // Collect a set of all the artificial blocks.
+ for (auto &MBB : MF)
+ if (none_of(MBB.instrs(), hasNonArtificialLocation))
+ ArtificialBlocks.insert(&MBB);
+
+ // Compute mappings of block <=> RPO order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ auto processMBB = [&](MachineBasicBlock *MBB) {
+ OrderToBB[RPONumber] = MBB;
+ BBToOrder[MBB] = RPONumber;
+ BBNumToRPO[MBB->getNumber()] = RPONumber;
+ ++RPONumber;
+ };
+ for (MachineBasicBlock *MBB : RPOT)
+ processMBB(MBB);
+ for (MachineBasicBlock &MBB : MF)
+ if (!BBToOrder.contains(&MBB))
+ processMBB(&MBB);
+
+ // Order value substitutions by their "source" operand pair, for quick lookup.
+ llvm::sort(MF.DebugValueSubstitutions);
+
+#ifdef EXPENSIVE_CHECKS
+ // As an expensive check, test whether there are any duplicate substitution
+ // sources in the collection.
+ if (MF.DebugValueSubstitutions.size() > 2) {
+ for (auto It = MF.DebugValueSubstitutions.begin();
+ It != std::prev(MF.DebugValueSubstitutions.end()); ++It) {
+ assert(It->Src != std::next(It)->Src && "Duplicate variable location "
+ "substitution seen");
+ }
+ }
+#endif
+}
+
+// Produce an "ejection map" for blocks, i.e., what's the highest-numbered
+// lexical scope it's used in. When exploring in DFS order and we pass that
+// scope, the block can be processed and any tracking information freed.
+void InstrRefBasedLDV::makeDepthFirstEjectionMap(
+ SmallVectorImpl<unsigned> &EjectionMap,
+ const ScopeToDILocT &ScopeToDILocation,
+ ScopeToAssignBlocksT &ScopeToAssignBlocks) {
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+ SmallVector<std::pair<LexicalScope *, ssize_t>, 4> WorkStack;
+ auto *TopScope = LS.getCurrentFunctionScope();
+
+ // Unlike lexical scope explorers, we explore in reverse order, to find the
+ // "last" lexical scope used for each block early.
+ WorkStack.push_back({TopScope, TopScope->getChildren().size() - 1});
+
+ while (!WorkStack.empty()) {
+ auto &ScopePosition = WorkStack.back();
+ LexicalScope *WS = ScopePosition.first;
+ ssize_t ChildNum = ScopePosition.second--;
+
+ const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
+ if (ChildNum >= 0) {
+ // If ChildNum is positive, there are remaining children to explore.
+ // Push the child and its children-count onto the stack.
+ auto &ChildScope = Children[ChildNum];
+ WorkStack.push_back(
+ std::make_pair(ChildScope, ChildScope->getChildren().size() - 1));
+ } else {
+ WorkStack.pop_back();
+
+ // We've explored all children and any later blocks: examine all blocks
+ // in our scope. If they haven't yet had an ejection number set, then
+ // this scope will be the last to use that block.
+ auto DILocationIt = ScopeToDILocation.find(WS);
+ if (DILocationIt != ScopeToDILocation.end()) {
+ getBlocksForScope(DILocationIt->second, BlocksToExplore,
+ ScopeToAssignBlocks.find(WS)->second);
+ for (const auto *MBB : BlocksToExplore) {
+ unsigned BBNum = MBB->getNumber();
+ if (EjectionMap[BBNum] == 0)
+ EjectionMap[BBNum] = WS->getDFSOut();
+ }
+
+ BlocksToExplore.clear();
+ }
+ }
+ }
+}
+
+bool InstrRefBasedLDV::depthFirstVLocAndEmit(
+ unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
+ const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC) {
+ TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC);
+ unsigned NumLocs = MTracker->getNumLocs();
+ VTracker = nullptr;
+
+ // No scopes? No variable locations.
+ if (!LS.getCurrentFunctionScope())
+ return false;
+
+ // Build map from block number to the last scope that uses the block.
+ SmallVector<unsigned, 16> EjectionMap;
+ EjectionMap.resize(MaxNumBlocks, 0);
+ makeDepthFirstEjectionMap(EjectionMap, ScopeToDILocation,
+ ScopeToAssignBlocks);
+
+ // Helper lambda for ejecting a block -- if nothing is going to use the block,
+ // we can translate the variable location information into DBG_VALUEs and then
+ // free all of InstrRefBasedLDV's data structures.
+ auto EjectBlock = [&](MachineBasicBlock &MBB) -> void {
+ unsigned BBNum = MBB.getNumber();
+ AllTheVLocs[BBNum].clear();
+
+ // Prime the transfer-tracker, and then step through all the block
+ // instructions, installing transfers.
+ MTracker->reset();
+ MTracker->loadFromArray(MInLocs[BBNum], BBNum);
+ TTracker->loadInlocs(MBB, MInLocs[BBNum], DbgOpStore, Output[BBNum],
+ NumLocs);
+
+ CurBB = BBNum;
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI, MOutLocs.get(), MInLocs.get());
+ TTracker->checkInstForNewValues(CurInst, MI.getIterator());
+ ++CurInst;
+ }
+
+ // Free machine-location tables for this block.
+ MInLocs[BBNum].reset();
+ MOutLocs[BBNum].reset();
+ // We don't need live-in variable values for this block either.
+ Output[BBNum].clear();
+ AllTheVLocs[BBNum].clear();
+ };
+
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+ SmallVector<std::pair<LexicalScope *, ssize_t>, 4> WorkStack;
+ WorkStack.push_back({LS.getCurrentFunctionScope(), 0});
+ unsigned HighestDFSIn = 0;
+
+ // Proceed to explore in depth first order.
+ while (!WorkStack.empty()) {
+ auto &ScopePosition = WorkStack.back();
+ LexicalScope *WS = ScopePosition.first;
+ ssize_t ChildNum = ScopePosition.second++;
+
+ // We obesrve scopes with children twice here, once descending in, once
+ // ascending out of the scope nest. Use HighestDFSIn as a ratchet to ensure
+ // we don't process a scope twice. Additionally, ignore scopes that don't
+ // have a DILocation -- by proxy, this means we never tracked any variable
+ // assignments in that scope.
+ auto DILocIt = ScopeToDILocation.find(WS);
+ if (HighestDFSIn <= WS->getDFSIn() && DILocIt != ScopeToDILocation.end()) {
+ const DILocation *DILoc = DILocIt->second;
+ auto &VarsWeCareAbout = ScopeToVars.find(WS)->second;
+ auto &BlocksInScope = ScopeToAssignBlocks.find(WS)->second;
+
+ buildVLocValueMap(DILoc, VarsWeCareAbout, BlocksInScope, Output, MOutLocs,
+ MInLocs, AllTheVLocs);
+ }
+
+ HighestDFSIn = std::max(HighestDFSIn, WS->getDFSIn());
+
+ // Descend into any scope nests.
+ const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
+ if (ChildNum < (ssize_t)Children.size()) {
+ // There are children to explore -- push onto stack and continue.
+ auto &ChildScope = Children[ChildNum];
+ WorkStack.push_back(std::make_pair(ChildScope, 0));
+ } else {
+ WorkStack.pop_back();
+
+ // We've explored a leaf, or have explored all the children of a scope.
+ // Try to eject any blocks where this is the last scope it's relevant to.
+ auto DILocationIt = ScopeToDILocation.find(WS);
+ if (DILocationIt == ScopeToDILocation.end())
+ continue;
+
+ getBlocksForScope(DILocationIt->second, BlocksToExplore,
+ ScopeToAssignBlocks.find(WS)->second);
+ for (const auto *MBB : BlocksToExplore)
+ if (WS->getDFSOut() == EjectionMap[MBB->getNumber()])
+ EjectBlock(const_cast<MachineBasicBlock &>(*MBB));
+
+ BlocksToExplore.clear();
+ }
+ }
+
+ // Some artificial blocks may not have been ejected, meaning they're not
+ // connected to an actual legitimate scope. This can technically happen
+ // with things like the entry block. In theory, we shouldn't need to do
+ // anything for such out-of-scope blocks, but for the sake of being similar
+ // to VarLocBasedLDV, eject these too.
+ for (auto *MBB : ArtificialBlocks)
+ if (MOutLocs[MBB->getNumber()])
+ EjectBlock(*MBB);
+
+ return emitTransfers(AllVarsNumbering);
+}
+
+bool InstrRefBasedLDV::emitTransfers(
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+ // Go through all the transfers recorded in the TransferTracker -- this is
+ // both the live-ins to a block, and any movements of values that happen
+ // in the middle.
+ for (const auto &P : TTracker->Transfers) {
+ // We have to insert DBG_VALUEs in a consistent order, otherwise they
+ // appear in DWARF in different orders. Use the order that they appear
+ // when walking through each block / each instruction, stored in
+ // AllVarsNumbering.
+ SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
+ for (MachineInstr *MI : P.Insts) {
+ DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
+ MI->getDebugLoc()->getInlinedAt());
+ Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
+ }
+ llvm::sort(Insts, llvm::less_first());
+
+ // Insert either before or after the designated point...
+ if (P.MBB) {
+ MachineBasicBlock &MBB = *P.MBB;
+ for (const auto &Pair : Insts)
+ MBB.insert(P.Pos, Pair.second);
+ } else {
+ // Terminators, like tail calls, can clobber things. Don't try and place
+ // transfers after them.
+ if (P.Pos->isTerminator())
+ continue;
+
+ MachineBasicBlock &MBB = *P.Pos->getParent();
+ for (const auto &Pair : Insts)
+ MBB.insertAfterBundle(P.Pos, Pair.second);
+ }
+ }
+
+ return TTracker->Transfers.size() != 0;
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC,
+ unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
+ // No subprogram means this function contains no debuginfo.
+ if (!MF.getFunction().getSubprogram())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
+ this->TPC = TPC;
+
+ this->DomTree = DomTree;
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ MFI = &MF.getFrameInfo();
+ LS.initialize(MF);
+
+ const auto &STI = MF.getSubtarget();
+ AdjustsStackInCalls = MFI->adjustsStack() &&
+ STI.getFrameLowering()->stackProbeFunctionModifiesSP();
+ if (AdjustsStackInCalls)
+ StackProbeSymbolName = STI.getTargetLowering()->getStackProbeSymbolName(MF);
+
+ MTracker =
+ new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering());
+ VTracker = nullptr;
+ TTracker = nullptr;
+
+ SmallVector<MLocTransferMap, 32> MLocTransfer;
+ SmallVector<VLocTracker, 8> vlocs;
+ LiveInsT SavedLiveIns;
+
+ int MaxNumBlocks = -1;
+ for (auto &MBB : MF)
+ MaxNumBlocks = std::max(MBB.getNumber(), MaxNumBlocks);
+ assert(MaxNumBlocks >= 0);
+ ++MaxNumBlocks;
+
+ initialSetup(MF);
+
+ MLocTransfer.resize(MaxNumBlocks);
+ vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
+ SavedLiveIns.resize(MaxNumBlocks);
+
+ produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks);
+
+ // Allocate and initialize two array-of-arrays for the live-in and live-out
+ // machine values. The outer dimension is the block number; while the inner
+ // dimension is a LocIdx from MLocTracker.
+ FuncValueTable MOutLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
+ FuncValueTable MInLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
+ unsigned NumLocs = MTracker->getNumLocs();
+ for (int i = 0; i < MaxNumBlocks; ++i) {
+ // These all auto-initialize to ValueIDNum::EmptyValue
+ MOutLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
+ MInLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
+ }
+
+ // Solve the machine value dataflow problem using the MLocTransfer function,
+ // storing the computed live-ins / live-outs into the array-of-arrays. We use
+ // both live-ins and live-outs for decision making in the variable value
+ // dataflow problem.
+ buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer);
+
+ // Patch up debug phi numbers, turning unknown block-live-in values into
+ // either live-through machine values, or PHIs.
+ for (auto &DBG_PHI : DebugPHINumToValue) {
+ // Identify unresolved block-live-ins.
+ if (!DBG_PHI.ValueRead)
+ continue;
+
+ ValueIDNum &Num = *DBG_PHI.ValueRead;
+ if (!Num.isPHI())
+ continue;
+
+ unsigned BlockNo = Num.getBlock();
+ LocIdx LocNo = Num.getLoc();
+ ValueIDNum ResolvedValue = MInLocs[BlockNo][LocNo.asU64()];
+ // If there is no resolved value for this live-in then it is not directly
+ // reachable from the entry block -- model it as a PHI on entry to this
+ // block, which means we leave the ValueIDNum unchanged.
+ if (ResolvedValue != ValueIDNum::EmptyValue)
+ Num = ResolvedValue;
+ }
+ // Later, we'll be looking up ranges of instruction numbers.
+ llvm::sort(DebugPHINumToValue);
+
+ // Walk back through each block / instruction, collecting DBG_VALUE
+ // instructions and recording what machine value their operands refer to.
+ for (auto &OrderPair : OrderToBB) {
+ MachineBasicBlock &MBB = *OrderPair.second;
+ CurBB = MBB.getNumber();
+ VTracker = &vlocs[CurBB];
+ VTracker->MBB = &MBB;
+ MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI, MOutLocs.get(), MInLocs.get());
+ ++CurInst;
+ }
+ MTracker->reset();
+ }
+
+ // Number all variables in the order that they appear, to be used as a stable
+ // insertion order later.
+ DenseMap<DebugVariable, unsigned> AllVarsNumbering;
+
+ // Map from one LexicalScope to all the variables in that scope.
+ ScopeToVarsT ScopeToVars;
+
+ // Map from One lexical scope to all blocks where assignments happen for
+ // that scope.
+ ScopeToAssignBlocksT ScopeToAssignBlocks;
+
+ // Store map of DILocations that describes scopes.
+ ScopeToDILocT ScopeToDILocation;
+
+ // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
+ // the order is unimportant, it just has to be stable.
+ unsigned VarAssignCount = 0;
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ auto *MBB = OrderToBB[I];
+ auto *VTracker = &vlocs[MBB->getNumber()];
+ // Collect each variable with a DBG_VALUE in this block.
+ for (auto &idx : VTracker->Vars) {
+ const auto &Var = idx.first;
+ const DILocation *ScopeLoc = VTracker->Scopes[Var];
+ assert(ScopeLoc != nullptr);
+ auto *Scope = LS.findLexicalScope(ScopeLoc);
+
+ // No insts in scope -> shouldn't have been recorded.
+ assert(Scope != nullptr);
+
+ AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size()));
+ ScopeToVars[Scope].insert(Var);
+ ScopeToAssignBlocks[Scope].insert(VTracker->MBB);
+ ScopeToDILocation[Scope] = ScopeLoc;
+ ++VarAssignCount;
+ }
+ }
+
+ bool Changed = false;
+
+ // If we have an extremely large number of variable assignments and blocks,
+ // bail out at this point. We've burnt some time doing analysis already,
+ // however we should cut our losses.
+ if ((unsigned)MaxNumBlocks > InputBBLimit &&
+ VarAssignCount > InputDbgValLimit) {
+ LLVM_DEBUG(dbgs() << "Disabling InstrRefBasedLDV: " << MF.getName()
+ << " has " << MaxNumBlocks << " basic blocks and "
+ << VarAssignCount
+ << " variable assignments, exceeding limits.\n");
+ } else {
+ // Optionally, solve the variable value problem and emit to blocks by using
+ // a lexical-scope-depth search. It should be functionally identical to
+ // the "else" block of this condition.
+ Changed = depthFirstVLocAndEmit(
+ MaxNumBlocks, ScopeToDILocation, ScopeToVars, ScopeToAssignBlocks,
+ SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC);
+ }
+
+ delete MTracker;
+ delete TTracker;
+ MTracker = nullptr;
+ VTracker = nullptr;
+ TTracker = nullptr;
+
+ ArtificialBlocks.clear();
+ OrderToBB.clear();
+ BBToOrder.clear();
+ BBNumToRPO.clear();
+ DebugInstrNumToInstr.clear();
+ DebugPHINumToValue.clear();
+ OverlapFragments.clear();
+ SeenFragments.clear();
+ SeenDbgPHIs.clear();
+ DbgOpStore.clear();
+
+ return Changed;
+}
+
+LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() {
+ return new InstrRefBasedLDV();
+}
+
+namespace {
+class LDVSSABlock;
+class LDVSSAUpdater;
+
+// Pick a type to identify incoming block values as we construct SSA. We
+// can't use anything more robust than an integer unfortunately, as SSAUpdater
+// expects to zero-initialize the type.
+typedef uint64_t BlockValueNum;
+
+/// Represents an SSA PHI node for the SSA updater class. Contains the block
+/// this PHI is in, the value number it would have, and the expected incoming
+/// values from parent blocks.
+class LDVSSAPhi {
+public:
+ SmallVector<std::pair<LDVSSABlock *, BlockValueNum>, 4> IncomingValues;
+ LDVSSABlock *ParentBlock;
+ BlockValueNum PHIValNum;
+ LDVSSAPhi(BlockValueNum PHIValNum, LDVSSABlock *ParentBlock)
+ : ParentBlock(ParentBlock), PHIValNum(PHIValNum) {}
+
+ LDVSSABlock *getParent() { return ParentBlock; }
+};
+
+/// Thin wrapper around a block predecessor iterator. Only difference from a
+/// normal block iterator is that it dereferences to an LDVSSABlock.
+class LDVSSABlockIterator {
+public:
+ MachineBasicBlock::pred_iterator PredIt;
+ LDVSSAUpdater &Updater;
+
+ LDVSSABlockIterator(MachineBasicBlock::pred_iterator PredIt,
+ LDVSSAUpdater &Updater)
+ : PredIt(PredIt), Updater(Updater) {}
+
+ bool operator!=(const LDVSSABlockIterator &OtherIt) const {
+ return OtherIt.PredIt != PredIt;
+ }
+
+ LDVSSABlockIterator &operator++() {
+ ++PredIt;
+ return *this;
+ }
+
+ LDVSSABlock *operator*();
+};
+
+/// Thin wrapper around a block for SSA Updater interface. Necessary because
+/// we need to track the PHI value(s) that we may have observed as necessary
+/// in this block.
+class LDVSSABlock {
+public:
+ MachineBasicBlock &BB;
+ LDVSSAUpdater &Updater;
+ using PHIListT = SmallVector<LDVSSAPhi, 1>;
+ /// List of PHIs in this block. There should only ever be one.
+ PHIListT PHIList;
+
+ LDVSSABlock(MachineBasicBlock &BB, LDVSSAUpdater &Updater)
+ : BB(BB), Updater(Updater) {}
+
+ LDVSSABlockIterator succ_begin() {
+ return LDVSSABlockIterator(BB.succ_begin(), Updater);
+ }
+
+ LDVSSABlockIterator succ_end() {
+ return LDVSSABlockIterator(BB.succ_end(), Updater);
+ }
+
+ /// SSAUpdater has requested a PHI: create that within this block record.
+ LDVSSAPhi *newPHI(BlockValueNum Value) {
+ PHIList.emplace_back(Value, this);
+ return &PHIList.back();
+ }
+
+ /// SSAUpdater wishes to know what PHIs already exist in this block.
+ PHIListT &phis() { return PHIList; }
+};
+
+/// Utility class for the SSAUpdater interface: tracks blocks, PHIs and values
+/// while SSAUpdater is exploring the CFG. It's passed as a handle / baton to
+// SSAUpdaterTraits<LDVSSAUpdater>.
+class LDVSSAUpdater {
+public:
+ /// Map of value numbers to PHI records.
+ DenseMap<BlockValueNum, LDVSSAPhi *> PHIs;
+ /// Map of which blocks generate Undef values -- blocks that are not
+ /// dominated by any Def.
+ DenseMap<MachineBasicBlock *, BlockValueNum> UndefMap;
+ /// Map of machine blocks to our own records of them.
+ DenseMap<MachineBasicBlock *, LDVSSABlock *> BlockMap;
+ /// Machine location where any PHI must occur.
+ LocIdx Loc;
+ /// Table of live-in machine value numbers for blocks / locations.
+ const ValueTable *MLiveIns;
+
+ LDVSSAUpdater(LocIdx L, const ValueTable *MLiveIns)
+ : Loc(L), MLiveIns(MLiveIns) {}
+
+ void reset() {
+ for (auto &Block : BlockMap)
+ delete Block.second;
+
+ PHIs.clear();
+ UndefMap.clear();
+ BlockMap.clear();
+ }
+
+ ~LDVSSAUpdater() { reset(); }
+
+ /// For a given MBB, create a wrapper block for it. Stores it in the
+ /// LDVSSAUpdater block map.
+ LDVSSABlock *getSSALDVBlock(MachineBasicBlock *BB) {
+ auto it = BlockMap.find(BB);
+ if (it == BlockMap.end()) {
+ BlockMap[BB] = new LDVSSABlock(*BB, *this);
+ it = BlockMap.find(BB);
+ }
+ return it->second;
+ }
+
+ /// Find the live-in value number for the given block. Looks up the value at
+ /// the PHI location on entry.
+ BlockValueNum getValue(LDVSSABlock *LDVBB) {
+ return MLiveIns[LDVBB->BB.getNumber()][Loc.asU64()].asU64();
+ }
+};
+
+LDVSSABlock *LDVSSABlockIterator::operator*() {
+ return Updater.getSSALDVBlock(*PredIt);
+}
+
+#ifndef NDEBUG
+
+raw_ostream &operator<<(raw_ostream &out, const LDVSSAPhi &PHI) {
+ out << "SSALDVPHI " << PHI.PHIValNum;
+ return out;
+}
+
+#endif
+
+} // namespace
+
+namespace llvm {
+
+/// Template specialization to give SSAUpdater access to CFG and value
+/// information. SSAUpdater calls methods in these traits, passing in the
+/// LDVSSAUpdater object, to learn about blocks and the values they define.
+/// It also provides methods to create PHI nodes and track them.
+template <> class SSAUpdaterTraits<LDVSSAUpdater> {
+public:
+ using BlkT = LDVSSABlock;
+ using ValT = BlockValueNum;
+ using PhiT = LDVSSAPhi;
+ using BlkSucc_iterator = LDVSSABlockIterator;
+
+ // Methods to access block successors -- dereferencing to our wrapper class.
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ LDVSSAPhi *PHI;
+ unsigned Idx;
+
+ public:
+ explicit PHI_iterator(LDVSSAPhi *P) // begin iterator
+ : PHI(P), Idx(0) {}
+ PHI_iterator(LDVSSAPhi *P, bool) // end iterator
+ : PHI(P), Idx(PHI->IncomingValues.size()) {}
+
+ PHI_iterator &operator++() {
+ Idx++;
+ return *this;
+ }
+ bool operator==(const PHI_iterator &X) const { return Idx == X.Idx; }
+ bool operator!=(const PHI_iterator &X) const { return !operator==(X); }
+
+ BlockValueNum getIncomingValue() { return PHI->IncomingValues[Idx].second; }
+
+ LDVSSABlock *getIncomingBlock() { return PHI->IncomingValues[Idx].first; }
+ };
+
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(LDVSSABlock *BB,
+ SmallVectorImpl<LDVSSABlock *> *Preds) {
+ for (MachineBasicBlock *Pred : BB->BB.predecessors())
+ Preds->push_back(BB->Updater.getSSALDVBlock(Pred));
+ }
+
+ /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new
+ /// register. For LiveDebugValues, represents a block identified as not having
+ /// any DBG_PHI predecessors.
+ static BlockValueNum GetUndefVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) {
+ // Create a value number for this block -- it needs to be unique and in the
+ // "undef" collection, so that we know it's not real. Use a number
+ // representing a PHI into this block.
+ BlockValueNum Num = ValueIDNum(BB->BB.getNumber(), 0, Updater->Loc).asU64();
+ Updater->UndefMap[&BB->BB] = Num;
+ return Num;
+ }
+
+ /// CreateEmptyPHI - Create a (representation of a) PHI in the given block.
+ /// SSAUpdater will populate it with information about incoming values. The
+ /// value number of this PHI is whatever the machine value number problem
+ /// solution determined it to be. This includes non-phi values if SSAUpdater
+ /// tries to create a PHI where the incoming values are identical.
+ static BlockValueNum CreateEmptyPHI(LDVSSABlock *BB, unsigned NumPreds,
+ LDVSSAUpdater *Updater) {
+ BlockValueNum PHIValNum = Updater->getValue(BB);
+ LDVSSAPhi *PHI = BB->newPHI(PHIValNum);
+ Updater->PHIs[PHIValNum] = PHI;
+ return PHIValNum;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(LDVSSAPhi *PHI, BlockValueNum Val, LDVSSABlock *Pred) {
+ PHI->IncomingValues.push_back(std::make_pair(Pred, Val));
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified value
+ /// is a PHI instruction.
+ static LDVSSAPhi *ValueIsPHI(BlockValueNum Val, LDVSSAUpdater *Updater) {
+ return Updater->PHIs.lookup(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static LDVSSAPhi *ValueIsNewPHI(BlockValueNum Val, LDVSSAUpdater *Updater) {
+ LDVSSAPhi *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->IncomingValues.size() == 0)
+ return PHI;
+ return nullptr;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static BlockValueNum GetPHIValue(LDVSSAPhi *PHI) { return PHI->PHIValNum; }
+};
+
+} // end namespace llvm
+
+std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
+ MachineFunction &MF, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
+ assert(MLiveOuts && MLiveIns &&
+ "Tried to resolve DBG_PHI before location "
+ "tables allocated?");
+
+ // This function will be called twice per DBG_INSTR_REF, and might end up
+ // computing lots of SSA information: memoize it.
+ auto SeenDbgPHIIt = SeenDbgPHIs.find(std::make_pair(&Here, InstrNum));
+ if (SeenDbgPHIIt != SeenDbgPHIs.end())
+ return SeenDbgPHIIt->second;
+
+ std::optional<ValueIDNum> Result =
+ resolveDbgPHIsImpl(MF, MLiveOuts, MLiveIns, Here, InstrNum);
+ SeenDbgPHIs.insert({std::make_pair(&Here, InstrNum), Result});
+ return Result;
+}
+
+std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
+ MachineFunction &MF, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
+ // Pick out records of DBG_PHI instructions that have been observed. If there
+ // are none, then we cannot compute a value number.
+ auto RangePair = std::equal_range(DebugPHINumToValue.begin(),
+ DebugPHINumToValue.end(), InstrNum);
+ auto LowerIt = RangePair.first;
+ auto UpperIt = RangePair.second;
+
+ // No DBG_PHI means there can be no location.
+ if (LowerIt == UpperIt)
+ return std::nullopt;
+
+ // If any DBG_PHIs referred to a location we didn't understand, don't try to
+ // compute a value. There might be scenarios where we could recover a value
+ // for some range of DBG_INSTR_REFs, but at this point we can have high
+ // confidence that we've seen a bug.
+ auto DBGPHIRange = make_range(LowerIt, UpperIt);
+ for (const DebugPHIRecord &DBG_PHI : DBGPHIRange)
+ if (!DBG_PHI.ValueRead)
+ return std::nullopt;
+
+ // If there's only one DBG_PHI, then that is our value number.
+ if (std::distance(LowerIt, UpperIt) == 1)
+ return *LowerIt->ValueRead;
+
+ // Pick out the location (physreg, slot) where any PHIs must occur. It's
+ // technically possible for us to merge values in different registers in each
+ // block, but highly unlikely that LLVM will generate such code after register
+ // allocation.
+ LocIdx Loc = *LowerIt->ReadLoc;
+
+ // We have several DBG_PHIs, and a use position (the Here inst). All each
+ // DBG_PHI does is identify a value at a program position. We can treat each
+ // DBG_PHI like it's a Def of a value, and the use position is a Use of a
+ // value, just like SSA. We use the bulk-standard LLVM SSA updater class to
+ // determine which Def is used at the Use, and any PHIs that happen along
+ // the way.
+ // Adapted LLVM SSA Updater:
+ LDVSSAUpdater Updater(Loc, MLiveIns);
+ // Map of which Def or PHI is the current value in each block.
+ DenseMap<LDVSSABlock *, BlockValueNum> AvailableValues;
+ // Set of PHIs that we have created along the way.
+ SmallVector<LDVSSAPhi *, 8> CreatedPHIs;
+
+ // Each existing DBG_PHI is a Def'd value under this model. Record these Defs
+ // for the SSAUpdater.
+ for (const auto &DBG_PHI : DBGPHIRange) {
+ LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
+ const ValueIDNum &Num = *DBG_PHI.ValueRead;
+ AvailableValues.insert(std::make_pair(Block, Num.asU64()));
+ }
+
+ LDVSSABlock *HereBlock = Updater.getSSALDVBlock(Here.getParent());
+ const auto &AvailIt = AvailableValues.find(HereBlock);
+ if (AvailIt != AvailableValues.end()) {
+ // Actually, we already know what the value is -- the Use is in the same
+ // block as the Def.
+ return ValueIDNum::fromU64(AvailIt->second);
+ }
+
+ // Otherwise, we must use the SSA Updater. It will identify the value number
+ // that we are to use, and the PHIs that must happen along the way.
+ SSAUpdaterImpl<LDVSSAUpdater> Impl(&Updater, &AvailableValues, &CreatedPHIs);
+ BlockValueNum ResultInt = Impl.GetValue(Updater.getSSALDVBlock(Here.getParent()));
+ ValueIDNum Result = ValueIDNum::fromU64(ResultInt);
+
+ // We have the number for a PHI, or possibly live-through value, to be used
+ // at this Use. There are a number of things we have to check about it though:
+ // * Does any PHI use an 'Undef' (like an IMPLICIT_DEF) value? If so, this
+ // Use was not completely dominated by DBG_PHIs and we should abort.
+ // * Are the Defs or PHIs clobbered in a block? SSAUpdater isn't aware that
+ // we've left SSA form. Validate that the inputs to each PHI are the
+ // expected values.
+ // * Is a PHI we've created actually a merging of values, or are all the
+ // predecessor values the same, leading to a non-PHI machine value number?
+ // (SSAUpdater doesn't know that either). Remap validated PHIs into the
+ // the ValidatedValues collection below to sort this out.
+ DenseMap<LDVSSABlock *, ValueIDNum> ValidatedValues;
+
+ // Define all the input DBG_PHI values in ValidatedValues.
+ for (const auto &DBG_PHI : DBGPHIRange) {
+ LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
+ const ValueIDNum &Num = *DBG_PHI.ValueRead;
+ ValidatedValues.insert(std::make_pair(Block, Num));
+ }
+
+ // Sort PHIs to validate into RPO-order.
+ SmallVector<LDVSSAPhi *, 8> SortedPHIs;
+ for (auto &PHI : CreatedPHIs)
+ SortedPHIs.push_back(PHI);
+
+ llvm::sort(SortedPHIs, [&](LDVSSAPhi *A, LDVSSAPhi *B) {
+ return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB];
+ });
+
+ for (auto &PHI : SortedPHIs) {
+ ValueIDNum ThisBlockValueNum =
+ MLiveIns[PHI->ParentBlock->BB.getNumber()][Loc.asU64()];
+
+ // Are all these things actually defined?
+ for (auto &PHIIt : PHI->IncomingValues) {
+ // Any undef input means DBG_PHIs didn't dominate the use point.
+ if (Updater.UndefMap.contains(&PHIIt.first->BB))
+ return std::nullopt;
+
+ ValueIDNum ValueToCheck;
+ const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
+
+ auto VVal = ValidatedValues.find(PHIIt.first);
+ if (VVal == ValidatedValues.end()) {
+ // We cross a loop, and this is a backedge. LLVMs tail duplication
+ // happens so late that DBG_PHI instructions should not be able to
+ // migrate into loops -- meaning we can only be live-through this
+ // loop.
+ ValueToCheck = ThisBlockValueNum;
+ } else {
+ // Does the block have as a live-out, in the location we're examining,
+ // the value that we expect? If not, it's been moved or clobbered.
+ ValueToCheck = VVal->second;
+ }
+
+ if (BlockLiveOuts[Loc.asU64()] != ValueToCheck)
+ return std::nullopt;
+ }
+
+ // Record this value as validated.
+ ValidatedValues.insert({PHI->ParentBlock, ThisBlockValueNum});
+ }
+
+ // All the PHIs are valid: we can return what the SSAUpdater said our value
+ // number was.
+ return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
new file mode 100644
index 000000000000..30de18e53c4f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -0,0 +1,1441 @@
+//===- InstrRefBasedImpl.h - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include <optional>
+
+#include "LiveDebugValues.h"
+
+class TransferTracker;
+
+// Forward dec of unit test class, so that we can peer into the LDV object.
+class InstrRefLDVTest;
+
+namespace LiveDebugValues {
+
+class MLocTracker;
+class DbgOpIDMap;
+
+using namespace llvm;
+
+/// Handle-class for a particular "location". This value-type uniquely
+/// symbolises a register or stack location, allowing manipulation of locations
+/// without concern for where that location is. Practically, this allows us to
+/// treat the state of the machine at a particular point as an array of values,
+/// rather than a map of values.
+class LocIdx {
+ unsigned Location;
+
+ // Default constructor is private, initializing to an illegal location number.
+ // Use only for "not an entry" elements in IndexedMaps.
+ LocIdx() : Location(UINT_MAX) {}
+
+public:
+#define NUM_LOC_BITS 24
+ LocIdx(unsigned L) : Location(L) {
+ assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
+ }
+
+ static LocIdx MakeIllegalLoc() { return LocIdx(); }
+ static LocIdx MakeTombstoneLoc() {
+ LocIdx L = LocIdx();
+ --L.Location;
+ return L;
+ }
+
+ bool isIllegal() const { return Location == UINT_MAX; }
+
+ uint64_t asU64() const { return Location; }
+
+ bool operator==(unsigned L) const { return Location == L; }
+
+ bool operator==(const LocIdx &L) const { return Location == L.Location; }
+
+ bool operator!=(unsigned L) const { return !(*this == L); }
+
+ bool operator!=(const LocIdx &L) const { return !(*this == L); }
+
+ bool operator<(const LocIdx &Other) const {
+ return Location < Other.Location;
+ }
+};
+
+// The location at which a spilled value resides. It consists of a register and
+// an offset.
+struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return std::make_pair(SpillBase, SpillOffset) ==
+ std::make_pair(Other.SpillBase, Other.SpillOffset);
+ }
+ bool operator<(const SpillLoc &Other) const {
+ return std::make_tuple(SpillBase, SpillOffset.getFixed(),
+ SpillOffset.getScalable()) <
+ std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
+ Other.SpillOffset.getScalable());
+ }
+};
+
+/// Unique identifier for a value defined by an instruction, as a value type.
+/// Casts back and forth to a uint64_t. Probably replacable with something less
+/// bit-constrained. Each value identifies the instruction and machine location
+/// where the value is defined, although there may be no corresponding machine
+/// operand for it (ex: regmasks clobbering values). The instructions are
+/// one-based, and definitions that are PHIs have instruction number zero.
+///
+/// The obvious limits of a 1M block function or 1M instruction blocks are
+/// problematic; but by that point we should probably have bailed out of
+/// trying to analyse the function.
+class ValueIDNum {
+ union {
+ struct {
+ uint64_t BlockNo : 20; /// The block where the def happens.
+ uint64_t InstNo : 20; /// The Instruction where the def happens.
+ /// One based, is distance from start of block.
+ uint64_t LocNo
+ : NUM_LOC_BITS; /// The machine location where the def happens.
+ } s;
+ uint64_t Value;
+ } u;
+
+ static_assert(sizeof(u) == 8, "Badly packed ValueIDNum?");
+
+public:
+ // Default-initialize to EmptyValue. This is necessary to make IndexedMaps
+ // of values to work.
+ ValueIDNum() { u.Value = EmptyValue.asU64(); }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) {
+ u.s = {Block, Inst, Loc};
+ }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) {
+ u.s = {Block, Inst, Loc.asU64()};
+ }
+
+ uint64_t getBlock() const { return u.s.BlockNo; }
+ uint64_t getInst() const { return u.s.InstNo; }
+ uint64_t getLoc() const { return u.s.LocNo; }
+ bool isPHI() const { return u.s.InstNo == 0; }
+
+ uint64_t asU64() const { return u.Value; }
+
+ static ValueIDNum fromU64(uint64_t v) {
+ ValueIDNum Val;
+ Val.u.Value = v;
+ return Val;
+ }
+
+ bool operator<(const ValueIDNum &Other) const {
+ return asU64() < Other.asU64();
+ }
+
+ bool operator==(const ValueIDNum &Other) const {
+ return u.Value == Other.u.Value;
+ }
+
+ bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
+
+ std::string asString(const std::string &mlocname) const {
+ return Twine("Value{bb: ")
+ .concat(Twine(u.s.BlockNo)
+ .concat(Twine(", inst: ")
+ .concat((u.s.InstNo ? Twine(u.s.InstNo)
+ : Twine("live-in"))
+ .concat(Twine(", loc: ").concat(
+ Twine(mlocname)))
+ .concat(Twine("}")))))
+ .str();
+ }
+
+ static ValueIDNum EmptyValue;
+ static ValueIDNum TombstoneValue;
+};
+
+} // End namespace LiveDebugValues
+
+namespace llvm {
+using namespace LiveDebugValues;
+
+template <> struct DenseMapInfo<LocIdx> {
+ static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); }
+ static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); }
+
+ static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); }
+
+ static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; }
+};
+
+template <> struct DenseMapInfo<ValueIDNum> {
+ static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; }
+ static inline ValueIDNum getTombstoneKey() {
+ return ValueIDNum::TombstoneValue;
+ }
+
+ static unsigned getHashValue(const ValueIDNum &Val) {
+ return hash_value(Val.asU64());
+ }
+
+ static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
+ return A == B;
+ }
+};
+
+} // end namespace llvm
+
+namespace LiveDebugValues {
+using namespace llvm;
+
+/// Type for a table of values in a block.
+using ValueTable = std::unique_ptr<ValueIDNum[]>;
+
+/// Type for a table-of-table-of-values, i.e., the collection of either
+/// live-in or live-out values for each block in the function.
+using FuncValueTable = std::unique_ptr<ValueTable[]>;
+
+/// Thin wrapper around an integer -- designed to give more type safety to
+/// spill location numbers.
+class SpillLocationNo {
+public:
+ explicit SpillLocationNo(unsigned SpillNo) : SpillNo(SpillNo) {}
+ unsigned SpillNo;
+ unsigned id() const { return SpillNo; }
+
+ bool operator<(const SpillLocationNo &Other) const {
+ return SpillNo < Other.SpillNo;
+ }
+
+ bool operator==(const SpillLocationNo &Other) const {
+ return SpillNo == Other.SpillNo;
+ }
+ bool operator!=(const SpillLocationNo &Other) const {
+ return !(*this == Other);
+ }
+};
+
+/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
+/// the value, and Boolean of whether or not it's indirect.
+class DbgValueProperties {
+public:
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect, bool IsVariadic)
+ : DIExpr(DIExpr), Indirect(Indirect), IsVariadic(IsVariadic) {}
+
+ /// Extract properties from an existing DBG_VALUE instruction.
+ DbgValueProperties(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ assert(MI.getDebugExpression()->getNumLocationOperands() == 0 ||
+ MI.isDebugValueList() || MI.isUndefDebugValue());
+ IsVariadic = MI.isDebugValueList();
+ DIExpr = MI.getDebugExpression();
+ Indirect = MI.isDebugOffsetImm();
+ }
+
+ bool isJoinable(const DbgValueProperties &Other) const {
+ return DIExpression::isEqualExpression(DIExpr, Indirect, Other.DIExpr,
+ Other.Indirect);
+ }
+
+ bool operator==(const DbgValueProperties &Other) const {
+ return std::tie(DIExpr, Indirect, IsVariadic) ==
+ std::tie(Other.DIExpr, Other.Indirect, Other.IsVariadic);
+ }
+
+ bool operator!=(const DbgValueProperties &Other) const {
+ return !(*this == Other);
+ }
+
+ unsigned getLocationOpCount() const {
+ return IsVariadic ? DIExpr->getNumLocationOperands() : 1;
+ }
+
+ const DIExpression *DIExpr;
+ bool Indirect;
+ bool IsVariadic;
+};
+
+/// TODO: Might pack better if we changed this to a Struct of Arrays, since
+/// MachineOperand is width 32, making this struct width 33. We could also
+/// potentially avoid storing the whole MachineOperand (sizeof=32), instead
+/// choosing to store just the contents portion (sizeof=8) and a Kind enum,
+/// since we already know it is some type of immediate value.
+/// Stores a single debug operand, which can either be a MachineOperand for
+/// directly storing immediate values, or a ValueIDNum representing some value
+/// computed at some point in the program. IsConst is used as a discriminator.
+struct DbgOp {
+ union {
+ ValueIDNum ID;
+ MachineOperand MO;
+ };
+ bool IsConst;
+
+ DbgOp() : ID(ValueIDNum::EmptyValue), IsConst(false) {}
+ DbgOp(ValueIDNum ID) : ID(ID), IsConst(false) {}
+ DbgOp(MachineOperand MO) : MO(MO), IsConst(true) {}
+
+ bool isUndef() const { return !IsConst && ID == ValueIDNum::EmptyValue; }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
+};
+
+/// A DbgOp whose ID (if any) has resolved to an actual location, LocIdx. Used
+/// when working with concrete debug values, i.e. when joining MLocs and VLocs
+/// in the TransferTracker or emitting DBG_VALUE/DBG_VALUE_LIST instructions in
+/// the MLocTracker.
+struct ResolvedDbgOp {
+ union {
+ LocIdx Loc;
+ MachineOperand MO;
+ };
+ bool IsConst;
+
+ ResolvedDbgOp(LocIdx Loc) : Loc(Loc), IsConst(false) {}
+ ResolvedDbgOp(MachineOperand MO) : MO(MO), IsConst(true) {}
+
+ bool operator==(const ResolvedDbgOp &Other) const {
+ if (IsConst != Other.IsConst)
+ return false;
+ if (IsConst)
+ return MO.isIdenticalTo(Other.MO);
+ return Loc == Other.Loc;
+ }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
+};
+
+/// An ID used in the DbgOpIDMap (below) to lookup a stored DbgOp. This is used
+/// in place of actual DbgOps inside of a DbgValue to reduce its size, as
+/// DbgValue is very frequently used and passed around, and the actual DbgOp is
+/// over 8x larger than this class, due to storing a MachineOperand. This ID
+/// should be equal for all equal DbgOps, and also encodes whether the mapped
+/// DbgOp is a constant, meaning that for simple equality or const-ness checks
+/// it is not necessary to lookup this ID.
+struct DbgOpID {
+ struct IsConstIndexPair {
+ uint32_t IsConst : 1;
+ uint32_t Index : 31;
+ };
+
+ union {
+ struct IsConstIndexPair ID;
+ uint32_t RawID;
+ };
+
+ DbgOpID() : RawID(UndefID.RawID) {
+ static_assert(sizeof(DbgOpID) == 4, "DbgOpID should fit within 4 bytes.");
+ }
+ DbgOpID(uint32_t RawID) : RawID(RawID) {}
+ DbgOpID(bool IsConst, uint32_t Index) : ID({IsConst, Index}) {}
+
+ static DbgOpID UndefID;
+
+ bool operator==(const DbgOpID &Other) const { return RawID == Other.RawID; }
+ bool operator!=(const DbgOpID &Other) const { return !(*this == Other); }
+
+ uint32_t asU32() const { return RawID; }
+
+ bool isUndef() const { return *this == UndefID; }
+ bool isConst() const { return ID.IsConst && !isUndef(); }
+ uint32_t getIndex() const { return ID.Index; }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack, const DbgOpIDMap *OpStore) const;
+#endif
+};
+
+/// Class storing the complete set of values that are observed by DbgValues
+/// within the current function. Allows 2-way lookup, with `find` returning the
+/// Op for a given ID and `insert` returning the ID for a given Op (creating one
+/// if none exists).
+class DbgOpIDMap {
+
+ SmallVector<ValueIDNum, 0> ValueOps;
+ SmallVector<MachineOperand, 0> ConstOps;
+
+ DenseMap<ValueIDNum, DbgOpID> ValueOpToID;
+ DenseMap<MachineOperand, DbgOpID> ConstOpToID;
+
+public:
+ /// If \p Op does not already exist in this map, it is inserted and the
+ /// corresponding DbgOpID is returned. If Op already exists in this map, then
+ /// no change is made and the existing ID for Op is returned.
+ /// Calling this with the undef DbgOp will always return DbgOpID::UndefID.
+ DbgOpID insert(DbgOp Op) {
+ if (Op.isUndef())
+ return DbgOpID::UndefID;
+ if (Op.IsConst)
+ return insertConstOp(Op.MO);
+ return insertValueOp(Op.ID);
+ }
+ /// Returns the DbgOp associated with \p ID. Should only be used for IDs
+ /// returned from calling `insert` from this map or DbgOpID::UndefID.
+ DbgOp find(DbgOpID ID) const {
+ if (ID == DbgOpID::UndefID)
+ return DbgOp();
+ if (ID.isConst())
+ return DbgOp(ConstOps[ID.getIndex()]);
+ return DbgOp(ValueOps[ID.getIndex()]);
+ }
+
+ void clear() {
+ ValueOps.clear();
+ ConstOps.clear();
+ ValueOpToID.clear();
+ ConstOpToID.clear();
+ }
+
+private:
+ DbgOpID insertConstOp(MachineOperand &MO) {
+ auto ExistingIt = ConstOpToID.find(MO);
+ if (ExistingIt != ConstOpToID.end())
+ return ExistingIt->second;
+ DbgOpID ID(true, ConstOps.size());
+ ConstOpToID.insert(std::make_pair(MO, ID));
+ ConstOps.push_back(MO);
+ return ID;
+ }
+ DbgOpID insertValueOp(ValueIDNum VID) {
+ auto ExistingIt = ValueOpToID.find(VID);
+ if (ExistingIt != ValueOpToID.end())
+ return ExistingIt->second;
+ DbgOpID ID(false, ValueOps.size());
+ ValueOpToID.insert(std::make_pair(VID, ID));
+ ValueOps.push_back(VID);
+ return ID;
+ }
+};
+
+// We set the maximum number of operands that we will handle to keep DbgValue
+// within a reasonable size (64 bytes), as we store and pass a lot of them
+// around.
+#define MAX_DBG_OPS 8
+
+/// Class recording the (high level) _value_ of a variable. Identifies the value
+/// of the variable as a list of ValueIDNums and constant MachineOperands, or as
+/// an empty list for undef debug values or VPHI values which we have not found
+/// valid locations for.
+/// This class also stores meta-information about how the value is qualified.
+/// Used to reason about variable values when performing the second
+/// (DebugVariable specific) dataflow analysis.
+class DbgValue {
+private:
+ /// If Kind is Def or VPHI, the set of IDs corresponding to the DbgOps that
+ /// are used. VPHIs set every ID to EmptyID when we have not found a valid
+ /// machine-value for every operand, and sets them to the corresponding
+ /// machine-values when we have found all of them.
+ DbgOpID DbgOps[MAX_DBG_OPS];
+ unsigned OpCount;
+
+public:
+ /// For a NoVal or VPHI DbgValue, which block it was generated in.
+ int BlockNo;
+
+ /// Qualifiers for the ValueIDNum above.
+ DbgValueProperties Properties;
+
+ typedef enum {
+ Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
+ Def, // This value is defined by some combination of constants,
+ // instructions, or PHI values.
+ VPHI, // Incoming values to BlockNo differ, those values must be joined by
+ // a PHI in this block.
+ NoVal, // Empty DbgValue indicating an unknown value. Used as initializer,
+ // before dominating blocks values are propagated in.
+ } KindT;
+ /// Discriminator for whether this is a constant or an in-program value.
+ KindT Kind;
+
+ DbgValue(ArrayRef<DbgOpID> DbgOps, const DbgValueProperties &Prop)
+ : OpCount(DbgOps.size()), BlockNo(0), Properties(Prop), Kind(Def) {
+ static_assert(sizeof(DbgValue) <= 64,
+ "DbgValue should fit within 64 bytes.");
+ assert(DbgOps.size() == Prop.getLocationOpCount());
+ if (DbgOps.size() > MAX_DBG_OPS ||
+ any_of(DbgOps, [](DbgOpID ID) { return ID.isUndef(); })) {
+ Kind = Undef;
+ OpCount = 0;
+#define DEBUG_TYPE "LiveDebugValues"
+ if (DbgOps.size() > MAX_DBG_OPS) {
+ LLVM_DEBUG(dbgs() << "Found DbgValue with more than maximum allowed "
+ "operands.\n");
+ }
+#undef DEBUG_TYPE
+ } else {
+ for (unsigned Idx = 0; Idx < DbgOps.size(); ++Idx)
+ this->DbgOps[Idx] = DbgOps[Idx];
+ }
+ }
+
+ DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
+ : OpCount(0), BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
+ assert(Kind == NoVal || Kind == VPHI);
+ }
+
+ DbgValue(const DbgValueProperties &Prop, KindT Kind)
+ : OpCount(0), BlockNo(0), Properties(Prop), Kind(Kind) {
+ assert(Kind == Undef &&
+ "Empty DbgValue constructor must pass in Undef kind");
+ }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack = nullptr,
+ const DbgOpIDMap *OpStore = nullptr) const;
+#endif
+
+ bool operator==(const DbgValue &Other) const {
+ if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
+ return false;
+ else if (Kind == Def && !equal(getDbgOpIDs(), Other.getDbgOpIDs()))
+ return false;
+ else if (Kind == NoVal && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == VPHI && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == VPHI && !equal(getDbgOpIDs(), Other.getDbgOpIDs()))
+ return false;
+
+ return true;
+ }
+
+ bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+
+ // Returns an array of all the machine values used to calculate this variable
+ // value, or an empty list for an Undef or unjoined VPHI.
+ ArrayRef<DbgOpID> getDbgOpIDs() const { return {DbgOps, OpCount}; }
+
+ // Returns either DbgOps[Index] if this DbgValue has Debug Operands, or
+ // the ID for ValueIDNum::EmptyValue otherwise (i.e. if this is an Undef,
+ // NoVal, or an unjoined VPHI).
+ DbgOpID getDbgOpID(unsigned Index) const {
+ if (!OpCount)
+ return DbgOpID::UndefID;
+ assert(Index < OpCount);
+ return DbgOps[Index];
+ }
+ // Replaces this DbgValue's existing DbgOpIDs (if any) with the contents of
+ // \p NewIDs. The number of DbgOpIDs passed must be equal to the number of
+ // arguments expected by this DbgValue's properties (the return value of
+ // `getLocationOpCount()`).
+ void setDbgOpIDs(ArrayRef<DbgOpID> NewIDs) {
+ // We can go from no ops to some ops, but not from some ops to no ops.
+ assert(NewIDs.size() == getLocationOpCount() &&
+ "Incorrect number of Debug Operands for this DbgValue.");
+ OpCount = NewIDs.size();
+ for (unsigned Idx = 0; Idx < NewIDs.size(); ++Idx)
+ DbgOps[Idx] = NewIDs[Idx];
+ }
+
+ // The number of debug operands expected by this DbgValue's expression.
+ // getDbgOpIDs() should return an array of this length, unless this is an
+ // Undef or an unjoined VPHI.
+ unsigned getLocationOpCount() const {
+ return Properties.getLocationOpCount();
+ }
+
+ // Returns true if this or Other are unjoined PHIs, which do not have defined
+ // Loc Ops, or if the `n`th Loc Op for this has a different constness to the
+ // `n`th Loc Op for Other.
+ bool hasJoinableLocOps(const DbgValue &Other) const {
+ if (isUnjoinedPHI() || Other.isUnjoinedPHI())
+ return true;
+ for (unsigned Idx = 0; Idx < getLocationOpCount(); ++Idx) {
+ if (getDbgOpID(Idx).isConst() != Other.getDbgOpID(Idx).isConst())
+ return false;
+ }
+ return true;
+ }
+
+ bool isUnjoinedPHI() const { return Kind == VPHI && OpCount == 0; }
+
+ bool hasIdenticalValidLocOps(const DbgValue &Other) const {
+ if (!OpCount)
+ return false;
+ return equal(getDbgOpIDs(), Other.getDbgOpIDs());
+ }
+};
+
+class LocIdxToIndexFunctor {
+public:
+ using argument_type = LocIdx;
+ unsigned operator()(const LocIdx &L) const { return L.asU64(); }
+};
+
+/// Tracker for what values are in machine locations. Listens to the Things
+/// being Done by various instructions, and maintains a table of what machine
+/// locations have what values (as defined by a ValueIDNum).
+///
+/// There are potentially a much larger number of machine locations on the
+/// target machine than the actual working-set size of the function. On x86 for
+/// example, we're extremely unlikely to want to track values through control
+/// or debug registers. To avoid doing so, MLocTracker has several layers of
+/// indirection going on, described below, to avoid unnecessarily tracking
+/// any location.
+///
+/// Here's a sort of diagram of the indexes, read from the bottom up:
+///
+/// Size on stack Offset on stack
+/// \ /
+/// Stack Idx (Where in slot is this?)
+/// /
+/// /
+/// Slot Num (%stack.0) /
+/// FrameIdx => SpillNum /
+/// \ /
+/// SpillID (int) Register number (int)
+/// \ /
+/// LocationID => LocIdx
+/// |
+/// LocIdx => ValueIDNum
+///
+/// The aim here is that the LocIdx => ValueIDNum vector is just an array of
+/// values in numbered locations, so that later analyses can ignore whether the
+/// location is a register or otherwise. To map a register / spill location to
+/// a LocIdx, you have to use the (sparse) LocationID => LocIdx map. And to
+/// build a LocationID for a stack slot, you need to combine identifiers for
+/// which stack slot it is and where within that slot is being described.
+///
+/// Register mask operands cause trouble by technically defining every register;
+/// various hacks are used to avoid tracking registers that are never read and
+/// only written by regmasks.
+class MLocTracker {
+public:
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const TargetLowering &TLI;
+
+ /// IndexedMap type, mapping from LocIdx to ValueIDNum.
+ using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
+
+ /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
+ /// packed, entries only exist for locations that are being tracked.
+ LocToValueType LocIdxToIDNum;
+
+ /// "Map" of machine location IDs (i.e., raw register or spill number) to the
+ /// LocIdx key / number for that location. There are always at least as many
+ /// as the number of registers on the target -- if the value in the register
+ /// is not being tracked, then the LocIdx value will be zero. New entries are
+ /// appended if a new spill slot begins being tracked.
+ /// This, and the corresponding reverse map persist for the analysis of the
+ /// whole function, and is necessarying for decoding various vectors of
+ /// values.
+ std::vector<LocIdx> LocIDToLocIdx;
+
+ /// Inverse map of LocIDToLocIdx.
+ IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
+
+ /// When clobbering register masks, we chose to not believe the machine model
+ /// and don't clobber SP. Do the same for SP aliases, and for efficiency,
+ /// keep a set of them here.
+ SmallSet<Register, 8> SPAliases;
+
+ /// Unique-ification of spill. Used to number them -- their LocID number is
+ /// the index in SpillLocs minus one plus NumRegs.
+ UniqueVector<SpillLoc> SpillLocs;
+
+ // If we discover a new machine location, assign it an mphi with this
+ // block number.
+ unsigned CurBB = -1;
+
+ /// Cached local copy of the number of registers the target has.
+ unsigned NumRegs;
+
+ /// Number of slot indexes the target has -- distinct segments of a stack
+ /// slot that can take on the value of a subregister, when a super-register
+ /// is written to the stack.
+ unsigned NumSlotIdxes;
+
+ /// Collection of register mask operands that have been observed. Second part
+ /// of pair indicates the instruction that they happened in. Used to
+ /// reconstruct where defs happened if we start tracking a location later
+ /// on.
+ SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
+
+ /// Pair for describing a position within a stack slot -- first the size in
+ /// bits, then the offset.
+ typedef std::pair<unsigned short, unsigned short> StackSlotPos;
+
+ /// Map from a size/offset pair describing a position in a stack slot, to a
+ /// numeric identifier for that position. Allows easier identification of
+ /// individual positions.
+ DenseMap<StackSlotPos, unsigned> StackSlotIdxes;
+
+ /// Inverse of StackSlotIdxes.
+ DenseMap<unsigned, StackSlotPos> StackIdxesToPos;
+
+ /// Iterator for locations and the values they contain. Dereferencing
+ /// produces a struct/pair containing the LocIdx key for this location,
+ /// and a reference to the value currently stored. Simplifies the process
+ /// of seeking a particular location.
+ class MLocIterator {
+ LocToValueType &ValueMap;
+ LocIdx Idx;
+
+ public:
+ class value_type {
+ public:
+ value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) {}
+ const LocIdx Idx; /// Read-only index of this location.
+ ValueIDNum &Value; /// Reference to the stored value at this location.
+ };
+
+ MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
+ : ValueMap(ValueMap), Idx(Idx) {}
+
+ bool operator==(const MLocIterator &Other) const {
+ assert(&ValueMap == &Other.ValueMap);
+ return Idx == Other.Idx;
+ }
+
+ bool operator!=(const MLocIterator &Other) const {
+ return !(*this == Other);
+ }
+
+ void operator++() { Idx = LocIdx(Idx.asU64() + 1); }
+
+ value_type operator*() { return value_type(Idx, ValueMap[LocIdx(Idx)]); }
+ };
+
+ MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI, const TargetLowering &TLI);
+
+ /// Produce location ID number for a Register. Provides some small amount of
+ /// type safety.
+ /// \param Reg The register we're looking up.
+ unsigned getLocID(Register Reg) { return Reg.id(); }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \param SpillSubReg Subregister within the spill we're addressing.
+ unsigned getLocID(SpillLocationNo Spill, unsigned SpillSubReg) {
+ unsigned short Size = TRI.getSubRegIdxSize(SpillSubReg);
+ unsigned short Offs = TRI.getSubRegIdxOffset(SpillSubReg);
+ return getLocID(Spill, {Size, Offs});
+ }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \apram SpillIdx size/offset within the spill slot to be addressed.
+ unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ assert(StackSlotIdxes.contains(Idx));
+ SlotNo += StackSlotIdxes[Idx];
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Given a spill number, and a slot within the spill, calculate the ID number
+ /// for that location.
+ unsigned getSpillIDWithIdx(SpillLocationNo Spill, unsigned Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ SlotNo += Idx;
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Return the spill number that a location ID corresponds to.
+ SpillLocationNo locIDToSpill(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ // Truncate away the index part, leaving only the spill number.
+ ID /= NumSlotIdxes;
+ return SpillLocationNo(ID + 1); // The UniqueVector is one-based.
+ }
+
+ /// Returns the spill-slot size/offs that a location ID corresponds to.
+ StackSlotPos locIDToSpillIdx(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ unsigned Idx = ID % NumSlotIdxes;
+ return StackIdxesToPos.find(Idx)->second;
+ }
+
+ unsigned getNumLocs() const { return LocIdxToIDNum.size(); }
+
+ /// Reset all locations to contain a PHI value at the designated block. Used
+ /// sometimes for actual PHI values, othertimes to indicate the block entry
+ /// value (before any more information is known).
+ void setMPhis(unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ for (auto Location : locations())
+ Location.Value = {CurBB, 0, Location.Idx};
+ }
+
+ /// Load values for each location from array of ValueIDNums. Take current
+ /// bbnum just in case we read a value from a hitherto untouched register.
+ void loadFromArray(ValueTable &Locs, unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ // Iterate over all tracked locations, and load each locations live-in
+ // value into our local index.
+ for (auto Location : locations())
+ Location.Value = Locs[Location.Idx.asU64()];
+ }
+
+ /// Wipe any un-necessary location records after traversing a block.
+ void reset() {
+ // We could reset all the location values too; however either loadFromArray
+ // or setMPhis should be called before this object is re-used. Just
+ // clear Masks, they're definitely not needed.
+ Masks.clear();
+ }
+
+ /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
+ /// the information in this pass uninterpretable.
+ void clear() {
+ reset();
+ LocIDToLocIdx.clear();
+ LocIdxToLocID.clear();
+ LocIdxToIDNum.clear();
+ // SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from
+ // 0
+ SpillLocs = decltype(SpillLocs)();
+ StackSlotIdxes.clear();
+ StackIdxesToPos.clear();
+
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ }
+
+ /// Set a locaiton to a certain value.
+ void setMLoc(LocIdx L, ValueIDNum Num) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ LocIdxToIDNum[L] = Num;
+ }
+
+ /// Read the value of a particular location
+ ValueIDNum readMLoc(LocIdx L) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ return LocIdxToIDNum[L];
+ }
+
+ /// Create a LocIdx for an untracked register ID. Initialize it to either an
+ /// mphi value representing a live-in, or a recent register mask clobber.
+ LocIdx trackRegister(unsigned ID);
+
+ LocIdx lookupOrTrackRegister(unsigned ID) {
+ LocIdx &Index = LocIDToLocIdx[ID];
+ if (Index.isIllegal())
+ Index = trackRegister(ID);
+ return Index;
+ }
+
+ /// Is register R currently tracked by MLocTracker?
+ bool isRegisterTracked(Register R) {
+ LocIdx &Index = LocIDToLocIdx[R];
+ return !Index.isIllegal();
+ }
+
+ /// Record a definition of the specified register at the given block / inst.
+ /// This doesn't take a ValueIDNum, because the definition and its location
+ /// are synonymous.
+ void defReg(Register R, unsigned BB, unsigned Inst) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ ValueIDNum ValueID = {BB, Inst, Idx};
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Set a register to a value number. To be used if the value number is
+ /// known in advance.
+ void setReg(Register R, ValueIDNum ValueID) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ ValueIDNum readReg(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Reset a register value to zero / empty. Needed to replicate the
+ /// VarLoc implementation where a copy to/from a register effectively
+ /// clears the contents of the source register. (Values can only have one
+ /// machine location in VarLocBasedImpl).
+ void wipeRegister(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = LocIDToLocIdx[ID];
+ LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
+ }
+
+ /// Determine the LocIdx of an existing register.
+ LocIdx getRegMLoc(Register R) {
+ unsigned ID = getLocID(R);
+ assert(ID < LocIDToLocIdx.size());
+ assert(LocIDToLocIdx[ID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[ID];
+ }
+
+ /// Record a RegMask operand being executed. Defs any register we currently
+ /// track, stores a pointer to the mask in case we have to account for it
+ /// later.
+ void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID);
+
+ /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
+ /// Returns std::nullopt when in scenarios where a spill slot could be
+ /// tracked, but we would likely run into resource limitations.
+ std::optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L);
+
+ // Get LocIdx of a spill ID.
+ LocIdx getSpillMLoc(unsigned SpillID) {
+ assert(LocIDToLocIdx[SpillID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[SpillID];
+ }
+
+ /// Return true if Idx is a spill machine location.
+ bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; }
+
+ /// How large is this location (aka, how wide is a value defined there?).
+ unsigned getLocSizeInBits(LocIdx L) const {
+ unsigned ID = LocIdxToLocID[L];
+ if (!isSpill(L)) {
+ return TRI.getRegSizeInBits(Register(ID), MF.getRegInfo());
+ } else {
+ // The slot location on the stack is uninteresting, we care about the
+ // position of the value within the slot (which comes with a size).
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ return Pos.first;
+ }
+ }
+
+ MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); }
+
+ MLocIterator end() {
+ return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
+ }
+
+ /// Return a range over all locations currently tracked.
+ iterator_range<MLocIterator> locations() {
+ return llvm::make_range(begin(), end());
+ }
+
+ std::string LocIdxToName(LocIdx Idx) const;
+
+ std::string IDAsString(const ValueIDNum &Num) const;
+
+#ifndef NDEBUG
+ LLVM_DUMP_METHOD void dump();
+
+ LLVM_DUMP_METHOD void dump_mloc_map();
+#endif
+
+ /// Create a DBG_VALUE based on debug operands \p DbgOps. Qualify it with the
+ /// information in \pProperties, for variable Var. Don't insert it anywhere,
+ /// just return the builder for it.
+ MachineInstrBuilder emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties);
+};
+
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+/// Collection of DBG_VALUEs observed when traversing a block. Records each
+/// variable and the value the DBG_VALUE refers to. Requires the machine value
+/// location dataflow algorithm to have run already, so that values can be
+/// identified.
+class VLocTracker {
+public:
+ /// Map DebugVariable to the latest Value it's defined to have.
+ /// Needs to be a MapVector because we determine order-in-the-input-MIR from
+ /// the order in this container.
+ /// We only retain the last DbgValue in each block for each variable, to
+ /// determine the blocks live-out variable value. The Vars container forms the
+ /// transfer function for this block, as part of the dataflow analysis. The
+ /// movement of values between locations inside of a block is handled at a
+ /// much later stage, in the TransferTracker class.
+ MapVector<DebugVariable, DbgValue> Vars;
+ SmallDenseMap<DebugVariable, const DILocation *, 8> Scopes;
+ MachineBasicBlock *MBB = nullptr;
+ const OverlapMap &OverlappingFragments;
+ DbgValueProperties EmptyProperties;
+
+public:
+ VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr)
+ : OverlappingFragments(O), EmptyProperties(EmptyExpr, false, false) {}
+
+ void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ const SmallVectorImpl<DbgOpID> &DebugOps) {
+ assert(MI.isDebugValueLike());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValue Rec = (DebugOps.size() > 0)
+ ? DbgValue(DebugOps, Properties)
+ : DbgValue(Properties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
+ }
+
+ void considerOverlaps(const DebugVariable &Var, const DILocation *Loc) {
+ auto Overlaps = OverlappingFragments.find(
+ {Var.getVariable(), Var.getFragmentOrDefault()});
+ if (Overlaps == OverlappingFragments.end())
+ return;
+
+ // Otherwise: terminate any overlapped variable locations.
+ for (auto FragmentInfo : Overlaps->second) {
+ // The "empty" fragment is stored as DebugVariable::DefaultFragment, so
+ // that it overlaps with everything, however its cannonical representation
+ // in a DebugVariable is as "None".
+ std::optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo;
+ if (DebugVariable::isDefaultFragment(FragmentInfo))
+ OptFragmentInfo = std::nullopt;
+
+ DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo,
+ Var.getInlinedAt());
+ DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Overlapped, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Overlapped] = Loc;
+ }
+ }
+
+ void clear() {
+ Vars.clear();
+ Scopes.clear();
+ }
+};
+
+// XXX XXX docs
+class InstrRefBasedLDV : public LDVImpl {
+public:
+ friend class ::InstrRefLDVTest;
+
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = std::optional<DIExpression::FragmentInfo>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Machine location/value transfer function, a mapping of which locations
+ /// are assigned which new values.
+ using MLocTransferMap = SmallDenseMap<LocIdx, ValueIDNum>;
+
+ /// Live in/out structure for the variable values: a per-block map of
+ /// variables to their values.
+ using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>;
+
+ using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+
+ /// Type for a live-in value: the predecessor block, and its value.
+ using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
+
+ /// Vector (per block) of a collection (inner smallvector) of live-ins.
+ /// Used as the result type for the variable value dataflow problem.
+ using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+
+ /// Mapping from lexical scopes to a DILocation in that scope.
+ using ScopeToDILocT = DenseMap<const LexicalScope *, const DILocation *>;
+
+ /// Mapping from lexical scopes to variables in that scope.
+ using ScopeToVarsT = DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>>;
+
+ /// Mapping from lexical scopes to blocks where variables in that scope are
+ /// assigned. Such blocks aren't necessarily "in" the lexical scope, it's
+ /// just a block where an assignment happens.
+ using ScopeToAssignBlocksT = DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>;
+
+private:
+ MachineDominatorTree *DomTree;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ const MachineFrameInfo *MFI;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ TargetPassConfig *TPC;
+
+ // An empty DIExpression. Used default / placeholder DbgValueProperties
+ // objects, as we can't have null expressions.
+ const DIExpression *EmptyExpr;
+
+ /// Object to track machine locations as we step through a block. Could
+ /// probably be a field rather than a pointer, as it's always used.
+ MLocTracker *MTracker = nullptr;
+
+ /// Number of the current block LiveDebugValues is stepping through.
+ unsigned CurBB = -1;
+
+ /// Number of the current instruction LiveDebugValues is evaluating.
+ unsigned CurInst;
+
+ /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
+ /// steps through a block. Reads the values at each location from the
+ /// MLocTracker object.
+ VLocTracker *VTracker = nullptr;
+
+ /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
+ /// between locations during stepping, creates new DBG_VALUEs when values move
+ /// location.
+ TransferTracker *TTracker = nullptr;
+
+ /// Blocks which are artificial, i.e. blocks which exclusively contain
+ /// instructions without DebugLocs, or with line 0 locations.
+ SmallPtrSet<MachineBasicBlock *, 16> ArtificialBlocks;
+
+ // Mapping of blocks to and from their RPOT order.
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder;
+ DenseMap<unsigned, unsigned> BBNumToRPO;
+
+ /// Pair of MachineInstr, and its 1-based offset into the containing block.
+ using InstAndNum = std::pair<const MachineInstr *, unsigned>;
+ /// Map from debug instruction number to the MachineInstr labelled with that
+ /// number, and its location within the function. Used to transform
+ /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
+ std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+
+ /// Record of where we observed a DBG_PHI instruction.
+ class DebugPHIRecord {
+ public:
+ /// Instruction number of this DBG_PHI.
+ uint64_t InstrNum;
+ /// Block where DBG_PHI occurred.
+ MachineBasicBlock *MBB;
+ /// The value number read by the DBG_PHI -- or std::nullopt if it didn't
+ /// refer to a value.
+ std::optional<ValueIDNum> ValueRead;
+ /// Register/Stack location the DBG_PHI reads -- or std::nullopt if it
+ /// referred to something unexpected.
+ std::optional<LocIdx> ReadLoc;
+
+ operator unsigned() const { return InstrNum; }
+ };
+
+ /// Map from instruction numbers defined by DBG_PHIs to a record of what that
+ /// DBG_PHI read and where. Populated and edited during the machine value
+ /// location problem -- we use LLVMs SSA Updater to fix changes by
+ /// optimizations that destroy PHI instructions.
+ SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
+
+ // Map of overlapping variable fragments.
+ OverlapMap OverlapFragments;
+ VarToFragments SeenFragments;
+
+ /// Mapping of DBG_INSTR_REF instructions to their values, for those
+ /// DBG_INSTR_REFs that call resolveDbgPHIs. These variable references solve
+ /// a mini SSA problem caused by DBG_PHIs being cloned, this collection caches
+ /// the result.
+ DenseMap<std::pair<MachineInstr *, unsigned>, std::optional<ValueIDNum>>
+ SeenDbgPHIs;
+
+ DbgOpIDMap DbgOpStore;
+
+ /// True if we need to examine call instructions for stack clobbers. We
+ /// normally assume that they don't clobber SP, but stack probes on Windows
+ /// do.
+ bool AdjustsStackInCalls = false;
+
+ /// If AdjustsStackInCalls is true, this holds the name of the target's stack
+ /// probe function, which is the function we expect will alter the stack
+ /// pointer.
+ StringRef StackProbeSymbolName;
+
+ /// Tests whether this instruction is a spill to a stack slot.
+ std::optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
+ /// If a given instruction is identified as a spill, return the spill slot
+ /// and set \p Reg to the spilled register.
+ std::optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF,
+ unsigned &Reg);
+
+ /// Given a spill instruction, extract the spill slot information, ensure it's
+ /// tracked, and return the spill number.
+ std::optional<SpillLocationNo>
+ extractSpillBaseRegAndOffset(const MachineInstr &MI);
+
+ /// For an instruction reference given by \p InstNo and \p OpNo in instruction
+ /// \p MI returns the Value pointed to by that instruction reference if any
+ /// exists, otherwise returns std::nullopt.
+ std::optional<ValueIDNum> getValueForInstrRef(unsigned InstNo, unsigned OpNo,
+ MachineInstr &MI,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
+
+ /// Observe a single instruction while stepping through a block.
+ void process(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
+
+ /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugValue(const MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugInstrRef(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
+
+ /// Stores value-information about where this PHI occurred, and what
+ /// instruction number is associated with it.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugPHI(MachineInstr &MI);
+
+ /// Examines whether \p MI is copy instruction, and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferRegisterCopy(MachineInstr &MI);
+
+ /// Examines whether \p MI is stack spill or restore instruction, and
+ /// notifies trackers. \returns true if MI was recognized and processed.
+ bool transferSpillOrRestoreInst(MachineInstr &MI);
+
+ /// Examines \p MI for any registers that it defines, and notifies trackers.
+ void transferRegisterDef(MachineInstr &MI);
+
+ /// Copy one location to the other, accounting for movement of subregisters
+ /// too.
+ void performCopy(Register Src, Register Dst);
+
+ void accumulateFragmentMap(MachineInstr &MI);
+
+ /// Determine the machine value number referred to by (potentially several)
+ /// DBG_PHI instructions. Block duplication and tail folding can duplicate
+ /// DBG_PHIs, shifting the position where values in registers merge, and
+ /// forming another mini-ssa problem to solve.
+ /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
+ /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
+ /// \returns The machine value number at position Here, or std::nullopt.
+ std::optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here,
+ uint64_t InstrNum);
+
+ std::optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here,
+ uint64_t InstrNum);
+
+ /// Step through the function, recording register definitions and movements
+ /// in an MLocTracker. Convert the observations into a per-block transfer
+ /// function in \p MLocTransfer, suitable for using with the machine value
+ /// location dataflow problem.
+ void
+ produceMLocTransferFunction(MachineFunction &MF,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks);
+
+ /// Solve the machine value location dataflow problem. Takes as input the
+ /// transfer functions in \p MLocTransfer. Writes the output live-in and
+ /// live-out arrays to the (initialized to zero) multidimensional arrays in
+ /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
+ /// number, the inner by LocIdx.
+ void buildMLocValueMap(MachineFunction &MF, FuncValueTable &MInLocs,
+ FuncValueTable &MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Examine the stack indexes (i.e. offsets within the stack) to find the
+ /// basic units of interference -- like reg units, but for the stack.
+ void findStackIndexInterference(SmallVectorImpl<unsigned> &Slots);
+
+ /// Install PHI values into the live-in array for each block, according to
+ /// the IDF of each register.
+ void placeMLocPHIs(MachineFunction &MF,
+ SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ FuncValueTable &MInLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Propagate variable values to blocks in the common case where there's
+ /// only one value assigned to the variable. This function has better
+ /// performance as it doesn't have to find the dominance frontier between
+ /// different assignments.
+ void placePHIsForSingleVarDefinition(
+ const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+ MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+ const DebugVariable &Var, LiveInsT &Output);
+
+ /// Calculate the iterated-dominance-frontier for a set of defs, using the
+ /// existing LLVM facilities for this. Works for a single "value" or
+ /// machine/variable location.
+ /// \p AllBlocks Set of blocks where we might consume the value.
+ /// \p DefBlocks Set of blocks where the value/location is defined.
+ /// \p PHIBlocks Output set of blocks where PHIs must be placed.
+ void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks);
+
+ /// Perform a control flow join (lattice value meet) of the values in machine
+ /// locations at \p MBB. Follows the algorithm described in the file-comment,
+ /// reading live-outs of predecessors from \p OutLocs, the current live ins
+ /// from \p InLocs, and assigning the newly computed live ins back into
+ /// \p InLocs. \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ bool mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ FuncValueTable &OutLocs, ValueTable &InLocs);
+
+ /// Produce a set of blocks that are in the current lexical scope. This means
+ /// those blocks that contain instructions "in" the scope, blocks where
+ /// assignments to variables in scope occur, and artificial blocks that are
+ /// successors to any of the earlier blocks. See https://llvm.org/PR48091 for
+ /// more commentry on what "in scope" means.
+ /// \p DILoc A location in the scope that we're fetching blocks for.
+ /// \p Output Set to put in-scope-blocks into.
+ /// \p AssignBlocks Blocks known to contain assignments of variables in scope.
+ void
+ getBlocksForScope(const DILocation *DILoc,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Output,
+ const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks);
+
+ /// Solve the variable value dataflow problem, for a single lexical scope.
+ /// Uses the algorithm from the file comment to resolve control flow joins
+ /// using PHI placement and value propagation. Reads the locations of machine
+ /// values from the \p MInLocs and \p MOutLocs arrays (see buildMLocValueMap)
+ /// and reads the variable values transfer function from \p AllTheVlocs.
+ /// Live-in and Live-out variable values are stored locally, with the live-ins
+ /// permanently stored to \p Output once a fixedpoint is reached.
+ /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
+ /// that we should be tracking.
+ /// \p AssignBlocks contains the set of blocks that aren't in \p DILoc's
+ /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
+ /// locations through.
+ void buildVLocValueMap(const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs,
+ FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
+
+ /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the
+ /// live-in values coming from predecessors live-outs, and replaces any PHIs
+ /// already present in this blocks live-ins with a live-through value if the
+ /// PHI isn't needed.
+ /// \p LiveIn Old live-in value, overwritten with new one if live-in changes.
+ /// \returns true if any live-ins change value, either from value propagation
+ /// or PHI elimination.
+ bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DbgValue &LiveIn);
+
+ /// For the given block and live-outs feeding into it, try to find
+ /// machine locations for each debug operand where all the values feeding
+ /// into that operand join together.
+ /// \returns true if a joined location was found for every value that needed
+ /// to be joined.
+ bool
+ pickVPHILoc(SmallVectorImpl<DbgOpID> &OutValues, const MachineBasicBlock &MBB,
+ const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+
+ std::optional<ValueIDNum> pickOperandPHILoc(
+ unsigned DbgOpIdx, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts,
+ FuncValueTable &MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+
+ /// Take collections of DBG_VALUE instructions stored in TTracker, and
+ /// install them into their output blocks. Preserves a stable order of
+ /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through
+ /// the AllVarsNumbering order.
+ bool emitTransfers(DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+
+ /// Boilerplate computation of some initial sets, artifical blocks and
+ /// RPOT block ordering.
+ void initialSetup(MachineFunction &MF);
+
+ /// Produce a map of the last lexical scope that uses a block, using the
+ /// scopes DFSOut number. Mapping is block-number to DFSOut.
+ /// \p EjectionMap Pre-allocated vector in which to install the built ma.
+ /// \p ScopeToDILocation Mapping of LexicalScopes to their DILocations.
+ /// \p AssignBlocks Map of blocks where assignments happen for a scope.
+ void makeDepthFirstEjectionMap(SmallVectorImpl<unsigned> &EjectionMap,
+ const ScopeToDILocT &ScopeToDILocation,
+ ScopeToAssignBlocksT &AssignBlocks);
+
+ /// When determining per-block variable values and emitting to DBG_VALUEs,
+ /// this function explores by lexical scope depth. Doing so means that per
+ /// block information can be fully computed before exploration finishes,
+ /// allowing us to emit it and free data structures earlier than otherwise.
+ /// It's also good for locality.
+ bool depthFirstVLocAndEmit(
+ unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
+ const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC);
+
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
+
+public:
+ /// Default construct and initialize the pass.
+ InstrRefBasedLDV();
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+
+ bool isCalleeSaved(LocIdx L) const;
+ bool isCalleeSavedReg(Register R) const;
+
+ bool hasFoldedStackStore(const MachineInstr &MI) {
+ // Instruction must have a memory operand that's a stack slot, and isn't
+ // aliased, meaning it's a spill from regalloc instead of a variable.
+ // If it's aliased, we can't guarantee its value.
+ if (!MI.hasOneMemOperand())
+ return false;
+ auto *MemOperand = *MI.memoperands_begin();
+ return MemOperand->isStore() &&
+ MemOperand->getPseudoValue() &&
+ MemOperand->getPseudoValue()->kind() == PseudoSourceValue::FixedStack
+ && !MemOperand->getPseudoValue()->isAliased(MFI);
+ }
+
+ std::optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
+};
+
+} // namespace LiveDebugValues
+
+#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
new file mode 100644
index 000000000000..0c0a4e13c7c9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -0,0 +1,139 @@
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveDebugValues.h"
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+
+/// \file LiveDebugValues.cpp
+///
+/// The LiveDebugValues pass extends the range of variable locations
+/// (specified by DBG_VALUE instructions) from single blocks to successors
+/// and any other code locations where the variable location is valid.
+/// There are currently two implementations: the "VarLoc" implementation
+/// explicitly tracks the location of a variable, while the "InstrRef"
+/// implementation tracks the values defined by instructions through locations.
+///
+/// This file implements neither; it merely registers the pass, allows the
+/// user to pick which implementation will be used to propagate variable
+/// locations.
+
+#define DEBUG_TYPE "livedebugvalues"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ ForceInstrRefLDV("force-instr-ref-livedebugvalues", cl::Hidden,
+ cl::desc("Use instruction-ref based LiveDebugValues with "
+ "normal DBG_VALUE inputs"),
+ cl::init(false));
+
+static cl::opt<cl::boolOrDefault> ValueTrackingVariableLocations(
+ "experimental-debug-variable-locations",
+ cl::desc("Use experimental new value-tracking variable locations"));
+
+// Options to prevent pathological compile-time behavior. If InputBBLimit and
+// InputDbgValueLimit are both exceeded, range extension is disabled.
+static cl::opt<unsigned> InputBBLimit(
+ "livedebugvalues-input-bb-limit",
+ cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
+ cl::init(10000), cl::Hidden);
+static cl::opt<unsigned> InputDbgValueLimit(
+ "livedebugvalues-input-dbg-value-limit",
+ cl::desc(
+ "Maximum input DBG_VALUE insts supported by debug range extension"),
+ cl::init(50000), cl::Hidden);
+
+namespace {
+/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
+/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
+/// base class.
+class LiveDebugValues : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveDebugValues();
+ ~LiveDebugValues() = default;
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ std::unique_ptr<LDVImpl> InstrRefImpl;
+ std::unique_ptr<LDVImpl> VarLocImpl;
+ TargetPassConfig *TPC = nullptr;
+ MachineDominatorTree MDT;
+};
+} // namespace
+
+char LiveDebugValues::ID = 0;
+
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+
+INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,
+ false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+ initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+ InstrRefImpl =
+ std::unique_ptr<LDVImpl>(llvm::makeInstrRefBasedLiveDebugValues());
+ VarLocImpl = std::unique_ptr<LDVImpl>(llvm::makeVarLocBasedLiveDebugValues());
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ // Except for Wasm, all targets should be only using physical register at this
+ // point. Wasm only use virtual registers throught its pipeline, but its
+ // virtual registers don't participate in this LiveDebugValues analysis; only
+ // its target indices do.
+ assert(MF.getTarget().getTargetTriple().isWasm() ||
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
+ bool InstrRefBased = MF.useDebugInstrRef();
+ // Allow the user to force selection of InstrRef LDV.
+ InstrRefBased |= ForceInstrRefLDV;
+
+ TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ LDVImpl *TheImpl = &*VarLocImpl;
+
+ MachineDominatorTree *DomTree = nullptr;
+ if (InstrRefBased) {
+ DomTree = &MDT;
+ MDT.calculate(MF);
+ TheImpl = &*InstrRefImpl;
+ }
+
+ return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit,
+ InputDbgValueLimit);
+}
+
+bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
+ // Enable by default on x86_64, disable if explicitly turned off on cmdline.
+ if (T.getArch() == llvm::Triple::x86_64 &&
+ ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE)
+ return true;
+
+ // Enable if explicitly requested on command line.
+ return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
new file mode 100644
index 000000000000..6cc1685c0022
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -0,0 +1,43 @@
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+
+namespace llvm {
+class MachineDominatorTree;
+class MachineFunction;
+class TargetPassConfig;
+class Triple;
+
+// Inline namespace for types / symbols shared between different
+// LiveDebugValues implementations.
+inline namespace SharedLiveDebugValues {
+
+// Expose a base class for LiveDebugValues interfaces to inherit from. This
+// allows the generic LiveDebugValues pass handles to call into the
+// implementation.
+class LDVImpl {
+public:
+ virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) = 0;
+ virtual ~LDVImpl() = default;
+};
+
+} // namespace SharedLiveDebugValues
+
+// Factory functions for LiveDebugValues implementations.
+extern LDVImpl *makeVarLocBasedLiveDebugValues();
+extern LDVImpl *makeInstrRefBasedLiveDebugValues();
+
+extern bool debuginfoShouldUseDebugInstrRef(const Triple &T);
+
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
new file mode 100644
index 000000000000..116c6b7e2d19
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -0,0 +1,2405 @@
+//===- VarLocBasedImpl.cpp - Tracking Debug Value MIs with VarLoc class----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file VarLocBasedImpl.cpp
+///
+/// LiveDebugValues is an optimistic "available expressions" dataflow
+/// algorithm. The set of expressions is the set of machine locations
+/// (registers, spill slots, constants, and target indices) that a variable
+/// fragment might be located, qualified by a DIExpression and indirect-ness
+/// flag, while each variable is identified by a DebugVariable object. The
+/// availability of an expression begins when a DBG_VALUE instruction specifies
+/// the location of a DebugVariable, and continues until that location is
+/// clobbered or re-specified by a different DBG_VALUE for the same
+/// DebugVariable.
+///
+/// The output of LiveDebugValues is additional DBG_VALUE instructions,
+/// placed to extend variable locations as far they're available. This file
+/// and the VarLocBasedLDV class is an implementation that explicitly tracks
+/// locations, using the VarLoc class.
+///
+/// The canonical "available expressions" problem doesn't have expression
+/// clobbering, instead when a variable is re-assigned, any expressions using
+/// that variable get invalidated. LiveDebugValues can map onto "available
+/// expressions" by having every register represented by a variable, which is
+/// used in an expression that becomes available at a DBG_VALUE instruction.
+/// When the register is clobbered, its variable is effectively reassigned, and
+/// expressions computed from it become unavailable. A similar construct is
+/// needed when a DebugVariable has its location re-specified, to invalidate
+/// all other locations for that DebugVariable.
+///
+/// Using the dataflow analysis to compute the available expressions, we create
+/// a DBG_VALUE at the beginning of each block where the expression is
+/// live-in. This propagates variable locations into every basic block where
+/// the location can be determined, rather than only having DBG_VALUEs in blocks
+/// where locations are specified due to an assignment or some optimization.
+/// Movements of values between registers and spill slots are annotated with
+/// DBG_VALUEs too to track variable values bewteen locations. All this allows
+/// DbgEntityHistoryCalculator to focus on only the locations within individual
+/// blocks, facilitating testing and improving modularity.
+///
+/// We follow an optimisic dataflow approach, with this lattice:
+///
+/// \verbatim
+/// ┬ "Unknown"
+/// |
+/// v
+/// True
+/// |
+/// v
+/// ⊥ False
+/// \endverbatim With "True" signifying that the expression is available (and
+/// thus a DebugVariable's location is the corresponding register), while
+/// "False" signifies that the expression is unavailable. "Unknown"s never
+/// survive to the end of the analysis (see below).
+///
+/// Formally, all DebugVariable locations that are live-out of a block are
+/// initialized to \top. A blocks live-in values take the meet of the lattice
+/// value for every predecessors live-outs, except for the entry block, where
+/// all live-ins are \bot. The usual dataflow propagation occurs: the transfer
+/// function for a block assigns an expression for a DebugVariable to be "True"
+/// if a DBG_VALUE in the block specifies it; "False" if the location is
+/// clobbered; or the live-in value if it is unaffected by the block. We
+/// visit each block in reverse post order until a fixedpoint is reached. The
+/// solution produced is maximal.
+///
+/// Intuitively, we start by assuming that every expression / variable location
+/// is at least "True", and then propagate "False" from the entry block and any
+/// clobbers until there are no more changes to make. This gives us an accurate
+/// solution because all incorrect locations will have a "False" propagated into
+/// them. It also gives us a solution that copes well with loops by assuming
+/// that variable locations are live-through every loop, and then removing those
+/// that are not through dataflow.
+///
+/// Within LiveDebugValues: each variable location is represented by a
+/// VarLoc object that identifies the source variable, the set of
+/// machine-locations that currently describe it (a single location for
+/// DBG_VALUE or multiple for DBG_VALUE_LIST), and the DBG_VALUE inst that
+/// specifies the location. Each VarLoc is indexed in the (function-scope) \p
+/// VarLocMap, giving each VarLoc a set of unique indexes, each of which
+/// corresponds to one of the VarLoc's machine-locations and can be used to
+/// lookup the VarLoc in the VarLocMap. Rather than operate directly on machine
+/// locations, the dataflow analysis in this pass identifies locations by their
+/// indices in the VarLocMap, meaning all the variable locations in a block can
+/// be described by a sparse vector of VarLocMap indicies.
+///
+/// All the storage for the dataflow analysis is local to the ExtendRanges
+/// method and passed down to helper methods. "OutLocs" and "InLocs" record the
+/// in and out lattice values for each block. "OpenRanges" maintains a list of
+/// variable locations and, with the "process" method, evaluates the transfer
+/// function of each block. "flushPendingLocs" installs debug value instructions
+/// for each live-in location at the start of blocks, while "Transfers" records
+/// transfers of values between machine-locations.
+///
+/// We avoid explicitly representing the "Unknown" (\top) lattice value in the
+/// implementation. Instead, unvisited blocks implicitly have all lattice
+/// values set as "Unknown". After being visited, there will be path back to
+/// the entry block where the lattice value is "False", and as the transfer
+/// function cannot make new "Unknown" locations, there are no scenarios where
+/// a block can have an "Unknown" location after being visited. Similarly, we
+/// don't enumerate all possible variable locations before exploring the
+/// function: when a new location is discovered, all blocks previously explored
+/// were implicitly "False" but unrecorded, and become explicitly "False" when
+/// a new VarLoc is created with its bit not set in predecessor InLocs or
+/// OutLocs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "LiveDebugValues.h"
+
+#include "llvm/ADT/CoalescingBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <map>
+#include <optional>
+#include <queue>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "livedebugvalues"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+
+/// If \p Op is a stack or frame register return true, otherwise return false.
+/// This is used to avoid basing the debug entry values on the registers, since
+/// we do not support it at the moment.
+static bool isRegOtherThanSPAndFP(const MachineOperand &Op,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI) {
+ if (!Op.isReg())
+ return false;
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ Register Reg = Op.getReg();
+
+ return Reg && Reg != SP && Reg != FP;
+}
+
+namespace {
+
+// Max out the number of statically allocated elements in DefinedRegsSet, as
+// this prevents fallback to std::set::count() operations.
+using DefinedRegsSet = SmallSet<Register, 32>;
+
+// The IDs in this set correspond to MachineLocs in VarLocs, as well as VarLocs
+// that represent Entry Values; every VarLoc in the set will also appear
+// exactly once at Location=0.
+// As a result, each VarLoc may appear more than once in this "set", but each
+// range corresponding to a Reg, SpillLoc, or EntryValue type will still be a
+// "true" set (i.e. each VarLoc may appear only once), and the range Location=0
+// is the set of all VarLocs.
+using VarLocSet = CoalescingBitVector<uint64_t>;
+
+/// A type-checked pair of {Register Location (or 0), Index}, used to index
+/// into a \ref VarLocMap. This can be efficiently converted to a 64-bit int
+/// for insertion into a \ref VarLocSet, and efficiently converted back. The
+/// type-checker helps ensure that the conversions aren't lossy.
+///
+/// Why encode a location /into/ the VarLocMap index? This makes it possible
+/// to find the open VarLocs killed by a register def very quickly. This is a
+/// performance-critical operation for LiveDebugValues.
+struct LocIndex {
+ using u32_location_t = uint32_t;
+ using u32_index_t = uint32_t;
+
+ u32_location_t Location; // Physical registers live in the range [1;2^30) (see
+ // \ref MCRegister), so we have plenty of range left
+ // here to encode non-register locations.
+ u32_index_t Index;
+
+ /// The location that has an entry for every VarLoc in the map.
+ static constexpr u32_location_t kUniversalLocation = 0;
+
+ /// The first location that is reserved for VarLocs with locations of kind
+ /// RegisterKind.
+ static constexpr u32_location_t kFirstRegLocation = 1;
+
+ /// The first location greater than 0 that is not reserved for VarLocs with
+ /// locations of kind RegisterKind.
+ static constexpr u32_location_t kFirstInvalidRegLocation = 1 << 30;
+
+ /// A special location reserved for VarLocs with locations of kind
+ /// SpillLocKind.
+ static constexpr u32_location_t kSpillLocation = kFirstInvalidRegLocation;
+
+ /// A special location reserved for VarLocs of kind EntryValueBackupKind and
+ /// EntryValueCopyBackupKind.
+ static constexpr u32_location_t kEntryValueBackupLocation =
+ kFirstInvalidRegLocation + 1;
+
+ /// A special location reserved for VarLocs with locations of kind
+ /// WasmLocKind.
+ /// TODO Placing all Wasm target index locations in this single kWasmLocation
+ /// may cause slowdown in compilation time in very large functions. Consider
+ /// giving a each target index/offset pair its own u32_location_t if this
+ /// becomes a problem.
+ static constexpr u32_location_t kWasmLocation = kFirstInvalidRegLocation + 2;
+
+ LocIndex(u32_location_t Location, u32_index_t Index)
+ : Location(Location), Index(Index) {}
+
+ uint64_t getAsRawInteger() const {
+ return (static_cast<uint64_t>(Location) << 32) | Index;
+ }
+
+ template<typename IntT> static LocIndex fromRawInteger(IntT ID) {
+ static_assert(std::is_unsigned_v<IntT> && sizeof(ID) == sizeof(uint64_t),
+ "Cannot convert raw integer to LocIndex");
+ return {static_cast<u32_location_t>(ID >> 32),
+ static_cast<u32_index_t>(ID)};
+ }
+
+ /// Get the start of the interval reserved for VarLocs of kind RegisterKind
+ /// which reside in \p Reg. The end is at rawIndexForReg(Reg+1)-1.
+ static uint64_t rawIndexForReg(Register Reg) {
+ return LocIndex(Reg, 0).getAsRawInteger();
+ }
+
+ /// Return a range covering all set indices in the interval reserved for
+ /// \p Location in \p Set.
+ static auto indexRangeForLocation(const VarLocSet &Set,
+ u32_location_t Location) {
+ uint64_t Start = LocIndex(Location, 0).getAsRawInteger();
+ uint64_t End = LocIndex(Location + 1, 0).getAsRawInteger();
+ return Set.half_open_range(Start, End);
+ }
+};
+
+// Simple Set for storing all the VarLoc Indices at a Location bucket.
+using VarLocsInRange = SmallSet<LocIndex::u32_index_t, 32>;
+// Vector of all `LocIndex`s for a given VarLoc; the same Location should not
+// appear in any two of these, as each VarLoc appears at most once in any
+// Location bucket.
+using LocIndices = SmallVector<LocIndex, 2>;
+
+class VarLocBasedLDV : public LDVImpl {
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ TargetPassConfig *TPC;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ VarLocSet::Allocator Alloc;
+
+ const MachineInstr *LastNonDbgMI;
+
+ enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
+
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = std::optional<DIExpression::FragmentInfo>;
+
+ /// A pair of debug variable and value location.
+ struct VarLoc {
+ // The location at which a spilled variable resides. It consists of a
+ // register and an offset.
+ struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
+ }
+ bool operator!=(const SpillLoc &Other) const {
+ return !(*this == Other);
+ }
+ };
+
+ // Target indices used for wasm-specific locations.
+ struct WasmLoc {
+ // One of TargetIndex values defined in WebAssembly.h. We deal with
+ // local-related TargetIndex in this analysis (TI_LOCAL and
+ // TI_LOCAL_INDIRECT). Stack operands (TI_OPERAND_STACK) will be handled
+ // separately WebAssemblyDebugFixup pass, and we don't associate debug
+ // info with values in global operands (TI_GLOBAL_RELOC) at the moment.
+ int Index;
+ int64_t Offset;
+ bool operator==(const WasmLoc &Other) const {
+ return Index == Other.Index && Offset == Other.Offset;
+ }
+ bool operator!=(const WasmLoc &Other) const { return !(*this == Other); }
+ };
+
+ /// Identity of the variable at this location.
+ const DebugVariable Var;
+
+ /// The expression applied to this location.
+ const DIExpression *Expr;
+
+ /// DBG_VALUE to clone var/expr information from if this location
+ /// is moved.
+ const MachineInstr &MI;
+
+ enum class MachineLocKind {
+ InvalidKind = 0,
+ RegisterKind,
+ SpillLocKind,
+ ImmediateKind,
+ WasmLocKind
+ };
+
+ enum class EntryValueLocKind {
+ NonEntryValueKind = 0,
+ EntryValueKind,
+ EntryValueBackupKind,
+ EntryValueCopyBackupKind
+ } EVKind = EntryValueLocKind::NonEntryValueKind;
+
+ /// The value location. Stored separately to avoid repeatedly
+ /// extracting it from MI.
+ union MachineLocValue {
+ uint64_t RegNo;
+ SpillLoc SpillLocation;
+ uint64_t Hash;
+ int64_t Immediate;
+ const ConstantFP *FPImm;
+ const ConstantInt *CImm;
+ WasmLoc WasmLocation;
+ MachineLocValue() : Hash(0) {}
+ };
+
+ /// A single machine location; its Kind is either a register, spill
+ /// location, or immediate value.
+ /// If the VarLoc is not a NonEntryValueKind, then it will use only a
+ /// single MachineLoc of RegisterKind.
+ struct MachineLoc {
+ MachineLocKind Kind;
+ MachineLocValue Value;
+ bool operator==(const MachineLoc &Other) const {
+ if (Kind != Other.Kind)
+ return false;
+ switch (Kind) {
+ case MachineLocKind::SpillLocKind:
+ return Value.SpillLocation == Other.Value.SpillLocation;
+ case MachineLocKind::WasmLocKind:
+ return Value.WasmLocation == Other.Value.WasmLocation;
+ case MachineLocKind::RegisterKind:
+ case MachineLocKind::ImmediateKind:
+ return Value.Hash == Other.Value.Hash;
+ default:
+ llvm_unreachable("Invalid kind");
+ }
+ }
+ bool operator<(const MachineLoc &Other) const {
+ switch (Kind) {
+ case MachineLocKind::SpillLocKind:
+ return std::make_tuple(
+ Kind, Value.SpillLocation.SpillBase,
+ Value.SpillLocation.SpillOffset.getFixed(),
+ Value.SpillLocation.SpillOffset.getScalable()) <
+ std::make_tuple(
+ Other.Kind, Other.Value.SpillLocation.SpillBase,
+ Other.Value.SpillLocation.SpillOffset.getFixed(),
+ Other.Value.SpillLocation.SpillOffset.getScalable());
+ case MachineLocKind::WasmLocKind:
+ return std::make_tuple(Kind, Value.WasmLocation.Index,
+ Value.WasmLocation.Offset) <
+ std::make_tuple(Other.Kind, Other.Value.WasmLocation.Index,
+ Other.Value.WasmLocation.Offset);
+ case MachineLocKind::RegisterKind:
+ case MachineLocKind::ImmediateKind:
+ return std::tie(Kind, Value.Hash) <
+ std::tie(Other.Kind, Other.Value.Hash);
+ default:
+ llvm_unreachable("Invalid kind");
+ }
+ }
+ };
+
+ /// The set of machine locations used to determine the variable's value, in
+ /// conjunction with Expr. Initially populated with MI's debug operands,
+ /// but may be transformed independently afterwards.
+ SmallVector<MachineLoc, 8> Locs;
+ /// Used to map the index of each location in Locs back to the index of its
+ /// original debug operand in MI. Used when multiple location operands are
+ /// coalesced and the original MI's operands need to be accessed while
+ /// emitting a debug value.
+ SmallVector<unsigned, 8> OrigLocMap;
+
+ VarLoc(const MachineInstr &MI)
+ : Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt()),
+ Expr(MI.getDebugExpression()), MI(MI) {
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ assert((MI.isDebugValueList() || MI.getNumOperands() == 4) &&
+ "malformed DBG_VALUE");
+ for (const MachineOperand &Op : MI.debug_operands()) {
+ MachineLoc ML = GetLocForOp(Op);
+ auto It = find(Locs, ML);
+ if (It == Locs.end()) {
+ Locs.push_back(ML);
+ OrigLocMap.push_back(MI.getDebugOperandIndex(&Op));
+ } else {
+ // ML duplicates an element in Locs; replace references to Op
+ // with references to the duplicating element.
+ unsigned OpIdx = Locs.size();
+ unsigned DuplicatingIdx = std::distance(Locs.begin(), It);
+ Expr = DIExpression::replaceArg(Expr, OpIdx, DuplicatingIdx);
+ }
+ }
+
+ // We create the debug entry values from the factory functions rather
+ // than from this ctor.
+ assert(EVKind != EntryValueLocKind::EntryValueKind &&
+ !isEntryBackupLoc());
+ }
+
+ static MachineLoc GetLocForOp(const MachineOperand &Op) {
+ MachineLocKind Kind;
+ MachineLocValue Loc;
+ if (Op.isReg()) {
+ Kind = MachineLocKind::RegisterKind;
+ Loc.RegNo = Op.getReg();
+ } else if (Op.isImm()) {
+ Kind = MachineLocKind::ImmediateKind;
+ Loc.Immediate = Op.getImm();
+ } else if (Op.isFPImm()) {
+ Kind = MachineLocKind::ImmediateKind;
+ Loc.FPImm = Op.getFPImm();
+ } else if (Op.isCImm()) {
+ Kind = MachineLocKind::ImmediateKind;
+ Loc.CImm = Op.getCImm();
+ } else if (Op.isTargetIndex()) {
+ Kind = MachineLocKind::WasmLocKind;
+ Loc.WasmLocation = {Op.getIndex(), Op.getOffset()};
+ } else
+ llvm_unreachable("Invalid Op kind for MachineLoc.");
+ return {Kind, Loc};
+ }
+
+ /// Take the variable and machine-location in DBG_VALUE MI, and build an
+ /// entry location using the given expression.
+ static VarLoc CreateEntryLoc(const MachineInstr &MI,
+ const DIExpression *EntryExpr, Register Reg) {
+ VarLoc VL(MI);
+ assert(VL.Locs.size() == 1 &&
+ VL.Locs[0].Kind == MachineLocKind::RegisterKind);
+ VL.EVKind = EntryValueLocKind::EntryValueKind;
+ VL.Expr = EntryExpr;
+ VL.Locs[0].Value.RegNo = Reg;
+ return VL;
+ }
+
+ /// Take the variable and machine-location from the DBG_VALUE (from the
+ /// function entry), and build an entry value backup location. The backup
+ /// location will turn into the normal location if the backup is valid at
+ /// the time of the primary location clobbering.
+ static VarLoc CreateEntryBackupLoc(const MachineInstr &MI,
+ const DIExpression *EntryExpr) {
+ VarLoc VL(MI);
+ assert(VL.Locs.size() == 1 &&
+ VL.Locs[0].Kind == MachineLocKind::RegisterKind);
+ VL.EVKind = EntryValueLocKind::EntryValueBackupKind;
+ VL.Expr = EntryExpr;
+ return VL;
+ }
+
+ /// Take the variable and machine-location from the DBG_VALUE (from the
+ /// function entry), and build a copy of an entry value backup location by
+ /// setting the register location to NewReg.
+ static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI,
+ const DIExpression *EntryExpr,
+ Register NewReg) {
+ VarLoc VL(MI);
+ assert(VL.Locs.size() == 1 &&
+ VL.Locs[0].Kind == MachineLocKind::RegisterKind);
+ VL.EVKind = EntryValueLocKind::EntryValueCopyBackupKind;
+ VL.Expr = EntryExpr;
+ VL.Locs[0].Value.RegNo = NewReg;
+ return VL;
+ }
+
+ /// Copy the register location in DBG_VALUE MI, updating the register to
+ /// be NewReg.
+ static VarLoc CreateCopyLoc(const VarLoc &OldVL, const MachineLoc &OldML,
+ Register NewReg) {
+ VarLoc VL = OldVL;
+ for (MachineLoc &ML : VL.Locs)
+ if (ML == OldML) {
+ ML.Kind = MachineLocKind::RegisterKind;
+ ML.Value.RegNo = NewReg;
+ return VL;
+ }
+ llvm_unreachable("Should have found OldML in new VarLoc.");
+ }
+
+ /// Take the variable described by DBG_VALUE* MI, and create a VarLoc
+ /// locating it in the specified spill location.
+ static VarLoc CreateSpillLoc(const VarLoc &OldVL, const MachineLoc &OldML,
+ unsigned SpillBase, StackOffset SpillOffset) {
+ VarLoc VL = OldVL;
+ for (MachineLoc &ML : VL.Locs)
+ if (ML == OldML) {
+ ML.Kind = MachineLocKind::SpillLocKind;
+ ML.Value.SpillLocation = {SpillBase, SpillOffset};
+ return VL;
+ }
+ llvm_unreachable("Should have found OldML in new VarLoc.");
+ }
+
+ /// Create a DBG_VALUE representing this VarLoc in the given function.
+ /// Copies variable-specific information such as DILocalVariable and
+ /// inlining information from the original DBG_VALUE instruction, which may
+ /// have been several transfers ago.
+ MachineInstr *BuildDbgValue(MachineFunction &MF) const {
+ assert(!isEntryBackupLoc() &&
+ "Tried to produce DBG_VALUE for backup VarLoc");
+ const DebugLoc &DbgLoc = MI.getDebugLoc();
+ bool Indirect = MI.isIndirectDebugValue();
+ const auto &IID = MI.getDesc();
+ const DILocalVariable *Var = MI.getDebugVariable();
+ NumInserted++;
+
+ const DIExpression *DIExpr = Expr;
+ SmallVector<MachineOperand, 8> MOs;
+ for (unsigned I = 0, E = Locs.size(); I < E; ++I) {
+ MachineLocKind LocKind = Locs[I].Kind;
+ MachineLocValue Loc = Locs[I].Value;
+ const MachineOperand &Orig = MI.getDebugOperand(OrigLocMap[I]);
+ switch (LocKind) {
+ case MachineLocKind::RegisterKind:
+ // An entry value is a register location -- but with an updated
+ // expression. The register location of such DBG_VALUE is always the
+ // one from the entry DBG_VALUE, it does not matter if the entry value
+ // was copied in to another register due to some optimizations.
+ // Non-entry value register locations are like the source
+ // DBG_VALUE, but with the register number from this VarLoc.
+ MOs.push_back(MachineOperand::CreateReg(
+ EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg()
+ : Register(Loc.RegNo),
+ false));
+ break;
+ case MachineLocKind::SpillLocKind: {
+ // Spills are indirect DBG_VALUEs, with a base register and offset.
+ // Use the original DBG_VALUEs expression to build the spilt location
+ // on top of. FIXME: spill locations created before this pass runs
+ // are not recognized, and not handled here.
+ unsigned Base = Loc.SpillLocation.SpillBase;
+ auto *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MI.isNonListDebugValue()) {
+ auto Deref = Indirect ? DIExpression::DerefAfter : 0;
+ DIExpr = TRI->prependOffsetExpression(
+ DIExpr, DIExpression::ApplyOffset | Deref,
+ Loc.SpillLocation.SpillOffset);
+ Indirect = true;
+ } else {
+ SmallVector<uint64_t, 4> Ops;
+ TRI->getOffsetOpcodes(Loc.SpillLocation.SpillOffset, Ops);
+ Ops.push_back(dwarf::DW_OP_deref);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I);
+ }
+ MOs.push_back(MachineOperand::CreateReg(Base, false));
+ break;
+ }
+ case MachineLocKind::ImmediateKind: {
+ MOs.push_back(Orig);
+ break;
+ }
+ case MachineLocKind::WasmLocKind: {
+ MOs.push_back(Orig);
+ break;
+ }
+ case MachineLocKind::InvalidKind:
+ llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+ }
+ }
+ return BuildMI(MF, DbgLoc, IID, Indirect, MOs, Var, DIExpr);
+ }
+
+ /// Is the Loc field a constant or constant object?
+ bool isConstant(MachineLocKind Kind) const {
+ return Kind == MachineLocKind::ImmediateKind;
+ }
+
+ /// Check if the Loc field is an entry backup location.
+ bool isEntryBackupLoc() const {
+ return EVKind == EntryValueLocKind::EntryValueBackupKind ||
+ EVKind == EntryValueLocKind::EntryValueCopyBackupKind;
+ }
+
+ /// If this variable is described by register \p Reg holding the entry
+ /// value, return true.
+ bool isEntryValueBackupReg(Register Reg) const {
+ return EVKind == EntryValueLocKind::EntryValueBackupKind && usesReg(Reg);
+ }
+
+ /// If this variable is described by register \p Reg holding a copy of the
+ /// entry value, return true.
+ bool isEntryValueCopyBackupReg(Register Reg) const {
+ return EVKind == EntryValueLocKind::EntryValueCopyBackupKind &&
+ usesReg(Reg);
+ }
+
+ /// If this variable is described in whole or part by \p Reg, return true.
+ bool usesReg(Register Reg) const {
+ MachineLoc RegML;
+ RegML.Kind = MachineLocKind::RegisterKind;
+ RegML.Value.RegNo = Reg;
+ return is_contained(Locs, RegML);
+ }
+
+ /// If this variable is described in whole or part by \p Reg, return true.
+ unsigned getRegIdx(Register Reg) const {
+ for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
+ if (Locs[Idx].Kind == MachineLocKind::RegisterKind &&
+ Register{static_cast<unsigned>(Locs[Idx].Value.RegNo)} == Reg)
+ return Idx;
+ llvm_unreachable("Could not find given Reg in Locs");
+ }
+
+ /// If this variable is described in whole or part by 1 or more registers,
+ /// add each of them to \p Regs and return true.
+ bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const {
+ bool AnyRegs = false;
+ for (const auto &Loc : Locs)
+ if (Loc.Kind == MachineLocKind::RegisterKind) {
+ Regs.push_back(Loc.Value.RegNo);
+ AnyRegs = true;
+ }
+ return AnyRegs;
+ }
+
+ bool containsSpillLocs() const {
+ return any_of(Locs, [](VarLoc::MachineLoc ML) {
+ return ML.Kind == VarLoc::MachineLocKind::SpillLocKind;
+ });
+ }
+
+ /// If this variable is described in whole or part by \p SpillLocation,
+ /// return true.
+ bool usesSpillLoc(SpillLoc SpillLocation) const {
+ MachineLoc SpillML;
+ SpillML.Kind = MachineLocKind::SpillLocKind;
+ SpillML.Value.SpillLocation = SpillLocation;
+ return is_contained(Locs, SpillML);
+ }
+
+ /// If this variable is described in whole or part by \p SpillLocation,
+ /// return the index .
+ unsigned getSpillLocIdx(SpillLoc SpillLocation) const {
+ for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
+ if (Locs[Idx].Kind == MachineLocKind::SpillLocKind &&
+ Locs[Idx].Value.SpillLocation == SpillLocation)
+ return Idx;
+ llvm_unreachable("Could not find given SpillLoc in Locs");
+ }
+
+ bool containsWasmLocs() const {
+ return any_of(Locs, [](VarLoc::MachineLoc ML) {
+ return ML.Kind == VarLoc::MachineLocKind::WasmLocKind;
+ });
+ }
+
+ /// If this variable is described in whole or part by \p WasmLocation,
+ /// return true.
+ bool usesWasmLoc(WasmLoc WasmLocation) const {
+ MachineLoc WasmML;
+ WasmML.Kind = MachineLocKind::WasmLocKind;
+ WasmML.Value.WasmLocation = WasmLocation;
+ return is_contained(Locs, WasmML);
+ }
+
+ /// Determine whether the lexical scope of this value's debug location
+ /// dominates MBB.
+ bool dominates(LexicalScopes &LS, MachineBasicBlock &MBB) const {
+ return LS.dominates(MI.getDebugLoc().get(), &MBB);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ // TRI and TII can be null.
+ void dump(const TargetRegisterInfo *TRI, const TargetInstrInfo *TII,
+ raw_ostream &Out = dbgs()) const {
+ Out << "VarLoc(";
+ for (const MachineLoc &MLoc : Locs) {
+ if (Locs.begin() != &MLoc)
+ Out << ", ";
+ switch (MLoc.Kind) {
+ case MachineLocKind::RegisterKind:
+ Out << printReg(MLoc.Value.RegNo, TRI);
+ break;
+ case MachineLocKind::SpillLocKind:
+ Out << printReg(MLoc.Value.SpillLocation.SpillBase, TRI);
+ Out << "[" << MLoc.Value.SpillLocation.SpillOffset.getFixed() << " + "
+ << MLoc.Value.SpillLocation.SpillOffset.getScalable()
+ << "x vscale"
+ << "]";
+ break;
+ case MachineLocKind::ImmediateKind:
+ Out << MLoc.Value.Immediate;
+ break;
+ case MachineLocKind::WasmLocKind: {
+ if (TII) {
+ auto Indices = TII->getSerializableTargetIndices();
+ auto Found =
+ find_if(Indices, [&](const std::pair<int, const char *> &I) {
+ return I.first == MLoc.Value.WasmLocation.Index;
+ });
+ assert(Found != Indices.end());
+ Out << Found->second;
+ if (MLoc.Value.WasmLocation.Offset > 0)
+ Out << " + " << MLoc.Value.WasmLocation.Offset;
+ } else {
+ Out << "WasmLoc";
+ }
+ break;
+ }
+ case MachineLocKind::InvalidKind:
+ llvm_unreachable("Invalid VarLoc in dump method");
+ }
+ }
+
+ Out << ", \"" << Var.getVariable()->getName() << "\", " << *Expr << ", ";
+ if (Var.getInlinedAt())
+ Out << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
+ else
+ Out << "(null))";
+
+ if (isEntryBackupLoc())
+ Out << " (backup loc)\n";
+ else
+ Out << "\n";
+ }
+#endif
+
+ bool operator==(const VarLoc &Other) const {
+ return std::tie(EVKind, Var, Expr, Locs) ==
+ std::tie(Other.EVKind, Other.Var, Other.Expr, Other.Locs);
+ }
+
+ /// This operator guarantees that VarLocs are sorted by Variable first.
+ bool operator<(const VarLoc &Other) const {
+ return std::tie(Var, EVKind, Locs, Expr) <
+ std::tie(Other.Var, Other.EVKind, Other.Locs, Other.Expr);
+ }
+ };
+
+#ifndef NDEBUG
+ using VarVec = SmallVector<VarLoc, 32>;
+#endif
+
+ /// VarLocMap is used for two things:
+ /// 1) Assigning LocIndices to a VarLoc. The LocIndices can be used to
+ /// virtually insert a VarLoc into a VarLocSet.
+ /// 2) Given a LocIndex, look up the unique associated VarLoc.
+ class VarLocMap {
+ /// Map a VarLoc to an index within the vector reserved for its location
+ /// within Loc2Vars.
+ std::map<VarLoc, LocIndices> Var2Indices;
+
+ /// Map a location to a vector which holds VarLocs which live in that
+ /// location.
+ SmallDenseMap<LocIndex::u32_location_t, std::vector<VarLoc>> Loc2Vars;
+
+ public:
+ /// Retrieve LocIndices for \p VL.
+ LocIndices insert(const VarLoc &VL) {
+ LocIndices &Indices = Var2Indices[VL];
+ // If Indices is not empty, VL is already in the map.
+ if (!Indices.empty())
+ return Indices;
+ SmallVector<LocIndex::u32_location_t, 4> Locations;
+ // LocIndices are determined by EVKind and MLs; each Register has a
+ // unique location, while all SpillLocs use a single bucket, and any EV
+ // VarLocs use only the Backup bucket or none at all (except the
+ // compulsory entry at the universal location index). LocIndices will
+ // always have an index at the universal location index as the last index.
+ if (VL.EVKind == VarLoc::EntryValueLocKind::NonEntryValueKind) {
+ VL.getDescribingRegs(Locations);
+ assert(all_of(Locations,
+ [](auto RegNo) {
+ return RegNo < LocIndex::kFirstInvalidRegLocation;
+ }) &&
+ "Physreg out of range?");
+ if (VL.containsSpillLocs())
+ Locations.push_back(LocIndex::kSpillLocation);
+ if (VL.containsWasmLocs())
+ Locations.push_back(LocIndex::kWasmLocation);
+ } else if (VL.EVKind != VarLoc::EntryValueLocKind::EntryValueKind) {
+ LocIndex::u32_location_t Loc = LocIndex::kEntryValueBackupLocation;
+ Locations.push_back(Loc);
+ }
+ Locations.push_back(LocIndex::kUniversalLocation);
+ for (LocIndex::u32_location_t Location : Locations) {
+ auto &Vars = Loc2Vars[Location];
+ Indices.push_back(
+ {Location, static_cast<LocIndex::u32_index_t>(Vars.size())});
+ Vars.push_back(VL);
+ }
+ return Indices;
+ }
+
+ LocIndices getAllIndices(const VarLoc &VL) const {
+ auto IndIt = Var2Indices.find(VL);
+ assert(IndIt != Var2Indices.end() && "VarLoc not tracked");
+ return IndIt->second;
+ }
+
+ /// Retrieve the unique VarLoc associated with \p ID.
+ const VarLoc &operator[](LocIndex ID) const {
+ auto LocIt = Loc2Vars.find(ID.Location);
+ assert(LocIt != Loc2Vars.end() && "Location not tracked");
+ return LocIt->second[ID.Index];
+ }
+ };
+
+ using VarLocInMBB =
+ SmallDenseMap<const MachineBasicBlock *, std::unique_ptr<VarLocSet>>;
+ struct TransferDebugPair {
+ MachineInstr *TransferInst; ///< Instruction where this transfer occurs.
+ LocIndex LocationID; ///< Location number for the transfer dest.
+ };
+ using TransferMap = SmallVector<TransferDebugPair, 4>;
+ // Types for recording Entry Var Locations emitted by a single MachineInstr,
+ // as well as recording MachineInstr which last defined a register.
+ using InstToEntryLocMap = std::multimap<const MachineInstr *, LocIndex>;
+ using RegDefToInstMap = DenseMap<Register, MachineInstr *>;
+
+ // Types for recording sets of variable fragments that overlap. For a given
+ // local variable, we record all other fragments of that variable that could
+ // overlap it, to reduce search time.
+ using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+ using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Collects all VarLocs from \p CollectFrom. Each unique VarLoc is added
+ /// to \p Collected once, in order of insertion into \p VarLocIDs.
+ static void collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs);
+
+ /// Get the registers which are used by VarLocs of kind RegisterKind tracked
+ /// by \p CollectFrom.
+ void getUsedRegs(const VarLocSet &CollectFrom,
+ SmallVectorImpl<Register> &UsedRegs) const;
+
+ /// This holds the working set of currently open ranges. For fast
+ /// access, this is done both as a set of VarLocIDs, and a map of
+ /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
+ /// previous open ranges for the same variable. In addition, we keep
+ /// two different maps (Vars/EntryValuesBackupVars), so erase/insert
+ /// methods act differently depending on whether a VarLoc is primary
+ /// location or backup one. In the case the VarLoc is backup location
+ /// we will erase/insert from the EntryValuesBackupVars map, otherwise
+ /// we perform the operation on the Vars.
+ class OpenRangesSet {
+ VarLocSet::Allocator &Alloc;
+ VarLocSet VarLocs;
+ // Map the DebugVariable to recent primary location ID.
+ SmallDenseMap<DebugVariable, LocIndices, 8> Vars;
+ // Map the DebugVariable to recent backup location ID.
+ SmallDenseMap<DebugVariable, LocIndices, 8> EntryValuesBackupVars;
+ OverlapMap &OverlappingFragments;
+
+ public:
+ OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap)
+ : Alloc(Alloc), VarLocs(Alloc), OverlappingFragments(_OLapMap) {}
+
+ const VarLocSet &getVarLocs() const { return VarLocs; }
+
+ // Fetches all VarLocs in \p VarLocIDs and inserts them into \p Collected.
+ // This method is needed to get every VarLoc once, as each VarLoc may have
+ // multiple indices in a VarLocMap (corresponding to each applicable
+ // location), but all VarLocs appear exactly once at the universal location
+ // index.
+ void getUniqueVarLocs(SmallVectorImpl<VarLoc> &Collected,
+ const VarLocMap &VarLocIDs) const {
+ collectAllVarLocs(Collected, VarLocs, VarLocIDs);
+ }
+
+ /// Terminate all open ranges for VL.Var by removing it from the set.
+ void erase(const VarLoc &VL);
+
+ /// Terminate all open ranges listed as indices in \c KillSet with
+ /// \c Location by removing them from the set.
+ void erase(const VarLocsInRange &KillSet, const VarLocMap &VarLocIDs,
+ LocIndex::u32_location_t Location);
+
+ /// Insert a new range into the set.
+ void insert(LocIndices VarLocIDs, const VarLoc &VL);
+
+ /// Insert a set of ranges.
+ void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map);
+
+ std::optional<LocIndices> getEntryValueBackup(DebugVariable Var);
+
+ /// Empty the set.
+ void clear() {
+ VarLocs.clear();
+ Vars.clear();
+ EntryValuesBackupVars.clear();
+ }
+
+ /// Return whether the set is empty or not.
+ bool empty() const {
+ assert(Vars.empty() == EntryValuesBackupVars.empty() &&
+ Vars.empty() == VarLocs.empty() &&
+ "open ranges are inconsistent");
+ return VarLocs.empty();
+ }
+
+ /// Get an empty range of VarLoc IDs.
+ auto getEmptyVarLocRange() const {
+ return iterator_range<VarLocSet::const_iterator>(getVarLocs().end(),
+ getVarLocs().end());
+ }
+
+ /// Get all set IDs for VarLocs with MLs of kind RegisterKind in \p Reg.
+ auto getRegisterVarLocs(Register Reg) const {
+ return LocIndex::indexRangeForLocation(getVarLocs(), Reg);
+ }
+
+ /// Get all set IDs for VarLocs with MLs of kind SpillLocKind.
+ auto getSpillVarLocs() const {
+ return LocIndex::indexRangeForLocation(getVarLocs(),
+ LocIndex::kSpillLocation);
+ }
+
+ /// Get all set IDs for VarLocs of EVKind EntryValueBackupKind or
+ /// EntryValueCopyBackupKind.
+ auto getEntryValueBackupVarLocs() const {
+ return LocIndex::indexRangeForLocation(
+ getVarLocs(), LocIndex::kEntryValueBackupLocation);
+ }
+
+ /// Get all set IDs for VarLocs with MLs of kind WasmLocKind.
+ auto getWasmVarLocs() const {
+ return LocIndex::indexRangeForLocation(getVarLocs(),
+ LocIndex::kWasmLocation);
+ }
+ };
+
+ /// Collect all VarLoc IDs from \p CollectFrom for VarLocs with MLs of kind
+ /// RegisterKind which are located in any reg in \p Regs. The IDs for each
+ /// VarLoc correspond to entries in the universal location bucket, which every
+ /// VarLoc has exactly 1 entry for. Insert collected IDs into \p Collected.
+ static void collectIDsForRegs(VarLocsInRange &Collected,
+ const DefinedRegsSet &Regs,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs);
+
+ VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) {
+ std::unique_ptr<VarLocSet> &VLS = Locs[MBB];
+ if (!VLS)
+ VLS = std::make_unique<VarLocSet>(Alloc);
+ return *VLS;
+ }
+
+ const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB,
+ const VarLocInMBB &Locs) const {
+ auto It = Locs.find(MBB);
+ assert(It != Locs.end() && "MBB not in map");
+ return *It->second;
+ }
+
+ /// Tests whether this instruction is a spill to a stack location.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ Register &Reg);
+
+ /// Returns true if the given machine instruction is a debug value which we
+ /// can emit entry values for.
+ ///
+ /// Currently, we generate debug entry values only for parameters that are
+ /// unmodified throughout the function and located in a register.
+ bool isEntryValueCandidate(const MachineInstr &MI,
+ const DefinedRegsSet &Regs) const;
+
+ /// If a given instruction is identified as a spill, return the spill location
+ /// and set \p Reg to the spilled register.
+ std::optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF,
+ Register &Reg);
+ /// Given a spill instruction, extract the register and offset used to
+ /// address the spill location in a target independent way.
+ VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
+ void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ TransferMap &Transfers, VarLocMap &VarLocIDs,
+ LocIndex OldVarID, TransferKind Kind,
+ const VarLoc::MachineLoc &OldLoc,
+ Register NewReg = Register());
+
+ void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
+ void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void cleanupEntryValueTransfers(const MachineInstr *MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers);
+ void removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
+ void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ VarLocsInRange &KillSet);
+ void recordEntryValue(const MachineInstr &MI,
+ const DefinedRegsSet &DefinedRegs,
+ OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
+ void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
+ void transferWasmDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs);
+ bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+
+ void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
+
+ void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
+ OverlapMap &OLapMap);
+
+ bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+ const VarLocMap &VarLocIDs,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
+
+ /// Create DBG_VALUE insts for inlocs that have been propagated but
+ /// had their instruction creation deferred.
+ void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
+
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
+
+public:
+ /// Default construct and initialize the pass.
+ VarLocBasedLDV();
+
+ ~VarLocBasedLDV();
+
+ /// Print to ostream with a message.
+ void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V,
+ const VarLocMap &VarLocIDs, const char *msg,
+ raw_ostream &Out) const;
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+VarLocBasedLDV::VarLocBasedLDV() = default;
+
+VarLocBasedLDV::~VarLocBasedLDV() = default;
+
+/// Erase a variable from the set of open ranges, and additionally erase any
+/// fragments that may overlap it. If the VarLoc is a backup location, erase
+/// the variable from the EntryValuesBackupVars set, indicating we should stop
+/// tracking its backup entry location. Otherwise, if the VarLoc is primary
+/// location, erase the variable from the Vars set.
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
+ // Erasure helper.
+ auto DoErase = [&VL, this](DebugVariable VarToErase) {
+ auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ auto It = EraseFrom->find(VarToErase);
+ if (It != EraseFrom->end()) {
+ LocIndices IDs = It->second;
+ for (LocIndex ID : IDs)
+ VarLocs.reset(ID.getAsRawInteger());
+ EraseFrom->erase(It);
+ }
+ };
+
+ DebugVariable Var = VL.Var;
+
+ // Erase the variable/fragment that ends here.
+ DoErase(Var);
+
+ // Extract the fragment. Interpret an empty fragment as one that covers all
+ // possible bits.
+ FragmentInfo ThisFragment = Var.getFragmentOrDefault();
+
+ // There may be fragments that overlap the designated fragment. Look them up
+ // in the pre-computed overlap map, and erase them too.
+ auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment});
+ if (MapIt != OverlappingFragments.end()) {
+ for (auto Fragment : MapIt->second) {
+ VarLocBasedLDV::OptFragmentInfo FragmentHolder;
+ if (!DebugVariable::isDefaultFragment(Fragment))
+ FragmentHolder = VarLocBasedLDV::OptFragmentInfo(Fragment);
+ DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()});
+ }
+ }
+}
+
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLocsInRange &KillSet,
+ const VarLocMap &VarLocIDs,
+ LocIndex::u32_location_t Location) {
+ VarLocSet RemoveSet(Alloc);
+ for (LocIndex::u32_index_t ID : KillSet) {
+ const VarLoc &VL = VarLocIDs[LocIndex(Location, ID)];
+ auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ EraseFrom->erase(VL.Var);
+ LocIndices VLI = VarLocIDs.getAllIndices(VL);
+ for (LocIndex ID : VLI)
+ RemoveSet.set(ID.getAsRawInteger());
+ }
+ VarLocs.intersectWithComplement(RemoveSet);
+}
+
+void VarLocBasedLDV::OpenRangesSet::insertFromLocSet(const VarLocSet &ToLoad,
+ const VarLocMap &Map) {
+ VarLocsInRange UniqueVarLocIDs;
+ DefinedRegsSet Regs;
+ Regs.insert(LocIndex::kUniversalLocation);
+ collectIDsForRegs(UniqueVarLocIDs, Regs, ToLoad, Map);
+ for (uint64_t ID : UniqueVarLocIDs) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VarL = Map[Idx];
+ const LocIndices Indices = Map.getAllIndices(VarL);
+ insert(Indices, VarL);
+ }
+}
+
+void VarLocBasedLDV::OpenRangesSet::insert(LocIndices VarLocIDs,
+ const VarLoc &VL) {
+ auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ for (LocIndex ID : VarLocIDs)
+ VarLocs.set(ID.getAsRawInteger());
+ InsertInto->insert({VL.Var, VarLocIDs});
+}
+
+/// Return the Loc ID of an entry value backup location, if it exists for the
+/// variable.
+std::optional<LocIndices>
+VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
+ auto It = EntryValuesBackupVars.find(Var);
+ if (It != EntryValuesBackupVars.end())
+ return It->second;
+
+ return std::nullopt;
+}
+
+void VarLocBasedLDV::collectIDsForRegs(VarLocsInRange &Collected,
+ const DefinedRegsSet &Regs,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs) {
+ assert(!Regs.empty() && "Nothing to collect");
+ SmallVector<Register, 32> SortedRegs;
+ append_range(SortedRegs, Regs);
+ array_pod_sort(SortedRegs.begin(), SortedRegs.end());
+ auto It = CollectFrom.find(LocIndex::rawIndexForReg(SortedRegs.front()));
+ auto End = CollectFrom.end();
+ for (Register Reg : SortedRegs) {
+ // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains
+ // all possible VarLoc IDs for VarLocs with MLs of kind RegisterKind which
+ // live in Reg.
+ uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg);
+ uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1);
+ It.advanceToLowerBound(FirstIndexForReg);
+
+ // Iterate through that half-open interval and collect all the set IDs.
+ for (; It != End && *It < FirstInvalidIndex; ++It) {
+ LocIndex ItIdx = LocIndex::fromRawInteger(*It);
+ const VarLoc &VL = VarLocIDs[ItIdx];
+ LocIndices LI = VarLocIDs.getAllIndices(VL);
+ // For now, the back index is always the universal location index.
+ assert(LI.back().Location == LocIndex::kUniversalLocation &&
+ "Unexpected order of LocIndices for VarLoc; was it inserted into "
+ "the VarLocMap correctly?");
+ Collected.insert(LI.back().Index);
+ }
+
+ if (It == End)
+ return;
+ }
+}
+
+void VarLocBasedLDV::getUsedRegs(const VarLocSet &CollectFrom,
+ SmallVectorImpl<Register> &UsedRegs) const {
+ // All register-based VarLocs are assigned indices greater than or equal to
+ // FirstRegIndex.
+ uint64_t FirstRegIndex =
+ LocIndex::rawIndexForReg(LocIndex::kFirstRegLocation);
+ uint64_t FirstInvalidIndex =
+ LocIndex::rawIndexForReg(LocIndex::kFirstInvalidRegLocation);
+ for (auto It = CollectFrom.find(FirstRegIndex),
+ End = CollectFrom.find(FirstInvalidIndex);
+ It != End;) {
+ // We found a VarLoc ID for a VarLoc that lives in a register. Figure out
+ // which register and add it to UsedRegs.
+ uint32_t FoundReg = LocIndex::fromRawInteger(*It).Location;
+ assert((UsedRegs.empty() || FoundReg != UsedRegs.back()) &&
+ "Duplicate used reg");
+ UsedRegs.push_back(FoundReg);
+
+ // Skip to the next /set/ register. Note that this finds a lower bound, so
+ // even if there aren't any VarLocs living in `FoundReg+1`, we're still
+ // guaranteed to move on to the next register (or to end()).
+ uint64_t NextRegIndex = LocIndex::rawIndexForReg(FoundReg + 1);
+ It.advanceToLowerBound(NextRegIndex);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF,
+ const VarLocInMBB &V,
+ const VarLocMap &VarLocIDs,
+ const char *msg,
+ raw_ostream &Out) const {
+ Out << '\n' << msg << '\n';
+ for (const MachineBasicBlock &BB : MF) {
+ if (!V.count(&BB))
+ continue;
+ const VarLocSet &L = getVarLocsInMBB(&BB, V);
+ if (L.empty())
+ continue;
+ SmallVector<VarLoc, 32> VarLocs;
+ collectAllVarLocs(VarLocs, L, VarLocIDs);
+ Out << "MBB: " << BB.getNumber() << ":\n";
+ for (const VarLoc &VL : VarLocs) {
+ Out << " Var: " << VL.Var.getVariable()->getName();
+ Out << " MI: ";
+ VL.dump(TRI, TII, Out);
+ }
+ }
+ Out << "\n";
+}
+#endif
+
+VarLocBasedLDV::VarLoc::SpillLoc
+VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ Register Reg;
+ StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ return {Reg, Offset};
+}
+
+/// Do cleanup of \p EntryValTransfers created by \p TRInst, by removing the
+/// Transfer, which uses the to-be-deleted \p EntryVL.
+void VarLocBasedLDV::cleanupEntryValueTransfers(
+ const MachineInstr *TRInst, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL, InstToEntryLocMap &EntryValTransfers) {
+ if (EntryValTransfers.empty() || TRInst == nullptr)
+ return;
+
+ auto TransRange = EntryValTransfers.equal_range(TRInst);
+ for (auto &TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
+ const VarLoc &EmittedEV = VarLocIDs[TDPair.second];
+ if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) ==
+ std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo,
+ EmittedEV.Expr)) {
+ OpenRanges.erase(EmittedEV);
+ EntryValTransfers.erase(TRInst);
+ break;
+ }
+ }
+}
+
+/// Try to salvage the debug entry value if we encounter a new debug value
+/// describing the same parameter, otherwise stop tracking the value. Return
+/// true if we should stop tracking the entry value and do the cleanup of
+/// emitted Entry Value Transfers, otherwise return false.
+void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+ // Skip the DBG_VALUE which is the debug entry value itself.
+ if (&MI == &EntryVL.MI)
+ return;
+
+ // If the parameter's location is not register location, we can not track
+ // the entry value any more. It doesn't have the TransferInst which defines
+ // register, so no Entry Value Transfers have been emitted already.
+ if (!MI.getDebugOperand(0).isReg())
+ return;
+
+ // Try to get non-debug instruction responsible for the DBG_VALUE.
+ const MachineInstr *TransferInst = nullptr;
+ Register Reg = MI.getDebugOperand(0).getReg();
+ if (Reg.isValid() && RegSetInstrs.contains(Reg))
+ TransferInst = RegSetInstrs.find(Reg)->second;
+
+ // Case of the parameter's DBG_VALUE at the start of entry MBB.
+ if (!TransferInst && !LastNonDbgMI && MI.getParent()->isEntryBlock())
+ return;
+
+ // If the debug expression from the DBG_VALUE is not empty, we can assume the
+ // parameter's value has changed indicating that we should stop tracking its
+ // entry value as well.
+ if (MI.getDebugExpression()->getNumElements() == 0 && TransferInst) {
+ // If the DBG_VALUE comes from a copy instruction that copies the entry
+ // value, it means the parameter's value has not changed and we should be
+ // able to use its entry value.
+ // TODO: Try to keep tracking of an entry value if we encounter a propagated
+ // DBG_VALUE describing the copy of the entry value. (Propagated entry value
+ // does not indicate the parameter modification.)
+ auto DestSrc = TII->isCopyInstr(*TransferInst);
+ if (DestSrc) {
+ const MachineOperand *SrcRegOp, *DestRegOp;
+ SrcRegOp = DestSrc->Source;
+ DestRegOp = DestSrc->Destination;
+ if (Reg == DestRegOp->getReg()) {
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
+ if (VL.isEntryValueCopyBackupReg(Reg) &&
+ // Entry Values should not be variadic.
+ VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
+ return;
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+ MI.print(dbgs(), /*IsStandalone*/ false,
+ /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+ /*AddNewLine*/ true, TII));
+ cleanupEntryValueTransfers(TransferInst, OpenRanges, VarLocIDs, EntryVL,
+ EntryValTransfers);
+ OpenRanges.erase(EntryVL);
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+ if (!MI.isDebugValue())
+ return;
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+
+ // Check if this DBG_VALUE indicates a parameter's value changing.
+ // If that is the case, we should stop tracking its entry value.
+ auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
+ if (Var->isParameter() && EntryValBackupID) {
+ const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()];
+ removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL, EntryValTransfers,
+ RegSetInstrs);
+ }
+
+ if (all_of(MI.debug_operands(), [](const MachineOperand &MO) {
+ return (MO.isReg() && MO.getReg()) || MO.isImm() || MO.isFPImm() ||
+ MO.isCImm() || MO.isTargetIndex();
+ })) {
+ // Use normal VarLoc constructor for registers and immediates.
+ VarLoc VL(MI);
+ // End all previous ranges of VL.Var.
+ OpenRanges.erase(VL);
+
+ LocIndices IDs = VarLocIDs.insert(VL);
+ // Add the VarLoc to OpenRanges from this DBG_VALUE.
+ OpenRanges.insert(IDs, VL);
+ } else if (MI.memoperands().size() > 0) {
+ llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
+ } else {
+ // This must be an undefined location. If it has an open range, erase it.
+ assert(MI.isUndefDebugValue() &&
+ "Unexpected non-undef DBG_VALUE encountered");
+ VarLoc VL(MI);
+ OpenRanges.erase(VL);
+ }
+}
+
+// This should be removed later, doesn't fit the new design.
+void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs) {
+ // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all
+ // possible VarLoc IDs for VarLocs with MLs of kind RegisterKind which live
+ // in Reg.
+ uint64_t FirstIndex = LocIndex::rawIndexForReg(LocIndex::kUniversalLocation);
+ uint64_t FirstInvalidIndex =
+ LocIndex::rawIndexForReg(LocIndex::kUniversalLocation + 1);
+ // Iterate through that half-open interval and collect all the set IDs.
+ for (auto It = CollectFrom.find(FirstIndex), End = CollectFrom.end();
+ It != End && *It < FirstInvalidIndex; ++It) {
+ LocIndex RegIdx = LocIndex::fromRawInteger(*It);
+ Collected.push_back(VarLocIDs[RegIdx]);
+ }
+}
+
+/// Turn the entry value backup locations into primary locations.
+void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ VarLocsInRange &KillSet) {
+ // Do not insert entry value locations after a terminator.
+ if (MI.isTerminator())
+ return;
+
+ for (uint32_t ID : KillSet) {
+ // The KillSet IDs are indices for the universal location bucket.
+ LocIndex Idx = LocIndex(LocIndex::kUniversalLocation, ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ if (!VL.Var.getVariable()->isParameter())
+ continue;
+
+ auto DebugVar = VL.Var;
+ std::optional<LocIndices> EntryValBackupIDs =
+ OpenRanges.getEntryValueBackup(DebugVar);
+
+ // If the parameter has the entry value backup, it means we should
+ // be able to use its entry value.
+ if (!EntryValBackupIDs)
+ continue;
+
+ const VarLoc &EntryVL = VarLocIDs[EntryValBackupIDs->back()];
+ VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, EntryVL.Expr,
+ EntryVL.Locs[0].Value.RegNo);
+ LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc);
+ assert(EntryValueIDs.size() == 1 &&
+ "EntryValue loc should not be variadic");
+ EntryValTransfers.insert({&MI, EntryValueIDs.back()});
+ OpenRanges.insert(EntryValueIDs, EntryLoc);
+ }
+}
+
+/// Create new TransferDebugPair and insert it in \p Transfers. The VarLoc
+/// with \p OldVarID should be deleted form \p OpenRanges and replaced with
+/// new VarLoc. If \p NewReg is different than default zero value then the
+/// new location will be register location created by the copy like instruction,
+/// otherwise it is variable's location on the stack.
+void VarLocBasedLDV::insertTransferDebugPair(
+ MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
+ VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind,
+ const VarLoc::MachineLoc &OldLoc, Register NewReg) {
+ const VarLoc &OldVarLoc = VarLocIDs[OldVarID];
+
+ auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) {
+ LocIndices LocIds = VarLocIDs.insert(VL);
+
+ // Close this variable's previous location range.
+ OpenRanges.erase(VL);
+
+ // Record the new location as an open range, and a postponed transfer
+ // inserting a DBG_VALUE for this location.
+ OpenRanges.insert(LocIds, VL);
+ assert(!MI.isTerminator() && "Cannot insert DBG_VALUE after terminator");
+ TransferDebugPair MIP = {&MI, LocIds.back()};
+ Transfers.push_back(MIP);
+ };
+
+ // End all previous ranges of VL.Var.
+ OpenRanges.erase(VarLocIDs[OldVarID]);
+ switch (Kind) {
+ case TransferKind::TransferCopy: {
+ assert(NewReg &&
+ "No register supplied when handling a copy of a debug value");
+ // Create a DBG_VALUE instruction to describe the Var in its new
+ // register location.
+ VarLoc VL = VarLoc::CreateCopyLoc(OldVarLoc, OldLoc, NewReg);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for register copy:";
+ VL.dump(TRI, TII);
+ });
+ return;
+ }
+ case TransferKind::TransferSpill: {
+ // Create a DBG_VALUE instruction to describe the Var in its spilled
+ // location.
+ VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+ VarLoc VL = VarLoc::CreateSpillLoc(
+ OldVarLoc, OldLoc, SpillLocation.SpillBase, SpillLocation.SpillOffset);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for spill:";
+ VL.dump(TRI, TII);
+ });
+ return;
+ }
+ case TransferKind::TransferRestore: {
+ assert(NewReg &&
+ "No register supplied when handling a restore of a debug value");
+ // DebugInstr refers to the pre-spill location, therefore we can reuse
+ // its expression.
+ VarLoc VL = VarLoc::CreateCopyLoc(OldVarLoc, OldLoc, NewReg);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for restore:";
+ VL.dump(TRI, TII);
+ });
+ return;
+ }
+ }
+ llvm_unreachable("Invalid transfer kind");
+}
+
+/// A definition of a register may mark the end of a range.
+void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+
+ // Meta Instructions do not affect the debug liveness of any register they
+ // define.
+ if (MI.isMetaInstruction())
+ return;
+
+ MachineFunction *MF = MI.getMF();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+
+ // Find the regs killed by MI, and find regmasks of preserved regs.
+ DefinedRegsSet DeadRegs;
+ SmallVector<const uint32_t *, 4> RegMasks;
+ for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def.
+ if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical() &&
+ !(MI.isCall() && MO.getReg() == SP)) {
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ // FIXME: Can we break out of this loop early if no insertion occurs?
+ DeadRegs.insert(*RAI);
+ RegSetInstrs.erase(MO.getReg());
+ RegSetInstrs.insert({MO.getReg(), &MI});
+ } else if (MO.isRegMask()) {
+ RegMasks.push_back(MO.getRegMask());
+ }
+ }
+
+ // Erase VarLocs which reside in one of the dead registers. For performance
+ // reasons, it's critical to not iterate over the full set of open VarLocs.
+ // Iterate over the set of dying/used regs instead.
+ if (!RegMasks.empty()) {
+ SmallVector<Register, 32> UsedRegs;
+ getUsedRegs(OpenRanges.getVarLocs(), UsedRegs);
+ for (Register Reg : UsedRegs) {
+ // Remove ranges of all clobbered registers. Register masks don't usually
+ // list SP as preserved. Assume that call instructions never clobber SP,
+ // because some backends (e.g., AArch64) never list SP in the regmask.
+ // While the debug info may be off for an instruction or two around
+ // callee-cleanup calls, transferring the DEBUG_VALUE across the call is
+ // still a better user experience.
+ if (Reg == SP)
+ continue;
+ bool AnyRegMaskKillsReg =
+ any_of(RegMasks, [Reg](const uint32_t *RegMask) {
+ return MachineOperand::clobbersPhysReg(RegMask, Reg);
+ });
+ if (AnyRegMaskKillsReg)
+ DeadRegs.insert(Reg);
+ if (AnyRegMaskKillsReg) {
+ RegSetInstrs.erase(Reg);
+ RegSetInstrs.insert({Reg, &MI});
+ }
+ }
+ }
+
+ if (DeadRegs.empty())
+ return;
+
+ VarLocsInRange KillSet;
+ collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs(), VarLocIDs);
+ OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kUniversalLocation);
+
+ if (TPC) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (TM.Options.ShouldEmitDebugEntryValues())
+ emitEntryValues(MI, OpenRanges, VarLocIDs, EntryValTransfers, KillSet);
+ }
+}
+
+void VarLocBasedLDV::transferWasmDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
+ // If this is not a Wasm local.set or local.tee, which sets local values,
+ // return.
+ int Index;
+ int64_t Offset;
+ if (!TII->isExplicitTargetIndexDef(MI, Index, Offset))
+ return;
+
+ // Find the target indices killed by MI, and delete those variable locations
+ // from the open range.
+ VarLocsInRange KillSet;
+ VarLoc::WasmLoc Loc{Index, Offset};
+ for (uint64_t ID : OpenRanges.getWasmVarLocs()) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ assert(VL.containsWasmLocs() && "Broken VarLocSet?");
+ if (VL.usesWasmLoc(Loc))
+ KillSet.insert(ID);
+ }
+ OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kWasmLocation);
+}
+
+bool VarLocBasedLDV::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF) {
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+ return false; // This is not a spill instruction, since no valid size was
+ // returned from either function.
+
+ return true;
+}
+
+bool VarLocBasedLDV::isLocationSpill(const MachineInstr &MI,
+ MachineFunction *MF, Register &Reg) {
+ if (!isSpillInstruction(MI, MF))
+ return false;
+
+ auto isKilledReg = [&](const MachineOperand MO, Register &Reg) {
+ if (!MO.isReg() || !MO.isUse()) {
+ Reg = 0;
+ return false;
+ }
+ Reg = MO.getReg();
+ return MO.isKill();
+ };
+
+ for (const MachineOperand &MO : MI.operands()) {
+ // In a spill instruction generated by the InlineSpiller the spilled
+ // register has its kill flag set.
+ if (isKilledReg(MO, Reg))
+ return true;
+ if (Reg != 0) {
+ // Check whether next instruction kills the spilled register.
+ // FIXME: Current solution does not cover search for killed register in
+ // bundles and instructions further down the chain.
+ auto NextI = std::next(MI.getIterator());
+ // Skip next instruction that points to basic block end iterator.
+ if (MI.getParent()->end() == NextI)
+ continue;
+ Register RegNext;
+ for (const MachineOperand &MONext : NextI->operands()) {
+ // Return true if we came across the register from the
+ // previous spill instruction that is killed in NextI.
+ if (isKilledReg(MONext, RegNext) && RegNext == Reg)
+ return true;
+ }
+ }
+ }
+ // Return false if we didn't find spilled register.
+ return false;
+}
+
+std::optional<VarLocBasedLDV::VarLoc::SpillLoc>
+VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, Register &Reg) {
+ if (!MI.hasOneMemOperand())
+ return std::nullopt;
+
+ // FIXME: Handle folded restore instructions with more than one memory
+ // operand.
+ if (MI.getRestoreSize(TII)) {
+ Reg = MI.getOperand(0).getReg();
+ return extractSpillBaseRegAndOffset(MI);
+ }
+ return std::nullopt;
+}
+
+/// A spilled register may indicate that we have to end the current range of
+/// a variable and create a new one for the spill location.
+/// A restored register may indicate the reverse situation.
+/// We don't want to insert any instructions in process(), so we just create
+/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
+/// It will be inserted into the BB when we're done iterating over the
+/// instructions.
+void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers) {
+ MachineFunction *MF = MI.getMF();
+ TransferKind TKind;
+ Register Reg;
+ std::optional<VarLoc::SpillLoc> Loc;
+
+ LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+ // First, if there are any DBG_VALUEs pointing at a spill slot that is
+ // written to, then close the variable location. The value in memory
+ // will have changed.
+ VarLocsInRange KillSet;
+ if (isSpillInstruction(MI, MF)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+ for (uint64_t ID : OpenRanges.getSpillVarLocs()) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ assert(VL.containsSpillLocs() && "Broken VarLocSet?");
+ if (VL.usesSpillLoc(*Loc)) {
+ // This location is overwritten by the current instruction -- terminate
+ // the open range, and insert an explicit DBG_VALUE $noreg.
+ //
+ // Doing this at a later stage would require re-interpreting all
+ // DBG_VALUes and DIExpressions to identify whether they point at
+ // memory, and then analysing all memory writes to see if they
+ // overwrite that memory, which is expensive.
+ //
+ // At this stage, we already know which DBG_VALUEs are for spills and
+ // where they are located; it's best to fix handle overwrites now.
+ KillSet.insert(ID);
+ unsigned SpillLocIdx = VL.getSpillLocIdx(*Loc);
+ VarLoc::MachineLoc OldLoc = VL.Locs[SpillLocIdx];
+ VarLoc UndefVL = VarLoc::CreateCopyLoc(VL, OldLoc, 0);
+ LocIndices UndefLocIDs = VarLocIDs.insert(UndefVL);
+ Transfers.push_back({&MI, UndefLocIDs.back()});
+ }
+ }
+ OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kSpillLocation);
+ }
+
+ // Try to recognise spill and restore instructions that may create a new
+ // variable location.
+ if (isLocationSpill(MI, MF, Reg)) {
+ TKind = TransferKind::TransferSpill;
+ LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+ << "\n");
+ } else {
+ if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ return;
+ TKind = TransferKind::TransferRestore;
+ LLVM_DEBUG(dbgs() << "Recognized as restore: "; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+ << "\n");
+ }
+ // Check if the register or spill location is the location of a debug value.
+ auto TransferCandidates = OpenRanges.getEmptyVarLocRange();
+ if (TKind == TransferKind::TransferSpill)
+ TransferCandidates = OpenRanges.getRegisterVarLocs(Reg);
+ else if (TKind == TransferKind::TransferRestore)
+ TransferCandidates = OpenRanges.getSpillVarLocs();
+ for (uint64_t ID : TransferCandidates) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ unsigned LocIdx;
+ if (TKind == TransferKind::TransferSpill) {
+ assert(VL.usesReg(Reg) && "Broken VarLocSet?");
+ LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
+ << VL.Var.getVariable()->getName() << ")\n");
+ LocIdx = VL.getRegIdx(Reg);
+ } else {
+ assert(TKind == TransferKind::TransferRestore && VL.containsSpillLocs() &&
+ "Broken VarLocSet?");
+ if (!VL.usesSpillLoc(*Loc))
+ // The spill location is not the location of a debug value.
+ continue;
+ LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
+ << VL.Var.getVariable()->getName() << ")\n");
+ LocIdx = VL.getSpillLocIdx(*Loc);
+ }
+ VarLoc::MachineLoc MLoc = VL.Locs[LocIdx];
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, TKind,
+ MLoc, Reg);
+ // FIXME: A comment should explain why it's correct to return early here,
+ // if that is in fact correct.
+ return;
+ }
+}
+
+/// If \p MI is a register copy instruction, that copies a previously tracked
+/// value from one register to another register that is callee saved, we
+/// create new DBG_VALUE instruction described with copy destination register.
+void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers) {
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+
+ if (!DestRegOp->isDef())
+ return;
+
+ auto isCalleeSavedReg = [&](Register Reg) {
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ Register SrcReg = SrcRegOp->getReg();
+ Register DestReg = DestRegOp->getReg();
+
+ // We want to recognize instructions where destination register is callee
+ // saved register. If register that could be clobbered by the call is
+ // included, there would be a great chance that it is going to be clobbered
+ // soon. It is more likely that previous register location, which is callee
+ // saved, is going to stay unclobbered longer, even if it is killed.
+ if (!isCalleeSavedReg(DestReg))
+ return;
+
+ // Remember an entry value movement. If we encounter a new debug value of
+ // a parameter describing only a moving of the value around, rather then
+ // modifying it, we are still able to use the entry value if needed.
+ if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) {
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ if (VL.isEntryValueBackupReg(SrcReg)) {
+ LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
+ VarLoc EntryValLocCopyBackup =
+ VarLoc::CreateEntryCopyBackupLoc(VL.MI, VL.Expr, DestReg);
+ // Stop tracking the original entry value.
+ OpenRanges.erase(VL);
+
+ // Start tracking the entry value copy.
+ LocIndices EntryValCopyLocIDs = VarLocIDs.insert(EntryValLocCopyBackup);
+ OpenRanges.insert(EntryValCopyLocIDs, EntryValLocCopyBackup);
+ break;
+ }
+ }
+ }
+
+ if (!SrcRegOp->isKill())
+ return;
+
+ for (uint64_t ID : OpenRanges.getRegisterVarLocs(SrcReg)) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ assert(VarLocIDs[Idx].usesReg(SrcReg) && "Broken VarLocSet?");
+ VarLoc::MachineLocValue Loc;
+ Loc.RegNo = SrcReg;
+ VarLoc::MachineLoc MLoc{VarLoc::MachineLocKind::RegisterKind, Loc};
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx,
+ TransferKind::TransferCopy, MLoc, DestReg);
+ // FIXME: A comment should explain why it's correct to return early here,
+ // if that is in fact correct.
+ return;
+ }
+}
+
+/// Terminate all open ranges at the end of the current basic block.
+bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB,
+ OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs,
+ const VarLocMap &VarLocIDs) {
+ bool Changed = false;
+ LLVM_DEBUG({
+ VarVec VarLocs;
+ OpenRanges.getUniqueVarLocs(VarLocs, VarLocIDs);
+ for (VarLoc &VL : VarLocs) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
+ VL.dump(TRI, TII);
+ }
+ });
+ VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs);
+ Changed = VLS != OpenRanges.getVarLocs();
+ // New OutLocs set may be different due to spill, restore or register
+ // copy instruction processing.
+ if (Changed)
+ VLS = OpenRanges.getVarLocs();
+ OpenRanges.clear();
+ return Changed;
+}
+
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// fragment usage.
+/// \param SeenFragments Map from DILocalVariable to all fragments of that
+/// Variable which are known to exist.
+/// \param OverlappingFragments The overlap map being constructed, from one
+/// Var/Fragment pair to a vector of fragments known to overlap.
+void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
+ VarToFragments &SeenFragments,
+ OverlapMap &OverlappingFragments) {
+ DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
+
+ // If this is the first sighting of this variable, then we are guaranteed
+ // there are currently no overlapping fragments either. Initialize the set
+ // of seen fragments, record no overlaps for the current one, and return.
+ auto SeenIt = SeenFragments.find(MIVar.getVariable());
+ if (SeenIt == SeenFragments.end()) {
+ SmallSet<FragmentInfo, 4> OneFragment;
+ OneFragment.insert(ThisFragment);
+ SeenFragments.insert({MIVar.getVariable(), OneFragment});
+
+ OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ return;
+ }
+
+ // If this particular Variable/Fragment pair already exists in the overlap
+ // map, it has already been accounted for.
+ auto IsInOLapMap =
+ OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ if (!IsInOLapMap.second)
+ return;
+
+ auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+ auto &AllSeenFragments = SeenIt->second;
+
+ // Otherwise, examine all other seen fragments for this variable, with "this"
+ // fragment being a previously unseen fragment. Record any pair of
+ // overlapping fragments.
+ for (const auto &ASeenFragment : AllSeenFragments) {
+ // Does this previously seen fragment overlap?
+ if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+ // Yes: Mark the current fragment as being overlapped.
+ ThisFragmentsOverlaps.push_back(ASeenFragment);
+ // Mark the previously seen fragment as being overlapped by the current
+ // one.
+ auto ASeenFragmentsOverlaps =
+ OverlappingFragments.find({MIVar.getVariable(), ASeenFragment});
+ assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
+ "Previously seen var fragment has no vector of overlaps");
+ ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+ }
+ }
+
+ AllSeenFragments.insert(ThisFragment);
+}
+
+/// This routine creates OpenRanges.
+void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+ if (!MI.isDebugInstr())
+ LastNonDbgMI = &MI;
+ transferDebugValue(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
+ transferWasmDef(MI, OpenRanges, VarLocIDs);
+ transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
+ transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
+}
+
+/// This routine joins the analysis results of all incoming edges in @MBB by
+/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
+/// source variable in all the predecessors of @MBB reside in the same location.
+bool VarLocBasedLDV::join(
+ MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+ const VarLocMap &VarLocIDs,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+
+ VarLocSet InLocsT(Alloc); // Temporary incoming locations.
+
+ // For all predecessors of this MBB, find the set of VarLocs that
+ // can be joined.
+ int NumVisited = 0;
+ for (auto *p : MBB.predecessors()) {
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block.
+ if (!Visited.count(p)) {
+ LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber()
+ << "\n");
+ continue;
+ }
+ auto OL = OutLocs.find(p);
+ // Join is null in case of empty OutLocs from any of the pred.
+ if (OL == OutLocs.end())
+ return false;
+
+ // Just copy over the Out locs to incoming locs for the first visited
+ // predecessor, and for all other predecessors join the Out locs.
+ VarLocSet &OutLocVLS = *OL->second;
+ if (!NumVisited)
+ InLocsT = OutLocVLS;
+ else
+ InLocsT &= OutLocVLS;
+
+ LLVM_DEBUG({
+ if (!InLocsT.empty()) {
+ VarVec VarLocs;
+ collectAllVarLocs(VarLocs, InLocsT, VarLocIDs);
+ for (const VarLoc &VL : VarLocs)
+ dbgs() << " gathered candidate incoming var: "
+ << VL.Var.getVariable()->getName() << "\n";
+ }
+ });
+
+ NumVisited++;
+ }
+
+ // Filter out DBG_VALUES that are out of scope.
+ VarLocSet KillSet(Alloc);
+ bool IsArtificial = ArtificialBlocks.count(&MBB);
+ if (!IsArtificial) {
+ for (uint64_t ID : InLocsT) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ if (!VarLocIDs[Idx].dominates(LS, MBB)) {
+ KillSet.set(ID);
+ LLVM_DEBUG({
+ auto Name = VarLocIDs[Idx].Var.getVariable()->getName();
+ dbgs() << " killing " << Name << ", it doesn't dominate MBB\n";
+ });
+ }
+ }
+ }
+ InLocsT.intersectWithComplement(KillSet);
+
+ // As we are processing blocks in reverse post-order we
+ // should have processed at least one predecessor, unless it
+ // is the entry block which has no predecessor.
+ assert((NumVisited || MBB.pred_empty()) &&
+ "Should have processed at least one predecessor");
+
+ VarLocSet &ILS = getVarLocsInMBB(&MBB, InLocs);
+ bool Changed = false;
+ if (ILS != InLocsT) {
+ ILS = InLocsT;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs,
+ VarLocMap &VarLocIDs) {
+ // PendingInLocs records all locations propagated into blocks, which have
+ // not had DBG_VALUE insts created. Go through and create those insts now.
+ for (auto &Iter : PendingInLocs) {
+ // Map is keyed on a constant pointer, unwrap it so we can insert insts.
+ auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
+ VarLocSet &Pending = *Iter.second;
+
+ SmallVector<VarLoc, 32> VarLocs;
+ collectAllVarLocs(VarLocs, Pending, VarLocIDs);
+
+ for (VarLoc DiffIt : VarLocs) {
+ // The ID location is live-in to MBB -- work out what kind of machine
+ // location it is and create a DBG_VALUE.
+ if (DiffIt.isEntryBackupLoc())
+ continue;
+ MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
+ MBB.insert(MBB.instr_begin(), MI);
+
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ }
+ }
+}
+
+bool VarLocBasedLDV::isEntryValueCandidate(
+ const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const {
+ assert(MI.isDebugValue() && "This must be DBG_VALUE.");
+
+ // TODO: Add support for local variables that are expressed in terms of
+ // parameters entry values.
+ // TODO: Add support for modified arguments that can be expressed
+ // by using its entry value.
+ auto *DIVar = MI.getDebugVariable();
+ if (!DIVar->isParameter())
+ return false;
+
+ // Do not consider parameters that belong to an inlined function.
+ if (MI.getDebugLoc()->getInlinedAt())
+ return false;
+
+ // Only consider parameters that are described using registers. Parameters
+ // that are passed on the stack are not yet supported, so ignore debug
+ // values that are described by the frame or stack pointer.
+ if (!isRegOtherThanSPAndFP(MI.getDebugOperand(0), MI, TRI))
+ return false;
+
+ // If a parameter's value has been propagated from the caller, then the
+ // parameter's DBG_VALUE may be described using a register defined by some
+ // instruction in the entry block, in which case we shouldn't create an
+ // entry value.
+ if (DefinedRegs.count(MI.getDebugOperand(0).getReg()))
+ return false;
+
+ // TODO: Add support for parameters that have a pre-existing debug expressions
+ // (e.g. fragments).
+ // A simple deref expression is equivalent to an indirect debug value.
+ const DIExpression *Expr = MI.getDebugExpression();
+ if (Expr->getNumElements() > 0 && !Expr->isDeref())
+ return false;
+
+ return true;
+}
+
+/// Collect all register defines (including aliases) for the given instruction.
+static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (MO.getReg() && MO.getReg().isPhysical()) {
+ Regs.insert(MO.getReg());
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Regs.insert(*AI);
+ }
+ }
+}
+
+/// This routine records the entry values of function parameters. The values
+/// could be used as backup values. If we loose the track of some unmodified
+/// parameters, the backup values will be used as a primary locations.
+void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
+ const DefinedRegsSet &DefinedRegs,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
+ if (TPC) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.Options.ShouldEmitDebugEntryValues())
+ return;
+ }
+
+ DebugVariable V(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+
+ if (!isEntryValueCandidate(MI, DefinedRegs) ||
+ OpenRanges.getEntryValueBackup(V))
+ return;
+
+ LLVM_DEBUG(dbgs() << "Creating the backup entry location: "; MI.dump(););
+
+ // Create the entry value and use it as a backup location until it is
+ // valid. It is valid until a parameter is not changed.
+ DIExpression *NewExpr =
+ DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
+ VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, NewExpr);
+ LocIndices EntryValLocIDs = VarLocIDs.insert(EntryValLocAsBackup);
+ OpenRanges.insert(EntryValLocIDs, EntryValLocAsBackup);
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
+ (void)DomTree;
+ LLVM_DEBUG(dbgs() << "\nDebug Range Extension: " << MF.getName() << "\n");
+
+ if (!MF.getFunction().getSubprogram())
+ // VarLocBaseLDV will already have removed all DBG_VALUEs.
+ return false;
+
+ // Skip functions from NoDebug compilation units.
+ if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ this->TPC = TPC;
+ LS.initialize(MF);
+
+ bool Changed = false;
+ bool OLChanged = false;
+ bool MBBJoined = false;
+
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ OverlapMap OverlapFragments; // Map of overlapping variable fragments.
+ OpenRangesSet OpenRanges(Alloc, OverlapFragments);
+ // Ranges that are open until end of bb.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
+ // spills, copies and restores).
+ // Map responsible MI to attached Transfer emitted from Backup Entry Value.
+ InstToEntryLocMap EntryValTransfers;
+ // Map a Register to the last MI which clobbered it.
+ RegDefToInstMap RegSetInstrs;
+
+ VarToFragments SeenFragments;
+
+ // Blocks which are artificial, i.e. blocks which exclusively contain
+ // instructions without locations, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Pending;
+
+ // Set of register defines that are seen when traversing the entry block
+ // looking for debug entry value candidates.
+ DefinedRegsSet DefinedRegs;
+
+ // Only in the case of entry MBB collect DBG_VALUEs representing
+ // function parameters in order to generate debug entry values for them.
+ MachineBasicBlock &First_MBB = *(MF.begin());
+ for (auto &MI : First_MBB) {
+ collectRegDefs(MI, DefinedRegs, TRI);
+ if (MI.isDebugValue())
+ recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
+ }
+
+ // Initialize per-block structures and scan for fragment overlaps.
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
+
+ auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+ if (const DebugLoc &DL = MI.getDebugLoc())
+ return DL.getLine() != 0;
+ return false;
+ };
+ for (auto &MBB : MF)
+ if (none_of(MBB.instrs(), hasNonArtificialLocation))
+ ArtificialBlocks.insert(&MBB);
+
+ LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
+ "OutLocs after initialization", dbgs()));
+
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ for (MachineBasicBlock *MBB : RPOT) {
+ OrderToBB[RPONumber] = MBB;
+ BBToOrder[MBB] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
+
+ if (RPONumber > InputBBLimit) {
+ unsigned NumInputDbgValues = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ if (MI.isDebugValue())
+ ++NumInputDbgValues;
+ if (NumInputDbgValues > InputDbgValLimit) {
+ LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()
+ << " has " << RPONumber << " basic blocks and "
+ << NumInputDbgValues
+ << " input DBG_VALUEs, exceeding limits.\n");
+ return false;
+ }
+ }
+
+ // This is a standard "union of predecessor outs" dataflow problem.
+ // To solve it, we perform join() and process() using the two worklist method
+ // until the ranges converge.
+ // Ranges have converged when both worklists are empty.
+ SmallPtrSet<const MachineBasicBlock *, 16> Visited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice. We could avoid this with a custom priority queue, but this
+ // is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+ LLVM_DEBUG(dbgs() << "Processing Worklist\n");
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ Worklist.pop();
+ MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited,
+ ArtificialBlocks);
+ MBBJoined |= Visited.insert(MBB).second;
+ if (MBBJoined) {
+ MBBJoined = false;
+ Changed = true;
+ // Now that we have started to extend ranges across BBs we need to
+ // examine spill, copy and restore instructions to see whether they
+ // operate with registers that correspond to user variables.
+ // First load any pending inlocs.
+ OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs);
+ LastNonDbgMI = nullptr;
+ RegSetInstrs.clear();
+ for (auto &MI : *MBB)
+ process(MI, OpenRanges, VarLocIDs, Transfers, EntryValTransfers,
+ RegSetInstrs);
+ OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
+
+ LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
+ "OutLocs after propagating", dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs,
+ "InLocs after propagating", dbgs()));
+
+ if (OLChanged) {
+ OLChanged = false;
+ for (auto *s : MBB->successors())
+ if (OnPending.insert(s).second) {
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // Add any DBG_VALUE instructions created by location transfers.
+ for (auto &TR : Transfers) {
+ assert(!TR.TransferInst->isTerminator() &&
+ "Cannot insert DBG_VALUE after terminator");
+ MachineBasicBlock *MBB = TR.TransferInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.LocationID];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TR.TransferInst->getIterator(), MI);
+ }
+ Transfers.clear();
+
+ // Add DBG_VALUEs created using Backup Entry Value location.
+ for (auto &TR : EntryValTransfers) {
+ MachineInstr *TRInst = const_cast<MachineInstr *>(TR.first);
+ assert(!TRInst->isTerminator() &&
+ "Cannot insert DBG_VALUE after terminator");
+ MachineBasicBlock *MBB = TRInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.second];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TRInst->getIterator(), MI);
+ }
+ EntryValTransfers.clear();
+
+ // Deferred inlocs will not have had any DBG_VALUE insts created; do
+ // that now.
+ flushPendingLocs(InLocs, VarLocIDs);
+
+ LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
+ return Changed;
+}
+
+LDVImpl *
+llvm::makeVarLocBasedLiveDebugValues()
+{
+ return new VarLocBasedLDV();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..9603c1f01e08
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,1970 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "livedebugvars"
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+STATISTIC(NumInsertedDebugLabels, "Number of DBG_LABELs inserted");
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE,
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE,
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+enum : unsigned { UndefLocNo = ~0U };
+
+namespace {
+/// Describes a debug variable value by location number and expression along
+/// with some flags about the original usage of the location.
+class DbgVariableValue {
+public:
+ DbgVariableValue(ArrayRef<unsigned> NewLocs, bool WasIndirect, bool WasList,
+ const DIExpression &Expr)
+ : WasIndirect(WasIndirect), WasList(WasList), Expression(&Expr) {
+ assert(!(WasIndirect && WasList) &&
+ "DBG_VALUE_LISTs should not be indirect.");
+ SmallVector<unsigned> LocNoVec;
+ for (unsigned LocNo : NewLocs) {
+ auto It = find(LocNoVec, LocNo);
+ if (It == LocNoVec.end())
+ LocNoVec.push_back(LocNo);
+ else {
+ // Loc duplicates an element in LocNos; replace references to Op
+ // with references to the duplicating element.
+ unsigned OpIdx = LocNoVec.size();
+ unsigned DuplicatingIdx = std::distance(LocNoVec.begin(), It);
+ Expression =
+ DIExpression::replaceArg(Expression, OpIdx, DuplicatingIdx);
+ }
+ }
+ // FIXME: Debug values referencing 64+ unique machine locations are rare and
+ // currently unsupported for performance reasons. If we can verify that
+ // performance is acceptable for such debug values, we can increase the
+ // bit-width of LocNoCount to 14 to enable up to 16384 unique machine
+ // locations. We will also need to verify that this does not cause issues
+ // with LiveDebugVariables' use of IntervalMap.
+ if (LocNoVec.size() < 64) {
+ LocNoCount = LocNoVec.size();
+ if (LocNoCount > 0) {
+ LocNos = std::make_unique<unsigned[]>(LocNoCount);
+ std::copy(LocNoVec.begin(), LocNoVec.end(), loc_nos_begin());
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Found debug value with 64+ unique machine "
+ "locations, dropping...\n");
+ LocNoCount = 1;
+ // Turn this into an undef debug value list; right now, the simplest form
+ // of this is an expression with one arg, and an undef debug operand.
+ Expression =
+ DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0});
+ if (auto FragmentInfoOpt = Expr.getFragmentInfo())
+ Expression = *DIExpression::createFragmentExpression(
+ Expression, FragmentInfoOpt->OffsetInBits,
+ FragmentInfoOpt->SizeInBits);
+ LocNos = std::make_unique<unsigned[]>(LocNoCount);
+ LocNos[0] = UndefLocNo;
+ }
+ }
+
+ DbgVariableValue() : LocNoCount(0), WasIndirect(false), WasList(false) {}
+ DbgVariableValue(const DbgVariableValue &Other)
+ : LocNoCount(Other.LocNoCount), WasIndirect(Other.getWasIndirect()),
+ WasList(Other.getWasList()), Expression(Other.getExpression()) {
+ if (Other.getLocNoCount()) {
+ LocNos.reset(new unsigned[Other.getLocNoCount()]);
+ std::copy(Other.loc_nos_begin(), Other.loc_nos_end(), loc_nos_begin());
+ }
+ }
+
+ DbgVariableValue &operator=(const DbgVariableValue &Other) {
+ if (this == &Other)
+ return *this;
+ if (Other.getLocNoCount()) {
+ LocNos.reset(new unsigned[Other.getLocNoCount()]);
+ std::copy(Other.loc_nos_begin(), Other.loc_nos_end(), loc_nos_begin());
+ } else {
+ LocNos.release();
+ }
+ LocNoCount = Other.getLocNoCount();
+ WasIndirect = Other.getWasIndirect();
+ WasList = Other.getWasList();
+ Expression = Other.getExpression();
+ return *this;
+ }
+
+ const DIExpression *getExpression() const { return Expression; }
+ uint8_t getLocNoCount() const { return LocNoCount; }
+ bool containsLocNo(unsigned LocNo) const {
+ return is_contained(loc_nos(), LocNo);
+ }
+ bool getWasIndirect() const { return WasIndirect; }
+ bool getWasList() const { return WasList; }
+ bool isUndef() const { return LocNoCount == 0 || containsLocNo(UndefLocNo); }
+
+ DbgVariableValue decrementLocNosAfterPivot(unsigned Pivot) const {
+ SmallVector<unsigned, 4> NewLocNos;
+ for (unsigned LocNo : loc_nos())
+ NewLocNos.push_back(LocNo != UndefLocNo && LocNo > Pivot ? LocNo - 1
+ : LocNo);
+ return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression);
+ }
+
+ DbgVariableValue remapLocNos(ArrayRef<unsigned> LocNoMap) const {
+ SmallVector<unsigned> NewLocNos;
+ for (unsigned LocNo : loc_nos())
+ // Undef values don't exist in locations (and thus not in LocNoMap
+ // either) so skip over them. See getLocationNo().
+ NewLocNos.push_back(LocNo == UndefLocNo ? UndefLocNo : LocNoMap[LocNo]);
+ return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression);
+ }
+
+ DbgVariableValue changeLocNo(unsigned OldLocNo, unsigned NewLocNo) const {
+ SmallVector<unsigned> NewLocNos;
+ NewLocNos.assign(loc_nos_begin(), loc_nos_end());
+ auto OldLocIt = find(NewLocNos, OldLocNo);
+ assert(OldLocIt != NewLocNos.end() && "Old location must be present.");
+ *OldLocIt = NewLocNo;
+ return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression);
+ }
+
+ bool hasLocNoGreaterThan(unsigned LocNo) const {
+ return any_of(loc_nos(),
+ [LocNo](unsigned ThisLocNo) { return ThisLocNo > LocNo; });
+ }
+
+ void printLocNos(llvm::raw_ostream &OS) const {
+ for (const unsigned &Loc : loc_nos())
+ OS << (&Loc == loc_nos_begin() ? " " : ", ") << Loc;
+ }
+
+ friend inline bool operator==(const DbgVariableValue &LHS,
+ const DbgVariableValue &RHS) {
+ if (std::tie(LHS.LocNoCount, LHS.WasIndirect, LHS.WasList,
+ LHS.Expression) !=
+ std::tie(RHS.LocNoCount, RHS.WasIndirect, RHS.WasList, RHS.Expression))
+ return false;
+ return std::equal(LHS.loc_nos_begin(), LHS.loc_nos_end(),
+ RHS.loc_nos_begin());
+ }
+
+ friend inline bool operator!=(const DbgVariableValue &LHS,
+ const DbgVariableValue &RHS) {
+ return !(LHS == RHS);
+ }
+
+ unsigned *loc_nos_begin() { return LocNos.get(); }
+ const unsigned *loc_nos_begin() const { return LocNos.get(); }
+ unsigned *loc_nos_end() { return LocNos.get() + LocNoCount; }
+ const unsigned *loc_nos_end() const { return LocNos.get() + LocNoCount; }
+ ArrayRef<unsigned> loc_nos() const {
+ return ArrayRef<unsigned>(LocNos.get(), LocNoCount);
+ }
+
+private:
+ // IntervalMap requires the value object to be very small, to the extent
+ // that we do not have enough room for an std::vector. Using a C-style array
+ // (with a unique_ptr wrapper for convenience) allows us to optimize for this
+ // specific case by packing the array size into only 6 bits (it is highly
+ // unlikely that any debug value will need 64+ locations).
+ std::unique_ptr<unsigned[]> LocNos;
+ uint8_t LocNoCount : 6;
+ bool WasIndirect : 1;
+ bool WasList : 1;
+ const DIExpression *Expression = nullptr;
+};
+} // namespace
+
+/// Map of where a user value is live to that value.
+using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>;
+
+/// Map of stack slot offsets for spilled locations.
+/// Non-spilled locations are not added to the map.
+using SpillOffsetMap = DenseMap<unsigned, unsigned>;
+
+/// Cache to save the location where it can be used as the starting
+/// position as input for calling MachineBasicBlock::SkipPHIsLabelsAndDebug.
+/// This is to prevent MachineBasicBlock::SkipPHIsLabelsAndDebug from
+/// repeatedly searching the same set of PHIs/Labels/Debug instructions
+/// if it is called many times for the same block.
+using BlockSkipInstsMap =
+ DenseMap<MachineBasicBlock *, MachineBasicBlock::iterator>;
+
+namespace {
+
+class LDVImpl;
+
+/// A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they are held by the same virtual register. The
+/// equivalence class is the transitive closure of that relation.
+class UserValue {
+ const DILocalVariable *Variable; ///< The debug info variable we are part of.
+ /// The part of the variable we describe.
+ const std::optional<DIExpression::FragmentInfo> Fragment;
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next = nullptr; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// Set of interval start indexes that have been trimmed to the
+ /// lexical scope.
+ SmallSet<SlotIndex, 2> trimmedDefs;
+
+ /// Insert a DBG_VALUE into MBB at Idx for DbgValue.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
+ SlotIndex StopIdx, DbgVariableValue DbgValue,
+ ArrayRef<bool> LocSpills,
+ ArrayRef<unsigned> SpillOffsets, LiveIntervals &LIS,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ BlockSkipInstsMap &BBSkipInstsMap);
+
+ /// Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// is live. Returns true if any changes were made.
+ bool splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
+ LiveIntervals &LIS);
+
+public:
+ /// Create a new UserValue.
+ UserValue(const DILocalVariable *var,
+ std::optional<DIExpression::FragmentInfo> Fragment, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : Variable(var), Fragment(Fragment), dl(std::move(L)), leader(this),
+ locInts(alloc) {}
+
+ /// Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next) {
+ End->leader = L1;
+ End = End->next;
+ }
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// Return the location number that matches Loc.
+ ///
+ /// For undef values we always return location number UndefLocNo without
+ /// inserting anything in locations. Since locations is a vector and the
+ /// location number is the position in the vector and UndefLocNo is ~0,
+ /// we would need a very big vector to put the value at the right position.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg()) {
+ if (LocMO.getReg() == 0)
+ return UndefLocNo;
+ // For register locations we dont care about use/def and other flags.
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ locations[i].getReg() == LocMO.getReg() &&
+ locations[i].getSubReg() == LocMO.getSubReg())
+ return i;
+ } else
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ // Don't store def operands.
+ if (locations.back().isReg()) {
+ if (locations.back().isDef())
+ locations.back().setIsDead(false);
+ locations.back().setIsUse();
+ }
+ return locations.size() - 1;
+ }
+
+ /// Remove (recycle) a location number. If \p LocNo still is used by the
+ /// locInts nothing is done.
+ void removeLocationIfUnused(unsigned LocNo) {
+ // Bail out if LocNo still is used.
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ const DbgVariableValue &DbgValue = I.value();
+ if (DbgValue.containsLocNo(LocNo))
+ return;
+ }
+ // Remove the entry in the locations vector, and adjust all references to
+ // location numbers above the removed entry.
+ locations.erase(locations.begin() + LocNo);
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ const DbgVariableValue &DbgValue = I.value();
+ if (DbgValue.hasLocNoGreaterThan(LocNo))
+ I.setValueUnchecked(DbgValue.decrementLocNosAfterPivot(LocNo));
+ }
+ }
+
+ /// Ensure that all virtual register locations are mapped.
+ void mapVirtRegs(LDVImpl *LDV);
+
+ /// Add a definition point to this user value.
+ void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect,
+ bool IsList, const DIExpression &Expr) {
+ SmallVector<unsigned> Locs;
+ for (const MachineOperand &Op : LocMOs)
+ Locs.push_back(getLocationNo(Op));
+ DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr);
+ // Add a singular (Idx,Idx) -> value mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), std::move(DbgValue));
+ else
+ // A later DBG_VALUE at the same SlotIndex overrides the old location.
+ I.setValue(std::move(DbgValue));
+ }
+
+ /// Extend the current definition as far as possible down.
+ ///
+ /// Stop when meeting an existing def or when leaving the live
+ /// range of VNI. End points where VNI is no longer live are added to Kills.
+ ///
+ /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+ /// data-flow analysis to propagate them beyond basic block boundaries.
+ ///
+ /// \param Idx Starting point for the definition.
+ /// \param DbgValue value to propagate.
+ /// \param LiveIntervalInfo For each location number key in this map,
+ /// restricts liveness to where the LiveRange has the value equal to the\
+ /// VNInfo.
+ /// \param [out] Kills Append end points of VNI's live range to Kills.
+ /// \param LIS Live intervals analysis.
+ void
+ extendDef(SlotIndex Idx, DbgVariableValue DbgValue,
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
+ &LiveIntervalInfo,
+ std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
+ LiveIntervals &LIS);
+
+ /// The value in LI may be copies to other registers. Determine if
+ /// any of the copies are available at the kill points, and add defs if
+ /// possible.
+ ///
+ /// \param DbgValue Location number of LI->reg, and DIExpression.
+ /// \param LocIntervals Scan for copies of the value for each location in the
+ /// corresponding LiveInterval->reg.
+ /// \param KilledAt The point where the range of DbgValue could be extended.
+ /// \param [in,out] NewDefs Append (Idx, DbgValue) of inserted defs here.
+ void addDefsFromCopies(
+ DbgVariableValue DbgValue,
+ SmallVectorImpl<std::pair<unsigned, LiveInterval *>> &LocIntervals,
+ SlotIndex KilledAt,
+ SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS);
+
+ /// Compute the live intervals of all locations after collecting all their
+ /// def points.
+ void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS, LexicalScopes &LS);
+
+ /// Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// live. Returns true if any changes were made.
+ bool splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
+ LiveIntervals &LIS);
+
+ /// Rewrite virtual register locations according to the provided virtual
+ /// register map. Record the stack slot offsets for the locations that
+ /// were spilled.
+ void rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ SpillOffsetMap &SpillOffsets);
+
+ /// Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const SpillOffsetMap &SpillOffsets,
+ BlockSkipInstsMap &BBSkipInstsMap);
+
+ /// Return DebugLoc of this UserValue.
+ const DebugLoc &getDebugLoc() { return dl; }
+
+ void print(raw_ostream &, const TargetRegisterInfo *);
+};
+
+/// A user label is a part of a debug info user label.
+class UserLabel {
+ const DILabel *Label; ///< The debug info label we are part of.
+ DebugLoc dl; ///< The debug location for the label. This is
+ ///< used by dwarf writer to find lexical scope.
+ SlotIndex loc; ///< Slot used by the debug label.
+
+ /// Insert a DBG_LABEL into MBB at Idx.
+ void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap);
+
+public:
+ /// Create a new UserLabel.
+ UserLabel(const DILabel *label, DebugLoc L, SlotIndex Idx)
+ : Label(label), dl(std::move(L)), loc(Idx) {}
+
+ /// Does this UserLabel match the parameters?
+ bool matches(const DILabel *L, const DILocation *IA,
+ const SlotIndex Index) const {
+ return Label == L && dl->getInlinedAt() == IA && loc == Index;
+ }
+
+ /// Recreate DBG_LABEL instruction from data structures.
+ void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap);
+
+ /// Return DebugLoc of this UserLabel.
+ const DebugLoc &getDebugLoc() { return dl; }
+
+ void print(raw_ostream &, const TargetRegisterInfo *);
+};
+
+/// Implementation of the LiveDebugVariables pass.
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF = nullptr;
+ LiveIntervals *LIS;
+ const TargetRegisterInfo *TRI;
+
+ /// Position and VReg of a PHI instruction during register allocation.
+ struct PHIValPos {
+ SlotIndex SI; /// Slot where this PHI occurs.
+ Register Reg; /// VReg this PHI occurs in.
+ unsigned SubReg; /// Qualifiying subregister for Reg.
+ };
+
+ /// Map from debug instruction number to PHI position during allocation.
+ std::map<unsigned, PHIValPos> PHIValToPos;
+ /// Index of, for each VReg, which debug instruction numbers and corresponding
+ /// PHIs are sensitive to splitting. Each VReg may have multiple PHI defs,
+ /// at different positions.
+ DenseMap<Register, std::vector<unsigned>> RegToPHIIdx;
+
+ /// Record for any debug instructions unlinked from their blocks during
+ /// regalloc. Stores the instr and it's location, so that they can be
+ /// re-inserted after regalloc is over.
+ struct InstrPos {
+ MachineInstr *MI; ///< Debug instruction, unlinked from it's block.
+ SlotIndex Idx; ///< Slot position where MI should be re-inserted.
+ MachineBasicBlock *MBB; ///< Block that MI was in.
+ };
+
+ /// Collection of stored debug instructions, preserved until after regalloc.
+ SmallVector<InstrPos, 32> StashedDebugInstrs;
+
+ /// Whether emitDebugValues is called.
+ bool EmitDone = false;
+
+ /// Whether the machine function is modified during the pass.
+ bool ModifiedMF = false;
+
+ /// All allocated UserValue instances.
+ SmallVector<std::unique_ptr<UserValue>, 8> userValues;
+
+ /// All allocated UserLabel instances.
+ SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
+
+ /// Map virtual register to eq class leader.
+ using VRMap = DenseMap<unsigned, UserValue *>;
+ VRMap virtRegToEqClass;
+
+ /// Map to find existing UserValue instances.
+ using UVMap = DenseMap<DebugVariable, UserValue *>;
+ UVMap userVarMap;
+
+ /// Find or create a UserValue.
+ UserValue *getUserValue(const DILocalVariable *Var,
+ std::optional<DIExpression::FragmentInfo> Fragment,
+ const DebugLoc &DL);
+
+ /// Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(Register VirtReg);
+
+ /// Add DBG_VALUE instruction to our maps.
+ ///
+ /// \param MI DBG_VALUE instruction
+ /// \param Idx Last valid SLotIndex before instruction.
+ ///
+ /// \returns True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
+
+ /// Track variable location debug instructions while using the instruction
+ /// referencing implementation. Such debug instructions do not need to be
+ /// updated during regalloc because they identify instructions rather than
+ /// register locations. However, they needs to be removed from the
+ /// MachineFunction during regalloc, then re-inserted later, to avoid
+ /// disrupting the allocator.
+ ///
+ /// \param MI Any DBG_VALUE / DBG_INSTR_REF / DBG_PHI instruction
+ /// \param Idx Last valid SlotIndex before instruction
+ ///
+ /// \returns Iterator to continue processing from after unlinking.
+ MachineBasicBlock::iterator handleDebugInstr(MachineInstr &MI, SlotIndex Idx);
+
+ /// Add DBG_LABEL instruction to UserLabel.
+ ///
+ /// \param MI DBG_LABEL instruction
+ /// \param Idx Last valid SlotIndex before instruction.
+ ///
+ /// \returns True if the DBG_LABEL instruction should be deleted.
+ bool handleDebugLabel(MachineInstr &MI, SlotIndex Idx);
+
+ /// Collect and erase all DBG_VALUE instructions, adding a UserValue def
+ /// for each instruction.
+ ///
+ /// \param mf MachineFunction to be scanned.
+ /// \param InstrRef Whether to operate in instruction referencing mode. If
+ /// true, most of LiveDebugVariables doesn't run.
+ ///
+ /// \returns True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf, bool InstrRef);
+
+ /// Compute the live intervals of all user values after collecting all
+ /// their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+
+ bool runOnMachineFunction(MachineFunction &mf, bool InstrRef);
+
+ /// Release all memory.
+ void clear() {
+ MF = nullptr;
+ PHIValToPos.clear();
+ RegToPHIIdx.clear();
+ StashedDebugInstrs.clear();
+ userValues.clear();
+ userLabels.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ // Make sure we call emitDebugValues if the machine function was modified.
+ assert((!ModifiedMF || EmitDone) &&
+ "Dbg values are not emitted in LDV");
+ EmitDone = false;
+ ModifiedMF = false;
+ }
+
+ /// Map virtual register to an equivalence class.
+ void mapVirtReg(Register VirtReg, UserValue *EC);
+
+ /// Replace any PHI referring to OldReg with its corresponding NewReg, if
+ /// present.
+ void splitPHIRegister(Register OldReg, ArrayRef<Register> NewRegs);
+
+ /// Replace all references to OldReg with NewRegs.
+ void splitRegister(Register OldReg, ArrayRef<Register> NewRegs);
+
+ /// Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+
+} // end anonymous namespace
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
+ const LLVMContext &Ctx) {
+ if (!DL)
+ return;
+
+ auto *Scope = cast<DIScope>(DL.getScope());
+ // Omit the directory, because it's likely to be long and uninteresting.
+ CommentOS << Scope->getFilename();
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+
+ DebugLoc InlinedAtDL = DL.getInlinedAt();
+ if (!InlinedAtDL)
+ return;
+
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+ CommentOS << " ]";
+}
+
+static void printExtendedName(raw_ostream &OS, const DINode *Node,
+ const DILocation *DL) {
+ const LLVMContext &Ctx = Node->getContext();
+ StringRef Res;
+ unsigned Line = 0;
+ if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
+ Res = V->getName();
+ Line = V->getLine();
+ } else if (const auto *L = dyn_cast<const DILabel>(Node)) {
+ Res = L->getName();
+ Line = L->getLine();
+ }
+
+ if (!Res.empty())
+ OS << Res << "," << Line;
+ auto *InlinedAt = DL ? DL->getInlinedAt() : nullptr;
+ if (InlinedAt) {
+ if (DebugLoc InlinedAtDL = InlinedAt) {
+ OS << " @[";
+ printDebugLoc(InlinedAtDL, OS, Ctx);
+ OS << "]";
+ }
+ }
+}
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ OS << "!\"";
+ printExtendedName(OS, Variable, dl);
+
+ OS << "\"\t";
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value().isUndef())
+ OS << " undef";
+ else {
+ I.value().printLocNos(OS);
+ if (I.value().getWasIndirect())
+ OS << " ind";
+ else if (I.value().getWasList())
+ OS << " list";
+ }
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+ OS << " Loc" << i << '=';
+ locations[i].print(OS, TRI);
+ }
+ OS << '\n';
+}
+
+void UserLabel::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ OS << "!\"";
+ printExtendedName(OS, Label, dl);
+
+ OS << "\"\t";
+ OS << loc;
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (auto &userValue : userValues)
+ userValue->print(OS, TRI);
+ OS << "********** DEBUG LABELS **********\n";
+ for (auto &userLabel : userLabels)
+ userLabel->print(OS, TRI);
+}
+#endif
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() && locations[i].getReg().isVirtual())
+ LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *
+LDVImpl::getUserValue(const DILocalVariable *Var,
+ std::optional<DIExpression::FragmentInfo> Fragment,
+ const DebugLoc &DL) {
+ // FIXME: Handle partially overlapping fragments. See
+ // https://reviews.llvm.org/D70121#1849741.
+ DebugVariable ID(Var, Fragment, DL->getInlinedAt());
+ UserValue *&UV = userVarMap[ID];
+ if (!UV) {
+ userValues.push_back(
+ std::make_unique<UserValue>(Var, Fragment, DL, allocator));
+ UV = userValues.back().get();
+ }
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) {
+ assert(VirtReg.isVirtual() && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(Register VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return nullptr;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable, expr
+ // DBG_VALUE_LIST variable, expr, locs...
+ if (!MI.isDebugValue()) {
+ LLVM_DEBUG(dbgs() << "Can't handle non-DBG_VALUE*: " << MI);
+ return false;
+ }
+ if (!MI.getDebugVariableOp().isMetadata()) {
+ LLVM_DEBUG(dbgs() << "Can't handle DBG_VALUE* with invalid variable: "
+ << MI);
+ return false;
+ }
+ if (MI.isNonListDebugValue() &&
+ (MI.getNumOperands() != 4 ||
+ !(MI.getDebugOffset().isImm() || MI.getDebugOffset().isReg()))) {
+ LLVM_DEBUG(dbgs() << "Can't handle malformed DBG_VALUE: " << MI);
+ return false;
+ }
+
+ // Detect invalid DBG_VALUE instructions, with a debug-use of a virtual
+ // register that hasn't been defined yet. If we do not remove those here, then
+ // the re-insertion of the DBG_VALUE instruction after register allocation
+ // will be incorrect.
+ bool Discard = false;
+ for (const MachineOperand &Op : MI.debug_operands()) {
+ if (Op.isReg() && Op.getReg().isVirtual()) {
+ const Register Reg = Op.getReg();
+ if (!LIS->hasInterval(Reg)) {
+ // The DBG_VALUE is described by a virtual register that does not have a
+ // live interval. Discard the DBG_VALUE.
+ Discard = true;
+ LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx
+ << " " << MI);
+ } else {
+ // The DBG_VALUE is only valid if either Reg is live out from Idx, or
+ // Reg is defined dead at Idx (where Idx is the slot index for the
+ // instruction preceding the DBG_VALUE).
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ if (!LRQ.valueOutOrDead()) {
+ // We have found a DBG_VALUE with the value in a virtual register that
+ // is not live. Discard the DBG_VALUE.
+ Discard = true;
+ LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx
+ << " " << MI);
+ }
+ }
+ }
+ }
+
+ // Get or create the UserValue for (variable,offset) here.
+ bool IsIndirect = MI.isDebugOffsetImm();
+ if (IsIndirect)
+ assert(MI.getDebugOffset().getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
+ bool IsList = MI.isDebugValueList();
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ UserValue *UV = getUserValue(Var, Expr->getFragmentInfo(), MI.getDebugLoc());
+ if (!Discard)
+ UV->addDef(Idx,
+ ArrayRef<MachineOperand>(MI.debug_operands().begin(),
+ MI.debug_operands().end()),
+ IsIndirect, IsList, *Expr);
+ else {
+ MachineOperand MO = MachineOperand::CreateReg(0U, false);
+ MO.setIsDebug();
+ // We should still pass a list the same size as MI.debug_operands() even if
+ // all MOs are undef, so that DbgVariableValue can correctly adjust the
+ // expression while removing the duplicated undefs.
+ SmallVector<MachineOperand, 4> UndefMOs(MI.getNumDebugOperands(), MO);
+ UV->addDef(Idx, UndefMOs, false, IsList, *Expr);
+ }
+ return true;
+}
+
+MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI,
+ SlotIndex Idx) {
+ assert(MI.isDebugValueLike() || MI.isDebugPHI());
+
+ // In instruction referencing mode, there should be no DBG_VALUE instructions
+ // that refer to virtual registers. They might still refer to constants.
+ if (MI.isDebugValueLike())
+ assert(none_of(MI.debug_operands(),
+ [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isVirtual();
+ }) &&
+ "MIs should not refer to Virtual Registers in InstrRef mode.");
+
+ // Unlink the instruction, store it in the debug instructions collection.
+ auto NextInst = std::next(MI.getIterator());
+ auto *MBB = MI.getParent();
+ MI.removeFromParent();
+ StashedDebugInstrs.push_back({&MI, Idx, MBB});
+ return NextInst;
+}
+
+bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
+ // DBG_LABEL label
+ if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) {
+ LLVM_DEBUG(dbgs() << "Can't handle " << MI);
+ return false;
+ }
+
+ // Get or create the UserLabel for label here.
+ const DILabel *Label = MI.getDebugLabel();
+ const DebugLoc &DL = MI.getDebugLoc();
+ bool Found = false;
+ for (auto const &L : userLabels) {
+ if (L->matches(Label, DL->getInlinedAt(), Idx)) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found)
+ userLabels.push_back(std::make_unique<UserLabel>(Label, DL, Idx));
+
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf, bool InstrRef) {
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : mf) {
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
+ MBBI != MBBE;) {
+ // Use the first debug instruction in the sequence to get a SlotIndex
+ // for following consecutive debug instructions.
+ if (!MBBI->isDebugOrPseudoInstr()) {
+ ++MBBI;
+ continue;
+ }
+ // Debug instructions has no slot index. Use the previous
+ // non-debug instruction's SlotIndex as its SlotIndex.
+ SlotIndex Idx =
+ MBBI == MBB.begin()
+ ? LIS->getMBBStartIdx(&MBB)
+ : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
+ // Handle consecutive debug instructions with the same slot index.
+ do {
+ // In instruction referencing mode, pass each instr to handleDebugInstr
+ // to be unlinked. Ignore DBG_VALUE_LISTs -- they refer to vregs, and
+ // need to go through the normal live interval splitting process.
+ if (InstrRef && (MBBI->isNonListDebugValue() || MBBI->isDebugPHI() ||
+ MBBI->isDebugRef())) {
+ MBBI = handleDebugInstr(*MBBI, Idx);
+ Changed = true;
+ // In normal debug mode, use the dedicated DBG_VALUE / DBG_LABEL handler
+ // to track things through register allocation, and erase the instr.
+ } else if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+ (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
+ MBBI = MBB.erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugOrPseudoInstr());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(
+ SlotIndex Idx, DbgVariableValue DbgValue,
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
+ &LiveIntervalInfo,
+ std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
+ LiveIntervals &LIS) {
+ SlotIndex Start = Idx;
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to the intersection of the VNIs' live ranges.
+ for (auto &LII : LiveIntervalInfo) {
+ LiveRange *LR = LII.second.first;
+ assert(LR && LII.second.second && "Missing range info for Idx.");
+ LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
+ assert(Segment && Segment->valno == LII.second.second &&
+ "Invalid VNInfo for Idx given?");
+ if (Segment->end < Stop) {
+ Stop = Segment->end;
+ Kills = {Stop, {LII.first}};
+ } else if (Segment->end == Stop && Kills) {
+ // If multiple locations end at the same place, track all of them in
+ // Kills.
+ Kills->second.push_back(LII.first);
+ }
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != DbgValue || I.stop() != Start) {
+ // Clear `Kills`, as we have a new def available.
+ Kills = std::nullopt;
+ return;
+ }
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop) {
+ Stop = I.start();
+ // Clear `Kills`, as we have a new def available.
+ Kills = std::nullopt;
+ }
+
+ if (Start < Stop) {
+ DbgVariableValue ExtDbgValue(DbgValue);
+ I.insert(Start, Stop, std::move(ExtDbgValue));
+ }
+}
+
+void UserValue::addDefsFromCopies(
+ DbgVariableValue DbgValue,
+ SmallVectorImpl<std::pair<unsigned, LiveInterval *>> &LocIntervals,
+ SlotIndex KilledAt,
+ SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+ // Don't track copies from physregs, there are too many uses.
+ if (any_of(LocIntervals,
+ [](auto LocI) { return !LocI.second->reg().isVirtual(); }))
+ return;
+
+ // Collect all the (vreg, valno) pairs that are copies of LI.
+ SmallDenseMap<unsigned,
+ SmallVector<std::pair<LiveInterval *, const VNInfo *>, 4>>
+ CopyValues;
+ for (auto &LocInterval : LocIntervals) {
+ unsigned LocNo = LocInterval.first;
+ LiveInterval *LI = LocInterval.second;
+ for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) {
+ MachineInstr *MI = MO.getParent();
+ // Copies of the full value.
+ if (MO.getSubReg() || !MI->isCopy())
+ continue;
+ Register DstReg = MI->getOperand(0).getReg();
+
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved
+ // register, or it could be spilled.
+ if (!DstReg.isVirtual())
+ continue;
+
+ // Is the value extended to reach this copy? If not, another def may be
+ // blocking it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
+ if (!I.valid() || I.value() != DbgValue)
+ continue;
+
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
+ CopyValues[LocNo].push_back(std::make_pair(DstLI, DstVNI));
+ }
+ }
+
+ if (CopyValues.empty())
+ return;
+
+#if !defined(NDEBUG)
+ for (auto &LocInterval : LocIntervals)
+ LLVM_DEBUG(dbgs() << "Got " << CopyValues[LocInterval.first].size()
+ << " copies of " << *LocInterval.second << '\n');
+#endif
+
+ // Try to add defs of the copied values for the kill point. Check that there
+ // isn't already a def at Idx.
+ LocMap::iterator I = locInts.find(KilledAt);
+ if (I.valid() && I.start() <= KilledAt)
+ return;
+ DbgVariableValue NewValue(DbgValue);
+ for (auto &LocInterval : LocIntervals) {
+ unsigned LocNo = LocInterval.first;
+ bool FoundCopy = false;
+ for (auto &LIAndVNI : CopyValues[LocNo]) {
+ LiveInterval *DstLI = LIAndVNI.first;
+ const VNInfo *DstVNI = LIAndVNI.second;
+ if (DstLI->getVNInfoAt(KilledAt) != DstVNI)
+ continue;
+ LLVM_DEBUG(dbgs() << "Kill at " << KilledAt << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
+ MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+ assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+ unsigned NewLocNo = getLocationNo(CopyMI->getOperand(0));
+ NewValue = NewValue.changeLocNo(LocNo, NewLocNo);
+ FoundCopy = true;
+ break;
+ }
+ // If there are any killed locations we can't find a copy for, we can't
+ // extend the variable value.
+ if (!FoundCopy)
+ return;
+ }
+ I.insert(KilledAt, KilledAt.getNextSlot(), NewValue);
+ NewDefs.push_back(std::make_pair(KilledAt, NewValue));
+}
+
+void UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS, LexicalScopes &LS) {
+ SmallVector<std::pair<SlotIndex, DbgVariableValue>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (!I.value().isUndef())
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ // Extend all defs, and possibly add new ones along the way.
+ for (unsigned i = 0; i != Defs.size(); ++i) {
+ SlotIndex Idx = Defs[i].first;
+ DbgVariableValue DbgValue = Defs[i].second;
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> LIs;
+ SmallVector<const VNInfo *, 4> VNIs;
+ bool ShouldExtendDef = false;
+ for (unsigned LocNo : DbgValue.loc_nos()) {
+ const MachineOperand &LocMO = locations[LocNo];
+ if (!LocMO.isReg() || !LocMO.getReg().isVirtual()) {
+ ShouldExtendDef |= !LocMO.isReg();
+ continue;
+ }
+ ShouldExtendDef = true;
+ LiveInterval *LI = nullptr;
+ const VNInfo *VNI = nullptr;
+ if (LIS.hasInterval(LocMO.getReg())) {
+ LI = &LIS.getInterval(LocMO.getReg());
+ VNI = LI->getVNInfoAt(Idx);
+ }
+ if (LI && VNI)
+ LIs[LocNo] = {LI, VNI};
+ }
+ if (ShouldExtendDef) {
+ std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills;
+ extendDef(Idx, DbgValue, LIs, Kills, LIS);
+
+ if (Kills) {
+ SmallVector<std::pair<unsigned, LiveInterval *>, 2> KilledLocIntervals;
+ bool AnySubreg = false;
+ for (unsigned LocNo : Kills->second) {
+ const MachineOperand &LocMO = this->locations[LocNo];
+ if (LocMO.getSubReg()) {
+ AnySubreg = true;
+ break;
+ }
+ LiveInterval *LI = &LIS.getInterval(LocMO.getReg());
+ KilledLocIntervals.push_back({LocNo, LI});
+ }
+
+ // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
+ // if the original location for example is %vreg0:sub_hi, and we find a
+ // full register copy in addDefsFromCopies (at the moment it only
+ // handles full register copies), then we must add the sub1 sub-register
+ // index to the new location. However, that is only possible if the new
+ // virtual register is of the same regclass (or if there is an
+ // equivalent sub-register in that regclass). For now, simply skip
+ // handling copies if a sub-register is involved.
+ if (!AnySubreg)
+ addDefsFromCopies(DbgValue, KilledLocIntervals, Kills->first, Defs,
+ MRI, LIS);
+ }
+ }
+
+ // For physregs, we only mark the start slot idx. DwarfDebug will see it
+ // as if the DBG_VALUE is valid up until the end of the basic block, or
+ // the next def of the physical register. So we do not need to extend the
+ // range. It might actually happen that the DBG_VALUE is the last use of
+ // the physical register (e.g. if this is an unused input argument to a
+ // function).
+ }
+
+ // The computed intervals may extend beyond the range of the debug
+ // location's lexical scope. In this case, splitting of an interval
+ // can result in an interval outside of the scope being created,
+ // causing extra unnecessary DBG_VALUEs to be emitted. To prevent
+ // this, trim the intervals to the lexical scope in the case of inlined
+ // variables, since heavy inlining may cause production of dramatically big
+ // number of DBG_VALUEs to be generated.
+ if (!dl.getInlinedAt())
+ return;
+
+ LexicalScope *Scope = LS.findLexicalScope(dl);
+ if (!Scope)
+ return;
+
+ SlotIndex PrevEnd;
+ LocMap::iterator I = locInts.begin();
+
+ // Iterate over the lexical scope ranges. Each time round the loop
+ // we check the intervals for overlap with the end of the previous
+ // range and the start of the next. The first range is handled as
+ // a special case where there is no PrevEnd.
+ for (const InsnRange &Range : Scope->getRanges()) {
+ SlotIndex RStart = LIS.getInstructionIndex(*Range.first);
+ SlotIndex REnd = LIS.getInstructionIndex(*Range.second);
+
+ // Variable locations at the first instruction of a block should be
+ // based on the block's SlotIndex, not the first instruction's index.
+ if (Range.first == Range.first->getParent()->begin())
+ RStart = LIS.getSlotIndexes()->getIndexBefore(*Range.first);
+
+ // At the start of each iteration I has been advanced so that
+ // I.stop() >= PrevEnd. Check for overlap.
+ if (PrevEnd && I.start() < PrevEnd) {
+ SlotIndex IStop = I.stop();
+ DbgVariableValue DbgValue = I.value();
+
+ // Stop overlaps previous end - trim the end of the interval to
+ // the scope range.
+ I.setStopUnchecked(PrevEnd);
+ ++I;
+
+ // If the interval also overlaps the start of the "next" (i.e.
+ // current) range create a new interval for the remainder (which
+ // may be further trimmed).
+ if (RStart < IStop)
+ I.insert(RStart, IStop, DbgValue);
+ }
+
+ // Advance I so that I.stop() >= RStart, and check for overlap.
+ I.advanceTo(RStart);
+ if (!I.valid())
+ return;
+
+ if (I.start() < RStart) {
+ // Interval start overlaps range - trim to the scope range.
+ I.setStartUnchecked(RStart);
+ // Remember that this interval was trimmed.
+ trimmedDefs.insert(RStart);
+ }
+
+ // The end of a lexical scope range is the last instruction in the
+ // range. To convert to an interval we need the index of the
+ // instruction after it.
+ REnd = REnd.getNextIndex();
+
+ // Advance I to first interval outside current range.
+ I.advanceTo(REnd);
+ if (!I.valid())
+ return;
+
+ PrevEnd = REnd;
+ }
+
+ // Check for overlap with end of final range.
+ if (PrevEnd && I.start() < PrevEnd)
+ I.setStopUnchecked(PrevEnd);
+}
+
+void LDVImpl::computeIntervals() {
+ LexicalScopes LS;
+ LS.initialize(*MF);
+
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS);
+ userValues[i]->mapVirtRegs(this);
+ }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {
+ clear();
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ TRI = mf.getSubtarget().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << mf.getName() << " **********\n");
+
+ bool Changed = collectDebugValues(mf, InstrRef);
+ computeIntervals();
+ LLVM_DEBUG(print(dbgs()));
+
+ // Collect the set of VReg / SlotIndexs where PHIs occur; index the sensitive
+ // VRegs too, for when we're notified of a range split.
+ SlotIndexes *Slots = LIS->getSlotIndexes();
+ for (const auto &PHIIt : MF->DebugPHIPositions) {
+ const MachineFunction::DebugPHIRegallocPos &Position = PHIIt.second;
+ MachineBasicBlock *MBB = Position.MBB;
+ Register Reg = Position.Reg;
+ unsigned SubReg = Position.SubReg;
+ SlotIndex SI = Slots->getMBBStartIdx(MBB);
+ PHIValPos VP = {SI, Reg, SubReg};
+ PHIValToPos.insert(std::make_pair(PHIIt.first, VP));
+ RegToPHIIdx[Reg].push_back(PHIIt.first);
+ }
+
+ ModifiedMF = Changed;
+ return Changed;
+}
+
+static void removeDebugInstrs(MachineFunction &mf) {
+ for (MachineBasicBlock &MBB : mf) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ if (MI.isDebugInstr())
+ MBB.erase(&MI);
+ }
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!mf.getFunction().getSubprogram()) {
+ removeDebugInstrs(mf);
+ return false;
+ }
+
+ // Have we been asked to track variable locations using instruction
+ // referencing?
+ bool InstrRef = mf.useDebugInstrRef();
+
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl *>(pImpl)->runOnMachineFunction(mf, InstrRef);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
+ LiveIntervals& LIS) {
+ LLVM_DEBUG({
+ dbgs() << "Splitting Loc" << OldLocNo << '\t';
+ print(dbgs(), nullptr);
+ });
+ bool DidChange = false;
+ LocMap::iterator LocMapI;
+ LocMapI.setMap(locInts);
+ for (Register NewReg : NewRegs) {
+ LiveInterval *LI = &LIS.getInterval(NewReg);
+ if (LI->empty())
+ continue;
+
+ // Don't allocate the new LocNo until it is needed.
+ unsigned NewLocNo = UndefLocNo;
+
+ // Iterate over the overlaps between locInts and LI.
+ LocMapI.find(LI->beginIndex());
+ if (!LocMapI.valid())
+ continue;
+ LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+ LiveInterval::iterator LIE = LI->end();
+ while (LocMapI.valid() && LII != LIE) {
+ // At this point, we know that LocMapI.stop() > LII->start.
+ LII = LI->advanceTo(LII, LocMapI.start());
+ if (LII == LIE)
+ break;
+
+ // Now LII->end > LocMapI.start(). Do we have an overlap?
+ if (LocMapI.value().containsLocNo(OldLocNo) &&
+ LII->start < LocMapI.stop()) {
+ // Overlapping correct location. Allocate NewLocNo now.
+ if (NewLocNo == UndefLocNo) {
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg(), false);
+ MO.setSubReg(locations[OldLocNo].getSubReg());
+ NewLocNo = getLocationNo(MO);
+ DidChange = true;
+ }
+
+ SlotIndex LStart = LocMapI.start();
+ SlotIndex LStop = LocMapI.stop();
+ DbgVariableValue OldDbgValue = LocMapI.value();
+
+ // Trim LocMapI down to the LII overlap.
+ if (LStart < LII->start)
+ LocMapI.setStartUnchecked(LII->start);
+ if (LStop > LII->end)
+ LocMapI.setStopUnchecked(LII->end);
+
+ // Change the value in the overlap. This may trigger coalescing.
+ LocMapI.setValue(OldDbgValue.changeLocNo(OldLocNo, NewLocNo));
+
+ // Re-insert any removed OldDbgValue ranges.
+ if (LStart < LocMapI.start()) {
+ LocMapI.insert(LStart, LocMapI.start(), OldDbgValue);
+ ++LocMapI;
+ assert(LocMapI.valid() && "Unexpected coalescing");
+ }
+ if (LStop > LocMapI.stop()) {
+ ++LocMapI;
+ LocMapI.insert(LII->end, LStop, OldDbgValue);
+ --LocMapI;
+ }
+ }
+
+ // Advance to the next overlap.
+ if (LII->end < LocMapI.stop()) {
+ if (++LII == LIE)
+ break;
+ LocMapI.advanceTo(LII->start);
+ } else {
+ ++LocMapI;
+ if (!LocMapI.valid())
+ break;
+ LII = LI->advanceTo(LII, LocMapI.start());
+ }
+ }
+ }
+
+ // Finally, remove OldLocNo unless it is still used by some interval in the
+ // locInts map. One case when OldLocNo still is in use is when the register
+ // has been spilled. In such situations the spilled register is kept as a
+ // location until rewriteLocations is called (VirtRegMap is mapping the old
+ // register to the spill slot). So for a while we can have locations that map
+ // to virtual registers that have been removed from both the MachineFunction
+ // and from LiveIntervals.
+ //
+ // We may also just be using the location for a value with a different
+ // expression.
+ removeLocationIfUnused(OldLocNo);
+
+ LLVM_DEBUG({
+ dbgs() << "Split result: \t";
+ print(dbgs(), nullptr);
+ });
+ return DidChange;
+}
+
+bool
+UserValue::splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
+ LiveIntervals &LIS) {
+ bool DidChange = false;
+ // Split locations referring to OldReg. Iterate backwards so splitLocation can
+ // safely erase unused locations.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ const MachineOperand *Loc = &locations[LocNo];
+ if (!Loc->isReg() || Loc->getReg() != OldReg)
+ continue;
+ DidChange |= splitLocation(LocNo, NewRegs, LIS);
+ }
+ return DidChange;
+}
+
+void LDVImpl::splitPHIRegister(Register OldReg, ArrayRef<Register> NewRegs) {
+ auto RegIt = RegToPHIIdx.find(OldReg);
+ if (RegIt == RegToPHIIdx.end())
+ return;
+
+ std::vector<std::pair<Register, unsigned>> NewRegIdxes;
+ // Iterate over all the debug instruction numbers affected by this split.
+ for (unsigned InstrID : RegIt->second) {
+ auto PHIIt = PHIValToPos.find(InstrID);
+ assert(PHIIt != PHIValToPos.end());
+ const SlotIndex &Slot = PHIIt->second.SI;
+ assert(OldReg == PHIIt->second.Reg);
+
+ // Find the new register that covers this position.
+ for (auto NewReg : NewRegs) {
+ const LiveInterval &LI = LIS->getInterval(NewReg);
+ auto LII = LI.find(Slot);
+ if (LII != LI.end() && LII->start <= Slot) {
+ // This new register covers this PHI position, record this for indexing.
+ NewRegIdxes.push_back(std::make_pair(NewReg, InstrID));
+ // Record that this value lives in a different VReg now.
+ PHIIt->second.Reg = NewReg;
+ break;
+ }
+ }
+
+ // If we do not find a new register covering this PHI, then register
+ // allocation has dropped its location, for example because it's not live.
+ // The old VReg will not be mapped to a physreg, and the instruction
+ // number will have been optimized out.
+ }
+
+ // Re-create register index using the new register numbers.
+ RegToPHIIdx.erase(RegIt);
+ for (auto &RegAndInstr : NewRegIdxes)
+ RegToPHIIdx[RegAndInstr.first].push_back(RegAndInstr.second);
+}
+
+void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) {
+ // Consider whether this split range affects any PHI locations.
+ splitPHIRegister(OldReg, NewRegs);
+
+ // Check whether any intervals mapped by a DBG_VALUE were split and need
+ // updating.
+ bool DidChange = false;
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+
+ if (!DidChange)
+ return;
+
+ // Map all of the new virtual registers.
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (Register NewReg : NewRegs)
+ mapVirtReg(NewReg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(Register OldReg, ArrayRef<Register> NewRegs, LiveIntervals &LIS) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ SpillOffsetMap &SpillOffsets) {
+ // Build a set of new locations with new numbers so we can coalesce our
+ // IntervalMap if two vreg intervals collapse to the same physical location.
+ // Use MapVector instead of SetVector because MapVector::insert returns the
+ // position of the previously or newly inserted element. The boolean value
+ // tracks if the location was produced by a spill.
+ // FIXME: This will be problematic if we ever support direct and indirect
+ // frame index locations, i.e. expressing both variables in memory and
+ // 'int x, *px = &x'. The "spilled" bit must become part of the location.
+ MapVector<MachineOperand, std::pair<bool, unsigned>> NewLocations;
+ SmallVector<unsigned, 4> LocNoMap(locations.size());
+ for (unsigned I = 0, E = locations.size(); I != E; ++I) {
+ bool Spilled = false;
+ unsigned SpillOffset = 0;
+ MachineOperand Loc = locations[I];
+ // Only virtual registers are rewritten.
+ if (Loc.isReg() && Loc.getReg() && Loc.getReg().isVirtual()) {
+ Register VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ Register::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
+ // Retrieve the stack slot offset.
+ unsigned SpillSize;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *TRC = MRI.getRegClass(VirtReg);
+ bool Success = TII.getStackSlotRange(TRC, Loc.getSubReg(), SpillSize,
+ SpillOffset, MF);
+
+ // FIXME: Invalidate the location if the offset couldn't be calculated.
+ (void)Success;
+
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ Spilled = true;
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ }
+
+ // Insert this location if it doesn't already exist and record a mapping
+ // from the old number to the new number.
+ auto InsertResult = NewLocations.insert({Loc, {Spilled, SpillOffset}});
+ unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first);
+ LocNoMap[I] = NewLocNo;
+ }
+
+ // Rewrite the locations and record the stack slot offsets for spills.
+ locations.clear();
+ SpillOffsets.clear();
+ for (auto &Pair : NewLocations) {
+ bool Spilled;
+ unsigned SpillOffset;
+ std::tie(Spilled, SpillOffset) = Pair.second;
+ locations.push_back(Pair.first);
+ if (Spilled) {
+ unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair);
+ SpillOffsets[NewLocNo] = SpillOffset;
+ }
+ }
+
+ // Update the interval map, but only coalesce left, since intervals to the
+ // right use the old location numbers. This should merge two contiguous
+ // DBG_VALUE intervals with different vregs that were allocated to the same
+ // physical register.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ I.setValueUnchecked(I.value().remapLocNos(LocNoMap));
+ I.setStart(I.start());
+ }
+}
+
+/// Find an iterator for inserting a DBG_VALUE instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, LiveIntervals &LIS,
+ BlockSkipInstsMap &BBSkipInstsMap) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ // Retrieve the last PHI/Label/Debug location found when calling
+ // SkipPHIsLabelsAndDebug last time. Start searching from there.
+ //
+ // Note the iterator kept in BBSkipInstsMap is one step back based
+ // on the iterator returned by SkipPHIsLabelsAndDebug last time.
+ // One exception is when SkipPHIsLabelsAndDebug returns MBB->begin(),
+ // BBSkipInstsMap won't save it. This is to consider the case that
+ // new instructions may be inserted at the beginning of MBB after
+ // last call of SkipPHIsLabelsAndDebug. If we save MBB->begin() in
+ // BBSkipInstsMap, after new non-phi/non-label/non-debug instructions
+ // are inserted at the beginning of the MBB, the iterator in
+ // BBSkipInstsMap won't point to the beginning of the MBB anymore.
+ // Therefore The next search in SkipPHIsLabelsAndDebug will skip those
+ // newly added instructions and that is unwanted.
+ MachineBasicBlock::iterator BeginIt;
+ auto MapIt = BBSkipInstsMap.find(MBB);
+ if (MapIt == BBSkipInstsMap.end())
+ BeginIt = MBB->begin();
+ else
+ BeginIt = std::next(MapIt->second);
+ auto I = MBB->SkipPHIsLabelsAndDebug(BeginIt);
+ if (I != BeginIt)
+ BBSkipInstsMap[MBB] = std::prev(I);
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ std::next(MachineBasicBlock::iterator(MI));
+}
+
+/// Find an iterator for inserting the next DBG_VALUE instruction
+/// (or end if no more insert locations found).
+static MachineBasicBlock::iterator
+findNextInsertLocation(MachineBasicBlock *MBB, MachineBasicBlock::iterator I,
+ SlotIndex StopIdx, ArrayRef<MachineOperand> LocMOs,
+ LiveIntervals &LIS, const TargetRegisterInfo &TRI) {
+ SmallVector<Register, 4> Regs;
+ for (const MachineOperand &LocMO : LocMOs)
+ if (LocMO.isReg())
+ Regs.push_back(LocMO.getReg());
+ if (Regs.empty())
+ return MBB->instr_end();
+
+ // Find the next instruction in the MBB that define the register Reg.
+ while (I != MBB->end() && !I->isTerminator()) {
+ if (!LIS.isNotInMIMap(*I) &&
+ SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I)))
+ break;
+ if (any_of(Regs, [&I, &TRI](Register &Reg) {
+ return I->definesRegister(Reg, &TRI);
+ }))
+ // The insert location is directly after the instruction/bundle.
+ return std::next(I);
+ ++I;
+ }
+ return MBB->end();
+}
+
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
+ SlotIndex StopIdx, DbgVariableValue DbgValue,
+ ArrayRef<bool> LocSpills,
+ ArrayRef<unsigned> SpillOffsets,
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ BlockSkipInstsMap &BBSkipInstsMap) {
+ SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
+ // Only search within the current MBB.
+ StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx;
+ MachineBasicBlock::iterator I =
+ findInsertLocation(MBB, StartIdx, LIS, BBSkipInstsMap);
+ // Undef values don't exist in locations so create new "noreg" register MOs
+ // for them. See getLocationNo().
+ SmallVector<MachineOperand, 8> MOs;
+ if (DbgValue.isUndef()) {
+ MOs.assign(DbgValue.loc_nos().size(),
+ MachineOperand::CreateReg(
+ /* Reg */ 0, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true));
+ } else {
+ for (unsigned LocNo : DbgValue.loc_nos())
+ MOs.push_back(locations[LocNo]);
+ }
+
+ ++NumInsertedDebugValues;
+
+ assert(cast<DILocalVariable>(Variable)
+ ->isValidLocationForIntrinsic(getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+
+ // If the location was spilled, the new DBG_VALUE will be indirect. If the
+ // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
+ // that the original virtual register was a pointer. Also, add the stack slot
+ // offset for the spilled register to the expression.
+ const DIExpression *Expr = DbgValue.getExpression();
+ bool IsIndirect = DbgValue.getWasIndirect();
+ bool IsList = DbgValue.getWasList();
+ for (unsigned I = 0, E = LocSpills.size(); I != E; ++I) {
+ if (LocSpills[I]) {
+ if (!IsList) {
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
+ if (IsIndirect)
+ DIExprFlags |= DIExpression::DerefAfter;
+ Expr = DIExpression::prepend(Expr, DIExprFlags, SpillOffsets[I]);
+ IsIndirect = true;
+ } else {
+ SmallVector<uint64_t, 4> Ops;
+ DIExpression::appendOffset(Ops, SpillOffsets[I]);
+ Ops.push_back(dwarf::DW_OP_deref);
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, I);
+ }
+ }
+
+ assert((!LocSpills[I] || MOs[I].isFI()) &&
+ "a spilled location must be a frame index");
+ }
+
+ unsigned DbgValueOpcode =
+ IsList ? TargetOpcode::DBG_VALUE_LIST : TargetOpcode::DBG_VALUE;
+ do {
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(DbgValueOpcode), IsIndirect, MOs,
+ Variable, Expr);
+
+ // Continue and insert DBG_VALUES after every redefinition of a register
+ // associated with the debug value within the range
+ I = findNextInsertLocation(MBB, I, StopIdx, MOs, LIS, TRI);
+ } while (I != MBB->end());
+}
+
+void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap) {
+ MachineBasicBlock::iterator I =
+ findInsertLocation(MBB, Idx, LIS, BBSkipInstsMap);
+ ++NumInsertedDebugLabels;
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL))
+ .addMetadata(Label);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const SpillOffsetMap &SpillOffsets,
+ BlockSkipInstsMap &BBSkipInstsMap) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ DbgVariableValue DbgValue = I.value();
+
+ SmallVector<bool> SpilledLocs;
+ SmallVector<unsigned> LocSpillOffsets;
+ for (unsigned LocNo : DbgValue.loc_nos()) {
+ auto SpillIt =
+ !DbgValue.isUndef() ? SpillOffsets.find(LocNo) : SpillOffsets.end();
+ bool Spilled = SpillIt != SpillOffsets.end();
+ SpilledLocs.push_back(Spilled);
+ LocSpillOffsets.push_back(Spilled ? SpillIt->second : 0);
+ }
+
+ // If the interval start was trimmed to the lexical scope insert the
+ // DBG_VALUE at the previous index (otherwise it appears after the
+ // first instruction in the range).
+ if (trimmedDefs.count(Start))
+ Start = Start.getPrevIndex();
+
+ LLVM_DEBUG(auto &dbg = dbgs(); dbg << "\t[" << Start << ';' << Stop << "):";
+ DbgValue.printLocNos(dbg));
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
+
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+ insertDebugValue(&*MBB, Start, Stop, DbgValue, SpilledLocs, LocSpillOffsets,
+ LIS, TII, TRI, BBSkipInstsMap);
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while (Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(&*MBB);
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+ insertDebugValue(&*MBB, Start, Stop, DbgValue, SpilledLocs,
+ LocSpillOffsets, LIS, TII, TRI, BBSkipInstsMap);
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ }
+}
+
+void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap) {
+ LLVM_DEBUG(dbgs() << "\t" << loc);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator();
+
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB));
+ insertDebugLabel(&*MBB, loc, LIS, TII, BBSkipInstsMap);
+
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ if (!MF)
+ return;
+
+ BlockSkipInstsMap BBSkipInstsMap;
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ SpillOffsetMap SpillOffsets;
+ for (auto &userValue : userValues) {
+ LLVM_DEBUG(userValue->print(dbgs(), TRI));
+ userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+ userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets,
+ BBSkipInstsMap);
+ }
+ LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n");
+ for (auto &userLabel : userLabels) {
+ LLVM_DEBUG(userLabel->print(dbgs(), TRI));
+ userLabel->emitDebugLabel(*LIS, *TII, BBSkipInstsMap);
+ }
+
+ LLVM_DEBUG(dbgs() << "********** EMITTING DEBUG PHIS **********\n");
+
+ auto Slots = LIS->getSlotIndexes();
+ for (auto &It : PHIValToPos) {
+ // For each ex-PHI, identify its physreg location or stack slot, and emit
+ // a DBG_PHI for it.
+ unsigned InstNum = It.first;
+ auto Slot = It.second.SI;
+ Register Reg = It.second.Reg;
+ unsigned SubReg = It.second.SubReg;
+
+ MachineBasicBlock *OrigMBB = Slots->getMBBFromIndex(Slot);
+ if (VRM->isAssignedReg(Reg) &&
+ Register::isPhysicalRegister(VRM->getPhys(Reg))) {
+ unsigned PhysReg = VRM->getPhys(Reg);
+ if (SubReg != 0)
+ PhysReg = TRI->getSubReg(PhysReg, SubReg);
+
+ auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(),
+ TII->get(TargetOpcode::DBG_PHI));
+ Builder.addReg(PhysReg);
+ Builder.addImm(InstNum);
+ } else if (VRM->getStackSlot(Reg) != VirtRegMap::NO_STACK_SLOT) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ unsigned SpillSize, SpillOffset;
+
+ unsigned regSizeInBits = TRI->getRegSizeInBits(*TRC);
+ if (SubReg)
+ regSizeInBits = TRI->getSubRegIdxSize(SubReg);
+
+ // Test whether this location is legal with the given subreg. If the
+ // subregister has a nonzero offset, drop this location, it's too complex
+ // to describe. (TODO: future work).
+ bool Success =
+ TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF);
+
+ if (Success && SpillOffset == 0) {
+ auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(),
+ TII->get(TargetOpcode::DBG_PHI));
+ Builder.addFrameIndex(VRM->getStackSlot(Reg));
+ Builder.addImm(InstNum);
+ // Record how large the original value is. The stack slot might be
+ // merged and altered during optimisation, but we will want to know how
+ // large the value is, at this DBG_PHI.
+ Builder.addImm(regSizeInBits);
+ }
+
+ LLVM_DEBUG(
+ if (SpillOffset != 0) {
+ dbgs() << "DBG_PHI for Vreg " << Reg << " subreg " << SubReg <<
+ " has nonzero offset\n";
+ }
+ );
+ }
+ // If there was no mapping for a value ID, it's optimized out. Create no
+ // DBG_PHI, and any variables using this value will become optimized out.
+ }
+ MF->DebugPHIPositions.clear();
+
+ LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
+
+ // Re-insert any debug instrs back in the position they were. We must
+ // re-insert in the same order to ensure that debug instructions don't swap,
+ // which could re-order assignments. Do so in a batch -- once we find the
+ // insert position, insert all instructions at the same SlotIdx. They are
+ // guaranteed to appear in-sequence in StashedDebugInstrs because we insert
+ // them in order.
+ for (auto *StashIt = StashedDebugInstrs.begin();
+ StashIt != StashedDebugInstrs.end(); ++StashIt) {
+ SlotIndex Idx = StashIt->Idx;
+ MachineBasicBlock *MBB = StashIt->MBB;
+ MachineInstr *MI = StashIt->MI;
+
+ auto EmitInstsHere = [this, &StashIt, MBB, Idx,
+ MI](MachineBasicBlock::iterator InsertPos) {
+ // Insert this debug instruction.
+ MBB->insert(InsertPos, MI);
+
+ // Look at subsequent stashed debug instructions: if they're at the same
+ // index, insert those too.
+ auto NextItem = std::next(StashIt);
+ while (NextItem != StashedDebugInstrs.end() && NextItem->Idx == Idx) {
+ assert(NextItem->MBB == MBB && "Instrs with same slot index should be"
+ "in the same block");
+ MBB->insert(InsertPos, NextItem->MI);
+ StashIt = NextItem;
+ NextItem = std::next(StashIt);
+ };
+ };
+
+ // Start block index: find the first non-debug instr in the block, and
+ // insert before it.
+ if (Idx == Slots->getMBBStartIdx(MBB)) {
+ MachineBasicBlock::iterator InsertPos =
+ findInsertLocation(MBB, Idx, *LIS, BBSkipInstsMap);
+ EmitInstsHere(InsertPos);
+ continue;
+ }
+
+ if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) {
+ // Insert at the end of any debug instructions.
+ auto PostDebug = std::next(Pos->getIterator());
+ PostDebug = skipDebugInstructionsForward(PostDebug, MBB->instr_end());
+ EmitInstsHere(PostDebug);
+ } else {
+ // Insert position disappeared; walk forwards through slots until we
+ // find a new one.
+ SlotIndex End = Slots->getMBBEndIdx(MBB);
+ for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) {
+ Pos = Slots->getInstructionFromIndex(Idx);
+ if (Pos) {
+ EmitInstsHere(Pos->getIterator());
+ break;
+ }
+ }
+
+ // We have reached the end of the block and didn't find anywhere to
+ // insert! It's not safe to discard any debug instructions; place them
+ // in front of the first terminator, or in front of end().
+ if (Idx >= End) {
+ auto TermIt = MBB->getFirstTerminator();
+ EmitInstsHere(TermIt);
+ }
+ }
+ }
+
+ EmitDone = true;
+ BBSkipInstsMap.clear();
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveDebugVariables::dump() const {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..9998ce9e8dad
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,68 @@
+//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+template <typename T> class ArrayRef;
+class LiveIntervals;
+class VirtRegMap;
+
+class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables() override;
+
+ /// splitRegister - Move any user variables in OldReg to the live ranges in
+ /// NewRegs where they are live. Mark the values as unavailable where no new
+ /// register is live.
+ void splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
+ LiveIntervals &LIS);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump() const;
+
+private:
+ bool runOnMachineFunction(MachineFunction &) override;
+ void releaseMemory() override;
+ void getAnalysisUsage(AnalysisUsage &) const override;
+
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 000000000000..1cf354349c56
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,1409 @@
+//===- LiveInterval.cpp - Live Interval Representation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live range for register v if there is no instruction with number j' >= j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation ranges can have holes,
+// i.e. a range might look like [1,20), [50,65), [1000,1001). Each
+// individual segment is represented as an instance of LiveRange::Segment,
+// and the whole range is represented as an instance of LiveRange.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "LiveRangeUtils.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Implementation of various methods necessary for calculation of live ranges.
+// The implementation of the methods abstracts from the concrete type of the
+// segment collection.
+//
+// Implementation of the class follows the Template design pattern. The base
+// class contains generic algorithms that call collection-specific methods,
+// which are provided in concrete subclasses. In order to avoid virtual calls
+// these methods are provided by means of C++ template instantiation.
+// The base class calls the methods of the subclass through method impl(),
+// which casts 'this' pointer to the type of the subclass.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename ImplT, typename IteratorT, typename CollectionT>
+class CalcLiveRangeUtilBase {
+protected:
+ LiveRange *LR;
+
+protected:
+ CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
+
+public:
+ using Segment = LiveRange::Segment;
+ using iterator = IteratorT;
+
+ /// A counterpart of LiveRange::createDeadDef: Make sure the range has a
+ /// value defined at @p Def.
+ /// If @p ForVNI is null, and there is no value defined at @p Def, a new
+ /// value will be allocated using @p VNInfoAllocator.
+ /// If @p ForVNI is null, the return value is the value defined at @p Def,
+ /// either a pre-existing one, or the one newly created.
+ /// If @p ForVNI is not null, then @p Def should be the location where
+ /// @p ForVNI is defined. If the range does not have a value defined at
+ /// @p Def, the value @p ForVNI will be used instead of allocating a new
+ /// one. If the range already has a value defined at @p Def, it must be
+ /// same as @p ForVNI. In either case, @p ForVNI will be the return value.
+ VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator *VNInfoAllocator,
+ VNInfo *ForVNI) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+ assert((!ForVNI || ForVNI->def == Def) &&
+ "If ForVNI is specified, it must match Def");
+ iterator I = impl().find(Def);
+ if (I == segments().end()) {
+ VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
+ impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+
+ Segment *S = segmentAt(I);
+ if (SlotIndex::isSameInstr(Def, S->start)) {
+ assert((!ForVNI || ForVNI == S->valno) && "Value number mismatch");
+ assert(S->valno->def == S->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, S->start);
+ if (Def != S->start)
+ S->start = S->valno->def = Def;
+ return S->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
+ VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
+ segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+
+ VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) {
+ if (segments().empty())
+ return nullptr;
+ iterator I =
+ impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+ if (I == segments().begin())
+ return nullptr;
+ --I;
+ if (I->end <= StartIdx)
+ return nullptr;
+ if (I->end < Use)
+ extendSegmentEndTo(I, Use);
+ return I->valno;
+ }
+
+ std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs,
+ SlotIndex StartIdx, SlotIndex Use) {
+ if (segments().empty())
+ return std::make_pair(nullptr, false);
+ SlotIndex BeforeUse = Use.getPrevSlot();
+ iterator I = impl().findInsertPos(Segment(BeforeUse, Use, nullptr));
+ if (I == segments().begin())
+ return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+ --I;
+ if (I->end <= StartIdx)
+ return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+ if (I->end < Use) {
+ if (LR->isUndefIn(Undefs, I->end, BeforeUse))
+ return std::make_pair(nullptr, true);
+ extendSegmentEndTo(I, Use);
+ }
+ return std::make_pair(I->valno, false);
+ }
+
+ /// This method is used when we want to extend the segment specified
+ /// by I to end at the specified endpoint. To do this, we should
+ /// merge and eliminate all segments that this will overlap
+ /// with. The iterator is not invalidated.
+ void extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
+ assert(I != segments().end() && "Not a valid segment!");
+ Segment *S = segmentAt(I);
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = std::next(I);
+ for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo)
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+
+ // If NewEnd was in the middle of a segment, make sure to get its endpoint.
+ S->end = std::max(NewEnd, std::prev(MergeTo)->end);
+
+ // If the newly formed segment now touches the segment after it and if they
+ // have the same value number, merge the two segments into one segment.
+ if (MergeTo != segments().end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ S->end = MergeTo->end;
+ ++MergeTo;
+ }
+
+ // Erase any dead segments.
+ segments().erase(std::next(I), MergeTo);
+ }
+
+ /// This method is used when we want to extend the segment specified
+ /// by I to start at the specified endpoint. To do this, we should
+ /// merge and eliminate all segments that this will overlap with.
+ iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) {
+ assert(I != segments().end() && "Not a valid segment!");
+ Segment *S = segmentAt(I);
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = I;
+ do {
+ if (MergeTo == segments().begin()) {
+ S->start = NewStart;
+ segments().erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another segment, just delete a range and
+ // extend that segment.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ segmentAt(MergeTo)->end = S->end;
+ } else {
+ // Otherwise, extend the segment right after.
+ ++MergeTo;
+ Segment *MergeToSeg = segmentAt(MergeTo);
+ MergeToSeg->start = NewStart;
+ MergeToSeg->end = S->end;
+ }
+
+ segments().erase(std::next(MergeTo), std::next(I));
+ return MergeTo;
+ }
+
+ iterator addSegment(Segment S) {
+ SlotIndex Start = S.start, End = S.end;
+ iterator I = impl().findInsertPos(S);
+
+ // If the inserted segment starts in the middle or right at the end of
+ // another segment, just extend that segment to contain the segment of S.
+ if (I != segments().begin()) {
+ iterator B = std::prev(I);
+ if (S.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendSegmentEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live segments with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two segments with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this segment ends in the middle of, or right next
+ // to, another segment, merge it into that segment.
+ if (I != segments().end()) {
+ if (S.valno == I->valno) {
+ if (I->start <= End) {
+ I = extendSegmentStartTo(I, Start);
+
+ // If S is a complete superset of a segment, we may need to grow its
+ // endpoint as well.
+ if (End > I->end)
+ extendSegmentEndTo(I, End);
+ return I;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live segments with
+ // different valno's.
+ assert(I->start >= End &&
+ "Cannot overlap two segments with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new segment that doesn't interact with
+ // anything.
+ // Insert it.
+ return segments().insert(I, S);
+ }
+
+private:
+ ImplT &impl() { return *static_cast<ImplT *>(this); }
+
+ CollectionT &segments() { return impl().segmentsColl(); }
+
+ Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); }
+};
+
+//===----------------------------------------------------------------------===//
+// Instantiation of the methods for calculation of live ranges
+// based on a segment vector.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilVector;
+using CalcLiveRangeUtilVectorBase =
+ CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+ LiveRange::Segments>;
+
+class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
+public:
+ CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {}
+
+private:
+ friend CalcLiveRangeUtilVectorBase;
+
+ LiveRange::Segments &segmentsColl() { return LR->segments; }
+
+ void insertAtEnd(const Segment &S) { LR->segments.push_back(S); }
+
+ iterator find(SlotIndex Pos) { return LR->find(Pos); }
+
+ iterator findInsertPos(Segment S) { return llvm::upper_bound(*LR, S.start); }
+};
+
+//===----------------------------------------------------------------------===//
+// Instantiation of the methods for calculation of live ranges
+// based on a segment set.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilSet;
+using CalcLiveRangeUtilSetBase =
+ CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, LiveRange::SegmentSet::iterator,
+ LiveRange::SegmentSet>;
+
+class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
+public:
+ CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {}
+
+private:
+ friend CalcLiveRangeUtilSetBase;
+
+ LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; }
+
+ void insertAtEnd(const Segment &S) {
+ LR->segmentSet->insert(LR->segmentSet->end(), S);
+ }
+
+ iterator find(SlotIndex Pos) {
+ iterator I =
+ LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr));
+ if (I == LR->segmentSet->begin())
+ return I;
+ iterator PrevI = std::prev(I);
+ if (Pos < (*PrevI).end)
+ return PrevI;
+ return I;
+ }
+
+ iterator findInsertPos(Segment S) {
+ iterator I = LR->segmentSet->upper_bound(S);
+ if (I != LR->segmentSet->end() && !(S.start < *I))
+ ++I;
+ return I;
+ }
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// LiveRange methods
+//===----------------------------------------------------------------------===//
+
+LiveRange::iterator LiveRange::find(SlotIndex Pos) {
+ return llvm::partition_point(*this,
+ [&](const Segment &X) { return X.end <= Pos; });
+}
+
+VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).createDeadDef(Def, &VNIAlloc, nullptr);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).createDeadDef(Def, &VNIAlloc, nullptr);
+}
+
+VNInfo *LiveRange::createDeadDef(VNInfo *VNI) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).createDeadDef(VNI->def, nullptr, VNI);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).createDeadDef(VNI->def, nullptr, VNI);
+}
+
+// overlaps - Return true if the intersection of the two live ranges is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live ranges should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveRange::overlapsFrom(const LiveRange& other,
+ const_iterator StartPos) const {
+ assert(!empty() && "empty range");
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
+ const SlotIndexes &Indexes) const {
+ assert(!empty() && "empty range");
+ if (Other.empty())
+ return false;
+
+ // Use binary searches to find initial positions.
+ const_iterator I = find(Other.beginIndex());
+ const_iterator IE = end();
+ if (I == IE)
+ return false;
+ const_iterator J = Other.find(I->start);
+ const_iterator JE = Other.end();
+ if (J == JE)
+ return false;
+
+ while (true) {
+ // J has just been advanced to satisfy:
+ assert(J->end > I->start);
+ // Check for an overlap.
+ if (J->start < I->end) {
+ // I and J are overlapping. Find the later start.
+ SlotIndex Def = std::max(I->start, J->start);
+ // Allow the overlap if Def is a coalescable copy.
+ if (Def.isBlock() ||
+ !CP.isCoalescable(Indexes.getInstructionFromIndex(Def)))
+ return true;
+ }
+ // Advance the iterator that ends first to check for more overlaps.
+ if (J->end > I->end) {
+ std::swap(I, J);
+ std::swap(IE, JE);
+ }
+ // Advance J until J->end > I->start.
+ do
+ if (++J == JE)
+ return false;
+ while (J->end <= I->start);
+ }
+}
+
+/// overlaps - Return true if the live range overlaps an interval specified
+/// by [Start, End).
+bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = lower_bound(*this, End);
+ return I != begin() && (--I)->end > Start;
+}
+
+bool LiveRange::covers(const LiveRange &Other) const {
+ if (empty())
+ return Other.empty();
+
+ const_iterator I = begin();
+ for (const Segment &O : Other.segments) {
+ I = advanceTo(I, O.start);
+ if (I == end() || I->start > O.start)
+ return false;
+
+ // Check adjacent live segments and see if we can get behind O.end.
+ while (I->end < O.end) {
+ const_iterator Last = I;
+ // Get next segment and abort if it was not adjacent.
+ ++I;
+ if (I == end() || Last->end != I->start)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveRange::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->markUnused();
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveRange::RenumberValues() {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ valnos.clear();
+ for (const Segment &S : segments) {
+ VNInfo *VNI = S.valno;
+ if (!Seen.insert(VNI).second)
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live segment");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ }
+}
+
+void LiveRange::addSegmentToSet(Segment S) {
+ CalcLiveRangeUtilSet(this).addSegment(S);
+}
+
+LiveRange::iterator LiveRange::addSegment(Segment S) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr) {
+ addSegmentToSet(S);
+ return end();
+ }
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).addSegment(S);
+}
+
+void LiveRange::append(const Segment S) {
+ // Check that the segment belongs to the back of the list.
+ assert(segments.empty() || segments.back().end <= S.start);
+ segments.push_back(S);
+}
+
+std::pair<VNInfo*,bool> LiveRange::extendInBlock(ArrayRef<SlotIndex> Undefs,
+ SlotIndex StartIdx, SlotIndex Kill) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).extendInBlock(Undefs, StartIdx, Kill);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).extendInBlock(Undefs, StartIdx, Kill);
+}
+
+VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill);
+}
+
+/// Remove the specified segment from this range. Note that the segment must
+/// be in a single Segment in its entirety.
+void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the Segment containing this span.
+ iterator I = find(Start);
+ assert(I != end() && "Segment is not in range!");
+ assert(I->containsInterval(Start, End)
+ && "Segment is not entirely in range!");
+
+ // If the span we are removing is at the start of the Segment, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ segments.erase(I); // Removed the whole Segment.
+
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the Segment,
+ // adjust the other way.
+ if (I->end == End) {
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the Segment into two pieces.
+ SlotIndex OldEnd = I->end;
+ I->end = Start; // Trim the old segment.
+
+ // Insert the new one.
+ segments.insert(std::next(I), Segment(End, OldEnd, ValNo));
+}
+
+LiveRange::iterator LiveRange::removeSegment(iterator I, bool RemoveDeadValNo) {
+ VNInfo *ValNo = I->valno;
+ I = segments.erase(I);
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
+ return I;
+}
+
+void LiveRange::removeValNoIfDead(VNInfo *ValNo) {
+ if (none_of(*this, [=](const Segment &S) { return S.valno == ValNo; }))
+ markValNoForDeletion(ValNo);
+}
+
+/// removeValNo - Remove all the segments defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveRange::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ llvm::erase_if(segments,
+ [ValNo](const Segment &S) { return S.valno == ValNo; });
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+}
+
+void LiveRange::join(LiveRange &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVectorImpl<VNInfo *> &NewVNInfo) {
+ verify();
+
+ // Determine if any of our values are mapped. This is uncommon, so we want
+ // to avoid the range scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
+ MustMapCurValNos = true;
+ break;
+ }
+ }
+
+ // If we have to apply a mapping to our base range assignment, rewrite it now.
+ if (MustMapCurValNos && !empty()) {
+ // Map the first live range.
+
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ for (iterator I = std::next(OutIt), E = end(); I != E; ++I) {
+ VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ assert(nextValNo && "Huh?");
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one Segment. This happens when we
+ // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+ OutIt->end = I->end;
+ } else {
+ // Didn't merge. Move OutIt to the next segment,
+ ++OutIt;
+ OutIt->valno = nextValNo;
+ if (OutIt != I) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+ }
+ }
+ // If we merge some segments, chop off the end.
+ ++OutIt;
+ segments.erase(OutIt, end());
+ }
+
+ // Rewrite Other values before changing the VNInfo ids.
+ // This can leave Other in an invalid state because we're not coalescing
+ // touching segments that now have identical values. That's OK since Other is
+ // not supposed to be valid after calling join();
+ for (Segment &S : Other.segments)
+ S.valno = NewVNInfo[RHSValNoAssignments[S.valno->id]];
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveRange now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live segments into the LHS.
+ LiveRangeUpdater Updater(this);
+ for (Segment &S : Other.segments)
+ Updater.add(S);
+}
+
+/// Merge all of the segments in RHS into this live range as the specified
+/// value number. The segments in RHS are allowed to overlap with segments in
+/// the current range, but only if the overlapping segments have the
+/// specified value number.
+void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const Segment &S : RHS.segments)
+ Updater.add(S.start, S.end, LHSValNo);
+}
+
+/// MergeValueInAsValue - Merge all of the live segments of a specific val#
+/// in RHS into this live range as the specified value number.
+/// The segments in RHS are allowed to overlap with segments in the
+/// current range, it will replace the value numbers of the overlaped
+/// segments with the specified value number.
+void LiveRange::MergeValueInAsValue(const LiveRange &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const Segment &S : RHS.segments)
+ if (S.valno == RHSValNo)
+ Updater.add(S.start, S.end, LHSValNo);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// segments with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ V1->copyFrom(*V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 segments into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator S = I++;
+ if (S->valno != V1) continue; // Not a V1 Segment.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (S != begin()) {
+ iterator Prev = S-1;
+ if (Prev->valno == V2 && Prev->end == S->start) {
+ Prev->end = S->end;
+
+ // Erase this live-range.
+ segments.erase(S);
+ I = Prev+1;
+ S = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ S->valno = V2;
+
+ // If we can merge it into later V2 segments, do so now. We ignore any
+ // following V1 segments, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == S->end && I->valno == V2) {
+ S->end = I->end;
+ segments.erase(I);
+ I = S+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
+ return V2;
+}
+
+void LiveRange::flushSegmentSet() {
+ assert(segmentSet != nullptr && "segment set must have been created");
+ assert(
+ segments.empty() &&
+ "segment set can be used only initially before switching to the array");
+ segments.append(segmentSet->begin(), segmentSet->end());
+ segmentSet = nullptr;
+ verify();
+}
+
+bool LiveRange::isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const {
+ ArrayRef<SlotIndex>::iterator SlotI = Slots.begin();
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // If there are no regmask slots, we have nothing to search.
+ if (SlotI == SlotE)
+ return false;
+
+ // Start our search at the first segment that ends after the first slot.
+ const_iterator SegmentI = find(*SlotI);
+ const_iterator SegmentE = end();
+
+ // If there are no segments that end after the first slot, we're done.
+ if (SegmentI == SegmentE)
+ return false;
+
+ // Look for each slot in the live range.
+ for ( ; SlotI != SlotE; ++SlotI) {
+ // Go to the next segment that ends after the current slot.
+ // The slot may be within a hole in the range.
+ SegmentI = advanceTo(SegmentI, *SlotI);
+ if (SegmentI == SegmentE)
+ return false;
+
+ // If this segment contains the slot, we're done.
+ if (SegmentI->contains(*SlotI))
+ return true;
+ // Otherwise, look for the next slot.
+ }
+
+ // We didn't find a segment containing any of the slots.
+ return false;
+}
+
+void LiveInterval::freeSubRange(SubRange *S) {
+ S->~SubRange();
+ // Memory was allocated with BumpPtr allocator and is not freed here.
+}
+
+void LiveInterval::removeEmptySubRanges() {
+ SubRange **NextPtr = &SubRanges;
+ SubRange *I = *NextPtr;
+ while (I != nullptr) {
+ if (!I->empty()) {
+ NextPtr = &I->Next;
+ I = *NextPtr;
+ continue;
+ }
+ // Skip empty subranges until we find the first nonempty one.
+ do {
+ SubRange *Next = I->Next;
+ freeSubRange(I);
+ I = Next;
+ } while (I != nullptr && I->empty());
+ *NextPtr = I;
+ }
+}
+
+void LiveInterval::clearSubRanges() {
+ for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) {
+ Next = I->Next;
+ freeSubRange(I);
+ }
+ SubRanges = nullptr;
+}
+
+/// For each VNI in \p SR, check whether or not that value defines part
+/// of the mask describe by \p LaneMask and if not, remove that value
+/// from \p SR.
+static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
+ LaneBitmask LaneMask,
+ const SlotIndexes &Indexes,
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
+ // Phys reg should not be tracked at subreg level.
+ // Same for noreg (Reg == 0).
+ if (!Register::isVirtualRegister(Reg) || !Reg)
+ return;
+ // Remove the values that don't define those lanes.
+ SmallVector<VNInfo *, 8> ToBeRemoved;
+ for (VNInfo *VNI : SR.valnos) {
+ if (VNI->isUnused())
+ continue;
+ // PHI definitions don't have MI attached, so there is nothing
+ // we can use to strip the VNI.
+ if (VNI->isPHIDef())
+ continue;
+ const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def);
+ assert(MI && "Cannot find the definition of a value");
+ bool hasDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (MOI->getReg() != Reg)
+ continue;
+ LaneBitmask OrigMask = TRI.getSubRegIndexLaneMask(MOI->getSubReg());
+ LaneBitmask ExpectedDefMask =
+ ComposeSubRegIdx
+ ? TRI.composeSubRegIndexLaneMask(ComposeSubRegIdx, OrigMask)
+ : OrigMask;
+ if ((ExpectedDefMask & LaneMask).none())
+ continue;
+ hasDef = true;
+ break;
+ }
+
+ if (!hasDef)
+ ToBeRemoved.push_back(VNI);
+ }
+ for (VNInfo *VNI : ToBeRemoved)
+ SR.removeValNo(VNI);
+
+ // If the subrange is empty at this point, the MIR is invalid. Do not assert
+ // and let the verifier catch this case.
+}
+
+void LiveInterval::refineSubRanges(
+ BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
+ std::function<void(LiveInterval::SubRange &)> Apply,
+ const SlotIndexes &Indexes, const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
+ LaneBitmask ToApply = LaneMask;
+ for (SubRange &SR : subranges()) {
+ LaneBitmask SRMask = SR.LaneMask;
+ LaneBitmask Matching = SRMask & LaneMask;
+ if (Matching.none())
+ continue;
+
+ SubRange *MatchingRange;
+ if (SRMask == Matching) {
+ // The subrange fits (it does not cover bits outside \p LaneMask).
+ MatchingRange = &SR;
+ } else {
+ // We have to split the subrange into a matching and non-matching part.
+ // Reduce lanemask of existing lane to non-matching part.
+ SR.LaneMask = SRMask & ~Matching;
+ // Create a new subrange for the matching part
+ MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+ // Now that the subrange is split in half, make sure we
+ // only keep in the subranges the VNIs that touch the related half.
+ stripValuesNotDefiningMask(reg(), *MatchingRange, Matching, Indexes, TRI,
+ ComposeSubRegIdx);
+ stripValuesNotDefiningMask(reg(), SR, SR.LaneMask, Indexes, TRI,
+ ComposeSubRegIdx);
+ }
+ Apply(*MatchingRange);
+ ToApply &= ~Matching;
+ }
+ // Create a new subrange if there are uncovered bits left.
+ if (ToApply.any()) {
+ SubRange *NewRange = createSubRange(Allocator, ToApply);
+ Apply(*NewRange);
+ }
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const Segment &S : segments)
+ Sum += S.start.distance(S.end);
+ return Sum;
+}
+
+void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
+ LaneBitmask LaneMask,
+ const MachineRegisterInfo &MRI,
+ const SlotIndexes &Indexes) const {
+ assert(reg().isVirtual());
+ LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg());
+ assert((VRegMask & LaneMask).any());
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (const MachineOperand &MO : MRI.def_operands(reg())) {
+ if (!MO.isUndef())
+ continue;
+ unsigned SubReg = MO.getSubReg();
+ assert(SubReg != 0 && "Undef should only be set on subreg defs");
+ LaneBitmask DefMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask UndefMask = VRegMask & ~DefMask;
+ if ((UndefMask & LaneMask).any()) {
+ const MachineInstr &MI = *MO.getParent();
+ bool EarlyClobber = MO.isEarlyClobber();
+ SlotIndex Pos = Indexes.getInstructionIndex(MI).getRegSlot(EarlyClobber);
+ Undefs.push_back(Pos);
+ }
+ }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& OS, const LiveRange::Segment &S) {
+ return OS << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveRange::Segment::dump() const {
+ dbgs() << *this << '\n';
+}
+#endif
+
+void LiveRange::print(raw_ostream &OS) const {
+ if (empty())
+ OS << "EMPTY";
+ else {
+ for (const Segment &S : segments) {
+ OS << S;
+ assert(S.valno == getValNumInfo(S.valno->id) && "Bad VNInfo");
+ }
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << ' ';
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << ' ';
+ OS << vnum << '@';
+ if (vni->isUnused()) {
+ OS << 'x';
+ } else {
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phi";
+ }
+ }
+ }
+}
+
+void LiveInterval::SubRange::print(raw_ostream &OS) const {
+ OS << " L" << PrintLaneMask(LaneMask) << ' '
+ << static_cast<const LiveRange &>(*this);
+}
+
+void LiveInterval::print(raw_ostream &OS) const {
+ OS << printReg(reg()) << ' ';
+ super::print(OS);
+ // Print subranges
+ for (const SubRange &SR : subranges())
+ OS << SR;
+ OS << " weight:" << Weight;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveRange::dump() const {
+ dbgs() << *this << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveInterval::SubRange::dump() const {
+ dbgs() << *this << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveInterval::dump() const {
+ dbgs() << *this << '\n';
+}
+#endif
+
+#ifndef NDEBUG
+void LiveRange::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != nullptr);
+ assert(I->valno->id < valnos.size());
+ assert(I->valno == valnos[I->valno->id]);
+ if (std::next(I) != E) {
+ assert(I->end <= std::next(I)->start);
+ if (I->end == std::next(I)->start)
+ assert(I->valno != std::next(I)->valno);
+ }
+ }
+}
+
+void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
+ super::verify();
+
+ // Make sure SubRanges are fine and LaneMasks are disjunct.
+ LaneBitmask Mask;
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg())
+ : LaneBitmask::getAll();
+ for (const SubRange &SR : subranges()) {
+ // Subrange lanemask should be disjunct to any previous subrange masks.
+ assert((Mask & SR.LaneMask).none());
+ Mask |= SR.LaneMask;
+
+ // subrange mask should not contained in maximum lane mask for the vreg.
+ assert((Mask & ~MaxMask).none());
+ // empty subranges must be removed.
+ assert(!SR.empty());
+
+ SR.verify();
+ // Main liverange should cover subrange.
+ assert(covers(SR));
+ }
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// LiveRangeUpdater class
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeUpdater class always maintains these invariants:
+//
+// - When LastStart is invalid, Spills is empty and the iterators are invalid.
+// This is the initial state, and the state created by flush().
+// In this state, isDirty() returns false.
+//
+// Otherwise, segments are kept in three separate areas:
+//
+// 1. [begin; WriteI) at the front of LR.
+// 2. [ReadI; end) at the back of LR.
+// 3. Spills.
+//
+// - LR.begin() <= WriteI <= ReadI <= LR.end().
+// - Segments in all three areas are fully ordered and coalesced.
+// - Segments in area 1 precede and can't coalesce with segments in area 2.
+// - Segments in Spills precede and can't coalesce with segments in area 2.
+// - No coalescing is possible between segments in Spills and segments in area
+// 1, and there are no overlapping segments.
+//
+// The segments in Spills are not ordered with respect to the segments in area
+// 1. They need to be merged.
+//
+// When they exist, Spills.back().start <= LastStart,
+// and WriteI[-1].start <= LastStart.
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveRangeUpdater::print(raw_ostream &OS) const {
+ if (!isDirty()) {
+ if (LR)
+ OS << "Clean updater: " << *LR << '\n';
+ else
+ OS << "Null updater.\n";
+ return;
+ }
+ assert(LR && "Can't have null LR in dirty updater.");
+ OS << " updater with gap = " << (ReadI - WriteI)
+ << ", last start = " << LastStart
+ << ":\n Area 1:";
+ for (const auto &S : make_range(LR->begin(), WriteI))
+ OS << ' ' << S;
+ OS << "\n Spills:";
+ for (unsigned I = 0, E = Spills.size(); I != E; ++I)
+ OS << ' ' << Spills[I];
+ OS << "\n Area 2:";
+ for (const auto &S : make_range(ReadI, LR->end()))
+ OS << ' ' << S;
+ OS << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const {
+ print(errs());
+}
+#endif
+
+// Determine if A and B should be coalesced.
+static inline bool coalescable(const LiveRange::Segment &A,
+ const LiveRange::Segment &B) {
+ assert(A.start <= B.start && "Unordered live segments.");
+ if (A.end == B.start)
+ return A.valno == B.valno;
+ if (A.end < B.start)
+ return false;
+ assert(A.valno == B.valno && "Cannot overlap different values");
+ return true;
+}
+
+void LiveRangeUpdater::add(LiveRange::Segment Seg) {
+ assert(LR && "Cannot add to a null destination");
+
+ // Fall back to the regular add method if the live range
+ // is using the segment set instead of the segment vector.
+ if (LR->segmentSet != nullptr) {
+ LR->addSegmentToSet(Seg);
+ return;
+ }
+
+ // Flush the state if Start moves backwards.
+ if (!LastStart.isValid() || LastStart > Seg.start) {
+ if (isDirty())
+ flush();
+ // This brings us to an uninitialized state. Reinitialize.
+ assert(Spills.empty() && "Leftover spilled segments");
+ WriteI = ReadI = LR->begin();
+ }
+
+ // Remember start for next time.
+ LastStart = Seg.start;
+
+ // Advance ReadI until it ends after Seg.start.
+ LiveRange::iterator E = LR->end();
+ if (ReadI != E && ReadI->end <= Seg.start) {
+ // First try to close the gap between WriteI and ReadI with spills.
+ if (ReadI != WriteI)
+ mergeSpills();
+ // Then advance ReadI.
+ if (ReadI == WriteI)
+ ReadI = WriteI = LR->find(Seg.start);
+ else
+ while (ReadI != E && ReadI->end <= Seg.start)
+ *WriteI++ = *ReadI++;
+ }
+
+ assert(ReadI == E || ReadI->end > Seg.start);
+
+ // Check if the ReadI segment begins early.
+ if (ReadI != E && ReadI->start <= Seg.start) {
+ assert(ReadI->valno == Seg.valno && "Cannot overlap different values");
+ // Bail if Seg is completely contained in ReadI.
+ if (ReadI->end >= Seg.end)
+ return;
+ // Coalesce into Seg.
+ Seg.start = ReadI->start;
+ ++ReadI;
+ }
+
+ // Coalesce as much as possible from ReadI into Seg.
+ while (ReadI != E && coalescable(Seg, *ReadI)) {
+ Seg.end = std::max(Seg.end, ReadI->end);
+ ++ReadI;
+ }
+
+ // Try coalescing Spills.back() into Seg.
+ if (!Spills.empty() && coalescable(Spills.back(), Seg)) {
+ Seg.start = Spills.back().start;
+ Seg.end = std::max(Spills.back().end, Seg.end);
+ Spills.pop_back();
+ }
+
+ // Try coalescing Seg into WriteI[-1].
+ if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) {
+ WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
+ return;
+ }
+
+ // Seg doesn't coalesce with anything, and needs to be inserted somewhere.
+ if (WriteI != ReadI) {
+ *WriteI++ = Seg;
+ return;
+ }
+
+ // Finally, append to LR or Spills.
+ if (WriteI == E) {
+ LR->segments.push_back(Seg);
+ WriteI = ReadI = LR->end();
+ } else
+ Spills.push_back(Seg);
+}
+
+// Merge as many spilled segments as possible into the gap between WriteI
+// and ReadI. Advance WriteI to reflect the inserted instructions.
+void LiveRangeUpdater::mergeSpills() {
+ // Perform a backwards merge of Spills and [SpillI;WriteI).
+ size_t GapSize = ReadI - WriteI;
+ size_t NumMoved = std::min(Spills.size(), GapSize);
+ LiveRange::iterator Src = WriteI;
+ LiveRange::iterator Dst = Src + NumMoved;
+ LiveRange::iterator SpillSrc = Spills.end();
+ LiveRange::iterator B = LR->begin();
+
+ // This is the new WriteI position after merging spills.
+ WriteI = Dst;
+
+ // Now merge Src and Spills backwards.
+ while (Src != Dst) {
+ if (Src != B && Src[-1].start > SpillSrc[-1].start)
+ *--Dst = *--Src;
+ else
+ *--Dst = *--SpillSrc;
+ }
+ assert(NumMoved == size_t(Spills.end() - SpillSrc));
+ Spills.erase(SpillSrc, Spills.end());
+}
+
+void LiveRangeUpdater::flush() {
+ if (!isDirty())
+ return;
+ // Clear the dirty state.
+ LastStart = SlotIndex();
+
+ assert(LR && "Cannot add to a null destination");
+
+ // Nothing to merge?
+ if (Spills.empty()) {
+ LR->segments.erase(WriteI, ReadI);
+ LR->verify();
+ return;
+ }
+
+ // Resize the WriteI - ReadI gap to match Spills.
+ size_t GapSize = ReadI - WriteI;
+ if (GapSize < Spills.size()) {
+ // The gap is too small. Make some room.
+ size_t WritePos = WriteI - LR->begin();
+ LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment());
+ // This also invalidated ReadI, but it is recomputed below.
+ WriteI = LR->begin() + WritePos;
+ } else {
+ // Shrink the gap if necessary.
+ LR->segments.erase(WriteI + Spills.size(), ReadI);
+ }
+ ReadI = WriteI + Spills.size();
+ mergeSpills();
+ LR->verify();
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
+ // Create initial equivalence classes.
+ EqClass.clear();
+ EqClass.grow(LR.getNumValNums());
+
+ const VNInfo *used = nullptr, *unused = nullptr;
+
+ // Determine connections.
+ for (const VNInfo *VNI : LR.valnos) {
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ EqClass.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(Pred)))
+ EqClass.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LR.getVNInfoBefore(VNI->def))
+ EqClass.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ EqClass.join(used->id, unused->id);
+
+ EqClass.compress();
+ return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
+ // Rewrite instructions.
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI.reg_operands(LI.reg()))) {
+ MachineInstr *MI = MO.getParent();
+ const VNInfo *VNI;
+ if (MI->isDebugValue()) {
+ // DBG_VALUE instructions don't have slot indexes, so get the index of
+ // the instruction before them. The value is defined there too.
+ SlotIndex Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
+ VNI = LI.Query(Idx).valueOut();
+ } else {
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ }
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
+ if (!VNI)
+ continue;
+ if (unsigned EqClass = getEqClass(VNI))
+ MO.setReg(LIV[EqClass - 1]->reg());
+ }
+
+ // Distribute subregister liveranges.
+ if (LI.hasSubRanges()) {
+ unsigned NumComponents = EqClass.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ // Create new subranges in the split intervals and construct a mapping
+ // for the VNInfos in the subrange.
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumComponents-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ unsigned ComponentNum;
+ if (VNI.isUnused()) {
+ ComponentNum = 0;
+ } else {
+ const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
+ assert(MainRangeVNI != nullptr
+ && "SubRange def must have corresponding main range def");
+ ComponentNum = getEqClass(MainRangeVNI);
+ if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
+ SubRanges[ComponentNum-1]
+ = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+ }
+ }
+ VNIMapping.push_back(ComponentNum);
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
+ }
+ LI.removeEmptySubRanges();
+ }
+
+ // Distribute main liverange.
+ DistributeRange(LI, LIV, EqClass);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
new file mode 100644
index 000000000000..ccc5ae98086e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -0,0 +1,196 @@
+//===- LiveIntervalCalc.cpp - Calculate live interval --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveIntervalCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveIntervalCalc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
+static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
+ LiveRange &LR, const MachineOperand &MO) {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex DefIdx =
+ Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+
+ // Create the def in LR. This may find an existing def.
+ LR.createDeadDef(DefIdx, Alloc);
+}
+
+void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
+ const MachineRegisterInfo *MRI = getRegInfo();
+ SlotIndexes *Indexes = getIndexes();
+ VNInfo::Allocator *Alloc = getVNAlloc();
+
+ assert(MRI && Indexes && "call reset() first");
+
+ // Step 1: Create minimal live segments for every definition of Reg.
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // createDeadDef() will deduplicate.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ Register Reg = LI.reg();
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
+ LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ // If this is the first time we see a subregister def, initialize
+ // subranges by creating a copy of the main range.
+ if (!LI.hasSubRanges() && !LI.empty()) {
+ LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LI.createSubRangeFrom(*Alloc, ClassMask, LI);
+ }
+
+ LI.refineSubRanges(
+ *Alloc, SubMask,
+ [&MO, Indexes, Alloc](LiveInterval::SubRange &SR) {
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ },
+ *Indexes, TRI);
+ }
+
+ // Create the def in the main liverange. We do not have to do this if
+ // subranges are tracked as we recreate the main range later in this case.
+ if (MO.isDef() && !LI.hasSubRanges())
+ createDeadDef(*Indexes, *Alloc, LI, MO);
+ }
+
+ // We may have created empty live ranges for partially undefined uses, we
+ // can't keep them because we won't find defs in them later.
+ LI.removeEmptySubRanges();
+
+ const MachineFunction *MF = getMachineFunction();
+ MachineDominatorTree *DomTree = getDomTree();
+ // Step 2: Extend live segments to all uses, constructing SSA form as
+ // necessary.
+ if (LI.hasSubRanges()) {
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveIntervalCalc SubLIC;
+ SubLIC.reset(MF, Indexes, DomTree, Alloc);
+ SubLIC.extendToUses(S, Reg, S.LaneMask, &LI);
+ }
+ LI.clear();
+ constructMainRangeFromSubranges(LI);
+ } else {
+ resetLiveOutMap();
+ extendToUses(LI, Reg, LaneBitmask::getAll());
+ }
+}
+
+void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
+ // First create dead defs at all defs found in subranges.
+ LiveRange &MainRange = LI;
+ assert(MainRange.segments.empty() && MainRange.valnos.empty() &&
+ "Expect empty main liverange");
+
+ VNInfo::Allocator *Alloc = getVNAlloc();
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ for (const VNInfo *VNI : SR.valnos) {
+ if (!VNI->isUnused() && !VNI->isPHIDef())
+ MainRange.createDeadDef(VNI->def, *Alloc);
+ }
+ }
+ resetLiveOutMap();
+ extendToUses(MainRange, LI.reg(), LaneBitmask::getAll(), &LI);
+}
+
+void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) {
+ const MachineRegisterInfo *MRI = getRegInfo();
+ SlotIndexes *Indexes = getIndexes();
+ VNInfo::Allocator *Alloc = getVNAlloc();
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LR.createDeadDef() will deduplicate.
+ for (MachineOperand &MO : MRI->def_operands(Reg))
+ createDeadDef(*Indexes, *Alloc, LR, MO);
+}
+
+void LiveIntervalCalc::extendToUses(LiveRange &LR, Register Reg,
+ LaneBitmask Mask, LiveInterval *LI) {
+ const MachineRegisterInfo *MRI = getRegInfo();
+ SlotIndexes *Indexes = getIndexes();
+ SmallVector<SlotIndex, 4> Undefs;
+ if (LI != nullptr)
+ LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes);
+
+ // Visit all operands that read Reg. This may include partial defs.
+ bool IsSubRange = !Mask.all();
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ // Clear all kill flags. They will be reinserted after register allocation
+ // by LiveIntervals::addKillFlags().
+ if (MO.isUse())
+ MO.setIsKill(false);
+ // MO::readsReg returns "true" for subregister defs. This is for keeping
+ // liveness of the entire register (i.e. for the main range of the live
+ // interval). For subranges, definitions of non-overlapping subregisters
+ // do not count as uses.
+ if (!MO.readsReg() || (IsSubRange && MO.isDef()))
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef())
+ SLM = ~SLM;
+ // Ignore uses not reading the current (sub)range.
+ if ((SLM & Mask).none())
+ continue;
+ }
+
+ // Determine the actual place of the use.
+ const MachineInstr *MI = MO.getParent();
+ unsigned OpNo = (&MO - &MI->getOperand(0));
+ SlotIndex UseIdx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // The actual place where a phi operand is used is the end of the pred
+ // MBB. PHI operands are paired: (Reg, PredMBB).
+ UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo + 1).getMBB());
+ } else {
+ // Check for early-clobber redefs.
+ bool isEarlyClobber = false;
+ unsigned DefIdx;
+ if (MO.isDef())
+ isEarlyClobber = MO.isEarlyClobber();
+ else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber();
+ }
+ UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber);
+ }
+
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ extend(LR, UseIdx, Reg, Undefs);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..11a4ecf0bef9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,215 @@
+//===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdlib>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(const LiveInterval &VirtReg,
+ const LiveRange &Range) {
+ if (Range.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveRange::const_iterator RegPos = Range.begin();
+ LiveRange::const_iterator RegEnd = Range.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (SegPos.valid()) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+
+ // We have reached the end of Segments, so it is no longer necessary to search
+ // for the insertion position.
+ // It is faster to insert the end first.
+ --RegEnd;
+ SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+ for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(const LiveInterval &VirtReg,
+ const LiveRange &Range) {
+ if (Range.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveRange::const_iterator RegPos = Range.begin();
+ LiveRange::const_iterator RegEnd = Range.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (true) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = Range.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop()
+ << "):" << printReg(SI.value()->reg(), TRI);
+ }
+ OS << '\n';
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg());
+}
+#endif //!NDEBUG
+
+const LiveInterval *LiveIntervalUnion::getOneVReg() const {
+ if (empty())
+ return nullptr;
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ // return the first valid live interval
+ return SI.value();
+ }
+ return nullptr;
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(
+ const LiveInterval *VirtReg) const {
+ return is_contained(InterferingVRegs, VirtReg);
+}
+
+// Collect virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The query state is one of:
+//
+// 1. CheckedFirstInterference == false: Iterators are uninitialized.
+// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
+// 3. Iterators left at the last seen intersection.
+//
+unsigned
+LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ // Fast path return if we already have the desired information.
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+
+ // Set up iterators on the first call.
+ if (!CheckedFirstInterference) {
+ CheckedFirstInterference = true;
+
+ // Quickly skip interference check for empty sets.
+ if (LR->empty() || LiveUnion->empty()) {
+ SeenAllInterferences = true;
+ return 0;
+ }
+
+ // In most cases, the union will start before LR.
+ LRI = LR->begin();
+ LiveUnionI.setMap(LiveUnion->getMap());
+ LiveUnionI.find(LRI->start);
+ }
+
+ LiveRange::const_iterator LREnd = LR->end();
+ const LiveInterval *RecentReg = nullptr;
+ while (LiveUnionI.valid()) {
+ assert(LRI != LREnd && "Reached end of LR");
+
+ // Check for overlapping interference.
+ while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) {
+ // This is an overlap, record the interfering register.
+ const LiveInterval *VReg = LiveUnionI.value();
+ if (VReg != RecentReg && !isSeenInterference(VReg)) {
+ RecentReg = VReg;
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+ }
+ // This LiveUnion segment is no longer interesting.
+ if (!(++LiveUnionI).valid()) {
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+ }
+ }
+
+ // The iterators are now not overlapping, LiveUnionI has been advanced
+ // beyond LRI.
+ assert(LRI->end <= LiveUnionI.start() && "Expected non-overlap");
+
+ // Advance the iterator that ends first.
+ LRI = LR->advanceTo(LRI, LiveUnionI.start());
+ if (LRI == LREnd)
+ break;
+
+ // Detect overlap, handle above.
+ if (LRI->start < LiveUnionI.stop())
+ continue;
+
+ // Still not overlapping. Catch up LiveUnionI.
+ LiveUnionI.advanceTo(LRI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
+ unsigned NSize) {
+ // Reuse existing allocation.
+ if (NSize == Size)
+ return;
+ clear();
+ Size = NSize;
+ LIUs = static_cast<LiveIntervalUnion*>(
+ safe_malloc(sizeof(LiveIntervalUnion)*NSize));
+ for (unsigned i = 0; i != Size; ++i)
+ new(LIUs + i) LiveIntervalUnion(Alloc);
+}
+
+void LiveIntervalUnion::Array::clear() {
+ if (!LIUs)
+ return;
+ for (unsigned i = 0; i != Size; ++i)
+ LIUs[i].~LiveIntervalUnion();
+ free(LIUs);
+ Size = 0;
+ LIUs = nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
new file mode 100644
index 000000000000..da55e7f7284b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -0,0 +1,1748 @@
+//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LiveInterval analysis pass which is used
+/// by the Linear Scan Register allocator. This pass linearizes the
+/// basic blocks of the function in DFS order and computes live intervals for
+/// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+
+#ifndef NDEBUG
+static cl::opt<bool> EnablePrecomputePhysRegs(
+ "precompute-phys-liveness", cl::Hidden,
+ cl::desc("Eagerly compute live intervals for all physreg units."));
+#else
+static bool EnablePrecomputePhysRegs = false;
+#endif // NDEBUG
+
+namespace llvm {
+
+cl::opt<bool> UseSegmentSetForPhysRegs(
+ "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use segment set for the computation of the live ranges of physregs."));
+
+} // end namespace llvm
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequiredTransitiveID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+}
+
+LiveIntervals::~LiveIntervals() { delete LICalc; }
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
+ delete VirtRegIntervals[Register::index2VirtReg(i)];
+ VirtRegIntervals.clear();
+ RegMaskSlots.clear();
+ RegMaskBits.clear();
+ RegMaskBlocks.clear();
+
+ for (LiveRange *LR : RegUnitRanges)
+ delete LR;
+ RegUnitRanges.clear();
+
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+}
+
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &MF->getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+
+ if (!LICalc)
+ LICalc = new LiveIntervalCalc();
+
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ computeVirtRegs();
+ computeRegMasks();
+ computeLiveInRegUnits();
+
+ if (EnablePrecomputePhysRegs) {
+ // For stress testing, precompute live ranges of all physical register
+ // units, including reserved registers.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ getRegUnit(i);
+ }
+ LLVM_DEBUG(dump());
+ return false;
+}
+
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+ OS << "********** INTERVALS **********\n";
+
+ // Dump the regunits.
+ for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit)
+ if (LiveRange *LR = RegUnitRanges[Unit])
+ OS << printRegUnit(Unit, TRI) << ' ' << *LR << '\n';
+
+ // Dump the virtregs.
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (hasInterval(Reg))
+ OS << getInterval(Reg) << '\n';
+ }
+
+ OS << "RegMasks:";
+ for (SlotIndex Idx : RegMaskSlots)
+ OS << ' ' << Idx;
+ OS << '\n';
+
+ printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+ OS << "********** MACHINEINSTRS **********\n";
+ MF->print(OS, Indexes);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
+ printInstrs(dbgs());
+}
+#endif
+
+LiveInterval *LiveIntervals::createInterval(Register reg) {
+ float Weight = reg.isPhysical() ? huge_valf : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+/// Compute the live interval of a virtual register, based on defs and uses.
+bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
+ assert(LICalc && "LICalc not initialized.");
+ assert(LI.empty() && "Should only compute empty intervals.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg()));
+ return computeDeadValues(LI, nullptr);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval &LI = createEmptyInterval(Reg);
+ bool NeedSplit = computeVirtRegInterval(LI);
+ if (NeedSplit) {
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ splitSeparateComponents(LI, SplitLIs);
+ }
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (const MachineBasicBlock &MBB : *MF) {
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
+ RMB.first = RegMaskSlots.size();
+
+ // Some block starts, such as EH funclets, create masks.
+ if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
+ // Unwinders may clobber additional registers.
+ // FIXME: This functionality can possibly be merged into
+ // MachineBasicBlock::getBeginClobberMask().
+ if (MBB.isEHPad())
+ if (auto *Mask = TRI->getCustomEHPadPreservedMask(*MBB.getParent())) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO.getRegMask());
+ }
+ }
+
+ // Some block ends, such as funclet returns, create masks. Put the mask on
+ // the last instruction of the block, because MBB slot index intervals are
+ // half-open.
+ if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) {
+ assert(!MBB.empty() && "empty return block?");
+ RegMaskSlots.push_back(
+ Indexes->getInstructionIndex(MBB.back()).getRegSlot());
+ RegMaskBits.push_back(Mask);
+ }
+
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register Unit Liveness
+//===----------------------------------------------------------------------===//
+//
+// Fixed interference typically comes from ABI boundaries: Function arguments
+// and return values are passed in fixed registers, and so are exception
+// pointers entering landing pads. Certain instructions require values to be
+// present in specific registers. That is also represented through fixed
+// interference.
+//
+
+/// Compute the live range of a register unit, based on the uses and defs of
+/// aliasing registers. The range should be empty, or contain only dead
+/// phi-defs from ABI blocks.
+void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
+ assert(LICalc && "LICalc not initialized.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+
+ // The physregs aliasing Unit are the roots and their super-registers.
+ // Create all values as dead defs before extending to uses. Note that roots
+ // may share super-registers. That's OK because createDeadDefs() is
+ // idempotent. It is very rare for a register unit to have multiple roots, so
+ // uniquing super-registers is probably not worthwhile.
+ bool IsReserved = false;
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ bool IsRootReserved = true;
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
+ if (!MRI->reg_empty(Reg))
+ LICalc->createDeadDefs(LR, Reg);
+ // A register unit is considered reserved if all its roots and all their
+ // super registers are reserved.
+ if (!MRI->isReserved(Reg))
+ IsRootReserved = false;
+ }
+ IsReserved |= IsRootReserved;
+ }
+ assert(IsReserved == MRI->isReservedRegUnit(Unit) &&
+ "reserved computation mismatch");
+
+ // Now extend LR to reach all uses.
+ // Ignore uses of reserved registers. We only track defs of those.
+ if (!IsReserved) {
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
+ if (!MRI->reg_empty(Reg))
+ LICalc->extendToUses(LR, Reg);
+ }
+ }
+ }
+
+ // Flush the segment set to the segment vector.
+ if (UseSegmentSetForPhysRegs)
+ LR.flushSegmentSet();
+}
+
+/// Precompute the live ranges of any register units that are live-in to an ABI
+/// block somewhere. Register values can appear without a corresponding def when
+/// entering the entry block or a landing pad.
+void LiveIntervals::computeLiveInRegUnits() {
+ RegUnitRanges.resize(TRI->getNumRegUnits());
+ LLVM_DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+
+ // Keep track of the live range sets allocated.
+ SmallVector<unsigned, 8> NewRanges;
+
+ // Check all basic blocks for live-ins.
+ for (const MachineBasicBlock &MBB : *MF) {
+ // We only care about ABI blocks: Entry + landing pads.
+ if ((&MBB != &MF->front() && !MBB.isEHPad()) || MBB.livein_empty())
+ continue;
+
+ // Create phi-defs at Begin for all live-in registers.
+ SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
+ LLVM_DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
+ for (const auto &LI : MBB.liveins()) {
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
+ LiveRange *LR = RegUnitRanges[Unit];
+ if (!LR) {
+ // Use segment set to speed-up initial computation of the live range.
+ LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs);
+ NewRanges.push_back(Unit);
+ }
+ VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
+ (void)VNI;
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id);
+ }
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+ LLVM_DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
+
+ // Compute the 'normal' part of the ranges.
+ for (unsigned Unit : NewRanges)
+ computeRegUnitRange(*RegUnitRanges[Unit], Unit);
+}
+
+static void createSegmentsForValues(LiveRange &LR,
+ iterator_range<LiveInterval::vni_iterator> VNIs) {
+ for (VNInfo *VNI : VNIs) {
+ if (VNI->isUnused())
+ continue;
+ SlotIndex Def = VNI->def;
+ LR.addSegment(LiveRange::Segment(Def, Def.getDeadSlot(), VNI));
+ }
+}
+
+void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
+ ShrinkToUsesWorkList &WorkList,
+ Register Reg, LaneBitmask LaneMask) {
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<const MachineBasicBlock*, 16> LiveOut;
+
+ auto getSubRange = [](const LiveInterval &I, LaneBitmask M)
+ -> const LiveRange& {
+ if (M.none())
+ return I;
+ for (const LiveInterval::SubRange &SR : I.subranges()) {
+ if ((SR.LaneMask & M).any()) {
+ assert(SR.LaneMask == M && "Expecting lane masks to match exactly");
+ return SR;
+ }
+ }
+ llvm_unreachable("Subrange for mask not found");
+ };
+
+ const LiveInterval &LI = getInterval(Reg);
+ const LiveRange &OldRange = getSubRange(LI, LaneMask);
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Idx.getPrevSlot());
+ SlotIndex BlockStart = Indexes->getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = Segments.extendInBlock(BlockStart, Idx)) {
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ (void)ExtVNI;
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart ||
+ !UsedPHIs.insert(VNI).second)
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!LiveOut.insert(Pred).second)
+ continue;
+ SlotIndex Stop = Indexes->getMBBEndIdx(Pred);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop))
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ LLVM_DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ Segments.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!LiveOut.insert(Pred).second)
+ continue;
+ SlotIndex Stop = Indexes->getMBBEndIdx(Pred);
+ if (VNInfo *OldVNI = OldRange.getVNInfoBefore(Stop)) {
+ assert(OldVNI == VNI && "Wrong value out of predecessor");
+ (void)OldVNI;
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ } else {
+#ifndef NDEBUG
+ // There was no old VNI. Verify that Stop is jointly dominated
+ // by <undef>s for this live range.
+ assert(LaneMask.any() &&
+ "Missing value out of predecessor for main range");
+ SmallVector<SlotIndex,8> Undefs;
+ LI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes);
+ assert(LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes) &&
+ "Missing value out of predecessor for subrange");
+#endif
+ }
+ }
+ }
+}
+
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(li->reg().isVirtual() && "Can only shrink virtual registers");
+
+ // Shrink subregister live ranges.
+ bool NeedsCleanup = false;
+ for (LiveInterval::SubRange &S : li->subranges()) {
+ shrinkToUses(S, li->reg());
+ if (S.empty())
+ NeedsCleanup = true;
+ }
+ if (NeedsCleanup)
+ li->removeEmptySubRanges();
+
+ // Find all the values used, including PHI kills.
+ ShrinkToUsesWorkList WorkList;
+
+ // Visit all instructions reading li->reg().
+ Register Reg = li->reg();
+ for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
+ if (UseMI.isDebugInstr() || !UseMI.readsVirtualRegister(Reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ LiveQueryResult LRQ = li->Query(Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ if (!VNI) {
+ // This shouldn't happen: readsVirtualRegister returns true, but there is
+ // no live value. It is likely caused by a target getting <undef> flags
+ // wrong.
+ LLVM_DEBUG(
+ dbgs() << Idx << '\t' << UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
+ continue;
+ }
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create new live ranges with only minimal live segments per def.
+ LiveRange NewLR;
+ createSegmentsForValues(NewLR, li->vnis());
+ extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone());
+
+ // Move the trimmed segments back.
+ li->segments.swap(NewLR.segments);
+
+ // Handle dead values.
+ bool CanSeparate = computeDeadValues(*li, dead);
+ LLVM_DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+bool LiveIntervals::computeDeadValues(LiveInterval &LI,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ bool MayHaveSplitComponents = false;
+
+ for (VNInfo *VNI : LI.valnos) {
+ if (VNI->isUnused())
+ continue;
+ SlotIndex Def = VNI->def;
+ LiveRange::iterator I = LI.FindSegmentContaining(Def);
+ assert(I != LI.end() && "Missing segment for VNI");
+
+ // Is the register live before? Otherwise we may have to add a read-undef
+ // flag for subregister defs.
+ Register VReg = LI.reg();
+ if (MRI->shouldTrackSubRegLiveness(VReg)) {
+ if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
+ MachineInstr *MI = getInstructionFromIndex(Def);
+ MI->setRegisterDefReadUndef(VReg);
+ }
+ }
+
+ if (I->end != Def.getDeadSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->markUnused();
+ LI.removeSegment(I);
+ LLVM_DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(Def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(LI.reg(), TRI);
+
+ if (dead && MI->allDefsAreDead()) {
+ LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
+ dead->push_back(MI);
+ }
+ }
+ MayHaveSplitComponents = true;
+ }
+ return MayHaveSplitComponents;
+}
+
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) {
+ LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
+ assert(Reg.isVirtual() && "Can only shrink virtual registers");
+ // Find all the values used, including PHI kills.
+ ShrinkToUsesWorkList WorkList;
+
+ // Visit all instructions reading Reg.
+ SlotIndex LastIdx;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ // Skip "undef" uses.
+ if (!MO.readsReg())
+ continue;
+ // Maybe the operand is for a subregister we don't care about.
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((LaneMask & SR.LaneMask).none())
+ continue;
+ }
+ // We only need to visit each instruction once.
+ MachineInstr *UseMI = MO.getParent();
+ SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
+ if (Idx == LastIdx)
+ continue;
+ LastIdx = Idx;
+
+ LiveQueryResult LRQ = SR.Query(Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ // For Subranges it is possible that only undef values are left in that
+ // part of the subregister, so there is no real liverange at the use
+ if (!VNI)
+ continue;
+
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live ranges with only minimal live segments per def.
+ LiveRange NewLR;
+ createSegmentsForValues(NewLR, SR.vnis());
+ extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask);
+
+ // Move the trimmed ranges back.
+ SR.segments.swap(NewLR.segments);
+
+ // Remove dead PHI value numbers
+ for (VNInfo *VNI : SR.valnos) {
+ if (VNI->isUnused())
+ continue;
+ const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def);
+ assert(Segment != nullptr && "Missing segment for VNI");
+ if (Segment->end != VNI->def.getDeadSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ LLVM_DEBUG(dbgs() << "Dead PHI at " << VNI->def
+ << " may separate interval\n");
+ VNI->markUnused();
+ SR.removeSegment(*Segment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Shrunk: " << SR << '\n');
+}
+
+void LiveIntervals::extendToIndices(LiveRange &LR,
+ ArrayRef<SlotIndex> Indices,
+ ArrayRef<SlotIndex> Undefs) {
+ assert(LICalc && "LICalc not initialized.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ for (SlotIndex Idx : Indices)
+ LICalc->extend(LR, Idx, /*PhysReg=*/0, Undefs);
+}
+
+void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints) {
+ LiveQueryResult LRQ = LR.Query(Kill);
+ VNInfo *VNI = LRQ.valueOutOrDead();
+ if (!VNI)
+ return;
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(KillMBB);
+
+ // If VNI isn't live out from KillMBB, the value is trivially pruned.
+ if (LRQ.endPoint() < MBBEnd) {
+ LR.removeSegment(Kill, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ return;
+ }
+
+ // VNI is live out of KillMBB.
+ LR.removeSegment(Kill, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+
+ // Find all blocks that are reachable from KillMBB without leaving VNI's live
+ // range. It is possible that KillMBB itself is reachable, so start a DFS
+ // from each successor.
+ using VisitedTy = df_iterator_default_set<MachineBasicBlock*,9>;
+ VisitedTy Visited;
+ for (MachineBasicBlock *Succ : KillMBB->successors()) {
+ for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+ I = df_ext_begin(Succ, Visited), E = df_ext_end(Succ, Visited);
+ I != E;) {
+ MachineBasicBlock *MBB = *I;
+
+ // Check if VNI is live in to MBB.
+ SlotIndex MBBStart, MBBEnd;
+ std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveQueryResult LRQ = LR.Query(MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI segment. Prune the search.
+ I.skipChildren();
+ continue;
+ }
+
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LR.removeSegment(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
+
+ // VNI is live through MBB.
+ LR.removeSegment(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
+ // Keep track of regunit ranges.
+ SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ const LiveInterval &LI = getInterval(Reg);
+ if (LI.empty())
+ continue;
+
+ // Target may have not allocated this yet.
+ Register PhysReg = VRM->getPhys(Reg);
+ if (!PhysReg)
+ continue;
+
+ // Find the regunit intervals for the assigned register. They may overlap
+ // the virtual register live range, cancelling any kills.
+ RU.clear();
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ const LiveRange &RURange = getRegUnit(Unit);
+ if (RURange.empty())
+ continue;
+ RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
+ }
+ // Every instruction that kills Reg corresponds to a segment range end
+ // point.
+ for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
+ ++RI) {
+ // A block index indicates an MBB edge.
+ if (RI->end.isBlock())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+
+ // Check if any of the regunits are live beyond the end of RI. That could
+ // happen when a physreg is defined as a copy of a virtreg:
+ //
+ // %eax = COPY %5
+ // FOO %5 <--- MI, cancel kill because %eax is live.
+ // BAR killed %eax
+ //
+ // There should be no kill flag on FOO when %5 is rewritten as %eax.
+ for (auto &RUP : RU) {
+ const LiveRange &RURange = *RUP.first;
+ LiveRange::const_iterator &I = RUP.second;
+ if (I == RURange.end())
+ continue;
+ I = RURange.advanceTo(I, RI->end);
+ if (I == RURange.end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI.
+ goto CancelKill;
+ }
+
+ if (MRI->subRegLivenessEnabled()) {
+ // When reading a partial undefined value we must not add a kill flag.
+ // The regalloc might have used the undef lane for something else.
+ // Example:
+ // %1 = ... ; R32: %1
+ // %2:high16 = ... ; R64: %2
+ // = read killed %2 ; R64: %2
+ // = read %1 ; R32: %1
+ // The <kill> flag is correct for %2, but the register allocator may
+ // assign R0L to %1, and R0 to %2 because the low 32bits of R0
+ // are actually never written by %2. After assignment the <kill>
+ // flag at the read instruction is invalid.
+ LaneBitmask DefinedLanesMask;
+ if (LI.hasSubRanges()) {
+ // Compute a mask of lanes that are defined.
+ DefinedLanesMask = LaneBitmask::getNone();
+ for (const LiveInterval::SubRange &SR : LI.subranges())
+ for (const LiveRange::Segment &Segment : SR.segments) {
+ if (Segment.start >= RI->end)
+ break;
+ if (Segment.end == RI->end) {
+ DefinedLanesMask |= SR.LaneMask;
+ break;
+ }
+ }
+ } else
+ DefinedLanesMask = LaneBitmask::getAll();
+
+ bool IsFullWrite = false;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (MO.isUse()) {
+ // Reading any undefined lanes?
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask UseMask = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ if ((UseMask & ~DefinedLanesMask).any())
+ goto CancelKill;
+ } else if (MO.getSubReg() == 0) {
+ // Writing to the full register?
+ assert(MO.isDef());
+ IsFullWrite = true;
+ }
+ }
+
+ // If an instruction writes to a subregister, a new segment starts in
+ // the LiveInterval. But as this is only overriding part of the register
+ // adding kill-flags is not correct here after registers have been
+ // assigned.
+ if (!IsFullWrite) {
+ // Next segment has to be adjacent in the subregister write case.
+ LiveRange::const_iterator N = std::next(RI);
+ if (N != LI.end() && N->start == RI->end)
+ goto CancelKill;
+ }
+ }
+
+ MI->addRegisterKilled(Reg, nullptr);
+ continue;
+CancelKill:
+ MI->clearRegisterKills(Reg, nullptr);
+ }
+ }
+}
+
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ assert(!LI.empty() && "LiveInterval is empty.");
+
+ // A local live range must be fully contained inside the block, meaning it is
+ // defined and killed at instructions, not at block boundaries. It is not
+ // live in or out of any block.
+ //
+ // It is technically possible to have a PHI-defined live range identical to a
+ // single block, but we are going to return false in that case.
+
+ SlotIndex Start = LI.beginIndex();
+ if (Start.isBlock())
+ return nullptr;
+
+ SlotIndex Stop = LI.endIndex();
+ if (Stop.isBlock())
+ return nullptr;
+
+ // getMBBFromIndex doesn't need to search the MBB table when both indexes
+ // belong to proper instructions.
+ MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
+ return MBB1 == MBB2 ? MBB1 : nullptr;
+}
+
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (const VNInfo *PHI : LI.valnos) {
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (const MachineBasicBlock *Pred : PHIMBB->predecessors())
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(Pred)))
+ return true;
+ }
+ return false;
+}
+
+float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+ const MachineBlockFrequencyInfo *MBFI,
+ const MachineInstr &MI) {
+ return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
+}
+
+float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+ const MachineBlockFrequencyInfo *MBFI,
+ const MachineBasicBlock *MBB) {
+ return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
+}
+
+LiveRange::Segment
+LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) {
+ LiveInterval &Interval = createEmptyInterval(Reg);
+ VNInfo *VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ LiveRange::Segment S(SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst.getParent()), VN);
+ Interval.addSegment(S);
+
+ return S;
+}
+
+//===----------------------------------------------------------------------===//
+// Register mask functions
+//===----------------------------------------------------------------------===//
+/// Check whether use of reg in MI is live-through. Live-through means that
+/// the value is alive on exit from Machine instruction. The example of such
+/// use is a deopt value in statepoint instruction.
+static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) {
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT)
+ return false;
+ StatepointOpers SO(MI);
+ if (SO.getFlags() & (uint64_t)StatepointFlags::DeoptLiveIn)
+ return false;
+ for (unsigned Idx = SO.getNumDeoptArgsIdx(), E = SO.getNumGCPtrIdx(); Idx < E;
+ ++Idx) {
+ const MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isReg() && MO.getReg() == Reg)
+ return true;
+ }
+ return false;
+}
+
+bool LiveIntervals::checkRegMaskInterference(const LiveInterval &LI,
+ BitVector &UsableRegs) {
+ if (LI.empty())
+ return false;
+ LiveInterval::const_iterator LiveI = LI.begin(), LiveE = LI.end();
+
+ // Use a smaller arrays for local live ranges.
+ ArrayRef<SlotIndex> Slots;
+ ArrayRef<const uint32_t*> Bits;
+ if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+ Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+ Bits = getRegMaskBitsInBlock(MBB->getNumber());
+ } else {
+ Slots = getRegMaskSlots();
+ Bits = getRegMaskBits();
+ }
+
+ // We are going to enumerate all the register mask slots contained in LI.
+ // Start with a binary search of RegMaskSlots to find a starting point.
+ ArrayRef<SlotIndex>::iterator SlotI = llvm::lower_bound(Slots, LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // No slots in range, LI begins after the last call.
+ if (SlotI == SlotE)
+ return false;
+
+ bool Found = false;
+ // Utility to union regmasks.
+ auto unionBitMask = [&](unsigned Idx) {
+ if (!Found) {
+ // This is the first overlap. Initialize UsableRegs to all ones.
+ UsableRegs.clear();
+ UsableRegs.resize(TRI->getNumRegs(), true);
+ Found = true;
+ }
+ // Remove usable registers clobbered by this mask.
+ UsableRegs.clearBitsNotInMask(Bits[Idx]);
+ };
+ while (true) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ unionBitMask(SlotI - Slots.begin());
+ if (++SlotI == SlotE)
+ return Found;
+ }
+ // If segment ends with live-through use we need to collect its regmask.
+ if (*SlotI == LiveI->end)
+ if (MachineInstr *MI = getInstructionFromIndex(*SlotI))
+ if (hasLiveThroughUse(MI, LI.reg()))
+ unionBitMask(SlotI++ - Slots.begin());
+ // *SlotI is beyond the current LI segment.
+ // Special advance implementation to not miss next LiveI->end.
+ if (++LiveI == LiveE || SlotI == SlotE || *SlotI > LI.endIndex())
+ return Found;
+ while (LiveI->end < *SlotI)
+ ++LiveI;
+ // Advance SlotI until it overlaps.
+ while (*SlotI < LiveI->start)
+ if (++SlotI == SlotE)
+ return Found;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IntervalUpdate class.
+//===----------------------------------------------------------------------===//
+
+/// Toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+ LiveIntervals& LIS;
+ const MachineRegisterInfo& MRI;
+ const TargetRegisterInfo& TRI;
+ SlotIndex OldIdx;
+ SlotIndex NewIdx;
+ SmallPtrSet<LiveRange*, 8> Updated;
+ bool UpdateFlags;
+
+public:
+ HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+ const TargetRegisterInfo& TRI,
+ SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+ : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+ UpdateFlags(UpdateFlags) {}
+
+ // FIXME: UpdateFlags is a workaround that creates live intervals for all
+ // physregs, even those that aren't needed for regalloc, in order to update
+ // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+ // flags, and postRA passes will use a live register utility instead.
+ LiveRange *getRegUnitLI(unsigned Unit) {
+ if (UpdateFlags && !MRI.isReservedRegUnit(Unit))
+ return &LIS.getRegUnit(Unit);
+ return LIS.getCachedRegUnit(Unit);
+ }
+
+ /// Update all live ranges touched by MI, assuming a move from OldIdx to
+ /// NewIdx.
+ void updateAllRanges(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": "
+ << *MI);
+ bool hasRegMask = false;
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isRegMask())
+ hasRegMask = true;
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse()) {
+ if (!MO.readsReg())
+ continue;
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
+ MO.setIsKill(false);
+ }
+
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg.isVirtual()) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none())
+ continue;
+ updateRange(S, Reg, S.LaneMask);
+ }
+ }
+ updateRange(LI, Reg, LaneBitmask::getNone());
+ // If main range has a hole and we are moving a subrange use across
+ // the hole updateRange() cannot properly handle it since it only
+ // gets the LiveRange and not the whole LiveInterval. As a result
+ // we may end up with a main range not covering all subranges.
+ // This is extremely rare case, so let's check and reconstruct the
+ // main range.
+ if (LI.hasSubRanges()) {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none() || LI.covers(S))
+ continue;
+ LI.clear();
+ LIS.constructMainRangeFromSubranges(LI);
+ break;
+ }
+ }
+
+ continue;
+ }
+
+ // For physregs, only update the regunits that actually have a
+ // precomputed live range.
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ if (LiveRange *LR = getRegUnitLI(Unit))
+ updateRange(*LR, Unit, LaneBitmask::getNone());
+ }
+ if (hasRegMask)
+ updateRegMaskSlots();
+ }
+
+private:
+ /// Update a single live range, assuming an instruction has been moved from
+ /// OldIdx to NewIdx.
+ void updateRange(LiveRange &LR, Register Reg, LaneBitmask LaneMask) {
+ if (!Updated.insert(&LR).second)
+ return;
+ LLVM_DEBUG({
+ dbgs() << " ";
+ if (Reg.isVirtual()) {
+ dbgs() << printReg(Reg);
+ if (LaneMask.any())
+ dbgs() << " L" << PrintLaneMask(LaneMask);
+ } else {
+ dbgs() << printRegUnit(Reg, &TRI);
+ }
+ dbgs() << ":\t" << LR << '\n';
+ });
+ if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+ handleMoveDown(LR);
+ else
+ handleMoveUp(LR, Reg, LaneMask);
+ LLVM_DEBUG(dbgs() << " -->\t" << LR << '\n');
+ LR.verify();
+ }
+
+ /// Update LR to reflect an instruction has been moved downwards from OldIdx
+ /// to NewIdx (OldIdx < NewIdx).
+ void handleMoveDown(LiveRange &LR) {
+ LiveRange::iterator E = LR.end();
+ // Segment going into OldIdx.
+ LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
+
+ // No value live before or after OldIdx? Nothing to do.
+ if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start))
+ return;
+
+ LiveRange::iterator OldIdxOut;
+ // Do we have a value live-in to OldIdx?
+ if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) {
+ // If the live-in value already extends to NewIdx, there is nothing to do.
+ if (SlotIndex::isEarlierEqualInstr(NewIdx, OldIdxIn->end))
+ return;
+ // Aggressively remove all kill flags from the old kill point.
+ // Kill flags shouldn't be used while live intervals exist, they will be
+ // reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end))
+ for (MachineOperand &MOP : mi_bundle_ops(*KillMI))
+ if (MOP.isReg() && MOP.isUse())
+ MOP.setIsKill(false);
+
+ // Is there a def before NewIdx which is not OldIdx?
+ LiveRange::iterator Next = std::next(OldIdxIn);
+ if (Next != E && !SlotIndex::isSameInstr(OldIdx, Next->start) &&
+ SlotIndex::isEarlierInstr(Next->start, NewIdx)) {
+ // If we are here then OldIdx was just a use but not a def. We only have
+ // to ensure liveness extends to NewIdx.
+ LiveRange::iterator NewIdxIn =
+ LR.advanceTo(Next, NewIdx.getBaseIndex());
+ // Extend the segment before NewIdx if necessary.
+ if (NewIdxIn == E ||
+ !SlotIndex::isEarlierInstr(NewIdxIn->start, NewIdx)) {
+ LiveRange::iterator Prev = std::prev(NewIdxIn);
+ Prev->end = NewIdx.getRegSlot();
+ }
+ // Extend OldIdxIn.
+ OldIdxIn->end = Next->start;
+ return;
+ }
+
+ // Adjust OldIdxIn->end to reach NewIdx. This may temporarily make LR
+ // invalid by overlapping ranges.
+ bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end);
+ OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber());
+ // If this was not a kill, then there was no def and we're done.
+ if (!isKill)
+ return;
+
+ // Did we have a Def at OldIdx?
+ OldIdxOut = Next;
+ if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start))
+ return;
+ } else {
+ OldIdxOut = OldIdxIn;
+ }
+
+ // If we are here then there is a Definition at OldIdx. OldIdxOut points
+ // to the segment starting there.
+ assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) &&
+ "No def?");
+ VNInfo *OldIdxVNI = OldIdxOut->valno;
+ assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def");
+
+ // If the defined value extends beyond NewIdx, just move the beginning
+ // of the segment to NewIdx.
+ SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber());
+ if (SlotIndex::isEarlierInstr(NewIdxDef, OldIdxOut->end)) {
+ OldIdxVNI->def = NewIdxDef;
+ OldIdxOut->start = OldIdxVNI->def;
+ return;
+ }
+
+ // If we are here then we have a Definition at OldIdx which ends before
+ // NewIdx.
+
+ // Is there an existing Def at NewIdx?
+ LiveRange::iterator AfterNewIdx
+ = LR.advanceTo(OldIdxOut, NewIdx.getRegSlot());
+ bool OldIdxDefIsDead = OldIdxOut->end.isDead();
+ if (!OldIdxDefIsDead &&
+ SlotIndex::isEarlierInstr(OldIdxOut->end, NewIdxDef)) {
+ // OldIdx is not a dead def, and NewIdxDef is inside a new interval.
+ VNInfo *DefVNI;
+ if (OldIdxOut != LR.begin() &&
+ !SlotIndex::isEarlierInstr(std::prev(OldIdxOut)->end,
+ OldIdxOut->start)) {
+ // There is no gap between OldIdxOut and its predecessor anymore,
+ // merge them.
+ LiveRange::iterator IPrev = std::prev(OldIdxOut);
+ DefVNI = OldIdxVNI;
+ IPrev->end = OldIdxOut->end;
+ } else {
+ // The value is live in to OldIdx
+ LiveRange::iterator INext = std::next(OldIdxOut);
+ assert(INext != E && "Must have following segment");
+ // We merge OldIdxOut and its successor. As we're dealing with subreg
+ // reordering, there is always a successor to OldIdxOut in the same BB
+ // We don't need INext->valno anymore and will reuse for the new segment
+ // we create later.
+ DefVNI = OldIdxVNI;
+ INext->start = OldIdxOut->end;
+ INext->valno->def = INext->start;
+ }
+ // If NewIdx is behind the last segment, extend that and append a new one.
+ if (AfterNewIdx == E) {
+ // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up
+ // one position.
+ // |- ?/OldIdxOut -| |- X0 -| ... |- Xn -| end
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS -| end
+ std::copy(std::next(OldIdxOut), E, OldIdxOut);
+ // The last segment is undefined now, reuse it for a dead def.
+ LiveRange::iterator NewSegment = std::prev(E);
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ DefVNI);
+ DefVNI->def = NewIdxDef;
+
+ LiveRange::iterator Prev = std::prev(NewSegment);
+ Prev->end = NewIdxDef;
+ } else {
+ // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up
+ // one position.
+ // |- ?/OldIdxOut -| |- X0 -| ... |- Xn/AfterNewIdx -| |- Next -|
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- Xn/AfterNewIdx -| |- Next -|
+ std::copy(std::next(OldIdxOut), std::next(AfterNewIdx), OldIdxOut);
+ LiveRange::iterator Prev = std::prev(AfterNewIdx);
+ // We have two cases:
+ if (SlotIndex::isEarlierInstr(Prev->start, NewIdxDef)) {
+ // Case 1: NewIdx is inside a liverange. Split this liverange at
+ // NewIdxDef into the segment "Prev" followed by "NewSegment".
+ LiveRange::iterator NewSegment = AfterNewIdx;
+ *NewSegment = LiveRange::Segment(NewIdxDef, Prev->end, Prev->valno);
+ Prev->valno->def = NewIdxDef;
+
+ *Prev = LiveRange::Segment(Prev->start, NewIdxDef, DefVNI);
+ DefVNI->def = Prev->start;
+ } else {
+ // Case 2: NewIdx is in a lifetime hole. Keep AfterNewIdx as is and
+ // turn Prev into a segment from NewIdx to AfterNewIdx->start.
+ *Prev = LiveRange::Segment(NewIdxDef, AfterNewIdx->start, DefVNI);
+ DefVNI->def = NewIdxDef;
+ assert(DefVNI != AfterNewIdx->valno);
+ }
+ }
+ return;
+ }
+
+ if (AfterNewIdx != E &&
+ SlotIndex::isSameInstr(AfterNewIdx->start, NewIdxDef)) {
+ // There is an existing def at NewIdx. The def at OldIdx is coalesced into
+ // that value.
+ assert(AfterNewIdx->valno != OldIdxVNI && "Multiple defs of value?");
+ LR.removeValNo(OldIdxVNI);
+ } else {
+ // There was no existing def at NewIdx. We need to create a dead def
+ // at NewIdx. Shift segments over the old OldIdxOut segment, this frees
+ // a new segment at the place where we want to construct the dead def.
+ // |- OldIdxOut -| |- X0 -| ... |- Xn -| |- AfterNewIdx -|
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS. -| |- AfterNewIdx -|
+ assert(AfterNewIdx != OldIdxOut && "Inconsistent iterators");
+ std::copy(std::next(OldIdxOut), AfterNewIdx, OldIdxOut);
+ // We can reuse OldIdxVNI now.
+ LiveRange::iterator NewSegment = std::prev(AfterNewIdx);
+ VNInfo *NewSegmentVNI = OldIdxVNI;
+ NewSegmentVNI->def = NewIdxDef;
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ NewSegmentVNI);
+ }
+ }
+
+ /// Update LR to reflect an instruction has been moved upwards from OldIdx
+ /// to NewIdx (NewIdx < OldIdx).
+ void handleMoveUp(LiveRange &LR, Register Reg, LaneBitmask LaneMask) {
+ LiveRange::iterator E = LR.end();
+ // Segment going into OldIdx.
+ LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
+
+ // No value live before or after OldIdx? Nothing to do.
+ if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start))
+ return;
+
+ LiveRange::iterator OldIdxOut;
+ // Do we have a value live-in to OldIdx?
+ if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) {
+ // If the live-in value isn't killed here, then we have no Def at
+ // OldIdx, moreover the value must be live at NewIdx so there is nothing
+ // to do.
+ bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end);
+ if (!isKill)
+ return;
+
+ // At this point we have to move OldIdxIn->end back to the nearest
+ // previous use or (dead-)def but no further than NewIdx.
+ SlotIndex DefBeforeOldIdx
+ = std::max(OldIdxIn->start.getDeadSlot(),
+ NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()));
+ OldIdxIn->end = findLastUseBefore(DefBeforeOldIdx, Reg, LaneMask);
+
+ // Did we have a Def at OldIdx? If not we are done now.
+ OldIdxOut = std::next(OldIdxIn);
+ if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start))
+ return;
+ } else {
+ OldIdxOut = OldIdxIn;
+ OldIdxIn = OldIdxOut != LR.begin() ? std::prev(OldIdxOut) : E;
+ }
+
+ // If we are here then there is a Definition at OldIdx. OldIdxOut points
+ // to the segment starting there.
+ assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) &&
+ "No def?");
+ VNInfo *OldIdxVNI = OldIdxOut->valno;
+ assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def");
+ bool OldIdxDefIsDead = OldIdxOut->end.isDead();
+
+ // Is there an existing def at NewIdx?
+ SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber());
+ LiveRange::iterator NewIdxOut = LR.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewIdxOut->start, NewIdx)) {
+ assert(NewIdxOut->valno != OldIdxVNI &&
+ "Same value defined more than once?");
+ // If OldIdx was a dead def remove it.
+ if (!OldIdxDefIsDead) {
+ // Remove segment starting at NewIdx and move begin of OldIdxOut to
+ // NewIdx so it can take its place.
+ OldIdxVNI->def = NewIdxDef;
+ OldIdxOut->start = NewIdxDef;
+ LR.removeValNo(NewIdxOut->valno);
+ } else {
+ // Simply remove the dead def at OldIdx.
+ LR.removeValNo(OldIdxVNI);
+ }
+ } else {
+ // Previously nothing was live after NewIdx, so all we have to do now is
+ // move the begin of OldIdxOut to NewIdx.
+ if (!OldIdxDefIsDead) {
+ // Do we have any intermediate Defs between OldIdx and NewIdx?
+ if (OldIdxIn != E &&
+ SlotIndex::isEarlierInstr(NewIdxDef, OldIdxIn->start)) {
+ // OldIdx is not a dead def and NewIdx is before predecessor start.
+ LiveRange::iterator NewIdxIn = NewIdxOut;
+ assert(NewIdxIn == LR.find(NewIdx.getBaseIndex()));
+ const SlotIndex SplitPos = NewIdxDef;
+ OldIdxVNI = OldIdxIn->valno;
+
+ SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end;
+ LiveRange::iterator Prev = std::prev(OldIdxIn);
+ if (OldIdxIn != LR.begin() &&
+ SlotIndex::isEarlierInstr(NewIdx, Prev->end)) {
+ // If the segment before OldIdx read a value defined earlier than
+ // NewIdx, the moved instruction also reads and forwards that
+ // value. Extend the lifetime of the new def point.
+
+ // Extend to where the previous range started, unless there is
+ // another redef first.
+ NewDefEndPoint = std::min(OldIdxIn->start,
+ std::next(NewIdxOut)->start);
+ }
+
+ // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
+ OldIdxOut->valno->def = OldIdxIn->start;
+ *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
+ OldIdxOut->valno);
+ // OldIdxIn and OldIdxVNI are now undef and can be overridden.
+ // We Slide [NewIdxIn, OldIdxIn) down one position.
+ // |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -|
+ // => |- undef/NexIdxIn -| |- X0 -| ... |- Xn-1 -| |- Xn/OldIdxOut -|
+ std::copy_backward(NewIdxIn, OldIdxIn, OldIdxOut);
+ // NewIdxIn is now considered undef so we can reuse it for the moved
+ // value.
+ LiveRange::iterator NewSegment = NewIdxIn;
+ LiveRange::iterator Next = std::next(NewSegment);
+ if (SlotIndex::isEarlierInstr(Next->start, NewIdx)) {
+ // There is no gap between NewSegment and its predecessor.
+ *NewSegment = LiveRange::Segment(Next->start, SplitPos,
+ Next->valno);
+
+ *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI);
+ Next->valno->def = SplitPos;
+ } else {
+ // There is a gap between NewSegment and its predecessor
+ // Value becomes live in.
+ *NewSegment = LiveRange::Segment(SplitPos, Next->start, OldIdxVNI);
+ NewSegment->valno->def = SplitPos;
+ }
+ } else {
+ // Leave the end point of a live def.
+ OldIdxOut->start = NewIdxDef;
+ OldIdxVNI->def = NewIdxDef;
+ if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end))
+ OldIdxIn->end = NewIdxDef;
+ }
+ } else if (OldIdxIn != E
+ && SlotIndex::isEarlierInstr(NewIdxOut->start, NewIdx)
+ && SlotIndex::isEarlierInstr(NewIdx, NewIdxOut->end)) {
+ // OldIdxVNI is a dead def that has been moved into the middle of
+ // another value in LR. That can happen when LR is a whole register,
+ // but the dead def is a write to a subreg that is dead at NewIdx.
+ // The dead def may have been moved across other values
+ // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut)
+ // down one position.
+ // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - |
+ // => |- X0/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -|
+ std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut));
+ // Modify the segment at NewIdxOut and the following segment to meet at
+ // the point of the dead def, with the following segment getting
+ // OldIdxVNI as its value number.
+ *NewIdxOut = LiveRange::Segment(
+ NewIdxOut->start, NewIdxDef.getRegSlot(), NewIdxOut->valno);
+ *(NewIdxOut + 1) = LiveRange::Segment(
+ NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI);
+ OldIdxVNI->def = NewIdxDef;
+ // Modify subsequent segments to be defined by the moved def OldIdxVNI.
+ for (auto *Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx)
+ Idx->valno = OldIdxVNI;
+ // Aggressively remove all dead flags from the former dead definition.
+ // Kill/dead flags shouldn't be used while live intervals exist; they
+ // will be reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(NewIdx))
+ for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && !MO->isUse())
+ MO->setIsDead(false);
+ } else {
+ // OldIdxVNI is a dead def. It may have been moved across other values
+ // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut)
+ // down one position.
+ // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - |
+ // => |- undef/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -|
+ std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut));
+ // OldIdxVNI can be reused now to build a new dead def segment.
+ LiveRange::iterator NewSegment = NewIdxOut;
+ VNInfo *NewSegmentVNI = OldIdxVNI;
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ NewSegmentVNI);
+ NewSegmentVNI->def = NewIdxDef;
+ }
+ }
+ }
+
+ void updateRegMaskSlots() {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ llvm::lower_bound(LIS.RegMaskSlots, OldIdx);
+ assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+ "No RegMask at OldIdx.");
+ *RI = NewIdx.getRegSlot();
+ assert((RI == LIS.RegMaskSlots.begin() ||
+ SlotIndex::isEarlierInstr(*std::prev(RI), *RI)) &&
+ "Cannot move regmask instruction above another call");
+ assert((std::next(RI) == LIS.RegMaskSlots.end() ||
+ SlotIndex::isEarlierInstr(*RI, *std::next(RI))) &&
+ "Cannot move regmask instruction below another call");
+ }
+
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(SlotIndex Before, Register Reg,
+ LaneBitmask LaneMask) {
+ if (Reg.isVirtual()) {
+ SlotIndex LastUse = Before;
+ for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0 && LaneMask.any()
+ && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask).none())
+ continue;
+
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot.getRegSlot();
+ }
+ return LastUse;
+ }
+
+ // This is a regunit interval, so scanning the use list could be very
+ // expensive. Scan upwards from OldIdx instead.
+ assert(Before < OldIdx && "Expected upwards move");
+ SlotIndexes *Indexes = LIS.getSlotIndexes();
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Before);
+
+ // OldIdx may not correspond to an instruction any longer, so set MII to
+ // point to the next instruction after OldIdx, or MBB->end().
+ MachineBasicBlock::iterator MII = MBB->end();
+ if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+ Indexes->getNextNonNullIndex(OldIdx)))
+ if (MI->getParent() == MBB)
+ MII = MI;
+
+ MachineBasicBlock::iterator Begin = MBB->begin();
+ while (MII != Begin) {
+ if ((--MII)->isDebugOrPseudoInstr())
+ continue;
+ SlotIndex Idx = Indexes->getInstructionIndex(*MII);
+
+ // Stop searching when Before is reached.
+ if (!SlotIndex::isEarlierInstr(Before, Idx))
+ return Before;
+
+ // Check if MII uses Reg.
+ for (MIBundleOperands MO(*MII); MO.isValid(); ++MO)
+ if (MO->isReg() && !MO->isUndef() && MO->getReg().isPhysical() &&
+ TRI.hasRegUnit(MO->getReg(), Reg))
+ return Idx.getRegSlot();
+ }
+ // Didn't reach Before. It must be the first instruction in the block.
+ return Before;
+ }
+};
+
+void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
+ // It is fine to move a bundle as a whole, but not an individual instruction
+ // inside it.
+ assert((!MI.isBundled() || MI.getOpcode() == TargetOpcode::BUNDLE) &&
+ "Cannot move instruction in bundle");
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ Indexes->removeMachineInstrFromMaps(MI);
+ SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
+ assert(getMBBStartIdx(MI.getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI.getParent()) &&
+ "Cannot handle moves across basic block boundaries.");
+
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(&MI);
+}
+
+void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,
+ bool UpdateFlags) {
+ assert((BundleStart.getOpcode() == TargetOpcode::BUNDLE) &&
+ "Bundle start is not a bundle");
+ SmallVector<SlotIndex, 16> ToProcess;
+ const SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(BundleStart);
+ auto BundleEnd = getBundleEnd(BundleStart.getIterator());
+
+ auto I = BundleStart.getIterator();
+ I++;
+ while (I != BundleEnd) {
+ if (!Indexes->hasIndex(*I))
+ continue;
+ SlotIndex OldIndex = Indexes->getInstructionIndex(*I, true);
+ ToProcess.push_back(OldIndex);
+ Indexes->removeMachineInstrFromMaps(*I, true);
+ I++;
+ }
+ for (SlotIndex OldIndex : ToProcess) {
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(&BundleStart);
+ }
+
+ // Fix up dead defs
+ const SlotIndex Index = getInstructionIndex(BundleStart);
+ for (unsigned Idx = 0, E = BundleStart.getNumOperands(); Idx != E; ++Idx) {
+ MachineOperand &MO = BundleStart.getOperand(Idx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual() && hasInterval(Reg) && !MO.isUndef()) {
+ LiveInterval &LI = getInterval(Reg);
+ LiveQueryResult LRQ = LI.Query(Index);
+ if (LRQ.isDeadDef())
+ MO.setIsDead();
+ }
+ }
+}
+
+void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
+ const MachineBasicBlock::iterator End,
+ const SlotIndex EndIdx, LiveRange &LR,
+ const Register Reg,
+ LaneBitmask LaneMask) {
+ LiveInterval::iterator LII = LR.find(EndIdx);
+ SlotIndex lastUseIdx;
+ if (LII != LR.end() && LII->start < EndIdx) {
+ lastUseIdx = LII->end;
+ } else if (LII == LR.begin()) {
+ // We may not have a liverange at all if this is a subregister untouched
+ // between \p Begin and \p End.
+ } else {
+ --LII;
+ }
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr &MI = *I;
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+
+ SlotIndex instrIdx = getInstructionIndex(MI);
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ bool isEndValid = getInstructionFromIndex(LII->end);
+
+ // FIXME: This doesn't currently handle early-clobber or multiple removed
+ // defs inside of the region to repair.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((Mask & LaneMask).none())
+ continue;
+
+ if (MO.isDef()) {
+ if (!isStartValid) {
+ if (LII->end.isDead()) {
+ LII = LR.removeSegment(LII, true);
+ if (LII != LR.begin())
+ --LII;
+ } else {
+ LII->start = instrIdx.getRegSlot();
+ LII->valno->def = instrIdx.getRegSlot();
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ continue;
+ }
+ }
+
+ if (!lastUseIdx.isValid()) {
+ VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator);
+ LiveRange::Segment S(instrIdx.getRegSlot(),
+ instrIdx.getDeadSlot(), VNI);
+ LII = LR.addSegment(S);
+ } else if (LII->start != instrIdx.getRegSlot()) {
+ VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator);
+ LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LR.addSegment(S);
+ }
+
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ } else if (MO.isUse()) {
+ // FIXME: This should probably be handled outside of this branch,
+ // either as part of the def case (for defs inside of the region) or
+ // after the loop over the region.
+ if (!isEndValid && !LII->end.isBlock())
+ LII->end = instrIdx.getRegSlot();
+ if (!lastUseIdx.isValid())
+ lastUseIdx = instrIdx.getRegSlot();
+ }
+ }
+ }
+
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ if (!isStartValid && LII->end.isDead())
+ LR.removeSegment(*LII, true);
+}
+
+void
+LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ ArrayRef<Register> OrigRegs) {
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !Indexes->hasIndex(*std::prev(Begin)))
+ --Begin;
+ while (End != MBB->end() && !Indexes->hasIndex(*End))
+ ++End;
+
+ SlotIndex EndIdx;
+ if (End == MBB->end())
+ EndIdx = getMBBEndIdx(MBB).getPrevSlot();
+ else
+ EndIdx = getInstructionIndex(*End);
+
+ Indexes->repairIndexesInRange(MBB, Begin, End);
+
+ // Make sure a live interval exists for all register operands in the range.
+ SmallVector<Register> RegsToRepair(OrigRegs.begin(), OrigRegs.end());
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr &MI = *I;
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ Register Reg = MO.getReg();
+ // If the new instructions refer to subregs but the old instructions did
+ // not, throw away any old live interval so it will be recomputed with
+ // subranges.
+ if (MO.getSubReg() && hasInterval(Reg) &&
+ !getInterval(Reg).hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ removeInterval(Reg);
+ if (!hasInterval(Reg)) {
+ createAndComputeVirtRegInterval(Reg);
+ // Don't bother to repair a freshly calculated live interval.
+ erase_value(RegsToRepair, Reg);
+ }
+ }
+ }
+ }
+
+ for (Register Reg : RegsToRepair) {
+ if (!Reg.isVirtual())
+ continue;
+
+ LiveInterval &LI = getInterval(Reg);
+ // FIXME: Should we support undefs that gain defs?
+ if (!LI.hasAtLeastOneValue())
+ continue;
+
+ for (LiveInterval::SubRange &S : LI.subranges())
+ repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask);
+ LI.removeEmptySubRanges();
+
+ repairOldRegInRange(Begin, End, EndIdx, LI, Reg);
+ }
+}
+
+void LiveIntervals::removePhysRegDefAt(MCRegister Reg, SlotIndex Pos) {
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (LiveRange *LR = getCachedRegUnit(Unit))
+ if (VNInfo *VNI = LR->getVNInfoAt(Pos))
+ LR->removeValNo(VNI);
+ }
+}
+
+void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+ // LI may not have the main range computed yet, but its subranges may
+ // be present.
+ VNInfo *VNI = LI.getVNInfoAt(Pos);
+ if (VNI != nullptr) {
+ assert(VNI->def.getBaseIndex() == Pos.getBaseIndex());
+ LI.removeValNo(VNI);
+ }
+
+ // Also remove the value defined in subranges.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (VNInfo *SVNI = S.getVNInfoAt(Pos))
+ if (SVNI->def.getBaseIndex() == Pos.getBaseIndex())
+ S.removeValNo(SVNI);
+ }
+ LI.removeEmptySubRanges();
+}
+
+void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
+ SmallVectorImpl<LiveInterval*> &SplitLIs) {
+ ConnectedVNInfoEqClasses ConEQ(*this);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp <= 1)
+ return;
+ LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
+ Register Reg = LI.reg();
+ for (unsigned I = 1; I < NumComp; ++I) {
+ Register NewVReg = MRI->cloneVirtualRegister(Reg);
+ LiveInterval &NewLI = createEmptyInterval(NewVReg);
+ SplitLIs.push_back(&NewLI);
+ }
+ ConEQ.Distribute(LI, SplitLIs.data(), *MRI);
+}
+
+void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) {
+ assert(LICalc && "LICalc not initialized.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LICalc->constructMainRangeFromSubranges(LI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
new file mode 100644
index 000000000000..96380d408482
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -0,0 +1,340 @@
+//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LivePhysRegs utility for tracking liveness of
+// physical registers across machine instructions in forward or backward order.
+// A more detailed description can be found in the corresponding header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+/// Remove all registers from the set that get clobbered by the register
+/// mask.
+/// The clobbers set will be the list of live registers clobbered
+/// by the regmask.
+void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers) {
+ RegisterSet::iterator LRI = LiveRegs.begin();
+ while (LRI != LiveRegs.end()) {
+ if (MO.clobbersPhysReg(*LRI)) {
+ if (Clobbers)
+ Clobbers->push_back(std::make_pair(*LRI, &MO));
+ LRI = LiveRegs.erase(LRI);
+ } else
+ ++LRI;
+ }
+}
+
+/// Remove defined registers and regmask kills from the set.
+void LivePhysRegs::removeDefs(const MachineInstr &MI) {
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ removeRegsInMask(MOP);
+ continue;
+ }
+
+ if (MOP.isDef())
+ removeReg(MOP.getReg());
+ }
+}
+
+/// Add uses to the set.
+void LivePhysRegs::addUses(const MachineInstr &MI) {
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (!MOP.isReg() || !MOP.readsReg())
+ continue;
+ addReg(MOP.getReg());
+ }
+}
+
+/// Simulates liveness when stepping backwards over an instruction(bundle):
+/// Remove Defs, add uses. This is the recommended way of calculating liveness.
+void LivePhysRegs::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ removeDefs(MI);
+
+ // Add uses to the set.
+ addUses(MI);
+}
+
+/// Simulates liveness when stepping forward over an instruction(bundle): Remove
+/// killed-uses, add defs. This is the not recommended way, because it depends
+/// on accurate kill flags. If possible use stepBackward() instead of this
+/// function.
+void LivePhysRegs::stepForward(const MachineInstr &MI,
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
+ // Remove killed registers from the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ if (O->isDebug())
+ continue;
+ Register Reg = O->getReg();
+ if (!Reg.isPhysical())
+ continue;
+ if (O->isDef()) {
+ // Note, dead defs are still recorded. The caller should decide how to
+ // handle them.
+ Clobbers.push_back(std::make_pair(Reg, &*O));
+ } else {
+ assert(O->isUse());
+ if (O->isKill())
+ removeReg(Reg);
+ }
+ } else if (O->isRegMask()) {
+ removeRegsInMask(*O, &Clobbers);
+ }
+ }
+
+ // Add defs to the set.
+ for (auto Reg : Clobbers) {
+ // Skip dead defs and registers clobbered by regmasks. They shouldn't
+ // be added to the set.
+ if (Reg.second->isReg() && Reg.second->isDead())
+ continue;
+ if (Reg.second->isRegMask() &&
+ MachineOperand::clobbersPhysReg(Reg.second->getRegMask(), Reg.first))
+ continue;
+ addReg(Reg.first);
+ }
+}
+
+/// Print the currently live registers to OS.
+void LivePhysRegs::print(raw_ostream &OS) const {
+ OS << "Live Registers:";
+ if (!TRI) {
+ OS << " (uninitialized)\n";
+ return;
+ }
+
+ if (empty()) {
+ OS << " (empty)\n";
+ return;
+ }
+
+ for (MCPhysReg R : *this)
+ OS << " " << printReg(R, TRI);
+ OS << "\n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
+ dbgs() << " " << *this;
+}
+#endif
+
+bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
+ MCPhysReg Reg) const {
+ if (LiveRegs.count(Reg))
+ return false;
+ if (MRI.isReserved(Reg))
+ return false;
+ for (MCRegAliasIterator R(Reg, TRI, false); R.isValid(); ++R) {
+ if (LiveRegs.count(*R))
+ return false;
+ }
+ return true;
+}
+
+/// Add live-in registers of basic block \p MBB to \p LiveRegs.
+void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins()) {
+ MCPhysReg Reg = LI.PhysReg;
+ LaneBitmask Mask = LI.LaneMask;
+ MCSubRegIndexIterator S(Reg, TRI);
+ assert(Mask.any() && "Invalid livein mask");
+ if (Mask.all() || !S.isValid()) {
+ addReg(Reg);
+ continue;
+ }
+ for (; S.isValid(); ++S) {
+ unsigned SI = S.getSubRegIndex();
+ if ((Mask & TRI->getSubRegIndexLaneMask(SI)).any())
+ addReg(S.getSubReg());
+ }
+ }
+}
+
+/// Adds all callee saved registers to \p LiveRegs.
+static void addCalleeSavedRegs(LivePhysRegs &LiveRegs,
+ const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
+ LiveRegs.addReg(*CSR);
+}
+
+void LivePhysRegs::addPristines(const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// This function will usually be called on an empty object, handle this
+ /// as a special case.
+ if (empty()) {
+ /// Add all callee saved regs, then remove the ones that are saved and
+ /// restored.
+ addCalleeSavedRegs(*this, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ removeReg(Info.getReg());
+ return;
+ }
+ /// If a callee-saved register that is not pristine is already present
+ /// in the set, we should make sure that it stays in it. Precompute the
+ /// set of pristine registers in a separate object.
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ LivePhysRegs Pristine(*TRI);
+ addCalleeSavedRegs(Pristine, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ Pristine.removeReg(Info.getReg());
+ for (MCPhysReg R : Pristine)
+ addReg(R);
+}
+
+void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*Succ);
+ if (MBB.isReturnBlock()) {
+ // Return blocks are a special case because we currently don't mark up
+ // return instructions completely: specifically, there is no explicit
+ // use for callee-saved registers. So we add all callee saved registers
+ // that are saved and restored (somewhere). This does not include
+ // callee saved registers that are unused and hence not saved and
+ // restored; they are called pristine.
+ // FIXME: PEI should add explicit markings to return instructions
+ // instead of implicitly handling them here.
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ if (Info.isRestored())
+ addReg(Info.getReg());
+ }
+ }
+}
+
+void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ addPristines(MF);
+ addLiveOutsNoPristines(MBB);
+}
+
+void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ addPristines(MF);
+ addBlockLiveIns(MBB);
+}
+
+void LivePhysRegs::addLiveInsNoPristines(const MachineBasicBlock &MBB) {
+ addBlockLiveIns(MBB);
+}
+
+void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
+ const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOutsNoPristines(MBB);
+ for (const MachineInstr &MI : llvm::reverse(MBB))
+ LiveRegs.stepBackward(MI);
+}
+
+void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) {
+ assert(MBB.livein_empty() && "Expected empty live-in list");
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (MCPhysReg Reg : LiveRegs) {
+ if (MRI.isReserved(Reg))
+ continue;
+ // Skip the register if we are about to add one of its super registers.
+ if (any_of(TRI.superregs(Reg), [&](MCPhysReg SReg) {
+ return LiveRegs.contains(SReg) && !MRI.isReserved(SReg);
+ }))
+ continue;
+ MBB.addLiveIn(Reg);
+ }
+}
+
+void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // We walk through the block backwards and start with the live outs.
+ LivePhysRegs LiveRegs;
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOutsNoPristines(MBB);
+
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ // Recompute dead flags.
+ for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef() || MO->isDebug())
+ continue;
+
+ Register Reg = MO->getReg();
+ if (Reg == 0)
+ continue;
+ assert(Reg.isPhysical());
+
+ bool IsNotLive = LiveRegs.available(MRI, Reg);
+
+ // Special-case return instructions for cases when a return is not
+ // the last instruction in the block.
+ if (MI.isReturn() && MFI.isCalleeSavedInfoValid()) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
+ if (Info.getReg() == Reg) {
+ IsNotLive = !Info.isRestored();
+ break;
+ }
+ }
+ }
+
+ MO->setIsDead(IsNotLive);
+ }
+
+ // Step backward over defs.
+ LiveRegs.removeDefs(MI);
+
+ // Recompute kill flags.
+ for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->readsReg() || MO->isDebug())
+ continue;
+
+ Register Reg = MO->getReg();
+ if (Reg == 0)
+ continue;
+ assert(Reg.isPhysical());
+
+ bool IsNotLive = LiveRegs.available(MRI, Reg);
+ MO->setIsKill(IsNotLive);
+ }
+
+ // Complete the stepbackward.
+ LiveRegs.addUses(MI);
+ }
+}
+
+void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs,
+ MachineBasicBlock &MBB) {
+ computeLiveIns(LiveRegs, MBB);
+ addLiveIns(MBB, LiveRegs);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
new file mode 100644
index 000000000000..26f6e1ede1ad
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -0,0 +1,451 @@
+//===- LiveRangeCalc.cpp - Calculate live ranges -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveRangeCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRangeCalc.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
+void LiveRangeCalc::resetLiveOutMap() {
+ unsigned NumBlocks = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(NumBlocks);
+ EntryInfos.clear();
+ Map.resize(NumBlocks);
+}
+
+void LiveRangeCalc::reset(const MachineFunction *mf,
+ SlotIndexes *SI,
+ MachineDominatorTree *MDT,
+ VNInfo::Allocator *VNIA) {
+ MF = mf;
+ MRI = &MF->getRegInfo();
+ Indexes = SI;
+ DomTree = MDT;
+ Alloc = VNIA;
+ resetLiveOutMap();
+ LiveIn.clear();
+}
+
+void LiveRangeCalc::updateFromLiveIns() {
+ LiveRangeUpdater Updater;
+ for (const LiveInBlock &I : LiveIn) {
+ if (!I.DomNode)
+ continue;
+ MachineBasicBlock *MBB = I.DomNode->getBlock();
+ assert(I.Value && "No live-in value found");
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(MBB);
+
+ if (I.Kill.isValid())
+ // Value is killed inside this block.
+ End = I.Kill;
+ else {
+ // The value is live-through, update LiveOut as well.
+ // Defer the Domtree lookup until it is needed.
+ assert(Seen.test(MBB->getNumber()));
+ Map[MBB] = LiveOutPair(I.Value, nullptr);
+ }
+ Updater.setDest(&I.LR);
+ Updater.add(Start, End, I.Value);
+ }
+ LiveIn.clear();
+}
+
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs) {
+ assert(Use.isValid() && "Invalid SlotIndex");
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot());
+ assert(UseMBB && "No MBB at Use");
+
+ // Is there a def in the same MBB we can extend?
+ auto EP = LR.extendInBlock(Undefs, Indexes->getMBBStartIdx(UseMBB), Use);
+ if (EP.first != nullptr || EP.second)
+ return;
+
+ // Find the single reaching def, or determine if Use is jointly dominated by
+ // multiple values, and we may need to create even more phi-defs to preserve
+ // VNInfo SSA form. Perform a search for all predecessor blocks where we
+ // know the dominating VNInfo.
+ if (findReachingDefs(LR, *UseMBB, Use, PhysReg, Undefs))
+ return;
+
+ // When there were multiple different values, we may need new PHIs.
+ calculateValues();
+}
+
+// This function is called by a client after using the low-level API to add
+// live-out and live-in blocks. The unique value optimization is not
+// available, SplitEditor::transferValues handles that case directly anyway.
+void LiveRangeCalc::calculateValues() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+ updateSSA();
+ updateFromLiveIns();
+}
+
+bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+ MachineBasicBlock &MBB, BitVector &DefOnEntry,
+ BitVector &UndefOnEntry) {
+ unsigned BN = MBB.getNumber();
+ if (DefOnEntry[BN])
+ return true;
+ if (UndefOnEntry[BN])
+ return false;
+
+ auto MarkDefined = [BN, &DefOnEntry](MachineBasicBlock &B) -> bool {
+ for (MachineBasicBlock *S : B.successors())
+ DefOnEntry[S->getNumber()] = true;
+ DefOnEntry[BN] = true;
+ return true;
+ };
+
+ SetVector<unsigned> WorkList;
+ // Checking if the entry of MBB is reached by some def: add all predecessors
+ // that are potentially defined-on-exit to the work list.
+ for (MachineBasicBlock *P : MBB.predecessors())
+ WorkList.insert(P->getNumber());
+
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ // Determine if the exit from the block is reached by some def.
+ unsigned N = WorkList[i];
+ MachineBasicBlock &B = *MF->getBlockNumbered(N);
+ if (Seen[N]) {
+ const LiveOutPair &LOB = Map[&B];
+ if (LOB.first != nullptr && LOB.first != &UndefVNI)
+ return MarkDefined(B);
+ }
+ SlotIndex Begin, End;
+ std::tie(Begin, End) = Indexes->getMBBRange(&B);
+ // Treat End as not belonging to B.
+ // If LR has a segment S that starts at the next block, i.e. [End, ...),
+ // std::upper_bound will return the segment following S. Instead,
+ // S should be treated as the first segment that does not overlap B.
+ LiveRange::iterator UB = upper_bound(LR, End.getPrevSlot());
+ if (UB != LR.begin()) {
+ LiveRange::Segment &Seg = *std::prev(UB);
+ if (Seg.end > Begin) {
+ // There is a segment that overlaps B. If the range is not explicitly
+ // undefined between the end of the segment and the end of the block,
+ // treat the block as defined on exit. If it is, go to the next block
+ // on the work list.
+ if (LR.isUndefIn(Undefs, Seg.end, End))
+ continue;
+ return MarkDefined(B);
+ }
+ }
+
+ // No segment overlaps with this block. If this block is not defined on
+ // entry, or it undefines the range, do not process its predecessors.
+ if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) {
+ UndefOnEntry[N] = true;
+ continue;
+ }
+ if (DefOnEntry[N])
+ return MarkDefined(B);
+
+ // Still don't know: add all predecessors to the work list.
+ for (MachineBasicBlock *P : B.predecessors())
+ WorkList.insert(P->getNumber());
+ }
+
+ UndefOnEntry[BN] = true;
+ return false;
+}
+
+bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
+ SlotIndex Use, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs) {
+ unsigned UseMBBNum = UseMBB.getNumber();
+
+ // Block numbers where LR should be live-in.
+ SmallVector<unsigned, 16> WorkList(1, UseMBBNum);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = nullptr;
+
+ bool FoundUndef = false;
+
+ // Using Seen as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo())
+ << " does not have a corresponding definition on every path:\n";
+ const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
+ if (MI != nullptr)
+ errs() << Use << " " << *MI;
+ report_fatal_error("Use not jointly dominated by defs.");
+ }
+
+ if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ errs() << "The register " << printReg(PhysReg, TRI)
+ << " needs to be live in to " << printMBBReference(*MBB)
+ << ", but is missing from the live-in list.\n";
+ report_fatal_error("Invalid global physical register");
+ }
+#endif
+ FoundUndef |= MBB->pred_empty();
+
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ // Is this a known live-out block?
+ if (Seen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = Map[Pred].first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(Pred);
+
+ // First time we see Pred. Try to determine the live-out value, but set
+ // it as null if Pred is live-through with an unknown value.
+ auto EP = LR.extendInBlock(Undefs, Start, End);
+ VNInfo *VNI = EP.first;
+ FoundUndef |= EP.second;
+ setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI);
+ if (VNI) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ if (VNI || EP.second)
+ continue;
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != &UseMBB)
+ WorkList.push_back(Pred->getNumber());
+ else
+ // Loopback to UseMBB, so value is really live through.
+ Use = SlotIndex();
+ }
+ }
+
+ LiveIn.clear();
+ FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
+ if (!Undefs.empty() && FoundUndef)
+ UniqueVNI = false;
+
+ // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
+ // neither require it. Skip the sorting overhead for small updates.
+ if (WorkList.size() > 4)
+ array_pod_sort(WorkList.begin(), WorkList.end());
+
+ // If a unique reaching def was found, blit in the live ranges immediately.
+ if (UniqueVNI) {
+ assert(TheVNI != nullptr && TheVNI != &UndefVNI);
+ LiveRangeUpdater Updater(&LR);
+ for (unsigned BN : WorkList) {
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(BN);
+ // Trim the live range in UseMBB.
+ if (BN == UseMBBNum && Use.isValid())
+ End = Use;
+ else
+ Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr);
+ Updater.add(Start, End, TheVNI);
+ }
+ return true;
+ }
+
+ // Prepare the defined/undefined bit vectors.
+ EntryInfoMap::iterator Entry;
+ bool DidInsert;
+ std::tie(Entry, DidInsert) = EntryInfos.insert(
+ std::make_pair(&LR, std::make_pair(BitVector(), BitVector())));
+ if (DidInsert) {
+ // Initialize newly inserted entries.
+ unsigned N = MF->getNumBlockIDs();
+ Entry->second.first.resize(N);
+ Entry->second.second.resize(N);
+ }
+ BitVector &DefOnEntry = Entry->second.first;
+ BitVector &UndefOnEntry = Entry->second.second;
+
+ // Multiple values were found, so transfer the work list to the LiveIn array
+ // where UpdateSSA will use it as a work list.
+ LiveIn.reserve(WorkList.size());
+ for (unsigned BN : WorkList) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
+ if (!Undefs.empty() &&
+ !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+ continue;
+ addLiveInBlock(LR, DomTree->getNode(MBB));
+ if (MBB == &UseMBB)
+ LiveIn.back().Kill = Use;
+ }
+
+ return false;
+}
+
+// This is essentially the same iterative algorithm that SSAUpdater uses,
+// except we already have a dominator tree, so we don't have to recompute it.
+void LiveRangeCalc::updateSSA() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ // Interate until convergence.
+ bool Changed;
+ do {
+ Changed = false;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (LiveInBlock &I : LiveIn) {
+ MachineDomTreeNode *Node = I.DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = Map[IDom->getBlock()];
+
+ // Cache the DomTree node that defined the value.
+ if (IDomValue.first && IDomValue.first != &UndefVNI &&
+ !IDomValue.second) {
+ Map[IDom->getBlock()].second = IDomValue.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+ }
+
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveOutPair &Value = Map[Pred];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+ if (Value.first == &UndefVNI) {
+ needPHI = true;
+ break;
+ }
+
+ // Cache the DomTree node that defined the value.
+ if (!Value.second)
+ Value.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def));
+
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (DomTree->dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOut indicates a foreign or missing value.
+ LiveOutPair &LOP = Map[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ Changed = true;
+ assert(Alloc && "Need VNInfo allocator to create PHI-defs");
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(MBB);
+ LiveRange &LR = I.LR;
+ VNInfo *VNI = LR.getNextValue(Start, *Alloc);
+ I.Value = VNI;
+ // This block is done, we know the final value.
+ I.DomNode = nullptr;
+
+ // Add liveness since updateFromLiveIns now skips this node.
+ if (I.Kill.isValid()) {
+ if (VNI)
+ LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
+ } else {
+ if (VNI)
+ LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first && IDomValue.first != &UndefVNI) {
+ // No phi-def here. Remember incoming value.
+ I.Value = IDomValue.first;
+
+ // If the IDomValue is killed in the block, don't propagate through.
+ if (I.Kill.isValid())
+ continue;
+
+ // Propagate IDomValue if it isn't killed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.first == IDomValue.first)
+ continue;
+ Changed = true;
+ LOP = IDomValue;
+ }
+ }
+ } while (Changed);
+}
+
+bool LiveRangeCalc::isJointlyDominated(const MachineBasicBlock *MBB,
+ ArrayRef<SlotIndex> Defs,
+ const SlotIndexes &Indexes) {
+ const MachineFunction &MF = *MBB->getParent();
+ BitVector DefBlocks(MF.getNumBlockIDs());
+ for (SlotIndex I : Defs)
+ DefBlocks.set(Indexes.getMBBFromIndex(I)->getNumber());
+
+ SetVector<unsigned> PredQueue;
+ PredQueue.insert(MBB->getNumber());
+ for (unsigned i = 0; i != PredQueue.size(); ++i) {
+ unsigned BN = PredQueue[i];
+ if (DefBlocks[BN])
+ return true;
+ const MachineBasicBlock *B = MF.getBlockNumbered(BN);
+ for (const MachineBasicBlock *P : B->predecessors())
+ PredQueue.insert(P->getNumber());
+ }
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..c3477cd8ce34
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,508 @@
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+STATISTIC(NumReMaterialization, "Number of instructions rematerialized");
+
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg,
+ bool createSubRanges) {
+ Register VReg = MRI.cloneVirtualRegister(OldReg);
+ if (VRM)
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+
+ LiveInterval &LI = LIS.createEmptyInterval(VReg);
+ if (Parent && !Parent->isSpillable())
+ LI.markNotSpillable();
+ if (createSubRanges) {
+ // Create empty subranges if the OldReg's interval has them. Do not create
+ // the main range here---it will be constructed later after the subranges
+ // have been finalized.
+ LiveInterval &OldLI = LIS.getInterval(OldReg);
+ VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &S : OldLI.subranges())
+ LI.createSubRange(Alloc, S.LaneMask);
+ }
+ return LI;
+}
+
+Register LiveRangeEdit::createFrom(Register OldReg) {
+ Register VReg = MRI.cloneVirtualRegister(OldReg);
+ if (VRM) {
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
+ // FIXME: Getting the interval here actually computes it.
+ // In theory, this may not be what we want, but in practice
+ // the createEmptyIntervalFrom API is used when this is not
+ // the case. Generally speaking we just want to annotate the
+ // LiveInterval when it gets created but we cannot do that at
+ // the moment.
+ if (Parent && !Parent->isSpillable())
+ LIS.getInterval(VReg).markNotSpillable();
+ return VReg;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+ const MachineInstr *DefMI) {
+ assert(DefMI && "Missing instruction");
+ ScannedRemattable = true;
+ if (!TII.isTriviallyReMaterializable(*DefMI))
+ return false;
+ Remattable.insert(VNI);
+ return true;
+}
+
+void LiveRangeEdit::scanRemattable() {
+ for (VNInfo *VNI : getParent().valnos) {
+ if (VNI->isUnused())
+ continue;
+ Register Original = VRM->getOriginal(getReg());
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ if (!OrigVNI)
+ continue;
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
+ if (!DefMI)
+ continue;
+ checkRematerializable(OrigVNI, DefMI);
+ }
+ ScannedRemattable = true;
+}
+
+bool LiveRangeEdit::anyRematerializable() {
+ if (!ScannedRemattable)
+ scanRemattable();
+ return !Remattable.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) const {
+ OrigIdx = OrigIdx.getRegSlot(true);
+ UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
+ for (const MachineOperand &MO : OrigMI->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ // We can't remat physreg uses, unless it is a constant or target wants
+ // to ignore this use.
+ if (MO.getReg().isPhysical()) {
+ if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO))
+ continue;
+ return false;
+ }
+
+ LiveInterval &li = LIS.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if OrigMI redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+ return false;
+
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+
+ // Check that subrange is live at UseIdx.
+ if (li.hasSubRanges()) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(MO.getReg());
+ for (LiveInterval::SubRange &SR : li.subranges()) {
+ if ((SR.LaneMask & LM).none())
+ continue;
+ if (!SR.liveAt(UseIdx))
+ return false;
+ // Early exit if all used lanes are checked. No need to continue.
+ LM &= ~SR.LaneMask;
+ if (LM.none())
+ break;
+ }
+ }
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
+ SlotIndex UseIdx, bool cheapAsAMove) {
+ assert(ScannedRemattable && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!Remattable.count(OrigVNI))
+ return false;
+
+ // No defining instruction provided.
+ SlotIndex DefIdx;
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !TII.isAsCheapAsAMove(*RM.OrigMI))
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register DestReg, const Remat &RM,
+ const TargetRegisterInfo &tri,
+ bool Late, unsigned SubIdx,
+ MachineInstr *ReplaceIndexMI) {
+ assert(RM.OrigMI && "Invalid remat");
+ TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri);
+ // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
+ // to false anyway in case the isDead flag of RM.OrigMI's dest register
+ // is true.
+ (*--MI).getOperand(0).setIsDead(false);
+ Rematted.insert(RM.ParentVNI);
+ ++NumReMaterialization;
+
+ if (ReplaceIndexMI)
+ return LIS.ReplaceMachineInstrInMaps(*ReplaceIndexMI, *MI).getRegSlot();
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
+}
+
+void LiveRangeEdit::eraseVirtReg(Register Reg) {
+ if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
+ LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr*> &Dead) {
+ MachineInstr *DefMI = nullptr, *UseMI = nullptr;
+
+ // Check that there is a single def and a single use.
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg())) {
+ MachineInstr *MI = MO.getParent();
+ if (MO.isDef()) {
+ if (DefMI && DefMI != MI)
+ return false;
+ if (!MI->canFoldAsLoad())
+ return false;
+ DefMI = MI;
+ } else if (!MO.isUndef()) {
+ if (UseMI && UseMI != MI)
+ return false;
+ // FIXME: Targets don't know how to fold subreg uses.
+ if (MO.getSubReg())
+ return false;
+ UseMI = MI;
+ }
+ }
+ if (!DefMI || !UseMI)
+ return false;
+
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI, LIS.getInstructionIndex(*DefMI),
+ LIS.getInstructionIndex(*UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(nullptr, SawStore))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
+
+ SmallVector<unsigned, 8> Ops;
+ if (UseMI->readsWritesVirtualRegister(LI->reg(), &Ops).second)
+ return false;
+
+ MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);
+ if (!FoldMI)
+ return false;
+ LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
+ LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
+ // Update the call site info.
+ if (UseMI->shouldUpdateCallSiteInfo())
+ UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI);
+ UseMI->eraseFromParent();
+ DefMI->addRegisterDead(LI->reg(), nullptr);
+ Dead.push_back(DefMI);
+ ++NumDCEFoldedLoads;
+ return true;
+}
+
+bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
+ const MachineOperand &MO) const {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (LI.Query(Idx).isKill())
+ return true;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).any() && S.Query(Idx).isKill())
+ return true;
+ }
+ return false;
+}
+
+/// Find all live intervals that need to shrink, then remove the instruction.
+void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
+ assert(MI->allDefsAreDead() && "Def isn't really dead");
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+
+ // Never delete a bundled instruction.
+ if (MI->isBundled()) {
+ // TODO: Handle deleting copy bundles
+ LLVM_DEBUG(dbgs() << "Won't delete dead bundled inst: " << Idx << '\t'
+ << *MI);
+ return;
+ }
+
+ // Never delete inline asm.
+ if (MI->isInlineAsm()) {
+ LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ return;
+ }
+
+ // Use the same criteria as DeadMachineInstructionElim.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(nullptr, SawStore)) {
+ LLVM_DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+ // Collect virtual registers to be erased after MI is gone.
+ SmallVector<Register, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
+ bool isOrigDef = false;
+ Register Dest;
+ unsigned DestSubReg;
+ // Only optimize rematerialize case when the instruction has one def, since
+ // otherwise we could leave some dead defs in the code. This case is
+ // extremely rare.
+ if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ MI->getDesc().getNumDefs() == 1) {
+ Dest = MI->getOperand(0).getReg();
+ DestSubReg = MI->getOperand(0).getSubReg();
+ Register Original = VRM->getOriginal(Dest);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ // The original live-range may have been shrunk to
+ // an empty live-range. It happens when it is dead, but
+ // we still keep it around to be able to rematerialize
+ // other values that depend on it.
+ if (OrigVNI)
+ isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
+ }
+
+ bool HasLiveVRegUses = false;
+
+ // Check for live intervals that may shrink
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual()) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MO.readsReg() && !MRI.isReserved(Reg))
+ ReadsPhysRegs = true;
+ else if (MO.isDef())
+ LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // Shrink read registers, unless it is likely to be expensive and
+ // unlikely to change anything. We typically don't want to shrink the
+ // PIC base register that has lots of uses everywhere.
+ // Always shrink COPY uses that probably come from live range splitting.
+ if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
+ ToShrink.insert(&LI);
+ else if (MO.readsReg())
+ HasLiveVRegUses = true;
+
+ // Remove defined value.
+ if (MO.isDef()) {
+ if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg());
+ LIS.removeVRegDefAt(LI, Idx);
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
+ }
+ }
+
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.getReg().isPhysical())
+ continue;
+ MI->removeOperand(i-1);
+ }
+ LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ // If the dest of MI is an original reg and MI is reMaterializable,
+ // don't delete the inst. Replace the dest with a new reg, and keep
+ // the inst for remat of other siblings. The inst is saved in
+ // LiveRangeEdit::DeadRemats and will be deleted after all the
+ // allocations of the func are done.
+ // However, immediately delete instructions which have unshrunk virtual
+ // register uses. That may provoke RA to split an interval at the KILL
+ // and later result in an invalid live segment end.
+ if (isOrigDef && DeadRemats && !HasLiveVRegUses &&
+ TII.isTriviallyReMaterializable(*MI)) {
+ LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
+ VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+ VNInfo *VNI = NewLI.getNextValue(Idx, Alloc);
+ NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
+
+ if (DestSubReg) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ auto *SR = NewLI.createSubRange(
+ Alloc, TRI->getSubRegIndexLaneMask(DestSubReg));
+ SR->addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(),
+ SR->getNextValue(Idx, Alloc)));
+ }
+
+ pop_back();
+ DeadRemats->insert(MI);
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ MI->substituteRegister(Dest, NewLI.reg(), 0, TRI);
+ MI->getOperand(0).setIsDead(true);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+ }
+
+ // Erase any virtregs that are now empty and unused. There may be <undef>
+ // uses around. Keep the empty live range in that case.
+ for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+ Register Reg = RegsToErase[i];
+ if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+ ToShrink.remove(&LIS.getInterval(Reg));
+ eraseVirtReg(Reg);
+ }
+ }
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
+ ArrayRef<Register> RegsBeingSpilled) {
+ ToShrinkSet ToShrink;
+
+ for (;;) {
+ // Erase all dead defs.
+ while (!Dead.empty())
+ eliminateDeadDef(Dead.pop_back_val(), ToShrink);
+
+ if (ToShrink.empty())
+ break;
+
+ // Shrink just one live interval. Then delete new dead defs.
+ LiveInterval *LI = ToShrink.pop_back_val();
+ if (foldAsLoad(LI, Dead))
+ continue;
+ Register VReg = LI->reg();
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(VReg);
+ if (!LIS.shrinkToUses(LI, &Dead))
+ continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ if (llvm::is_contained(RegsBeingSpilled, VReg))
+ continue;
+
+ // LI may have been separated, create new intervals.
+ LI->RenumberValues();
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(*LI, SplitLIs);
+ if (!SplitLIs.empty())
+ ++NumFracRanges;
+
+ Register Original = VRM ? VRM->getOriginal(VReg) : Register();
+ for (const LiveInterval *SplitLI : SplitLIs) {
+ // If LI is an original interval that hasn't been split yet, make the new
+ // intervals their own originals instead of referring to LI. The original
+ // interval must contain all the split products, and LI doesn't.
+ if (Original != VReg && Original != 0)
+ VRM->setIsSplitFromReg(SplitLI->reg(), Original);
+ if (TheDelegate)
+ TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg(), VReg);
+ }
+ }
+}
+
+// Keep track of new virtual registers created via
+// MachineRegisterInfo::createVirtualRegister.
+void
+LiveRangeEdit::MRI_NoteNewVirtualRegister(Register VReg) {
+ if (VRM)
+ VRM->grow();
+
+ NewRegs.push_back(VReg);
+}
+
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ VirtRegAuxInfo &VRAI) {
+ for (unsigned I = 0, Size = size(); I < Size; ++I) {
+ LiveInterval &LI = LIS.getInterval(get(I));
+ if (MRI.recomputeRegClass(LI.reg()))
+ LLVM_DEBUG({
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ dbgs() << "Inflated " << printReg(LI.reg()) << " to "
+ << TRI->getRegClassName(MRI.getRegClass(LI.reg())) << '\n';
+ });
+ VRAI.calculateSpillWeightAndHint(LI);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
new file mode 100644
index 000000000000..93f5314539cd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -0,0 +1,245 @@
+//===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+///===---------------------------------------------------------------------===//
+///
+/// \file
+/// This pass moves instructions close to the definition of its operands to
+/// shrink live range of the def instruction. The code motion is limited within
+/// the basic block. The moved instruction should have 1 def, and more than one
+/// uses, all of which are the only use of the def.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lrshrink"
+
+STATISTIC(NumInstrsHoistedToShrinkLiveRange,
+ "Number of insructions hoisted to shrink live range.");
+
+namespace {
+
+class LiveRangeShrink : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveRangeShrink() : MachineFunctionPass(ID) {
+ initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Live Range Shrink"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char LiveRangeShrink::ID = 0;
+
+char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
+
+INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
+ false)
+
+using InstOrderMap = DenseMap<MachineInstr *, unsigned>;
+
+/// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
+/// \p M maintains a map from instruction to its dominating order that satisfies
+/// M[A] > M[B] guarantees that A is dominated by B.
+/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
+/// \p New.
+static MachineInstr *FindDominatedInstruction(MachineInstr &New,
+ MachineInstr *Old,
+ const InstOrderMap &M) {
+ auto NewIter = M.find(&New);
+ if (NewIter == M.end())
+ return Old;
+ if (Old == nullptr)
+ return &New;
+ unsigned OrderOld = M.find(Old)->second;
+ unsigned OrderNew = NewIter->second;
+ if (OrderOld != OrderNew)
+ return OrderOld < OrderNew ? &New : Old;
+ // OrderOld == OrderNew, we need to iterate down from Old to see if it
+ // can reach New, if yes, New is dominated by Old.
+ for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew;
+ I = I->getNextNode())
+ if (I == &New)
+ return &New;
+ return Old;
+}
+
+/// Builds Instruction to its dominating order number map \p M by traversing
+/// from instruction \p Start.
+static void BuildInstOrderMap(MachineBasicBlock::iterator Start,
+ InstOrderMap &M) {
+ M.clear();
+ unsigned i = 0;
+ for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
+ M[&I] = i++;
+}
+
+bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ InstOrderMap IOM;
+ // Map from register to instruction order (value of IOM) where the
+ // register is used last. When moving instructions up, we need to
+ // make sure all its defs (including dead def) will not cross its
+ // last use when moving up.
+ DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap;
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ bool SawStore = false;
+ BuildInstOrderMap(MBB.begin(), IOM);
+ UseMap.clear();
+
+ for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
+ MachineInstr &MI = *Next;
+ ++Next;
+ if (MI.isPHI() || MI.isDebugOrPseudoInstr())
+ continue;
+ if (MI.mayStore())
+ SawStore = true;
+
+ unsigned CurrentOrder = IOM[&MI];
+ unsigned Barrier = 0;
+ MachineInstr *BarrierMI = nullptr;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDebug())
+ continue;
+ if (MO.isUse())
+ UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI);
+ else if (MO.isDead() && UseMap.count(MO.getReg()))
+ // Barrier is the last instruction where MO get used. MI should not
+ // be moved above Barrier.
+ if (Barrier < UseMap[MO.getReg()].first) {
+ Barrier = UseMap[MO.getReg()].first;
+ BarrierMI = UseMap[MO.getReg()].second;
+ }
+ }
+
+ if (!MI.isSafeToMove(nullptr, SawStore)) {
+ // If MI has side effects, it should become a barrier for code motion.
+ // IOM is rebuild from the next instruction to prevent later
+ // instructions from being moved before this MI.
+ if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
+ Next != MBB.end()) {
+ BuildInstOrderMap(Next, IOM);
+ SawStore = false;
+ }
+ continue;
+ }
+
+ const MachineOperand *DefMO = nullptr;
+ MachineInstr *Insert = nullptr;
+
+ // Number of live-ranges that will be shortened. We do not count
+ // live-ranges that are defined by a COPY as it could be coalesced later.
+ unsigned NumEligibleUse = 0;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDead() || MO.isDebug())
+ continue;
+ Register Reg = MO.getReg();
+ // Do not move the instruction if it def/uses a physical register,
+ // unless it is a constant physical register or a noreg.
+ if (!Reg.isVirtual()) {
+ if (!Reg || MRI.isConstantPhysReg(Reg))
+ continue;
+ Insert = nullptr;
+ break;
+ }
+ if (MO.isDef()) {
+ // Do not move if there is more than one def.
+ if (DefMO) {
+ Insert = nullptr;
+ break;
+ }
+ DefMO = &MO;
+ } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO &&
+ MRI.getRegClass(DefMO->getReg()) ==
+ MRI.getRegClass(MO.getReg())) {
+ // The heuristic does not handle different register classes yet
+ // (registers of different sizes, looser/tighter constraints). This
+ // is because it needs more accurate model to handle register
+ // pressure correctly.
+ MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
+ if (!DefInstr.isCopy())
+ NumEligibleUse++;
+ Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
+ } else {
+ Insert = nullptr;
+ break;
+ }
+ }
+
+ // If Barrier equals IOM[I], traverse forward to find if BarrierMI is
+ // after Insert, if yes, then we should not hoist.
+ for (MachineInstr *I = Insert; I && IOM[I] == Barrier;
+ I = I->getNextNode())
+ if (I == BarrierMI) {
+ Insert = nullptr;
+ break;
+ }
+ // Move the instruction when # of shrunk live range > 1.
+ if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
+ MachineBasicBlock::iterator I = std::next(Insert->getIterator());
+ // Skip all the PHI and debug instructions.
+ while (I != MBB.end() && (I->isPHI() || I->isDebugOrPseudoInstr()))
+ I = std::next(I);
+ if (I == MI.getIterator())
+ continue;
+
+ // Update the dominator order to be the same as the insertion point.
+ // We do this to maintain a non-decreasing order without need to update
+ // all instruction orders after the insertion point.
+ unsigned NewOrder = IOM[&*I];
+ IOM[&MI] = NewOrder;
+ NumInstrsHoistedToShrinkLiveRange++;
+
+ // Find MI's debug value following MI.
+ MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
+ if (MI.getOperand(0).isReg())
+ for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
+ EndIter->hasDebugOperandForReg(MI.getOperand(0).getReg());
+ ++EndIter, ++Next)
+ IOM[&*EndIter] = NewOrder;
+ MBB.splice(I, &MBB, MI.getIterator(), EndIter);
+ }
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
new file mode 100644
index 000000000000..ada5c5be484a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -0,0 +1,61 @@
+//===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file contains helper functions to modify live ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
+#define LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Helper function that distributes live range value numbers and the
+/// corresponding segments of a primary live range \p LR to a list of newly
+/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p
+/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific
+/// live range in the \p SplitLRs array.
+template<typename LiveRangeT, typename EqClassesT>
+static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
+ EqClassesT VNIClasses) {
+ // Move segments to new intervals.
+ typename LiveRangeT::iterator J = LR.begin(), E = LR.end();
+ while (J != E && VNIClasses[J->valno->id] == 0)
+ ++J;
+ for (typename LiveRangeT::iterator I = J; I != E; ++I) {
+ if (unsigned eq = VNIClasses[I->valno->id]) {
+ assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ SplitLRs[eq-1]->segments.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LR.segments.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LR.getNumValNums();
+ while (j != e && VNIClasses[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (unsigned eq = VNIClasses[i]) {
+ VNI->id = SplitLRs[eq-1]->getNumValNums();
+ SplitLRs[eq-1]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LR.valnos[j++] = VNI;
+ }
+ }
+ LR.valnos.resize(j);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
new file mode 100644
index 000000000000..6df7e5c10862
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -0,0 +1,248 @@
+//===- LiveRegMatrix.cpp - Track register interference --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LiveRegMatrix analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+
+char LiveRegMatrix::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID) {}
+
+void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ if (NumRegUnits != Matrix.size())
+ Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
+ Matrix.init(LIUAlloc, NumRegUnits);
+
+ // Make sure no stale queries get reused.
+ invalidateVirtRegs();
+ return false;
+}
+
+void LiveRegMatrix::releaseMemory() {
+ for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
+ Matrix[i].clear();
+ // No need to clear Queries here, since LiveIntervalUnion::Query doesn't
+ // have anything important to clear and LiveRegMatrix's runOnFunction()
+ // does a std::unique_ptr::reset anyways.
+ }
+}
+
+template <typename Callable>
+static bool foreachUnit(const TargetRegisterInfo *TRI,
+ const LiveInterval &VRegInterval, MCRegister PhysReg,
+ Callable Func) {
+ if (VRegInterval.hasSubRanges()) {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ LaneBitmask Mask = (*Units).second;
+ for (const LiveInterval::SubRange &S : VRegInterval.subranges()) {
+ if ((S.LaneMask & Mask).any()) {
+ if (Func(Unit, S))
+ return true;
+ break;
+ }
+ }
+ }
+ } else {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (Func(Unit, VRegInterval))
+ return true;
+ }
+ }
+ return false;
+}
+
+void LiveRegMatrix::assign(const LiveInterval &VirtReg, MCRegister PhysReg) {
+ LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to "
+ << printReg(PhysReg, TRI) << ':');
+ assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg(), PhysReg);
+
+ foreachUnit(
+ TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) {
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range);
+ Matrix[Unit].unify(VirtReg, Range);
+ return false;
+ });
+
+ ++NumAssigned;
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
+void LiveRegMatrix::unassign(const LiveInterval &VirtReg) {
+ Register PhysReg = VRM->getPhys(VirtReg.reg());
+ LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI)
+ << " from " << printReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg());
+
+ foreachUnit(TRI, VirtReg, PhysReg,
+ [&](unsigned Unit, const LiveRange &Range) {
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI));
+ Matrix[Unit].extract(VirtReg, Range);
+ return false;
+ });
+
+ ++NumUnassigned;
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
+bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (!Matrix[Unit].empty())
+ return true;
+ }
+ return false;
+}
+
+bool LiveRegMatrix::checkRegMaskInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg) {
+ // Check if the cached information is valid.
+ // The same BitVector can be reused for all PhysRegs.
+ // We could cache multiple VirtRegs if it becomes necessary.
+ if (RegMaskVirtReg != VirtReg.reg() || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg();
+ RegMaskTag = UserTag;
+ RegMaskUsable.clear();
+ LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
+ }
+
+ // The BitVector is indexed by PhysReg, not register unit.
+ // Regmask interference is more fine grained than regunits.
+ // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8.
+ return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
+}
+
+bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg) {
+ if (VirtReg.empty())
+ return false;
+ CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI);
+
+ bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
+ return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes());
+ });
+ return Result;
+}
+
+LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
+ MCRegister RegUnit) {
+ LiveIntervalUnion::Query &Q = Queries[RegUnit];
+ Q.init(UserTag, LR, Matrix[RegUnit]);
+ return Q;
+}
+
+LiveRegMatrix::InterferenceKind
+LiveRegMatrix::checkInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg) {
+ if (VirtReg.empty())
+ return IK_Free;
+
+ // Regmask interference is the fastest check.
+ if (checkRegMaskInterference(VirtReg, PhysReg))
+ return IK_RegMask;
+
+ // Check for fixed interference.
+ if (checkRegUnitInterference(VirtReg, PhysReg))
+ return IK_RegUnit;
+
+ // Check the matrix for virtual register interference.
+ bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
+ [&](MCRegister Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
+ if (Interference)
+ return IK_VirtReg;
+
+ return IK_Free;
+}
+
+bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
+ MCRegister PhysReg) {
+ // Construct artificial live range containing only one segment [Start, End).
+ VNInfo valno(0, Start);
+ LiveRange::Segment Seg(Start, End, &valno);
+ LiveRange LR;
+ LR.addSegment(Seg);
+
+ // Check for interference with that segment
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ // LR is stack-allocated. LiveRegMatrix caches queries by a key that
+ // includes the address of the live range. If (for the same reg unit) this
+ // checkInterference overload is called twice, without any other query()
+ // calls in between (on heap-allocated LiveRanges) - which would invalidate
+ // the cached query - the LR address seen the second time may well be the
+ // same as that seen the first time, while the Start/End/valno may not - yet
+ // the same cached result would be fetched. To avoid that, we don't cache
+ // this query.
+ //
+ // FIXME: the usability of the Query API needs to be improved to avoid
+ // subtle bugs due to query identity. Avoiding caching, for example, would
+ // greatly simplify things.
+ LiveIntervalUnion::Query Q;
+ Q.reset(UserTag, LR, Matrix[Unit]);
+ if (Q.checkInterference())
+ return true;
+ }
+ return false;
+}
+
+Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
+ const LiveInterval *VRegInterval = nullptr;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if ((VRegInterval = Matrix[Unit].getOneVReg()))
+ return VRegInterval->reg();
+ }
+
+ return MCRegister::NoRegister;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
new file mode 100644
index 000000000000..34de09dd2944
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -0,0 +1,159 @@
+//===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file imlements the LiveRegUnits set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) {
+ Units.reset(U);
+ break;
+ }
+ }
+ }
+}
+
+void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) {
+ Units.set(U);
+ break;
+ }
+ }
+ }
+}
+
+void LiveRegUnits::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isReg()) {
+ if (MOP.isDef() && MOP.getReg().isPhysical())
+ removeReg(MOP.getReg());
+ continue;
+ }
+
+ if (MOP.isRegMask()) {
+ removeRegsNotPreserved(MOP.getRegMask());
+ continue;
+ }
+ }
+
+ // Add uses to the set.
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (!MOP.isReg() || !MOP.readsReg())
+ continue;
+
+ if (MOP.getReg().isPhysical())
+ addReg(MOP.getReg());
+ }
+}
+
+void LiveRegUnits::accumulate(const MachineInstr &MI) {
+ // Add defs, uses and regmask clobbers to the set.
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isReg()) {
+ if (!MOP.getReg().isPhysical())
+ continue;
+ if (MOP.isDef() || MOP.readsReg())
+ addReg(MOP.getReg());
+ continue;
+ }
+
+ if (MOP.isRegMask()) {
+ addRegsInMask(MOP.getRegMask());
+ continue;
+ }
+ }
+}
+
+/// Add live-in registers of basic block \p MBB to \p LiveUnits.
+static void addBlockLiveIns(LiveRegUnits &LiveUnits,
+ const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins())
+ LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask);
+}
+
+/// Adds all callee saved registers to \p LiveUnits.
+static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
+ const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) {
+ const unsigned N = *CSR;
+
+ const auto &CSI = MFI.getCalleeSavedInfo();
+ auto Info =
+ llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; });
+ // If we have no info for this callee-saved register, assume it is liveout
+ if (Info == CSI.end() || Info->isRestored())
+ LiveUnits.addReg(N);
+ }
+}
+
+void LiveRegUnits::addPristines(const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// This function will usually be called on an empty object, handle this
+ /// as a special case.
+ if (empty()) {
+ /// Add all callee saved regs, then remove the ones that are saved and
+ /// restored.
+ addCalleeSavedRegs(*this, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ removeReg(Info.getReg());
+ return;
+ }
+ /// If a callee-saved register that is not pristine is already present
+ /// in the set, we should make sure that it stays in it. Precompute the
+ /// set of pristine registers in a separate object.
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ LiveRegUnits Pristine(*TRI);
+ addCalleeSavedRegs(Pristine, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ Pristine.removeReg(Info.getReg());
+ addUnits(Pristine.getBitVector());
+}
+
+void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+
+ addPristines(MF);
+
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*this, *Succ);
+
+ // For the return block: Add all callee saved registers.
+ if (MBB.isReturnBlock()) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addCalleeSavedRegs(*this, MF);
+ }
+}
+
+void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ addPristines(MF);
+ addBlockLiveIns(*this, MBB);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
new file mode 100644
index 000000000000..8fc5a929d77b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
@@ -0,0 +1,85 @@
+//===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "livestacks"
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE,
+ "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE,
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap
+ .emplace(
+ std::piecewise_construct, std::forward_as_tuple(Slot),
+ std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F))
+ .first;
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(OS);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ OS << " [" << TRI->getRegClassName(RC) << "]\n";
+ else
+ OS << " [Unknown]\n";
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 000000000000..9cd74689ba10
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,888 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using a sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (MachineInstr *MI : Kills)
+ if (MI->getParent() == MBB)
+ return MI;
+ return nullptr;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
+ dbgs() << " Alive in blocks: ";
+ for (unsigned AB : AliveBlocks)
+ dbgs() << AB << ", ";
+ dbgs() << "\n Killed by:";
+ if (Kills.empty())
+ dbgs() << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ dbgs() << "\n #" << i << ": " << *Kills[i];
+ dbgs() << "\n";
+ }
+}
+#endif
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(Register Reg) {
+ assert(Reg.isVirtual() && "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(Reg);
+ return VirtRegInfo[Reg];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(
+ VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
+ WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ SmallVector<MachineBasicBlock *, 16> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.pop_back_val();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB,
+ MachineInstr &MI) {
+ assert(MRI->getVRegDef(Reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = &MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (MachineInstr *Kill : VRInfo.Kills)
+ assert(Kill->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(Reg)->getParent())
+ return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(&MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), Pred);
+}
+
+void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(&MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *
+LiveVariables::FindLastPartialDef(Register Reg,
+ SmallSet<unsigned, 4> &PartDefRegs) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = nullptr;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+
+ if (!LastDef)
+ return nullptr;
+
+ PartDefRegs.insert(LastDefReg);
+ for (MachineOperand &MO : LastDef->all_defs()) {
+ if (MO.getReg() == 0)
+ continue;
+ Register DefReg = MO.getReg();
+ if (TRI->isSubRegister(Reg, DefReg)) {
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(DefReg))
+ PartDefRegs.insert(SubReg);
+ }
+ }
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ // If there was a previous use or a "full" def all is well.
+ if (!LastDef && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... implicit-def EAX, implicit killed AH
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ SmallSet<unsigned, 4> PartDefRegs;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ if (Processed.count(SubReg))
+ continue;
+ if (PartDefRegs.count(SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (MCPhysReg SS : TRI->subregs(SubReg))
+ Processed.insert(SS);
+ }
+ }
+ } else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+
+ // Remember this use.
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ PhysRegUse[SubReg] = &MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(Register Reg) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return nullptr;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ unsigned LastPartDefDist = 0;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist)
+ LastPartDefDist = Dist;
+ } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, implicit killed AX
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // dead AX =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // dead AX = implicit-def AL
+ // = killed AL
+ // AX =
+ MachineInstr *LastPartDef = nullptr;
+ unsigned LastPartDefDist = 0;
+ SmallSet<unsigned, 8> PartUses;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist) {
+ LastPartDefDist = Dist;
+ LastPartDef = Def;
+ }
+ continue;
+ }
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ PartUses.insert(SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (!PhysRegUse[Reg]) {
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // dead EAX = op implicit-def AL
+ // That is, EAX def is dead but AL def extends pass it.
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ if (!PartUses.count(SubReg))
+ continue;
+ bool NeedDef = true;
+ if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+ MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ if (MO) {
+ NeedDef = false;
+ assert(!MO->isDead());
+ }
+ }
+ if (NeedDef)
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true/*IsDef*/, true/*IsImp*/));
+ MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+ if (LastSubRef)
+ LastSubRef->addRegisterKilled(SubReg, TRI, true);
+ else {
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ PhysRegUse[SS] = LastRefOrPartRef;
+ }
+ for (MCPhysReg SS : TRI->subregs(SubReg))
+ PartUses.erase(SS);
+ }
+ } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
+ } else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+ // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+ // Clobbered registers are always dead, sp there is no need to use
+ // HandlePhysRegDef().
+ for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ // Skip dead regs.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+ continue;
+ // Skip mask-preserved regs.
+ if (!MO.clobbersPhysReg(Reg))
+ continue;
+ // Kill the largest clobbered super-register.
+ // This avoids needless implicit operands.
+ unsigned Super = Reg;
+ for (MCPhysReg SR : TRI->superregs(Reg))
+ if ((PhysRegDef[SR] || PhysRegUse[SR]) && MO.clobbersPhysReg(SR))
+ Super = SR;
+ HandlePhysRegKill(Super, nullptr);
+ }
+}
+
+void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ Live.insert(SubReg);
+ } else {
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (Live.count(SubReg))
+ continue;
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ Live.insert(SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ HandlePhysRegKill(Reg, MI);
+ // Only some of the sub-registers are used.
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ HandlePhysRegKill(SubReg, MI);
+ }
+
+ if (MI)
+ Defs.push_back(Reg); // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ while (!Defs.empty()) {
+ Register Reg = Defs.pop_back_val();
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ PhysRegDef[SubReg] = &MI;
+ PhysRegUse[SubReg] = nullptr;
+ }
+ }
+}
+
+void LiveVariables::runOnInstr(MachineInstr &MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ assert(!MI.isDebugOrPseudoInstr());
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI.getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI.isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ Register MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!(MOReg.isPhysical() && MRI->isReserved(MOReg)))
+ MO.setIsKill(false);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
+ } else {
+ assert(MO.isDef());
+ // FIXME: We should not remove any dead flags. However the MIPS RDDSP
+ // instruction needs it at the moment: http://llvm.org/PR27116.
+ if (MOReg.isPhysical() && !MRI->isReserved(MOReg))
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ // Process all uses.
+ for (unsigned MOReg : UseRegs) {
+ if (Register::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all masked registers. (Call clobbers).
+ for (unsigned Mask : RegMasks)
+ HandleRegMask(MI.getOperand(Mask));
+
+ // Process all defs.
+ for (unsigned MOReg : DefRegs) {
+ if (Register::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegDef(MOReg, &MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+}
+
+void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (const auto &LI : MBB->liveins()) {
+ assert(Register::isPhysicalRegister(LI.PhysReg) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+
+ runOnInstr(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (unsigned I : VarInfoVec)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(I), MRI->getVRegDef(I)->getParent(),
+ MBB);
+ }
+
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (const MachineBasicBlock *SuccMBB : MBB->successors()) {
+ if (SuccMBB->isEHPad())
+ continue;
+ for (const auto &LI : SuccMBB->liveins()) {
+ if (!TRI->isInAllocatableClass(LI.PhysReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LI.PhysReg);
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
+ HandlePhysRegDef(i, nullptr, Defs);
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
+ PHIVarInfo.resize(MF->getNumBlockIDs());
+ PHIJoins.clear();
+
+ // FIXME: LiveIntervals will be updated to remove its dependence on
+ // LiveVariables to improve compilation time and eliminate bizarre pass
+ // dependencies. Until then, we can't change much in -O0.
+ if (!MRI->isSSA())
+ report_fatal_error("regalloc=... not currently supported with -O0");
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = &MF->front();
+ df_iterator_default_set<MachineBasicBlock*,16> Visited;
+
+ for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
+ runOnBlock(MBB, NumRegs);
+
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const Register Reg = Register::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+ else
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for (const MachineBasicBlock &MBB : *MF)
+ assert(Visited.contains(&MBB) && "unreachable basic block found");
+#endif
+
+ PhysRegDef.clear();
+ PhysRegUse.clear();
+ PHIVarInfo.clear();
+
+ return false;
+}
+
+void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
+ assert(Reg.isVirtual());
+
+ VarInfo &VI = getVarInfo(Reg);
+ VI.AliveBlocks.clear();
+ VI.Kills.clear();
+
+ MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg);
+ MachineBasicBlock &DefBB = *DefMI.getParent();
+
+ // Handle the case where all uses have been removed.
+ if (MRI->use_nodbg_empty(Reg)) {
+ VI.Kills.push_back(&DefMI);
+ DefMI.addRegisterDead(Reg, nullptr);
+ return;
+ }
+ DefMI.clearRegisterDeads(Reg);
+
+ // Initialize a worklist of BBs that Reg is live-to-end of. (Here
+ // "live-to-end" means Reg is live at the end of a block even if it is only
+ // live because of phi uses in a successor. This is different from isLiveOut()
+ // which does not consider phi uses.)
+ SmallVector<MachineBasicBlock *> LiveToEndBlocks;
+ SparseBitVector<> UseBlocks;
+ for (auto &UseMO : MRI->use_nodbg_operands(Reg)) {
+ UseMO.setIsKill(false);
+ MachineInstr &UseMI = *UseMO.getParent();
+ MachineBasicBlock &UseBB = *UseMI.getParent();
+ UseBlocks.set(UseBB.getNumber());
+ if (UseMI.isPHI()) {
+ // If Reg is used in a phi then it is live-to-end of the corresponding
+ // predecessor.
+ unsigned Idx = UseMO.getOperandNo();
+ LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB());
+ } else if (&UseBB == &DefBB) {
+ // A non-phi use in the same BB as the single def must come after the def.
+ } else {
+ // Otherwise Reg must be live-to-end of all predecessors.
+ LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end());
+ }
+ }
+
+ // Iterate over the worklist adding blocks to AliveBlocks.
+ bool LiveToEndOfDefBB = false;
+ while (!LiveToEndBlocks.empty()) {
+ MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val();
+ if (&BB == &DefBB) {
+ LiveToEndOfDefBB = true;
+ continue;
+ }
+ if (VI.AliveBlocks.test(BB.getNumber()))
+ continue;
+ VI.AliveBlocks.set(BB.getNumber());
+ LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end());
+ }
+
+ // Recompute kill flags. For each block in which Reg is used but is not
+ // live-through, find the last instruction that uses Reg. Ignore phi nodes
+ // because they should not be included in Kills.
+ for (unsigned UseBBNum : UseBlocks) {
+ if (VI.AliveBlocks.test(UseBBNum))
+ continue;
+ MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum);
+ if (&UseBB == &DefBB && LiveToEndOfDefBB)
+ continue;
+ for (auto &MI : reverse(UseBB)) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+ if (MI.isPHI())
+ break;
+ if (MI.readsRegister(Reg)) {
+ assert(!MI.killsRegister(Reg));
+ MI.addRegisterKilled(Reg, nullptr);
+ VI.Kills.push_back(&MI);
+ break;
+ }
+ }
+ }
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
+ MachineInstr &NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ (void)removed;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (const auto &MBB : Fn)
+ for (const auto &BBI : MBB) {
+ if (!BBI.isPHI())
+ break;
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
+ if (BBI.getOperand(i).readsReg())
+ PHIVarInfo[BBI.getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI.getOperand(i).getReg());
+ }
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+ Register Reg, MachineRegisterInfo &MRI) {
+ unsigned Num = MBB.getNumber();
+
+ // Reg is live-through.
+ if (AliveBlocks.test(Num))
+ return true;
+
+ // Registers defined in MBB cannot be live in.
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // Reg was not defined in MBB, was it killed here?
+ return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ SmallPtrSet<const MachineBasicBlock *, 8> Kills;
+ for (MachineInstr *MI : VI.Kills)
+ Kills.insert(MI->getParent());
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ // Or is it live because there is a use in a successor that kills it?
+ if (Kills.count(SuccMBB))
+ return true;
+ }
+
+ return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB) {
+ const unsigned NumNew = BB->getNumber();
+
+ DenseSet<unsigned> Defs, Kills;
+
+ MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
+ for (; BBI != BBE && BBI->isPHI(); ++BBI) {
+ // Record the def of the PHI node.
+ Defs.insert(BBI->getOperand(0).getReg());
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i+1).getMBB() == BB)
+ getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+ }
+
+ // Record all vreg defs and kills of all instructions in SuccBB.
+ for (; BBI != BBE; ++BBI) {
+ for (const MachineOperand &Op : BBI->operands()) {
+ if (Op.isReg() && Op.getReg().isVirtual()) {
+ if (Op.isDef())
+ Defs.insert(Op.getReg());
+ else if (Op.isKill())
+ Kills.insert(Op.getReg());
+ }
+ }
+ }
+
+ // Update info for all live variables
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+
+ // If the Defs is defined in the successor it can't be live in BB.
+ if (Defs.count(Reg))
+ continue;
+
+ // If the register is either killed in or live through SuccBB it's also live
+ // through BB.
+ VarInfo &VI = getVarInfo(Reg);
+ if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber()))
+ VI.AliveBlocks.set(NumNew);
+ }
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB. LiveInSets[BB] is *not* updated (because it is not needed during
+/// PHIElimination).
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB,
+ std::vector<SparseBitVector<>> &LiveInSets) {
+ const unsigned NumNew = BB->getNumber();
+
+ SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()];
+ for (unsigned R : BV) {
+ Register VirtReg = Register::index2VirtReg(R);
+ LiveVariables::VarInfo &VI = getVarInfo(VirtReg);
+ VI.AliveBlocks.set(NumNew);
+ }
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
+ BBE = SuccBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i + 1).getMBB() == BB &&
+ BBI->getOperand(i).readsReg())
+ getVarInfo(BBI->getOperand(i).getReg())
+ .AliveBlocks.set(NumNew);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 000000000000..e491ed12034d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,442 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "localstackalloc"
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ int FrameIdx; // The frame index
+
+ // Order reference instruction appears in program. Used to ensure
+ // deterministic order when multiple instructions may reference the same
+ // location.
+ unsigned Order;
+
+ public:
+ FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
+ MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+
+ bool operator<(const FrameRef &RHS) const {
+ return std::tie(LocalOffset, FrameIdx, Order) <
+ std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
+ }
+
+ MachineBasicBlock::iterator getMachineInstr() const { return MI; }
+ int64_t getLocalOffset() const { return LocalOffset; }
+ int getFrameIndex() const { return FrameIdx; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t, 16> LocalOffsets;
+
+ /// StackObjSet - A set of stack object indexes
+ using StackObjSet = SmallSetVector<int, 8>;
+
+ void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, Align &MaxAlign);
+ void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo &MFI, bool StackGrowsDown,
+ int64_t &Offset, Align &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
+ initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE,
+ "Local Stack Slot Allocation", false, false)
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI.getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF))
+ return false;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI.getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI.setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+ int64_t &Offset, bool StackGrowsDown,
+ Align &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI.getObjectSize(FrameIdx);
+
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Alignment);
+
+ // Adjust to alignment boundary.
+ Offset = alignTo(Offset, Alignment);
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI.mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI.getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+void LocalStackSlotPass::AssignProtectedObjSet(
+ const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset,
+ Align &MaxAlign) {
+ for (int i : UnassignedObjs) {
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ ProtectedObjs.insert(i);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int64_t Offset = 0;
+ Align MaxAlign;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> ProtectedObjs;
+ if (MFI.hasStackProtectorIndex()) {
+ int StackProtectorFI = MFI.getStackProtectorIndex();
+
+ // We need to make sure we didn't pre-allocate the stack protector when
+ // doing this.
+ // If we already have a stack protector, this will re-assign it to a slot
+ // that is **not** covering the protected objects.
+ assert(!MFI.isObjectPreAllocated(StackProtectorFI) &&
+ "Stack protector pre-allocated in LocalStackSlotAllocation");
+
+ StackObjSet LargeArrayObjs;
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+ // Only place the stack protector in the local stack area if the target
+ // allows it.
+ if (TFI.isStackIdSafeForLocalArea(MFI.getStackID(StackProtectorFI)))
+ AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown,
+ MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isDeadObjectIndex(i))
+ continue;
+ if (StackProtectorFI == (int)i)
+ continue;
+ if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
+ continue;
+
+ switch (MFI.getObjectSSPLayout(i)) {
+ case MachineFrameInfo::SSPLK_None:
+ continue;
+ case MachineFrameInfo::SSPLK_SmallArray:
+ SmallArrayObjs.insert(i);
+ continue;
+ case MachineFrameInfo::SSPLK_AddrOf:
+ AddrOfObjs.insert(i);
+ continue;
+ case MachineFrameInfo::SSPLK_LargeArray:
+ LargeArrayObjs.insert(i);
+ continue;
+ }
+ llvm_unreachable("Unexpected SSPLayoutKind.");
+ }
+
+ AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isDeadObjectIndex(i))
+ continue;
+ if (MFI.getStackProtectorIndex() == (int)i)
+ continue;
+ if (ProtectedObjs.count(i))
+ continue;
+ if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI.setLocalFrameSize(Offset);
+ MFI.setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(unsigned BaseReg,
+ int64_t BaseOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI) {
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
+ return TRI->isFrameOffsetLegal(&MI, BaseReg, Offset);
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+ unsigned Order = 0;
+
+ for (MachineBasicBlock &BB : Fn) {
+ for (MachineInstr &MI : BB) {
+ // Debug value, stackmap and patchpoint instructions can't be out of
+ // range, so they don't need any updates.
+ if (MI.isDebugInstr() || MI.getOpcode() == TargetOpcode::STATEPOINT ||
+ MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ continue;
+
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (const MachineOperand &MO : MI.operands()) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MO.isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI.isObjectPreAllocated(MO.getIndex()))
+ break;
+ int Idx = MO.getIndex();
+ int64_t LocalOffset = LocalOffsets[Idx];
+ if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
+ break;
+ FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
+ break;
+ }
+ }
+ }
+ }
+
+ // Sort the frame references by local offset.
+ // Use frame index as a tie-breaker in case MI's have the same offset.
+ llvm::sort(FrameReferenceInsns);
+
+ MachineBasicBlock *Entry = &Fn.front();
+
+ Register BaseReg;
+ int64_t BaseOffset = 0;
+
+ // Loop through the frame references and allocate for them as necessary.
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ FrameRef &FR = FrameReferenceInsns[ref];
+ MachineInstr &MI = *FR.getMachineInstr();
+ int64_t LocalOffset = FR.getLocalOffset();
+ int FrameIdx = FR.getFrameIndex();
+ assert(MFI.isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ // We need to keep the references to the stack protector slot through frame
+ // index operands so that it gets resolved by PEI rather than this pass.
+ // This avoids accesses to the stack protector though virtual base
+ // registers, and forces PEI to address it using fp/sp/bp.
+ if (MFI.hasStackProtectorIndex() &&
+ FrameIdx == MFI.getStackProtectorIndex())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Considering: " << MI);
+
+ unsigned idx = 0;
+ for (unsigned f = MI.getNumOperands(); idx != f; ++idx) {
+ if (!MI.getOperand(idx).isFI())
+ continue;
+
+ if (FrameIdx == MI.getOperand(idx).getIndex())
+ break;
+ }
+
+ assert(idx < MI.getNumOperands() && "Cannot find FI operand");
+
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0;
+
+ LLVM_DEBUG(dbgs() << " Replacing FI in: " << MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ if (BaseReg.isValid() &&
+ lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust,
+ LocalOffset, MI, TRI)) {
+ LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
+ // We found a register to reuse.
+ Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
+ } else {
+ // No previously defined register was in range, so create a new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, idx);
+
+ int64_t CandBaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+
+ // We'd like to avoid creating single-use virtual base registers.
+ // Because the FrameRefs are in sorted order, and we've already
+ // processed all FrameRefs before this one, just check whether or not
+ // the next FrameRef will be able to reuse this new register. If not,
+ // then don't bother creating it.
+ if (ref + 1 >= e ||
+ !lookupCandidateBaseReg(
+ BaseReg, CandBaseOffset, FrameSizeAdjust,
+ FrameReferenceInsns[ref + 1].getLocalOffset(),
+ *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI))
+ continue;
+
+ // Save the base offset.
+ BaseOffset = CandBaseOffset;
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset);
+
+ LLVM_DEBUG(dbgs() << " Materialized base register at frame local offset "
+ << LocalOffset + InstrOffset
+ << " into " << printReg(BaseReg, TRI) << '\n');
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ ++NumBaseRegisters;
+ }
+ assert(BaseReg && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(MI, BaseReg, Offset);
+ LLVM_DEBUG(dbgs() << "Resolved: " << MI);
+
+ ++NumReplacements;
+ }
+
+ return BaseReg.isValid();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp
new file mode 100644
index 000000000000..0d400253c652
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LoopTraversal.cpp
@@ -0,0 +1,75 @@
+//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LoopTraversal.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) {
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number.");
+ return MBBInfos[MBBNumber].PrimaryCompleted &&
+ MBBInfos[MBBNumber].IncomingCompleted ==
+ MBBInfos[MBBNumber].PrimaryIncoming &&
+ MBBInfos[MBBNumber].IncomingProcessed == MBB->pred_size();
+}
+
+LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {
+ // Initialize the MMBInfos
+ MBBInfos.assign(MF.getNumBlockIDs(), MBBInfo());
+
+ MachineBasicBlock *Entry = &*MF.begin();
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(Entry);
+ SmallVector<MachineBasicBlock *, 4> Workqueue;
+ SmallVector<TraversedMBBInfo, 4> MBBTraversalOrder;
+ for (MachineBasicBlock *MBB : RPOT) {
+ // N.B: IncomingProcessed and IncomingCompleted were already updated while
+ // processing this block's predecessors.
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number.");
+ MBBInfos[MBBNumber].PrimaryCompleted = true;
+ MBBInfos[MBBNumber].PrimaryIncoming = MBBInfos[MBBNumber].IncomingProcessed;
+ bool Primary = true;
+ Workqueue.push_back(MBB);
+ while (!Workqueue.empty()) {
+ MachineBasicBlock *ActiveMBB = Workqueue.pop_back_val();
+ bool Done = isBlockDone(ActiveMBB);
+ MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));
+ for (MachineBasicBlock *Succ : ActiveMBB->successors()) {
+ unsigned SuccNumber = Succ->getNumber();
+ assert(SuccNumber < MBBInfos.size() &&
+ "Unexpected basic block number.");
+ if (!isBlockDone(Succ)) {
+ if (Primary)
+ MBBInfos[SuccNumber].IncomingProcessed++;
+ if (Done)
+ MBBInfos[SuccNumber].IncomingCompleted++;
+ if (isBlockDone(Succ))
+ Workqueue.push_back(Succ);
+ }
+ }
+ Primary = false;
+ }
+ }
+
+ // We need to go through again and finalize any blocks that are not done yet.
+ // This is possible if blocks have dead predecessors, so we didn't visit them
+ // above.
+ for (MachineBasicBlock *MBB : RPOT) {
+ if (!isBlockDone(MBB))
+ MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true));
+ // Don't update successors here. We'll get to them anyway through this
+ // loop.
+ }
+
+ MBBInfos.clear();
+
+ return MBBTraversalOrder;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
new file mode 100644
index 000000000000..24c30b756737
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -0,0 +1,66 @@
+//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+LLT::LLT(MVT VT) {
+ if (VT.isVector()) {
+ bool asVector = VT.getVectorMinNumElements() > 1;
+ init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
+ VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
+ /*AddressSpace=*/0);
+ } else if (VT.isValid() && !VT.isScalableTargetExtVT()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
+ ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0);
+ } else {
+ IsScalar = false;
+ IsPointer = false;
+ IsVector = false;
+ RawData = 0;
+ }
+}
+
+void LLT::print(raw_ostream &OS) const {
+ if (isVector()) {
+ OS << "<";
+ OS << getElementCount() << " x " << getElementType() << ">";
+ } else if (isPointer())
+ OS << "p" << getAddressSpace();
+ else if (isValid()) {
+ assert(isScalar() && "unexpected type");
+ OS << "s" << getScalarSizeInBits();
+ } else
+ OS << "LLT_invalid";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LLT::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
new file mode 100644
index 000000000000..bc2ea3f05b6d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -0,0 +1,85 @@
+//===-- llvm/CodeGen/LowLevelTypeUtils.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+using namespace llvm;
+
+LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
+ if (auto VTy = dyn_cast<VectorType>(&Ty)) {
+ auto EC = VTy->getElementCount();
+ LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
+ if (EC.isScalar())
+ return ScalarTy;
+ return LLT::vector(EC, ScalarTy);
+ }
+
+ if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+ unsigned AddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ }
+
+ if (Ty.isSized() && !Ty.isScalableTargetExtTy()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ auto SizeInBits = DL.getTypeSizeInBits(&Ty);
+ assert(SizeInBits != 0 && "invalid zero-sized type");
+ return LLT::scalar(SizeInBits);
+ }
+
+ return LLT();
+}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
+ LLVMContext &Ctx) {
+ if (Ty.isVector()) {
+ EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
+ return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
+ }
+
+ return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::scalarOrVector(Ty.getVectorElementCount(),
+ Ty.getVectorElementType().getSizeInBits());
+}
+
+const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
+ assert(Ty.isScalar() && "Expected a scalar type.");
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return APFloat::IEEEhalf();
+ case 32:
+ return APFloat::IEEEsingle();
+ case 64:
+ return APFloat::IEEEdouble();
+ case 128:
+ return APFloat::IEEEquad();
+ }
+ llvm_unreachable("Invalid FP type size.");
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
new file mode 100644
index 000000000000..a517ee3794ca
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -0,0 +1,158 @@
+//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is required for targets depending on libgcc style
+// emulated thread local storage variables. For every defined TLS variable xyz,
+// an __emutls_v.xyz is generated. If there is non-zero initialized value
+// an __emutls_t.xyz is also generated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loweremutls"
+
+namespace {
+
+class LowerEmuTLS : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerEmuTLS() : ModulePass(ID) {
+ initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+private:
+ bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
+ static void copyLinkageVisibility(Module &M,
+ const GlobalVariable *from,
+ GlobalVariable *to) {
+ to->setLinkage(from->getLinkage());
+ to->setVisibility(from->getVisibility());
+ to->setDSOLocal(from->isDSOLocal());
+ if (from->hasComdat()) {
+ to->setComdat(M.getOrInsertComdat(to->getName()));
+ to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
+ }
+ }
+};
+}
+
+char LowerEmuTLS::ID = 0;
+
+INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE,
+ "Add __emutls_[vt]. variables for emultated TLS model", false,
+ false)
+
+ModulePass *llvm::createLowerEmuTLSPass() { return new LowerEmuTLS(); }
+
+bool LowerEmuTLS::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.useEmulatedTLS())
+ return false;
+
+ bool Changed = false;
+ SmallVector<const GlobalVariable*, 8> TlsVars;
+ for (const auto &G : M.globals()) {
+ if (G.isThreadLocal())
+ TlsVars.append({&G});
+ }
+ for (const auto *const G : TlsVars)
+ Changed |= addEmuTlsVar(M, G);
+ return Changed;
+}
+
+bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
+ LLVMContext &C = M.getContext();
+ PointerType *VoidPtrType = Type::getInt8PtrTy(C);
+
+ std::string EmuTlsVarName = ("__emutls_v." + GV->getName()).str();
+ GlobalVariable *EmuTlsVar = M.getNamedGlobal(EmuTlsVarName);
+ if (EmuTlsVar)
+ return false; // It has been added before.
+
+ const DataLayout &DL = M.getDataLayout();
+ Constant *NullPtr = ConstantPointerNull::get(VoidPtrType);
+
+ // Get non-zero initializer from GV's initializer.
+ const Constant *InitValue = nullptr;
+ if (GV->hasInitializer()) {
+ InitValue = GV->getInitializer();
+ const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+ // When GV's init value is all 0, omit the EmuTlsTmplVar and let
+ // the emutls library function to reset newly allocated TLS variables.
+ if (isa<ConstantAggregateZero>(InitValue) ||
+ (InitIntValue && InitIntValue->isZero()))
+ InitValue = nullptr;
+ }
+
+ // Create the __emutls_v. symbol, whose type has 4 fields:
+ // word size; // size of GV in bytes
+ // word align; // alignment of GV
+ // void *ptr; // initialized to 0; set at run time per thread.
+ // void *templ; // 0 or point to __emutls_t.*
+ // sizeof(word) should be the same as sizeof(void*) on target.
+ IntegerType *WordType = DL.getIntPtrType(C);
+ PointerType *InitPtrType = InitValue ?
+ PointerType::getUnqual(InitValue->getType()) : VoidPtrType;
+ Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
+ ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
+ StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
+ EmuTlsVar = cast<GlobalVariable>(
+ M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType));
+ copyLinkageVisibility(M, GV, EmuTlsVar);
+
+ // Define "__emutls_t.*" and "__emutls_v.*" only if GV is defined.
+ if (!GV->hasInitializer())
+ return true;
+
+ Type *GVType = GV->getValueType();
+ Align GVAlignment = DL.getValueOrABITypeAlignment(GV->getAlign(), GVType);
+
+ // Define "__emutls_t.*" if there is InitValue
+ GlobalVariable *EmuTlsTmplVar = nullptr;
+ if (InitValue) {
+ std::string EmuTlsTmplName = ("__emutls_t." + GV->getName()).str();
+ EmuTlsTmplVar = dyn_cast_or_null<GlobalVariable>(
+ M.getOrInsertGlobal(EmuTlsTmplName, GVType));
+ assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
+ EmuTlsTmplVar->setConstant(true);
+ EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
+ EmuTlsTmplVar->setAlignment(GVAlignment);
+ copyLinkageVisibility(M, GV, EmuTlsTmplVar);
+ }
+
+ // Define "__emutls_v.*" with initializer and alignment.
+ Constant *ElementValues[4] = {
+ ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
+ ConstantInt::get(WordType, GVAlignment.value()), NullPtr,
+ EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr};
+ ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
+ EmuTlsVar->setInitializer(
+ ConstantStruct::get(EmuTlsVarType, ElementValueArray));
+ Align MaxAlignment =
+ std::max(DL.getABITypeAlign(WordType), DL.getABITypeAlign(VoidPtrType));
+ EmuTlsVar->setAlignment(MaxAlignment);
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
new file mode 100644
index 000000000000..5b388be27839
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -0,0 +1,62 @@
+//===- MBFIWrapper.cpp - MachineBlockFrequencyInfo wrapper ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class keeps track of branch frequencies of newly created blocks and
+// tail-merged blocks. Used by the TailDuplication and MachineBlockPlacement.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include <optional>
+
+using namespace llvm;
+
+BlockFrequency MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ if (I != MergedBBFreq.end())
+ return I->second;
+
+ return MBFI.getBlockFreq(MBB);
+}
+
+void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
+ BlockFrequency F) {
+ MergedBBFreq[MBB] = F;
+}
+
+std::optional<uint64_t>
+MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ // Modified block frequency also impacts profile count. So we should compute
+ // profile count from new block frequency if it has been changed.
+ if (I != MergedBBFreq.end())
+ return MBFI.getProfileCountFromFreq(I->second.getFrequency());
+
+ return MBFI.getBlockProfileCount(MBB);
+}
+
+raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const {
+ return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
+}
+
+raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const {
+ return MBFI.printBlockFreq(OS, Freq);
+}
+
+void MBFIWrapper::view(const Twine &Name, bool isSimple) {
+ MBFI.view(Name, isSimple);
+}
+
+uint64_t MBFIWrapper::getEntryFreq() const {
+ return MBFI.getEntryFreq();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
new file mode 100644
index 000000000000..21b849244d9b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -0,0 +1,423 @@
+//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to employ a canonical code transformation so
+// that code compiled with slightly different IR passes can be diffed more
+// effectively than otherwise. This is done by renaming vregs in a given
+// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
+// move defs closer to their use inorder to reduce diffs caused by slightly
+// different schedules.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-canonicalizer example.mir
+//
+// Reorders instructions canonically.
+// Renames virtual register operands canonically.
+// Strips certain MIR artifacts (optionally).
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mir-canonicalizer"
+
+static cl::opt<unsigned>
+ CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
+ cl::value_desc("N"),
+ cl::desc("Function number to canonicalize."));
+
+namespace {
+
+class MIRCanonicalizer : public MachineFunctionPass {
+public:
+ static char ID;
+ MIRCanonicalizer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename register operands in a canonical ordering.";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char MIRCanonicalizer::ID;
+
+char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
+ "Rename Register Operands Canonically", false, false)
+
+INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
+ "Rename Register Operands Canonically", false, false)
+
+static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+ if (MF.empty())
+ return {};
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ std::vector<MachineBasicBlock *> RPOList;
+ append_range(RPOList, RPOT);
+
+ return RPOList;
+}
+
+static bool
+rescheduleLexographically(std::vector<MachineInstr *> instructions,
+ MachineBasicBlock *MBB,
+ std::function<MachineBasicBlock::iterator()> getPos) {
+
+ bool Changed = false;
+ using StringInstrPair = std::pair<std::string, MachineInstr *>;
+ std::vector<StringInstrPair> StringInstrMap;
+
+ for (auto *II : instructions) {
+ std::string S;
+ raw_string_ostream OS(S);
+ II->print(OS);
+ OS.flush();
+
+ // Trim the assignment, or start from the beginning in the case of a store.
+ const size_t i = S.find('=');
+ StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
+ }
+
+ llvm::sort(StringInstrMap, llvm::less_first());
+
+ for (auto &II : StringInstrMap) {
+
+ LLVM_DEBUG({
+ dbgs() << "Splicing ";
+ II.second->dump();
+ dbgs() << " right before: ";
+ getPos()->dump();
+ });
+
+ Changed = true;
+ MBB->splice(getPos(), MBB, II.second);
+ }
+
+ return Changed;
+}
+
+static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
+ MachineBasicBlock *MBB) {
+
+ bool Changed = false;
+
+ // Calculates the distance of MI from the beginning of its parent BB.
+ auto getInstrIdx = [](const MachineInstr &MI) {
+ unsigned i = 0;
+ for (const auto &CurMI : *MI.getParent()) {
+ if (&CurMI == &MI)
+ return i;
+ i++;
+ }
+ return ~0U;
+ };
+
+ // Pre-Populate vector of instructions to reschedule so that we don't
+ // clobber the iterator.
+ std::vector<MachineInstr *> Instructions;
+ for (auto &MI : *MBB) {
+ Instructions.push_back(&MI);
+ }
+
+ std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
+ std::map<unsigned, MachineInstr *> MultiUserLookup;
+ unsigned UseToBringDefCloserToCount = 0;
+ std::vector<MachineInstr *> PseudoIdempotentInstructions;
+ std::vector<unsigned> PhysRegDefs;
+ for (auto *II : Instructions) {
+ for (unsigned i = 1; i < II->getNumOperands(); i++) {
+ MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ if (MO.getReg().isVirtual())
+ continue;
+
+ if (!MO.isDef())
+ continue;
+
+ PhysRegDefs.push_back(MO.getReg());
+ }
+ }
+
+ for (auto *II : Instructions) {
+ if (II->getNumOperands() == 0)
+ continue;
+ if (II->mayLoadOrStore())
+ continue;
+
+ MachineOperand &MO = II->getOperand(0);
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ if (!MO.isDef())
+ continue;
+
+ bool IsPseudoIdempotent = true;
+ for (unsigned i = 1; i < II->getNumOperands(); i++) {
+
+ if (II->getOperand(i).isImm()) {
+ continue;
+ }
+
+ if (II->getOperand(i).isReg()) {
+ if (!II->getOperand(i).getReg().isVirtual())
+ if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
+ continue;
+ }
+ }
+
+ IsPseudoIdempotent = false;
+ break;
+ }
+
+ if (IsPseudoIdempotent) {
+ PseudoIdempotentInstructions.push_back(II);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
+
+ MachineInstr *Def = II;
+ unsigned Distance = ~0U;
+ MachineInstr *UseToBringDefCloserTo = nullptr;
+ MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+ for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
+ MachineInstr *UseInst = UO.getParent();
+
+ const unsigned DefLoc = getInstrIdx(*Def);
+ const unsigned UseLoc = getInstrIdx(*UseInst);
+ const unsigned Delta = (UseLoc - DefLoc);
+
+ if (UseInst->getParent() != Def->getParent())
+ continue;
+ if (DefLoc >= UseLoc)
+ continue;
+
+ if (Delta < Distance) {
+ Distance = Delta;
+ UseToBringDefCloserTo = UseInst;
+ MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
+ }
+ }
+
+ const auto BBE = MBB->instr_end();
+ MachineBasicBlock::iterator DefI = BBE;
+ MachineBasicBlock::iterator UseI = BBE;
+
+ for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
+
+ if (DefI != BBE && UseI != BBE)
+ break;
+
+ if (&*BBI == Def) {
+ DefI = BBI;
+ continue;
+ }
+
+ if (&*BBI == UseToBringDefCloserTo) {
+ UseI = BBI;
+ continue;
+ }
+ }
+
+ if (DefI == BBE || UseI == BBE)
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "Splicing ";
+ DefI->dump();
+ dbgs() << " right before: ";
+ UseI->dump();
+ });
+
+ MultiUsers[UseToBringDefCloserTo].push_back(Def);
+ Changed = true;
+ MBB->splice(UseI, MBB, DefI);
+ }
+
+ // Sort the defs for users of multiple defs lexographically.
+ for (const auto &E : MultiUserLookup) {
+
+ auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
+ return &MI == E.second;
+ });
+
+ if (UseI == MBB->instr_end())
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
+ Changed |= rescheduleLexographically(
+ MultiUsers[E.second], MBB,
+ [&]() -> MachineBasicBlock::iterator { return UseI; });
+ }
+
+ PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
+ LLVM_DEBUG(
+ dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
+ Changed |= rescheduleLexographically(
+ PseudoIdempotentInstructions, MBB,
+ [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
+
+ return Changed;
+}
+
+static bool propagateLocalCopies(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ std::vector<MachineInstr *> Copies;
+ for (MachineInstr &MI : MBB->instrs()) {
+ if (MI.isCopy())
+ Copies.push_back(&MI);
+ }
+
+ for (MachineInstr *MI : Copies) {
+
+ if (!MI->getOperand(0).isReg())
+ continue;
+ if (!MI->getOperand(1).isReg())
+ continue;
+
+ const Register Dst = MI->getOperand(0).getReg();
+ const Register Src = MI->getOperand(1).getReg();
+
+ if (!Dst.isVirtual())
+ continue;
+ if (!Src.isVirtual())
+ continue;
+ // Not folding COPY instructions if regbankselect has not set the RCs.
+ // Why are we only considering Register Classes? Because the verifier
+ // sometimes gets upset if the register classes don't match even if the
+ // types do. A future patch might add COPY folding for matching types in
+ // pre-registerbankselect code.
+ if (!MRI.getRegClassOrNull(Dst))
+ continue;
+ if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
+ continue;
+
+ std::vector<MachineOperand *> Uses;
+ for (MachineOperand &MO : MRI.use_operands(Dst))
+ Uses.push_back(&MO);
+ for (auto *MO : Uses)
+ MO->setReg(Src);
+
+ Changed = true;
+ MI->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+static bool doDefKillClear(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ for (auto &MI : *MBB) {
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.isDef() && MO.isKill()) {
+ Changed = true;
+ MO.setIsKill(false);
+ }
+
+ if (MO.isDef() && MO.isDead()) {
+ Changed = true;
+ MO.setIsDead(false);
+ }
+ }
+ }
+
+ return Changed;
+}
+
+static bool runOnBasicBlock(MachineBasicBlock *MBB,
+ unsigned BasicBlockNum, VRegRenamer &Renamer) {
+ LLVM_DEBUG({
+ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ });
+
+ bool Changed = false;
+
+ LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
+
+ LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
+ MBB->dump(););
+ Changed |= propagateLocalCopies(MBB);
+ LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
+
+ LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
+ unsigned IdempotentInstCount = 0;
+ Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
+ LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
+
+ Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
+
+ // TODO: Consider dropping this. Dropping kill defs is probably not
+ // semantically sound.
+ Changed |= doDefKillClear(MBB);
+
+ LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
+ dbgs() << "\n";);
+ LLVM_DEBUG(
+ dbgs() << "\n\n================================================\n\n");
+ return Changed;
+}
+
+bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
+
+ static unsigned functionNum = 0;
+ if (CanonicalizeFunctionNumber != ~0U) {
+ if (CanonicalizeFunctionNumber != functionNum++)
+ return false;
+ LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
+ << "\n";);
+ }
+
+ // we need a valid vreg to create a vreg type for skipping all those
+ // stray vreg numbers so reach alignment/canonical vreg values.
+ std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
+
+ LLVM_DEBUG(
+ dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
+ for (auto MBB
+ : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
+ << "\n\n================================================\n\n";);
+
+ unsigned BBNum = 0;
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VRegRenamer Renamer(MRI);
+ for (auto *MBB : RPOList)
+ Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
new file mode 100644
index 000000000000..8d17cceeb3cd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -0,0 +1,202 @@
+//===-------- MIRFSDiscriminator.cpp: Flow Sensitive Discriminator --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of a machine pass that adds the flow
+// sensitive discriminator to the instruction debug information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRFSDiscriminator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace sampleprofutil;
+
+#define DEBUG_TYPE "mirfs-discriminators"
+
+// TODO(xur): Remove this option and related code once we make true as the
+// default.
+cl::opt<bool> ImprovedFSDiscriminator(
+ "improved-fs-discriminator", cl::Hidden, cl::init(false),
+ cl::desc("New FS discriminators encoding (incompatible with the original "
+ "encoding)"));
+
+char MIRAddFSDiscriminators::ID = 0;
+
+INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
+ "Add MIR Flow Sensitive Discriminators",
+ /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID;
+
+FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) {
+ return new MIRAddFSDiscriminators(P);
+}
+
+// TODO(xur): Remove this once we switch to ImprovedFSDiscriminator.
+// Compute a hash value using debug line number, and the line numbers from the
+// inline stack.
+static uint64_t getCallStackHashV0(const MachineBasicBlock &BB,
+ const MachineInstr &MI,
+ const DILocation *DIL) {
+ auto updateHash = [](const StringRef &Str) -> uint64_t {
+ if (Str.empty())
+ return 0;
+ return MD5Hash(Str);
+ };
+ uint64_t Ret = updateHash(std::to_string(DIL->getLine()));
+ Ret ^= updateHash(BB.getName());
+ Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName());
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ Ret ^= updateHash(std::to_string(DIL->getLine()));
+ Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName());
+ }
+ return Ret;
+}
+
+static uint64_t getCallStackHash(const DILocation *DIL) {
+ auto hashCombine = [](const uint64_t Seed, const uint64_t Val) {
+ std::hash<uint64_t> Hasher;
+ return Seed ^ (Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2));
+ };
+ uint64_t Ret = 0;
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ Ret = hashCombine(Ret, xxh3_64bits(ArrayRef<uint8_t>(DIL->getLine())));
+ Ret = hashCombine(Ret, xxh3_64bits(DIL->getSubprogramLinkageName()));
+ }
+ return Ret;
+}
+
+// Traverse the CFG and assign FD discriminators. If two instructions
+// have the same lineno and discriminator, but residing in different BBs,
+// the latter instruction will get a new discriminator value. The new
+// discriminator keeps the existing discriminator value but sets new bits
+// b/w LowBit and HighBit.
+bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
+ if (!EnableFSDiscriminator)
+ return false;
+
+ bool HasPseudoProbe = MF.getFunction().getParent()->getNamedMetadata(
+ PseudoProbeDescMetadataName);
+
+ if (!HasPseudoProbe && !MF.getFunction().shouldEmitDebugInfoForProfiling())
+ return false;
+
+ bool Changed = false;
+ using LocationDiscriminator =
+ std::tuple<StringRef, unsigned, unsigned, uint64_t>;
+ using BBSet = DenseSet<const MachineBasicBlock *>;
+ using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>;
+ using LocationDiscriminatorCurrPassMap =
+ DenseMap<LocationDiscriminator, unsigned>;
+
+ LocationDiscriminatorBBMap LDBM;
+ LocationDiscriminatorCurrPassMap LDCM;
+
+ // Mask of discriminators before this pass.
+ // TODO(xur): simplify this once we switch to ImprovedFSDiscriminator.
+ unsigned LowBitTemp = LowBit;
+ assert(LowBit > 0 && "LowBit in FSDiscriminator cannot be 0");
+ if (ImprovedFSDiscriminator)
+ LowBitTemp -= 1;
+ unsigned BitMaskBefore = getN1Bits(LowBitTemp);
+ // Mask of discriminators including this pass.
+ unsigned BitMaskNow = getN1Bits(HighBit);
+ // Mask of discriminators for bits specific to this pass.
+ unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore;
+ unsigned NumNewD = 0;
+
+ LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: "
+ << MF.getFunction().getName() << " Highbit=" << HighBit
+ << "\n");
+
+ for (MachineBasicBlock &BB : MF) {
+ for (MachineInstr &I : BB) {
+ if (HasPseudoProbe) {
+ // Only assign discriminators to pseudo probe instructions. Call
+ // instructions are excluded since their dwarf discriminators are used
+ // for other purposes, i.e, storing probe ids.
+ if (!I.isPseudoProbe())
+ continue;
+ } else if (ImprovedFSDiscriminator && I.isMetaInstruction()) {
+ continue;
+ }
+ const DILocation *DIL = I.getDebugLoc().get();
+ if (!DIL)
+ continue;
+
+ // Use the id of pseudo probe to compute the discriminator.
+ unsigned LineNo =
+ I.isPseudoProbe() ? I.getOperand(1).getImm() : DIL->getLine();
+ if (LineNo == 0)
+ continue;
+ unsigned Discriminator = DIL->getDiscriminator();
+ // Clean up discriminators for pseudo probes at the first FS discriminator
+ // pass as their discriminators should not ever be used.
+ if ((Pass == FSDiscriminatorPass::Pass1) && I.isPseudoProbe()) {
+ Discriminator = 0;
+ I.setDebugLoc(DIL->cloneWithDiscriminator(0));
+ }
+ uint64_t CallStackHashVal = 0;
+ if (ImprovedFSDiscriminator)
+ CallStackHashVal = getCallStackHash(DIL);
+
+ LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator,
+ CallStackHashVal};
+ auto &BBMap = LDBM[LD];
+ auto R = BBMap.insert(&BB);
+ if (BBMap.size() == 1)
+ continue;
+
+ unsigned DiscriminatorCurrPass;
+ DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD];
+ DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit;
+ if (!ImprovedFSDiscriminator)
+ DiscriminatorCurrPass += getCallStackHashV0(BB, I, DIL);
+ DiscriminatorCurrPass &= BitMaskThisPass;
+ unsigned NewD = Discriminator | DiscriminatorCurrPass;
+ const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
+ << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " "
+ << I << "\n");
+ continue;
+ }
+
+ I.setDebugLoc(NewDIL);
+ NumNewD++;
+ LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ": add FS discriminator, from "
+ << Discriminator << " -> " << NewD << "\n");
+ Changed = true;
+ }
+ }
+
+ if (Changed) {
+ createFSDiscriminatorVariable(MF.getFunction().getParent());
+ LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n");
+ (void) NumNewD;
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
new file mode 100644
index 000000000000..bc65700aba06
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -0,0 +1,75 @@
+//===----------------------- MIRNamer.cpp - MIR Namer ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to rename virtual register operands with the goal
+// of making it easier to author easier to read tests for MIR. This pass reuses
+// the vreg renamer used by MIRCanonicalizerPass.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-namer example.mir
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+namespace llvm {
+extern char &MIRNamerID;
+} // namespace llvm
+
+#define DEBUG_TYPE "mir-namer"
+
+namespace {
+
+class MIRNamer : public MachineFunctionPass {
+public:
+ static char ID;
+ MIRNamer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename virtual register operands";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ bool Changed = false;
+
+ if (MF.empty())
+ return Changed;
+
+ VRegRenamer Renamer(MF.getRegInfo());
+
+ unsigned BBIndex = 0;
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ for (auto &MBB : RPOT)
+ Changed |= Renamer.renameVRegs(MBB, BBIndex++);
+
+ return Changed;
+ }
+};
+
+} // end anonymous namespace
+
+char MIRNamer::ID;
+
+char &llvm::MIRNamerID = MIRNamer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false,
+ false)
+
+INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false,
+ false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
new file mode 100644
index 000000000000..a4c1ba340e46
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -0,0 +1,767 @@
+//===- MILexer.cpp - Machine instructions lexer implementation ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lexing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MILexer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cassert>
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+using ErrorCallbackType =
+ function_ref<void(StringRef::iterator Loc, const Twine &)>;
+
+/// This class provides a way to iterate and get characters from the source
+/// string.
+class Cursor {
+ const char *Ptr = nullptr;
+ const char *End = nullptr;
+
+public:
+ Cursor(std::nullopt_t) {}
+
+ explicit Cursor(StringRef Str) {
+ Ptr = Str.data();
+ End = Ptr + Str.size();
+ }
+
+ bool isEOF() const { return Ptr == End; }
+
+ char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
+
+ void advance(unsigned I = 1) { Ptr += I; }
+
+ StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
+
+ StringRef upto(Cursor C) const {
+ assert(C.Ptr >= Ptr && C.Ptr <= End);
+ return StringRef(Ptr, C.Ptr - Ptr);
+ }
+
+ StringRef::iterator location() const { return Ptr; }
+
+ operator bool() const { return Ptr != nullptr; }
+};
+
+} // end anonymous namespace
+
+MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
+ this->Kind = Kind;
+ this->Range = Range;
+ return *this;
+}
+
+MIToken &MIToken::setStringValue(StringRef StrVal) {
+ StringValue = StrVal;
+ return *this;
+}
+
+MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
+ StringValueStorage = std::move(StrVal);
+ StringValue = StringValueStorage;
+ return *this;
+}
+
+MIToken &MIToken::setIntegerValue(APSInt IntVal) {
+ this->IntVal = std::move(IntVal);
+ return *this;
+}
+
+/// Skip the leading whitespace characters and return the updated cursor.
+static Cursor skipWhitespace(Cursor C) {
+ while (isblank(C.peek()))
+ C.advance();
+ return C;
+}
+
+static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
+
+/// Skip a line comment and return the updated cursor.
+static Cursor skipComment(Cursor C) {
+ if (C.peek() != ';')
+ return C;
+ while (!isNewlineChar(C.peek()) && !C.isEOF())
+ C.advance();
+ return C;
+}
+
+/// Machine operands can have comments, enclosed between /* and */.
+/// This eats up all tokens, including /* and */.
+static Cursor skipMachineOperandComment(Cursor C) {
+ if (C.peek() != '/' || C.peek(1) != '*')
+ return C;
+
+ while (C.peek() != '*' || C.peek(1) != '/')
+ C.advance();
+
+ C.advance();
+ C.advance();
+ return C;
+}
+
+/// Return true if the given character satisfies the following regular
+/// expression: [-a-zA-Z$._0-9]
+static bool isIdentifierChar(char C) {
+ return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
+ C == '$';
+}
+
+/// Unescapes the given string value.
+///
+/// Expects the string value to be quoted.
+static std::string unescapeQuotedString(StringRef Value) {
+ assert(Value.front() == '"' && Value.back() == '"');
+ Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+ std::string Str;
+ Str.reserve(C.remaining().size());
+ while (!C.isEOF()) {
+ char Char = C.peek();
+ if (Char == '\\') {
+ if (C.peek(1) == '\\') {
+ // Two '\' become one
+ Str += '\\';
+ C.advance(2);
+ continue;
+ }
+ if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+ Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+ C.advance(3);
+ continue;
+ }
+ }
+ Str += Char;
+ C.advance();
+ }
+ return Str;
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
+ assert(C.peek() == '"');
+ for (C.advance(); C.peek() != '"'; C.advance()) {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '\"'");
+ return std::nullopt;
+ }
+ }
+ C.advance();
+ return C;
+}
+
+static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
+ unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
+ auto Range = C;
+ C.advance(PrefixLength);
+ if (C.peek() == '"') {
+ if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+ StringRef String = Range.upto(R);
+ Token.reset(Type, String)
+ .setOwnedStringValue(
+ unescapeQuotedString(String.drop_front(PrefixLength)));
+ return R;
+ }
+ Token.reset(MIToken::Error, Range.remaining());
+ return Range;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token.reset(Type, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(PrefixLength));
+ return C;
+}
+
+static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("_", MIToken::underscore)
+ .Case("implicit", MIToken::kw_implicit)
+ .Case("implicit-def", MIToken::kw_implicit_define)
+ .Case("def", MIToken::kw_def)
+ .Case("dead", MIToken::kw_dead)
+ .Case("killed", MIToken::kw_killed)
+ .Case("undef", MIToken::kw_undef)
+ .Case("internal", MIToken::kw_internal)
+ .Case("early-clobber", MIToken::kw_early_clobber)
+ .Case("debug-use", MIToken::kw_debug_use)
+ .Case("renamable", MIToken::kw_renamable)
+ .Case("tied-def", MIToken::kw_tied_def)
+ .Case("frame-setup", MIToken::kw_frame_setup)
+ .Case("frame-destroy", MIToken::kw_frame_destroy)
+ .Case("nnan", MIToken::kw_nnan)
+ .Case("ninf", MIToken::kw_ninf)
+ .Case("nsz", MIToken::kw_nsz)
+ .Case("arcp", MIToken::kw_arcp)
+ .Case("contract", MIToken::kw_contract)
+ .Case("afn", MIToken::kw_afn)
+ .Case("reassoc", MIToken::kw_reassoc)
+ .Case("nuw", MIToken::kw_nuw)
+ .Case("nsw", MIToken::kw_nsw)
+ .Case("exact", MIToken::kw_exact)
+ .Case("nofpexcept", MIToken::kw_nofpexcept)
+ .Case("unpredictable", MIToken::kw_unpredictable)
+ .Case("debug-location", MIToken::kw_debug_location)
+ .Case("debug-instr-number", MIToken::kw_debug_instr_number)
+ .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
+ .Case("same_value", MIToken::kw_cfi_same_value)
+ .Case("offset", MIToken::kw_cfi_offset)
+ .Case("rel_offset", MIToken::kw_cfi_rel_offset)
+ .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+ .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
+ .Case("escape", MIToken::kw_cfi_escape)
+ .Case("def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa)
+ .Case("remember_state", MIToken::kw_cfi_remember_state)
+ .Case("restore", MIToken::kw_cfi_restore)
+ .Case("restore_state", MIToken::kw_cfi_restore_state)
+ .Case("undefined", MIToken::kw_cfi_undefined)
+ .Case("register", MIToken::kw_cfi_register)
+ .Case("window_save", MIToken::kw_cfi_window_save)
+ .Case("negate_ra_sign_state",
+ MIToken::kw_cfi_aarch64_negate_ra_sign_state)
+ .Case("blockaddress", MIToken::kw_blockaddress)
+ .Case("intrinsic", MIToken::kw_intrinsic)
+ .Case("target-index", MIToken::kw_target_index)
+ .Case("half", MIToken::kw_half)
+ .Case("float", MIToken::kw_float)
+ .Case("double", MIToken::kw_double)
+ .Case("x86_fp80", MIToken::kw_x86_fp80)
+ .Case("fp128", MIToken::kw_fp128)
+ .Case("ppc_fp128", MIToken::kw_ppc_fp128)
+ .Case("target-flags", MIToken::kw_target_flags)
+ .Case("volatile", MIToken::kw_volatile)
+ .Case("non-temporal", MIToken::kw_non_temporal)
+ .Case("dereferenceable", MIToken::kw_dereferenceable)
+ .Case("invariant", MIToken::kw_invariant)
+ .Case("align", MIToken::kw_align)
+ .Case("basealign", MIToken::kw_basealign)
+ .Case("addrspace", MIToken::kw_addrspace)
+ .Case("stack", MIToken::kw_stack)
+ .Case("got", MIToken::kw_got)
+ .Case("jump-table", MIToken::kw_jump_table)
+ .Case("constant-pool", MIToken::kw_constant_pool)
+ .Case("call-entry", MIToken::kw_call_entry)
+ .Case("custom", MIToken::kw_custom)
+ .Case("liveout", MIToken::kw_liveout)
+ .Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("inlineasm-br-indirect-target",
+ MIToken::kw_inlineasm_br_indirect_target)
+ .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
+ .Case("liveins", MIToken::kw_liveins)
+ .Case("successors", MIToken::kw_successors)
+ .Case("floatpred", MIToken::kw_floatpred)
+ .Case("intpred", MIToken::kw_intpred)
+ .Case("shufflemask", MIToken::kw_shufflemask)
+ .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
+ .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+ .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
+ .Case("pcsections", MIToken::kw_pcsections)
+ .Case("cfi-type", MIToken::kw_cfi_type)
+ .Case("bbsections", MIToken::kw_bbsections)
+ .Case("bb_id", MIToken::kw_bb_id)
+ .Case("unknown-size", MIToken::kw_unknown_size)
+ .Case("unknown-address", MIToken::kw_unknown_address)
+ .Case("distinct", MIToken::kw_distinct)
+ .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken)
+ .Case("machine-block-address-taken",
+ MIToken::kw_machine_block_address_taken)
+ .Default(MIToken::Identifier);
+}
+
+static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
+ if (!isalpha(C.peek()) && C.peek() != '_')
+ return std::nullopt;
+ auto Range = C;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ auto Identifier = Range.upto(C);
+ Token.reset(getIdentifierKind(Identifier), Identifier)
+ .setStringValue(Identifier);
+ return C;
+}
+
+static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ bool IsReference = C.remaining().startswith("%bb.");
+ if (!IsReference && !C.remaining().startswith("bb."))
+ return std::nullopt;
+ auto Range = C;
+ unsigned PrefixLength = IsReference ? 4 : 3;
+ C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
+ if (!isdigit(C.peek())) {
+ Token.reset(MIToken::Error, C.remaining());
+ ErrorCallback(C.location(), "expected a number after '%bb.'");
+ return C;
+ }
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ StringRef Number = NumberRange.upto(C);
+ unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
+ // TODO: The format bb.<id>.<irname> is supported only when it's not a
+ // reference. Once we deprecate the format where the irname shows up, we
+ // should only lex forward if it is a reference.
+ if (C.peek() == '.') {
+ C.advance(); // Skip '.'
+ ++StringOffset;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ }
+ Token.reset(IsReference ? MIToken::MachineBasicBlock
+ : MIToken::MachineBasicBlockLabel,
+ Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
+ return C;
+}
+
+static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return std::nullopt;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return std::nullopt;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ StringRef Number = NumberRange.upto(C);
+ unsigned StringOffset = Rule.size() + Number.size();
+ if (C.peek() == '.') {
+ C.advance();
+ ++StringOffset;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ }
+ Token.reset(Kind, Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
+ return C;
+}
+
+static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
+}
+
+static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
+}
+
+static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
+}
+
+static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
+}
+
+static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%subreg.";
+ if (!C.remaining().startswith(Rule))
+ return std::nullopt;
+ return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
+ ErrorCallback);
+}
+
+static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%ir-block.";
+ if (!C.remaining().startswith(Rule))
+ return std::nullopt;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
+ return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%ir.";
+ if (!C.remaining().startswith(Rule))
+ return std::nullopt;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
+ return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '"')
+ return std::nullopt;
+ return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
+ ErrorCallback);
+}
+
+static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
+ auto Range = C;
+ C.advance(); // Skip '%'
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::VirtualRegister, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+/// Returns true for a character allowed in a register name.
+static bool isRegisterChar(char C) {
+ return isIdentifierChar(C) && C != '.';
+}
+
+static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
+ Cursor Range = C;
+ C.advance(); // Skip '%'
+ while (isRegisterChar(C.peek()))
+ C.advance();
+ Token.reset(MIToken::NamedVirtualRegister, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
+ return C;
+}
+
+static Cursor maybeLexRegister(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '%' && C.peek() != '$')
+ return std::nullopt;
+
+ if (C.peek() == '%') {
+ if (isdigit(C.peek(1)))
+ return lexVirtualRegister(C, Token);
+
+ if (isRegisterChar(C.peek(1)))
+ return lexNamedVirtualRegister(C, Token);
+
+ return std::nullopt;
+ }
+
+ assert(C.peek() == '$');
+ auto Range = C;
+ C.advance(); // Skip '$'
+ while (isRegisterChar(C.peek()))
+ C.advance();
+ Token.reset(MIToken::NamedRegister, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'
+ return C;
+}
+
+static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '@')
+ return std::nullopt;
+ if (!isdigit(C.peek(1)))
+ return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
+ ErrorCallback);
+ auto Range = C;
+ C.advance(1); // Skip the '@'
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::GlobalValue, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '&')
+ return std::nullopt;
+ return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
+ ErrorCallback);
+}
+
+static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "<mcsymbol ";
+ if (!C.remaining().startswith(Rule))
+ return std::nullopt;
+ auto Start = C;
+ C.advance(Rule.size());
+
+ // Try a simple unquoted name.
+ if (C.peek() != '"') {
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef String = Start.upto(C).drop_front(Rule.size());
+ if (C.peek() != '>') {
+ ErrorCallback(C.location(),
+ "expected the '<mcsymbol ...' to be closed by a '>'");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ C.advance();
+
+ Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
+ return C;
+ }
+
+ // Otherwise lex out a quoted name.
+ Cursor R = lexStringConstant(C, ErrorCallback);
+ if (!R) {
+ ErrorCallback(C.location(),
+ "unable to parse quoted string from opening quote");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ StringRef String = Start.upto(R).drop_front(Rule.size());
+ if (R.peek() != '>') {
+ ErrorCallback(R.location(),
+ "expected the '<mcsymbol ...' to be closed by a '>'");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ R.advance();
+
+ Token.reset(MIToken::MCSymbol, Start.upto(R))
+ .setOwnedStringValue(unescapeQuotedString(String));
+ return R;
+}
+
+static bool isValidHexFloatingPointPrefix(char C) {
+ return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R';
+}
+
+static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
+ C.advance();
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(C.peek()))
+ C.advance();
+ if ((C.peek() == 'e' || C.peek() == 'E') &&
+ (isdigit(C.peek(1)) ||
+ ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
+ C.advance(2);
+ while (isdigit(C.peek()))
+ C.advance();
+ }
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
+ if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
+ return std::nullopt;
+ Cursor Range = C;
+ C.advance(2);
+ unsigned PrefLen = 2;
+ if (isValidHexFloatingPointPrefix(C.peek())) {
+ C.advance();
+ PrefLen++;
+ }
+ while (isxdigit(C.peek()))
+ C.advance();
+ StringRef StrVal = Range.upto(C);
+ if (StrVal.size() <= PrefLen)
+ return std::nullopt;
+ if (PrefLen == 2)
+ Token.reset(MIToken::HexLiteral, Range.upto(C));
+ else // It must be 3, which means that there was a floating-point prefix.
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
+ if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
+ return std::nullopt;
+ auto Range = C;
+ C.advance();
+ while (isdigit(C.peek()))
+ C.advance();
+ if (C.peek() == '.')
+ return lexFloatingPointLiteral(Range, C, Token);
+ StringRef StrVal = Range.upto(C);
+ Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
+ return C;
+}
+
+static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("!tbaa", MIToken::md_tbaa)
+ .Case("!alias.scope", MIToken::md_alias_scope)
+ .Case("!noalias", MIToken::md_noalias)
+ .Case("!range", MIToken::md_range)
+ .Case("!DIExpression", MIToken::md_diexpr)
+ .Case("!DILocation", MIToken::md_dilocation)
+ .Default(MIToken::Error);
+}
+
+static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '!')
+ return std::nullopt;
+ auto Range = C;
+ C.advance(1);
+ if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
+ Token.reset(MIToken::exclaim, Range.upto(C));
+ return C;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef StrVal = Range.upto(C);
+ Token.reset(getMetadataKeywordKind(StrVal), StrVal);
+ if (Token.isError())
+ ErrorCallback(Token.location(),
+ "use of unknown metadata keyword '" + StrVal + "'");
+ return C;
+}
+
+static MIToken::TokenKind symbolToken(char C) {
+ switch (C) {
+ case ',':
+ return MIToken::comma;
+ case '.':
+ return MIToken::dot;
+ case '=':
+ return MIToken::equal;
+ case ':':
+ return MIToken::colon;
+ case '(':
+ return MIToken::lparen;
+ case ')':
+ return MIToken::rparen;
+ case '{':
+ return MIToken::lbrace;
+ case '}':
+ return MIToken::rbrace;
+ case '+':
+ return MIToken::plus;
+ case '-':
+ return MIToken::minus;
+ case '<':
+ return MIToken::less;
+ case '>':
+ return MIToken::greater;
+ default:
+ return MIToken::Error;
+ }
+}
+
+static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
+ MIToken::TokenKind Kind;
+ unsigned Length = 1;
+ if (C.peek() == ':' && C.peek(1) == ':') {
+ Kind = MIToken::coloncolon;
+ Length = 2;
+ } else
+ Kind = symbolToken(C.peek());
+ if (Kind == MIToken::Error)
+ return std::nullopt;
+ auto Range = C;
+ C.advance(Length);
+ Token.reset(Kind, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
+ if (!isNewlineChar(C.peek()))
+ return std::nullopt;
+ auto Range = C;
+ C.advance();
+ Token.reset(MIToken::Newline, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '`')
+ return std::nullopt;
+ auto Range = C;
+ C.advance();
+ auto StrRange = C;
+ while (C.peek() != '`') {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '`'");
+ Token.reset(MIToken::Error, Range.remaining());
+ return C;
+ }
+ C.advance();
+ }
+ StringRef Value = StrRange.upto(C);
+ C.advance();
+ Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
+ return C;
+}
+
+StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ auto C = skipComment(skipWhitespace(Cursor(Source)));
+ if (C.isEOF()) {
+ Token.reset(MIToken::Eof, C.remaining());
+ return C.remaining();
+ }
+
+ C = skipMachineOperandComment(C);
+
+ if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIdentifier(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexJumpTableIndex(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexFixedStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexConstantPoolItem(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexNumericalLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexSymbol(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexNewline(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
+ return R.remaining();
+
+ Token.reset(MIToken::Error, C.remaining());
+ ErrorCallback(C.location(),
+ Twine("unexpected character '") + Twine(C.peek()) + "'");
+ return C.remaining();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
new file mode 100644
index 000000000000..7149c29d6ba7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -0,0 +1,253 @@
+//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that lexes the machine instruction source
+// string.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace llvm {
+
+class Twine;
+
+/// A token produced by the machine instruction lexer.
+struct MIToken {
+ enum TokenKind {
+ // Markers
+ Eof,
+ Error,
+ Newline,
+
+ // Tokens with no info.
+ comma,
+ equal,
+ underscore,
+ colon,
+ coloncolon,
+ dot,
+ exclaim,
+ lparen,
+ rparen,
+ lbrace,
+ rbrace,
+ plus,
+ minus,
+ less,
+ greater,
+
+ // Keywords
+ kw_implicit,
+ kw_implicit_define,
+ kw_def,
+ kw_dead,
+ kw_dereferenceable,
+ kw_killed,
+ kw_undef,
+ kw_internal,
+ kw_early_clobber,
+ kw_debug_use,
+ kw_renamable,
+ kw_tied_def,
+ kw_frame_setup,
+ kw_frame_destroy,
+ kw_nnan,
+ kw_ninf,
+ kw_nsz,
+ kw_arcp,
+ kw_contract,
+ kw_afn,
+ kw_reassoc,
+ kw_nuw,
+ kw_nsw,
+ kw_exact,
+ kw_nofpexcept,
+ kw_unpredictable,
+ kw_debug_location,
+ kw_debug_instr_number,
+ kw_dbg_instr_ref,
+ kw_cfi_same_value,
+ kw_cfi_offset,
+ kw_cfi_rel_offset,
+ kw_cfi_def_cfa_register,
+ kw_cfi_def_cfa_offset,
+ kw_cfi_adjust_cfa_offset,
+ kw_cfi_escape,
+ kw_cfi_def_cfa,
+ kw_cfi_llvm_def_aspace_cfa,
+ kw_cfi_register,
+ kw_cfi_remember_state,
+ kw_cfi_restore,
+ kw_cfi_restore_state,
+ kw_cfi_undefined,
+ kw_cfi_window_save,
+ kw_cfi_aarch64_negate_ra_sign_state,
+ kw_blockaddress,
+ kw_intrinsic,
+ kw_target_index,
+ kw_half,
+ kw_float,
+ kw_double,
+ kw_x86_fp80,
+ kw_fp128,
+ kw_ppc_fp128,
+ kw_target_flags,
+ kw_volatile,
+ kw_non_temporal,
+ kw_invariant,
+ kw_align,
+ kw_basealign,
+ kw_addrspace,
+ kw_stack,
+ kw_got,
+ kw_jump_table,
+ kw_constant_pool,
+ kw_call_entry,
+ kw_custom,
+ kw_liveout,
+ kw_landing_pad,
+ kw_inlineasm_br_indirect_target,
+ kw_ehfunclet_entry,
+ kw_liveins,
+ kw_successors,
+ kw_floatpred,
+ kw_intpred,
+ kw_shufflemask,
+ kw_pre_instr_symbol,
+ kw_post_instr_symbol,
+ kw_heap_alloc_marker,
+ kw_pcsections,
+ kw_cfi_type,
+ kw_bbsections,
+ kw_bb_id,
+ kw_unknown_size,
+ kw_unknown_address,
+ kw_ir_block_address_taken,
+ kw_machine_block_address_taken,
+
+ // Metadata types.
+ kw_distinct,
+
+ // Named metadata keywords
+ md_tbaa,
+ md_alias_scope,
+ md_noalias,
+ md_range,
+ md_diexpr,
+ md_dilocation,
+
+ // Identifier tokens
+ Identifier,
+ NamedRegister,
+ NamedVirtualRegister,
+ MachineBasicBlockLabel,
+ MachineBasicBlock,
+ StackObject,
+ FixedStackObject,
+ NamedGlobalValue,
+ GlobalValue,
+ ExternalSymbol,
+ MCSymbol,
+
+ // Other tokens
+ IntegerLiteral,
+ FloatingPointLiteral,
+ HexLiteral,
+ VectorLiteral,
+ VirtualRegister,
+ ConstantPoolItem,
+ JumpTableIndex,
+ NamedIRBlock,
+ IRBlock,
+ NamedIRValue,
+ IRValue,
+ QuotedIRValue, // `<constant value>`
+ SubRegisterIndex,
+ StringConstant
+ };
+
+private:
+ TokenKind Kind = Error;
+ StringRef Range;
+ StringRef StringValue;
+ std::string StringValueStorage;
+ APSInt IntVal;
+
+public:
+ MIToken() = default;
+
+ MIToken &reset(TokenKind Kind, StringRef Range);
+
+ MIToken &setStringValue(StringRef StrVal);
+ MIToken &setOwnedStringValue(std::string StrVal);
+ MIToken &setIntegerValue(APSInt IntVal);
+
+ TokenKind kind() const { return Kind; }
+
+ bool isError() const { return Kind == Error; }
+
+ bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
+
+ bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
+
+ bool isRegister() const {
+ return Kind == NamedRegister || Kind == underscore ||
+ Kind == NamedVirtualRegister || Kind == VirtualRegister;
+ }
+
+ bool isRegisterFlag() const {
+ return Kind == kw_implicit || Kind == kw_implicit_define ||
+ Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
+ Kind == kw_undef || Kind == kw_internal ||
+ Kind == kw_early_clobber || Kind == kw_debug_use ||
+ Kind == kw_renamable;
+ }
+
+ bool isMemoryOperandFlag() const {
+ return Kind == kw_volatile || Kind == kw_non_temporal ||
+ Kind == kw_dereferenceable || Kind == kw_invariant ||
+ Kind == StringConstant;
+ }
+
+ bool is(TokenKind K) const { return Kind == K; }
+
+ bool isNot(TokenKind K) const { return Kind != K; }
+
+ StringRef::iterator location() const { return Range.begin(); }
+
+ StringRef range() const { return Range; }
+
+ /// Return the token's string value.
+ StringRef stringValue() const { return StringValue; }
+
+ const APSInt &integerValue() const { return IntVal; }
+
+ bool hasIntegerValue() const {
+ return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
+ Kind == MachineBasicBlockLabel || Kind == StackObject ||
+ Kind == FixedStackObject || Kind == GlobalValue ||
+ Kind == VirtualRegister || Kind == ConstantPoolItem ||
+ Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
+ }
+};
+
+/// Consume a single machine instruction token in the given source and return
+/// the remaining source string.
+StringRef lexMIToken(
+ StringRef Source, MIToken &Token,
+ function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
new file mode 100644
index 000000000000..bfd9286ff59c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -0,0 +1,3620 @@
+//===- MIParser.cpp - Machine instructions parser implementation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the parsing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRParser/MIParser.h"
+#include "MILexer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MIRFormatter.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+void PerTargetMIParsingState::setTarget(
+ const TargetSubtargetInfo &NewSubtarget) {
+
+ // If the subtarget changed, over conservatively assume everything is invalid.
+ if (&Subtarget == &NewSubtarget)
+ return;
+
+ Names2InstrOpCodes.clear();
+ Names2Regs.clear();
+ Names2RegMasks.clear();
+ Names2SubRegIndices.clear();
+ Names2TargetIndices.clear();
+ Names2DirectTargetFlags.clear();
+ Names2BitmaskTargetFlags.clear();
+ Names2MMOTargetFlags.clear();
+
+ initNames2RegClasses();
+ initNames2RegBanks();
+}
+
+void PerTargetMIParsingState::initNames2Regs() {
+ if (!Names2Regs.empty())
+ return;
+
+ // The '%noreg' register is the register 0.
+ Names2Regs.insert(std::make_pair("noreg", 0));
+ const auto *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+
+ for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
+ bool WasInserted =
+ Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
+ .second;
+ (void)WasInserted;
+ assert(WasInserted && "Expected registers to be unique case-insensitively");
+ }
+}
+
+bool PerTargetMIParsingState::getRegisterByName(StringRef RegName,
+ Register &Reg) {
+ initNames2Regs();
+ auto RegInfo = Names2Regs.find(RegName);
+ if (RegInfo == Names2Regs.end())
+ return true;
+ Reg = RegInfo->getValue();
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2InstrOpCodes() {
+ if (!Names2InstrOpCodes.empty())
+ return;
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
+ Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
+}
+
+bool PerTargetMIParsingState::parseInstrName(StringRef InstrName,
+ unsigned &OpCode) {
+ initNames2InstrOpCodes();
+ auto InstrInfo = Names2InstrOpCodes.find(InstrName);
+ if (InstrInfo == Names2InstrOpCodes.end())
+ return true;
+ OpCode = InstrInfo->getValue();
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2RegMasks() {
+ if (!Names2RegMasks.empty())
+ return;
+ const auto *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
+ ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
+ assert(RegMasks.size() == RegMaskNames.size());
+ for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
+ Names2RegMasks.insert(
+ std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
+}
+
+const uint32_t *PerTargetMIParsingState::getRegMask(StringRef Identifier) {
+ initNames2RegMasks();
+ auto RegMaskInfo = Names2RegMasks.find(Identifier);
+ if (RegMaskInfo == Names2RegMasks.end())
+ return nullptr;
+ return RegMaskInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2SubRegIndices() {
+ if (!Names2SubRegIndices.empty())
+ return;
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+ Names2SubRegIndices.insert(
+ std::make_pair(TRI->getSubRegIndexName(I), I));
+}
+
+unsigned PerTargetMIParsingState::getSubRegIndex(StringRef Name) {
+ initNames2SubRegIndices();
+ auto SubRegInfo = Names2SubRegIndices.find(Name);
+ if (SubRegInfo == Names2SubRegIndices.end())
+ return 0;
+ return SubRegInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2TargetIndices() {
+ if (!Names2TargetIndices.empty())
+ return;
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices)
+ Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getTargetIndex(StringRef Name, int &Index) {
+ initNames2TargetIndices();
+ auto IndexInfo = Names2TargetIndices.find(Name);
+ if (IndexInfo == Names2TargetIndices.end())
+ return true;
+ Index = IndexInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2DirectTargetFlags() {
+ if (!Names2DirectTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2DirectTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getDirectTargetFlag(StringRef Name,
+ unsigned &Flag) {
+ initNames2DirectTargetFlags();
+ auto FlagInfo = Names2DirectTargetFlags.find(Name);
+ if (FlagInfo == Names2DirectTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2BitmaskTargetFlags() {
+ if (!Names2BitmaskTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2BitmaskTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getBitmaskTargetFlag(StringRef Name,
+ unsigned &Flag) {
+ initNames2BitmaskTargetFlags();
+ auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+ if (FlagInfo == Names2BitmaskTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2MMOTargetFlags() {
+ if (!Names2MMOTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2MMOTargetFlags.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getMMOTargetFlag(StringRef Name,
+ MachineMemOperand::Flags &Flag) {
+ initNames2MMOTargetFlags();
+ auto FlagInfo = Names2MMOTargetFlags.find(Name);
+ if (FlagInfo == Names2MMOTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2RegClasses() {
+ if (!Names2RegClasses.empty())
+ return;
+
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+ const auto *RC = TRI->getRegClass(I);
+ Names2RegClasses.insert(
+ std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+ }
+}
+
+void PerTargetMIParsingState::initNames2RegBanks() {
+ if (!Names2RegBanks.empty())
+ return;
+
+ const RegisterBankInfo *RBI = Subtarget.getRegBankInfo();
+ // If the target does not support GlobalISel, we may not have a
+ // register bank info.
+ if (!RBI)
+ return;
+
+ for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
+ const auto &RegBank = RBI->getRegBank(I);
+ Names2RegBanks.insert(
+ std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
+ }
+}
+
+const TargetRegisterClass *
+PerTargetMIParsingState::getRegClass(StringRef Name) {
+ auto RegClassInfo = Names2RegClasses.find(Name);
+ if (RegClassInfo == Names2RegClasses.end())
+ return nullptr;
+ return RegClassInfo->getValue();
+}
+
+const RegisterBank *PerTargetMIParsingState::getRegBank(StringRef Name) {
+ auto RegBankInfo = Names2RegBanks.find(Name);
+ if (RegBankInfo == Names2RegBanks.end())
+ return nullptr;
+ return RegBankInfo->getValue();
+}
+
+PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
+ SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &T)
+ : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) {
+}
+
+VRegInfo &PerFunctionMIParsingState::getVRegInfo(Register Num) {
+ auto I = VRegInfos.insert(std::make_pair(Num, nullptr));
+ if (I.second) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VRegInfo *Info = new (Allocator) VRegInfo;
+ Info->VReg = MRI.createIncompleteVirtualRegister();
+ I.first->second = Info;
+ }
+ return *I.first->second;
+}
+
+VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) {
+ assert(RegName != "" && "Expected named reg.");
+
+ auto I = VRegInfosNamed.insert(std::make_pair(RegName.str(), nullptr));
+ if (I.second) {
+ VRegInfo *Info = new (Allocator) VRegInfo;
+ Info->VReg = MF.getRegInfo().createIncompleteVirtualRegister(RegName);
+ I.first->second = Info;
+ }
+ return *I.first->second;
+}
+
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ int Slot = MST.getLocalSlot(V);
+ if (Slot == -1)
+ return;
+ Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &Arg : F.args())
+ mapValueToSlot(&Arg, MST, Slots2Values);
+ for (const auto &BB : F) {
+ mapValueToSlot(&BB, MST, Slots2Values);
+ for (const auto &I : BB)
+ mapValueToSlot(&I, MST, Slots2Values);
+ }
+}
+
+const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
+ if (Slots2Values.empty())
+ initSlots2Values(MF.getFunction(), Slots2Values);
+ return Slots2Values.lookup(Slot);
+}
+
+namespace {
+
+/// A wrapper struct around the 'MachineOperand' struct that includes a source
+/// range and other attributes.
+struct ParsedMachineOperand {
+ MachineOperand Operand;
+ StringRef::iterator Begin;
+ StringRef::iterator End;
+ std::optional<unsigned> TiedDefIdx;
+
+ ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin,
+ StringRef::iterator End,
+ std::optional<unsigned> &TiedDefIdx)
+ : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) {
+ if (TiedDefIdx)
+ assert(Operand.isReg() && Operand.isUse() &&
+ "Only used register operands can be tied");
+ }
+};
+
+class MIParser {
+ MachineFunction &MF;
+ SMDiagnostic &Error;
+ StringRef Source, CurrentSource;
+ SMRange SourceRange;
+ MIToken Token;
+ PerFunctionMIParsingState &PFS;
+ /// Maps from slot numbers to function's unnamed basic blocks.
+ DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
+
+public:
+ MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source);
+ MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source, SMRange SourceRange);
+
+ /// \p SkipChar gives the number of characters to skip before looking
+ /// for the next token.
+ void lex(unsigned SkipChar = 0);
+
+ /// Report an error at the current location with the given message.
+ ///
+ /// This function always return true.
+ bool error(const Twine &Msg);
+
+ /// Report an error at the given location with the given message.
+ ///
+ /// This function always return true.
+ bool error(StringRef::iterator Loc, const Twine &Msg);
+
+ bool
+ parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlocks();
+ bool parse(MachineInstr *&MI);
+ bool parseStandaloneMBB(MachineBasicBlock *&MBB);
+ bool parseStandaloneNamedRegister(Register &Reg);
+ bool parseStandaloneVirtualRegister(VRegInfo *&Info);
+ bool parseStandaloneRegister(Register &Reg);
+ bool parseStandaloneStackObject(int &FI);
+ bool parseStandaloneMDNode(MDNode *&Node);
+ bool parseMachineMetadata();
+ bool parseMDTuple(MDNode *&MD, bool IsDistinct);
+ bool parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts);
+ bool parseMetadata(Metadata *&MD);
+
+ bool
+ parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock *&AddFalthroughFrom);
+ bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
+ bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
+
+ bool parseNamedRegister(Register &Reg);
+ bool parseVirtualRegister(VRegInfo *&Info);
+ bool parseNamedVirtualRegister(VRegInfo *&Info);
+ bool parseRegister(Register &Reg, VRegInfo *&VRegInfo);
+ bool parseRegisterFlag(unsigned &Flags);
+ bool parseRegisterClassOrBank(VRegInfo &RegInfo);
+ bool parseSubRegisterIndex(unsigned &SubReg);
+ bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
+ bool parseRegisterOperand(MachineOperand &Dest,
+ std::optional<unsigned> &TiedDefIdx,
+ bool IsDef = false);
+ bool parseImmediateOperand(MachineOperand &Dest);
+ bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C);
+ bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
+ bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty);
+ bool parseTypedImmediateOperand(MachineOperand &Dest);
+ bool parseFPImmediateOperand(MachineOperand &Dest);
+ bool parseMBBReference(MachineBasicBlock *&MBB);
+ bool parseMBBOperand(MachineOperand &Dest);
+ bool parseStackFrameIndex(int &FI);
+ bool parseStackObjectOperand(MachineOperand &Dest);
+ bool parseFixedStackFrameIndex(int &FI);
+ bool parseFixedStackObjectOperand(MachineOperand &Dest);
+ bool parseGlobalValue(GlobalValue *&GV);
+ bool parseGlobalAddressOperand(MachineOperand &Dest);
+ bool parseConstantPoolIndexOperand(MachineOperand &Dest);
+ bool parseSubRegisterIndexOperand(MachineOperand &Dest);
+ bool parseJumpTableIndexOperand(MachineOperand &Dest);
+ bool parseExternalSymbolOperand(MachineOperand &Dest);
+ bool parseMCSymbolOperand(MachineOperand &Dest);
+ [[nodiscard]] bool parseMDNode(MDNode *&Node);
+ bool parseDIExpression(MDNode *&Expr);
+ bool parseDILocation(MDNode *&Expr);
+ bool parseMetadataOperand(MachineOperand &Dest);
+ bool parseCFIOffset(int &Offset);
+ bool parseCFIRegister(Register &Reg);
+ bool parseCFIAddressSpace(unsigned &AddressSpace);
+ bool parseCFIEscapeValues(std::string& Values);
+ bool parseCFIOperand(MachineOperand &Dest);
+ bool parseIRBlock(BasicBlock *&BB, const Function &F);
+ bool parseBlockAddressOperand(MachineOperand &Dest);
+ bool parseIntrinsicOperand(MachineOperand &Dest);
+ bool parsePredicateOperand(MachineOperand &Dest);
+ bool parseShuffleMaskOperand(MachineOperand &Dest);
+ bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseDbgInstrRefOperand(MachineOperand &Dest);
+ bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
+ bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
+ bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest,
+ std::optional<unsigned> &TiedDefIdx);
+ bool parseMachineOperandAndTargetFlags(const unsigned OpCode,
+ const unsigned OpIdx,
+ MachineOperand &Dest,
+ std::optional<unsigned> &TiedDefIdx);
+ bool parseOffset(int64_t &Offset);
+ bool parseIRBlockAddressTaken(BasicBlock *&BB);
+ bool parseAlignment(uint64_t &Alignment);
+ bool parseAddrspace(unsigned &Addrspace);
+ bool parseSectionID(std::optional<MBBSectionID> &SID);
+ bool parseBBID(std::optional<unsigned> &BBID);
+ bool parseOperandsOffset(MachineOperand &Op);
+ bool parseIRValue(const Value *&V);
+ bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
+ bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
+ bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
+ bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
+ bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
+ bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
+ bool parseHeapAllocMarker(MDNode *&Node);
+ bool parsePCSections(MDNode *&Node);
+
+ bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest, const MIRFormatter &MF);
+
+private:
+ /// Convert the integer literal in the current token into an unsigned integer.
+ ///
+ /// Return true if an error occurred.
+ bool getUnsigned(unsigned &Result);
+
+ /// Convert the integer literal in the current token into an uint64.
+ ///
+ /// Return true if an error occurred.
+ bool getUint64(uint64_t &Result);
+
+ /// Convert the hexadecimal literal in the current token into an unsigned
+ /// APInt with a minimum bitwidth required to represent the value.
+ ///
+ /// Return true if the literal does not represent an integer value.
+ bool getHexUint(APInt &Result);
+
+ /// If the current token is of the given kind, consume it and return false.
+ /// Otherwise report an error and return true.
+ bool expectAndConsume(MIToken::TokenKind TokenKind);
+
+ /// If the current token is of the given kind, consume it and return true.
+ /// Otherwise return false.
+ bool consumeIfPresent(MIToken::TokenKind TokenKind);
+
+ bool parseInstruction(unsigned &OpCode, unsigned &Flags);
+
+ bool assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands);
+
+ bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+ const MCInstrDesc &MCID);
+
+ const BasicBlock *getIRBlock(unsigned Slot);
+ const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
+
+ /// Get or create an MCSymbol for a given name.
+ MCSymbol *getOrCreateMCSymbol(StringRef Name);
+
+ /// parseStringConstant
+ /// ::= StringConstant
+ bool parseStringConstant(std::string &Result);
+
+ /// Map the location in the MI string to the corresponding location specified
+ /// in `SourceRange`.
+ SMLoc mapSMLoc(StringRef::iterator Loc);
+};
+
+} // end anonymous namespace
+
+MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source)
+ : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
+{}
+
+MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source, SMRange SourceRange)
+ : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source),
+ SourceRange(SourceRange), PFS(PFS) {}
+
+void MIParser::lex(unsigned SkipChar) {
+ CurrentSource = lexMIToken(
+ CurrentSource.slice(SkipChar, StringRef::npos), Token,
+ [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
+}
+
+bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
+
+bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
+ const SourceMgr &SM = *PFS.SM;
+ assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
+ const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID());
+ if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) {
+ // Create an ordinary diagnostic when the source manager's buffer is the
+ // source string.
+ Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
+ return true;
+ }
+ // Create a diagnostic for a YAML string literal.
+ Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+ Loc - Source.data(), SourceMgr::DK_Error, Msg.str(),
+ Source, std::nullopt, std::nullopt);
+ return true;
+}
+
+SMLoc MIParser::mapSMLoc(StringRef::iterator Loc) {
+ assert(SourceRange.isValid() && "Invalid source range");
+ assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
+ return SMLoc::getFromPointer(SourceRange.Start.getPointer() +
+ (Loc - Source.data()));
+}
+
+typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
+static const char *toString(MIToken::TokenKind TokenKind) {
+ switch (TokenKind) {
+ case MIToken::comma:
+ return "','";
+ case MIToken::equal:
+ return "'='";
+ case MIToken::colon:
+ return "':'";
+ case MIToken::lparen:
+ return "'('";
+ case MIToken::rparen:
+ return "')'";
+ default:
+ return "<unknown token>";
+ }
+}
+
+bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return error(Twine("expected ") + toString(TokenKind));
+ lex();
+ return false;
+}
+
+bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return false;
+ lex();
+ return true;
+}
+
+// Parse Machine Basic Block Section ID.
+bool MIParser::parseSectionID(std::optional<MBBSectionID> &SID) {
+ assert(Token.is(MIToken::kw_bbsections));
+ lex();
+ if (Token.is(MIToken::IntegerLiteral)) {
+ unsigned Value = 0;
+ if (getUnsigned(Value))
+ return error("Unknown Section ID");
+ SID = MBBSectionID{Value};
+ } else {
+ const StringRef &S = Token.stringValue();
+ if (S == "Exception")
+ SID = MBBSectionID::ExceptionSectionID;
+ else if (S == "Cold")
+ SID = MBBSectionID::ColdSectionID;
+ else
+ return error("Unknown Section ID");
+ }
+ lex();
+ return false;
+}
+
+// Parse Machine Basic Block ID.
+bool MIParser::parseBBID(std::optional<unsigned> &BBID) {
+ assert(Token.is(MIToken::kw_bb_id));
+ lex();
+ unsigned Value = 0;
+ if (getUnsigned(Value))
+ return error("Unknown BB ID");
+ BBID = Value;
+ lex();
+ return false;
+}
+
+bool MIParser::parseBasicBlockDefinition(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ auto Loc = Token.location();
+ auto Name = Token.stringValue();
+ lex();
+ bool MachineBlockAddressTaken = false;
+ BasicBlock *AddressTakenIRBlock = nullptr;
+ bool IsLandingPad = false;
+ bool IsInlineAsmBrIndirectTarget = false;
+ bool IsEHFuncletEntry = false;
+ std::optional<MBBSectionID> SectionID;
+ uint64_t Alignment = 0;
+ std::optional<unsigned> BBID;
+ BasicBlock *BB = nullptr;
+ if (consumeIfPresent(MIToken::lparen)) {
+ do {
+ // TODO: Report an error when multiple same attributes are specified.
+ switch (Token.kind()) {
+ case MIToken::kw_machine_block_address_taken:
+ MachineBlockAddressTaken = true;
+ lex();
+ break;
+ case MIToken::kw_ir_block_address_taken:
+ if (parseIRBlockAddressTaken(AddressTakenIRBlock))
+ return true;
+ break;
+ case MIToken::kw_landing_pad:
+ IsLandingPad = true;
+ lex();
+ break;
+ case MIToken::kw_inlineasm_br_indirect_target:
+ IsInlineAsmBrIndirectTarget = true;
+ lex();
+ break;
+ case MIToken::kw_ehfunclet_entry:
+ IsEHFuncletEntry = true;
+ lex();
+ break;
+ case MIToken::kw_align:
+ if (parseAlignment(Alignment))
+ return true;
+ break;
+ case MIToken::IRBlock:
+ case MIToken::NamedIRBlock:
+ // TODO: Report an error when both name and ir block are specified.
+ if (parseIRBlock(BB, MF.getFunction()))
+ return true;
+ lex();
+ break;
+ case MIToken::kw_bbsections:
+ if (parseSectionID(SectionID))
+ return true;
+ break;
+ case MIToken::kw_bb_id:
+ if (parseBBID(BBID))
+ return true;
+ break;
+ default:
+ break;
+ }
+ } while (consumeIfPresent(MIToken::comma));
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ if (expectAndConsume(MIToken::colon))
+ return true;
+
+ if (!Name.empty()) {
+ BB = dyn_cast_or_null<BasicBlock>(
+ MF.getFunction().getValueSymbolTable()->lookup(Name));
+ if (!BB)
+ return error(Loc, Twine("basic block '") + Name +
+ "' is not defined in the function '" +
+ MF.getName() + "'");
+ }
+ auto *MBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(MF.end(), MBB);
+ bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second;
+ if (!WasInserted)
+ return error(Loc, Twine("redefinition of machine basic block with id #") +
+ Twine(ID));
+ if (Alignment)
+ MBB->setAlignment(Align(Alignment));
+ if (MachineBlockAddressTaken)
+ MBB->setMachineBlockAddressTaken();
+ if (AddressTakenIRBlock)
+ MBB->setAddressTakenIRBlock(AddressTakenIRBlock);
+ MBB->setIsEHPad(IsLandingPad);
+ MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
+ MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
+ if (SectionID) {
+ MBB->setSectionID(*SectionID);
+ MF.setBBSectionsType(BasicBlockSection::List);
+ }
+ if (BBID.has_value()) {
+ // BBSectionsType is set to `List` if any basic blocks has `SectionID`.
+ // Here, we set it to `Labels` if it hasn't been set above.
+ if (!MF.hasBBSections())
+ MF.setBBSectionsType(BasicBlockSection::Labels);
+ MBB->setBBID(BBID.value());
+ }
+ return false;
+}
+
+bool MIParser::parseBasicBlockDefinitions(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ if (Token.isNot(MIToken::MachineBasicBlockLabel))
+ return error("expected a basic block definition before instructions");
+ unsigned BraceDepth = 0;
+ do {
+ if (parseBasicBlockDefinition(MBBSlots))
+ return true;
+ bool IsAfterNewline = false;
+ // Skip until the next machine basic block.
+ while (true) {
+ if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) ||
+ Token.isErrorOrEOF())
+ break;
+ else if (Token.is(MIToken::MachineBasicBlockLabel))
+ return error("basic block definition should be located at the start of "
+ "the line");
+ else if (consumeIfPresent(MIToken::Newline)) {
+ IsAfterNewline = true;
+ continue;
+ }
+ IsAfterNewline = false;
+ if (Token.is(MIToken::lbrace))
+ ++BraceDepth;
+ if (Token.is(MIToken::rbrace)) {
+ if (!BraceDepth)
+ return error("extraneous closing brace ('}')");
+ --BraceDepth;
+ }
+ lex();
+ }
+ // Verify that we closed all of the '{' at the end of a file or a block.
+ if (!Token.isError() && BraceDepth)
+ return error("expected '}'"); // FIXME: Report a note that shows '{'.
+ } while (!Token.isErrorOrEOF());
+ return Token.isError();
+}
+
+bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_liveins));
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of liveins.
+ return false;
+ do {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ Register Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ LaneBitmask Mask = LaneBitmask::getAll();
+ if (consumeIfPresent(MIToken::colon)) {
+ // Parse lane mask.
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::HexLiteral))
+ return error("expected a lane mask");
+ static_assert(sizeof(LaneBitmask::Type) == sizeof(uint64_t),
+ "Use correct get-function for lane mask");
+ LaneBitmask::Type V;
+ if (getUint64(V))
+ return error("invalid lane mask value");
+ Mask = LaneBitmask(V);
+ lex();
+ }
+ MBB.addLiveIn(Reg, Mask);
+ } while (consumeIfPresent(MIToken::comma));
+ return false;
+}
+
+bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_successors));
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of successors.
+ return false;
+ do {
+ if (Token.isNot(MIToken::MachineBasicBlock))
+ return error("expected a machine basic block reference");
+ MachineBasicBlock *SuccMBB = nullptr;
+ if (parseMBBReference(SuccMBB))
+ return true;
+ lex();
+ unsigned Weight = 0;
+ if (consumeIfPresent(MIToken::lparen)) {
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::HexLiteral))
+ return error("expected an integer literal after '('");
+ if (getUnsigned(Weight))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight));
+ } while (consumeIfPresent(MIToken::comma));
+ MBB.normalizeSuccProbs();
+ return false;
+}
+
+bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock *&AddFalthroughFrom) {
+ // Skip the definition.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ lex();
+ if (consumeIfPresent(MIToken::lparen)) {
+ while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF())
+ lex();
+ consumeIfPresent(MIToken::rparen);
+ }
+ consumeIfPresent(MIToken::colon);
+
+ // Parse the liveins and successors.
+ // N.B: Multiple lists of successors and liveins are allowed and they're
+ // merged into one.
+ // Example:
+ // liveins: $edi
+ // liveins: $esi
+ //
+ // is equivalent to
+ // liveins: $edi, $esi
+ bool ExplicitSuccessors = false;
+ while (true) {
+ if (Token.is(MIToken::kw_successors)) {
+ if (parseBasicBlockSuccessors(MBB))
+ return true;
+ ExplicitSuccessors = true;
+ } else if (Token.is(MIToken::kw_liveins)) {
+ if (parseBasicBlockLiveins(MBB))
+ return true;
+ } else if (consumeIfPresent(MIToken::Newline)) {
+ continue;
+ } else
+ break;
+ if (!Token.isNewlineOrEOF())
+ return error("expected line break at the end of a list");
+ lex();
+ }
+
+ // Parse the instructions.
+ bool IsInBundle = false;
+ MachineInstr *PrevMI = nullptr;
+ while (!Token.is(MIToken::MachineBasicBlockLabel) &&
+ !Token.is(MIToken::Eof)) {
+ if (consumeIfPresent(MIToken::Newline))
+ continue;
+ if (consumeIfPresent(MIToken::rbrace)) {
+ // The first parsing pass should verify that all closing '}' have an
+ // opening '{'.
+ assert(IsInBundle);
+ IsInBundle = false;
+ continue;
+ }
+ MachineInstr *MI = nullptr;
+ if (parse(MI))
+ return true;
+ MBB.insert(MBB.end(), MI);
+ if (IsInBundle) {
+ PrevMI->setFlag(MachineInstr::BundledSucc);
+ MI->setFlag(MachineInstr::BundledPred);
+ }
+ PrevMI = MI;
+ if (Token.is(MIToken::lbrace)) {
+ if (IsInBundle)
+ return error("nested instruction bundles are not allowed");
+ lex();
+ // This instruction is the start of the bundle.
+ MI->setFlag(MachineInstr::BundledSucc);
+ IsInBundle = true;
+ if (!Token.is(MIToken::Newline))
+ // The next instruction can be on the same line.
+ continue;
+ }
+ assert(Token.isNewlineOrEOF() && "MI is not fully parsed");
+ lex();
+ }
+
+ // Construct successor list by searching for basic block machine operands.
+ if (!ExplicitSuccessors) {
+ SmallVector<MachineBasicBlock*,4> Successors;
+ bool IsFallthrough;
+ guessSuccessors(MBB, Successors, IsFallthrough);
+ for (MachineBasicBlock *Succ : Successors)
+ MBB.addSuccessor(Succ);
+
+ if (IsFallthrough) {
+ AddFalthroughFrom = &MBB;
+ } else {
+ MBB.normalizeSuccProbs();
+ }
+ }
+
+ return false;
+}
+
+bool MIParser::parseBasicBlocks() {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ // The first parsing pass should have verified that this token is a MBB label
+ // in the 'parseBasicBlockDefinitions' method.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ MachineBasicBlock *AddFalthroughFrom = nullptr;
+ do {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB))
+ return true;
+ if (AddFalthroughFrom) {
+ if (!AddFalthroughFrom->isSuccessor(MBB))
+ AddFalthroughFrom->addSuccessor(MBB);
+ AddFalthroughFrom->normalizeSuccProbs();
+ AddFalthroughFrom = nullptr;
+ }
+ if (parseBasicBlock(*MBB, AddFalthroughFrom))
+ return true;
+ // The method 'parseBasicBlock' should parse the whole block until the next
+ // block or the end of file.
+ assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof));
+ } while (Token.isNot(MIToken::Eof));
+ return false;
+}
+
+bool MIParser::parse(MachineInstr *&MI) {
+ // Parse any register operands before '='
+ MachineOperand MO = MachineOperand::CreateImm(0);
+ SmallVector<ParsedMachineOperand, 8> Operands;
+ while (Token.isRegister() || Token.isRegisterFlag()) {
+ auto Loc = Token.location();
+ std::optional<unsigned> TiedDefIdx;
+ if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true))
+ return true;
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+ if (!Operands.empty() && expectAndConsume(MIToken::equal))
+ return true;
+
+ unsigned OpCode, Flags = 0;
+ if (Token.isError() || parseInstruction(OpCode, Flags))
+ return true;
+
+ // Parse the remaining machine operands.
+ while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
+ Token.isNot(MIToken::kw_post_instr_symbol) &&
+ Token.isNot(MIToken::kw_heap_alloc_marker) &&
+ Token.isNot(MIToken::kw_pcsections) &&
+ Token.isNot(MIToken::kw_cfi_type) &&
+ Token.isNot(MIToken::kw_debug_location) &&
+ Token.isNot(MIToken::kw_debug_instr_number) &&
+ Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
+ auto Loc = Token.location();
+ std::optional<unsigned> TiedDefIdx;
+ if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
+ return true;
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ break;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ }
+
+ MCSymbol *PreInstrSymbol = nullptr;
+ if (Token.is(MIToken::kw_pre_instr_symbol))
+ if (parsePreOrPostInstrSymbol(PreInstrSymbol))
+ return true;
+ MCSymbol *PostInstrSymbol = nullptr;
+ if (Token.is(MIToken::kw_post_instr_symbol))
+ if (parsePreOrPostInstrSymbol(PostInstrSymbol))
+ return true;
+ MDNode *HeapAllocMarker = nullptr;
+ if (Token.is(MIToken::kw_heap_alloc_marker))
+ if (parseHeapAllocMarker(HeapAllocMarker))
+ return true;
+ MDNode *PCSections = nullptr;
+ if (Token.is(MIToken::kw_pcsections))
+ if (parsePCSections(PCSections))
+ return true;
+
+ unsigned CFIType = 0;
+ if (Token.is(MIToken::kw_cfi_type)) {
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'cfi-type'");
+ // getUnsigned is sufficient for 32-bit integers.
+ if (getUnsigned(CFIType))
+ return true;
+ lex();
+ // Lex past trailing comma if present.
+ if (Token.is(MIToken::comma))
+ lex();
+ }
+
+ unsigned InstrNum = 0;
+ if (Token.is(MIToken::kw_debug_instr_number)) {
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'debug-instr-number'");
+ if (getUnsigned(InstrNum))
+ return true;
+ lex();
+ // Lex past trailing comma if present.
+ if (Token.is(MIToken::comma))
+ lex();
+ }
+
+ DebugLoc DebugLocation;
+ if (Token.is(MIToken::kw_debug_location)) {
+ lex();
+ MDNode *Node = nullptr;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(Node))
+ return true;
+ } else
+ return error("expected a metadata node after 'debug-location'");
+ if (!isa<DILocation>(Node))
+ return error("referenced metadata is not a DILocation");
+ DebugLocation = DebugLoc(Node);
+ }
+
+ // Parse the machine memory operands.
+ SmallVector<MachineMemOperand *, 2> MemOperands;
+ if (Token.is(MIToken::coloncolon)) {
+ lex();
+ while (!Token.isNewlineOrEOF()) {
+ MachineMemOperand *MemOp = nullptr;
+ if (parseMachineMemoryOperand(MemOp))
+ return true;
+ MemOperands.push_back(MemOp);
+ if (Token.isNewlineOrEOF())
+ break;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine memory operand");
+ lex();
+ }
+ }
+
+ const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
+ if (!MCID.isVariadic()) {
+ // FIXME: Move the implicit operand verification to the machine verifier.
+ if (verifyImplicitOperands(Operands, MCID))
+ return true;
+ }
+
+ MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
+ MI->setFlags(Flags);
+
+ unsigned NumExplicitOps = 0;
+ for (const auto &Operand : Operands) {
+ bool IsImplicitOp = Operand.Operand.isReg() && Operand.Operand.isImplicit();
+ if (!IsImplicitOp) {
+ if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() &&
+ !Operand.Operand.isValidExcessOperand())
+ return error(Operand.Begin, "too many operands for instruction");
+
+ ++NumExplicitOps;
+ }
+
+ MI->addOperand(MF, Operand.Operand);
+ }
+
+ if (assignRegisterTies(*MI, Operands))
+ return true;
+ if (PreInstrSymbol)
+ MI->setPreInstrSymbol(MF, PreInstrSymbol);
+ if (PostInstrSymbol)
+ MI->setPostInstrSymbol(MF, PostInstrSymbol);
+ if (HeapAllocMarker)
+ MI->setHeapAllocMarker(MF, HeapAllocMarker);
+ if (PCSections)
+ MI->setPCSections(MF, PCSections);
+ if (CFIType)
+ MI->setCFIType(MF, CFIType);
+ if (!MemOperands.empty())
+ MI->setMemRefs(MF, MemOperands);
+ if (InstrNum)
+ MI->setDebugInstrNum(InstrNum);
+ return false;
+}
+
+bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) {
+ lex();
+ if (Token.isNot(MIToken::MachineBasicBlock))
+ return error("expected a machine basic block reference");
+ if (parseMBBReference(MBB))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error(
+ "expected end of string after the machine basic block reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneNamedRegister(Register &Reg) {
+ lex();
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) {
+ lex();
+ if (Token.isNot(MIToken::VirtualRegister))
+ return error("expected a virtual register");
+ if (parseVirtualRegister(Info))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneRegister(Register &Reg) {
+ lex();
+ if (Token.isNot(MIToken::NamedRegister) &&
+ Token.isNot(MIToken::VirtualRegister))
+ return error("expected either a named or virtual register");
+
+ VRegInfo *Info;
+ if (parseRegister(Reg, Info))
+ return true;
+
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneStackObject(int &FI) {
+ lex();
+ if (Token.isNot(MIToken::StackObject))
+ return error("expected a stack object");
+ if (parseStackFrameIndex(FI))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the stack object reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
+ lex();
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_diexpr)) {
+ if (parseDIExpression(Node))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(Node))
+ return true;
+ } else
+ return error("expected a metadata node");
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the metadata node");
+ return false;
+}
+
+bool MIParser::parseMachineMetadata() {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::equal))
+ return true;
+ bool IsDistinct = Token.is(MIToken::kw_distinct);
+ if (IsDistinct)
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+ lex();
+
+ MDNode *MD;
+ if (parseMDTuple(MD, IsDistinct))
+ return true;
+
+ auto FI = PFS.MachineForwardRefMDNodes.find(ID);
+ if (FI != PFS.MachineForwardRefMDNodes.end()) {
+ FI->second.first->replaceAllUsesWith(MD);
+ PFS.MachineForwardRefMDNodes.erase(FI);
+
+ assert(PFS.MachineMetadataNodes[ID] == MD && "Tracking VH didn't work");
+ } else {
+ if (PFS.MachineMetadataNodes.count(ID))
+ return error("Metadata id is already used");
+ PFS.MachineMetadataNodes[ID].reset(MD);
+ }
+
+ return false;
+}
+
+bool MIParser::parseMDTuple(MDNode *&MD, bool IsDistinct) {
+ SmallVector<Metadata *, 16> Elts;
+ if (parseMDNodeVector(Elts))
+ return true;
+ MD = (IsDistinct ? MDTuple::getDistinct
+ : MDTuple::get)(MF.getFunction().getContext(), Elts);
+ return false;
+}
+
+bool MIParser::parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
+ if (Token.isNot(MIToken::lbrace))
+ return error("expected '{' here");
+ lex();
+
+ if (Token.is(MIToken::rbrace)) {
+ lex();
+ return false;
+ }
+
+ do {
+ Metadata *MD;
+ if (parseMetadata(MD))
+ return true;
+
+ Elts.push_back(MD);
+
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ } while (true);
+
+ if (Token.isNot(MIToken::rbrace))
+ return error("expected end of metadata node");
+ lex();
+
+ return false;
+}
+
+// ::= !42
+// ::= !"string"
+bool MIParser::parseMetadata(Metadata *&MD) {
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected '!' here");
+ lex();
+
+ if (Token.is(MIToken::StringConstant)) {
+ std::string Str;
+ if (parseStringConstant(Str))
+ return true;
+ MD = MDString::get(MF.getFunction().getContext(), Str);
+ return false;
+ }
+
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+
+ SMLoc Loc = mapSMLoc(Token.location());
+
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ lex();
+
+ auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo != PFS.IRSlots.MetadataNodes.end()) {
+ MD = NodeInfo->second.get();
+ return false;
+ }
+ // Check machine metadata.
+ NodeInfo = PFS.MachineMetadataNodes.find(ID);
+ if (NodeInfo != PFS.MachineMetadataNodes.end()) {
+ MD = NodeInfo->second.get();
+ return false;
+ }
+ // Forward reference.
+ auto &FwdRef = PFS.MachineForwardRefMDNodes[ID];
+ FwdRef = std::make_pair(
+ MDTuple::getTemporary(MF.getFunction().getContext(), std::nullopt), Loc);
+ PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get());
+ MD = FwdRef.first.get();
+
+ return false;
+}
+
+static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
+ assert(MO.isImplicit());
+ return MO.isDef() ? "implicit-def" : "implicit";
+}
+
+static std::string getRegisterName(const TargetRegisterInfo *TRI,
+ Register Reg) {
+ assert(Reg.isPhysical() && "expected phys reg");
+ return StringRef(TRI->getName(Reg)).lower();
+}
+
+/// Return true if the parsed machine operands contain a given machine operand.
+static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ for (const auto &I : Operands) {
+ if (ImplicitOperand.isIdenticalTo(I.Operand))
+ return true;
+ }
+ return false;
+}
+
+bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+ const MCInstrDesc &MCID) {
+ if (MCID.isCall())
+ // We can't verify call instructions as they can contain arbitrary implicit
+ // register and register mask operands.
+ return false;
+
+ // Gather all the expected implicit operands.
+ SmallVector<MachineOperand, 4> ImplicitOperands;
+ for (MCPhysReg ImpDef : MCID.implicit_defs())
+ ImplicitOperands.push_back(MachineOperand::CreateReg(ImpDef, true, true));
+ for (MCPhysReg ImpUse : MCID.implicit_uses())
+ ImplicitOperands.push_back(MachineOperand::CreateReg(ImpUse, false, true));
+
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ for (const auto &I : ImplicitOperands) {
+ if (isImplicitOperandIn(I, Operands))
+ continue;
+ return error(Operands.empty() ? Token.location() : Operands.back().End,
+ Twine("missing implicit register operand '") +
+ printImplicitRegisterFlag(I) + " $" +
+ getRegisterName(TRI, I.getReg()) + "'");
+ }
+ return false;
+}
+
+bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
+ // Allow frame and fast math flags for OPCODE
+ while (Token.is(MIToken::kw_frame_setup) ||
+ Token.is(MIToken::kw_frame_destroy) ||
+ Token.is(MIToken::kw_nnan) ||
+ Token.is(MIToken::kw_ninf) ||
+ Token.is(MIToken::kw_nsz) ||
+ Token.is(MIToken::kw_arcp) ||
+ Token.is(MIToken::kw_contract) ||
+ Token.is(MIToken::kw_afn) ||
+ Token.is(MIToken::kw_reassoc) ||
+ Token.is(MIToken::kw_nuw) ||
+ Token.is(MIToken::kw_nsw) ||
+ Token.is(MIToken::kw_exact) ||
+ Token.is(MIToken::kw_nofpexcept) ||
+ Token.is(MIToken::kw_unpredictable)) {
+ // Mine frame and fast math flags
+ if (Token.is(MIToken::kw_frame_setup))
+ Flags |= MachineInstr::FrameSetup;
+ if (Token.is(MIToken::kw_frame_destroy))
+ Flags |= MachineInstr::FrameDestroy;
+ if (Token.is(MIToken::kw_nnan))
+ Flags |= MachineInstr::FmNoNans;
+ if (Token.is(MIToken::kw_ninf))
+ Flags |= MachineInstr::FmNoInfs;
+ if (Token.is(MIToken::kw_nsz))
+ Flags |= MachineInstr::FmNsz;
+ if (Token.is(MIToken::kw_arcp))
+ Flags |= MachineInstr::FmArcp;
+ if (Token.is(MIToken::kw_contract))
+ Flags |= MachineInstr::FmContract;
+ if (Token.is(MIToken::kw_afn))
+ Flags |= MachineInstr::FmAfn;
+ if (Token.is(MIToken::kw_reassoc))
+ Flags |= MachineInstr::FmReassoc;
+ if (Token.is(MIToken::kw_nuw))
+ Flags |= MachineInstr::NoUWrap;
+ if (Token.is(MIToken::kw_nsw))
+ Flags |= MachineInstr::NoSWrap;
+ if (Token.is(MIToken::kw_exact))
+ Flags |= MachineInstr::IsExact;
+ if (Token.is(MIToken::kw_nofpexcept))
+ Flags |= MachineInstr::NoFPExcept;
+ if (Token.is(MIToken::kw_unpredictable))
+ Flags |= MachineInstr::Unpredictable;
+
+ lex();
+ }
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected a machine instruction");
+ StringRef InstrName = Token.stringValue();
+ if (PFS.Target.parseInstrName(InstrName, OpCode))
+ return error(Twine("unknown machine instruction name '") + InstrName + "'");
+ lex();
+ return false;
+}
+
+bool MIParser::parseNamedRegister(Register &Reg) {
+ assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
+ StringRef Name = Token.stringValue();
+ if (PFS.Target.getRegisterByName(Name, Reg))
+ return error(Twine("unknown register name '") + Name + "'");
+ return false;
+}
+
+bool MIParser::parseNamedVirtualRegister(VRegInfo *&Info) {
+ assert(Token.is(MIToken::NamedVirtualRegister) && "Expected NamedVReg token");
+ StringRef Name = Token.stringValue();
+ // TODO: Check that the VReg name is not the same as a physical register name.
+ // If it is, then print a warning (when warnings are implemented).
+ Info = &PFS.getVRegInfoNamed(Name);
+ return false;
+}
+
+bool MIParser::parseVirtualRegister(VRegInfo *&Info) {
+ if (Token.is(MIToken::NamedVirtualRegister))
+ return parseNamedVirtualRegister(Info);
+ assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token");
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ Info = &PFS.getVRegInfo(ID);
+ return false;
+}
+
+bool MIParser::parseRegister(Register &Reg, VRegInfo *&Info) {
+ switch (Token.kind()) {
+ case MIToken::underscore:
+ Reg = 0;
+ return false;
+ case MIToken::NamedRegister:
+ return parseNamedRegister(Reg);
+ case MIToken::NamedVirtualRegister:
+ case MIToken::VirtualRegister:
+ if (parseVirtualRegister(Info))
+ return true;
+ Reg = Info->VReg;
+ return false;
+ // TODO: Parse other register kinds.
+ default:
+ llvm_unreachable("The current token should be a register");
+ }
+}
+
+bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
+ if (Token.isNot(MIToken::Identifier) && Token.isNot(MIToken::underscore))
+ return error("expected '_', register class, or register bank name");
+ StringRef::iterator Loc = Token.location();
+ StringRef Name = Token.stringValue();
+
+ // Was it a register class?
+ const TargetRegisterClass *RC = PFS.Target.getRegClass(Name);
+ if (RC) {
+ lex();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::NORMAL:
+ RegInfo.Kind = VRegInfo::NORMAL;
+ if (RegInfo.Explicit && RegInfo.D.RC != RC) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ return error(Loc, Twine("conflicting register classes, previously: ") +
+ Twine(TRI.getRegClassName(RegInfo.D.RC)));
+ }
+ RegInfo.D.RC = RC;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ return error(Loc, "register class specification on generic register");
+ }
+ llvm_unreachable("Unexpected register kind");
+ }
+
+ // Should be a register bank or a generic register.
+ const RegisterBank *RegBank = nullptr;
+ if (Name != "_") {
+ RegBank = PFS.Target.getRegBank(Name);
+ if (!RegBank)
+ return error(Loc, "expected '_', register class, or register bank name");
+ }
+
+ lex();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ RegInfo.Kind = RegBank ? VRegInfo::REGBANK : VRegInfo::GENERIC;
+ if (RegInfo.Explicit && RegInfo.D.RegBank != RegBank)
+ return error(Loc, "conflicting generic register banks");
+ RegInfo.D.RegBank = RegBank;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::NORMAL:
+ return error(Loc, "register bank specification on normal register");
+ }
+ llvm_unreachable("Unexpected register kind");
+}
+
+bool MIParser::parseRegisterFlag(unsigned &Flags) {
+ const unsigned OldFlags = Flags;
+ switch (Token.kind()) {
+ case MIToken::kw_implicit:
+ Flags |= RegState::Implicit;
+ break;
+ case MIToken::kw_implicit_define:
+ Flags |= RegState::ImplicitDefine;
+ break;
+ case MIToken::kw_def:
+ Flags |= RegState::Define;
+ break;
+ case MIToken::kw_dead:
+ Flags |= RegState::Dead;
+ break;
+ case MIToken::kw_killed:
+ Flags |= RegState::Kill;
+ break;
+ case MIToken::kw_undef:
+ Flags |= RegState::Undef;
+ break;
+ case MIToken::kw_internal:
+ Flags |= RegState::InternalRead;
+ break;
+ case MIToken::kw_early_clobber:
+ Flags |= RegState::EarlyClobber;
+ break;
+ case MIToken::kw_debug_use:
+ Flags |= RegState::Debug;
+ break;
+ case MIToken::kw_renamable:
+ Flags |= RegState::Renamable;
+ break;
+ default:
+ llvm_unreachable("The current token should be a register flag");
+ }
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' register flag");
+ lex();
+ return false;
+}
+
+bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
+ assert(Token.is(MIToken::dot));
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected a subregister index after '.'");
+ auto Name = Token.stringValue();
+ SubReg = PFS.Target.getSubRegIndex(Name);
+ if (!SubReg)
+ return error(Twine("use of unknown subregister index '") + Name + "'");
+ lex();
+ return false;
+}
+
+bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
+ if (!consumeIfPresent(MIToken::kw_tied_def))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'tied-def'");
+ if (getUnsigned(TiedDefIdx))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ return false;
+}
+
+bool MIParser::assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+ if (!Operands[I].TiedDefIdx)
+ continue;
+ // The parser ensures that this operand is a register use, so we just have
+ // to check the tied-def operand.
+ unsigned DefIdx = *Operands[I].TiedDefIdx;
+ if (DefIdx >= E)
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '" +
+ Twine(DefIdx) + "'; instruction has only ") +
+ Twine(E) + " operands");
+ const auto &DefOperand = Operands[DefIdx].Operand;
+ if (!DefOperand.isReg() || !DefOperand.isDef())
+ // FIXME: add note with the def operand.
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '") +
+ Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) +
+ " isn't a defined register");
+ // Check that the tied-def operand wasn't tied elsewhere.
+ for (const auto &TiedPair : TiedRegisterPairs) {
+ if (TiedPair.first == DefIdx)
+ return error(Operands[I].Begin,
+ Twine("the tied-def operand #") + Twine(DefIdx) +
+ " is already tied with another register operand");
+ }
+ TiedRegisterPairs.push_back(std::make_pair(DefIdx, I));
+ }
+ // FIXME: Verify that for non INLINEASM instructions, the def and use tied
+ // indices must be less than tied max.
+ for (const auto &TiedPair : TiedRegisterPairs)
+ MI.tieOperands(TiedPair.first, TiedPair.second);
+ return false;
+}
+
+bool MIParser::parseRegisterOperand(MachineOperand &Dest,
+ std::optional<unsigned> &TiedDefIdx,
+ bool IsDef) {
+ unsigned Flags = IsDef ? RegState::Define : 0;
+ while (Token.isRegisterFlag()) {
+ if (parseRegisterFlag(Flags))
+ return true;
+ }
+ if (!Token.isRegister())
+ return error("expected a register after register flags");
+ Register Reg;
+ VRegInfo *RegInfo;
+ if (parseRegister(Reg, RegInfo))
+ return true;
+ lex();
+ unsigned SubReg = 0;
+ if (Token.is(MIToken::dot)) {
+ if (parseSubRegisterIndex(SubReg))
+ return true;
+ if (!Reg.isVirtual())
+ return error("subregister index expects a virtual register");
+ }
+ if (Token.is(MIToken::colon)) {
+ if (!Reg.isVirtual())
+ return error("register class specification expects a virtual register");
+ lex();
+ if (parseRegisterClassOrBank(*RegInfo))
+ return true;
+ }
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if ((Flags & RegState::Define) == 0) {
+ if (consumeIfPresent(MIToken::lparen)) {
+ unsigned Idx;
+ if (!parseRegisterTiedDefIndex(Idx))
+ TiedDefIdx = Idx;
+ else {
+ // Try a redundant low-level type.
+ LLT Ty;
+ if (parseLowLevelType(Token.location(), Ty))
+ return error("expected tied-def or low-level type after '('");
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+ return error("inconsistent type for generic virtual register");
+
+ MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
+ MRI.setType(Reg, Ty);
+ }
+ }
+ } else if (consumeIfPresent(MIToken::lparen)) {
+ // Virtual registers may have a tpe with GlobalISel.
+ if (!Reg.isVirtual())
+ return error("unexpected type on physical register");
+
+ LLT Ty;
+ if (parseLowLevelType(Token.location(), Ty))
+ return true;
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+ return error("inconsistent type for generic virtual register");
+
+ MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
+ MRI.setType(Reg, Ty);
+ } else if (Reg.isVirtual()) {
+ // Generic virtual registers must have a type.
+ // If we end up here this means the type hasn't been specified and
+ // this is bad!
+ if (RegInfo->Kind == VRegInfo::GENERIC ||
+ RegInfo->Kind == VRegInfo::REGBANK)
+ return error("generic virtual registers must have a type");
+ }
+
+ if (Flags & RegState::Define) {
+ if (Flags & RegState::Kill)
+ return error("cannot have a killed def operand");
+ } else {
+ if (Flags & RegState::Dead)
+ return error("cannot have a dead use operand");
+ }
+
+ Dest = MachineOperand::CreateReg(
+ Reg, Flags & RegState::Define, Flags & RegState::Implicit,
+ Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
+ Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
+ Flags & RegState::InternalRead, Flags & RegState::Renamable);
+
+ return false;
+}
+
+bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::IntegerLiteral));
+ const APSInt &Int = Token.integerValue();
+ if (auto SImm = Int.trySExtValue(); Int.isSigned() && SImm.has_value())
+ Dest = MachineOperand::CreateImm(*SImm);
+ else if (auto UImm = Int.tryZExtValue(); !Int.isSigned() && UImm.has_value())
+ Dest = MachineOperand::CreateImm(*UImm);
+ else
+ return error("integer literal is too large to be an immediate operand");
+ lex();
+ return false;
+}
+
+bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
+ const unsigned OpIdx,
+ MachineOperand &Dest,
+ const MIRFormatter &MF) {
+ assert(Token.is(MIToken::dot));
+ auto Loc = Token.location(); // record start position
+ size_t Len = 1; // for "."
+ lex();
+
+ // Handle the case that mnemonic starts with number.
+ if (Token.is(MIToken::IntegerLiteral)) {
+ Len += Token.range().size();
+ lex();
+ }
+
+ StringRef Src;
+ if (Token.is(MIToken::comma))
+ Src = StringRef(Loc, Len);
+ else {
+ assert(Token.is(MIToken::Identifier));
+ Src = StringRef(Loc, Len + Token.stringValue().size());
+ }
+ int64_t Val;
+ if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
+ [this](StringRef::iterator Loc, const Twine &Msg)
+ -> bool { return error(Loc, Msg); }))
+ return true;
+
+ Dest = MachineOperand::CreateImm(Val);
+ if (!Token.is(MIToken::comma))
+ lex();
+ return false;
+}
+
+static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ PerFunctionMIParsingState &PFS, const Constant *&C,
+ ErrorCallbackType ErrCB) {
+ auto Source = StringValue.str(); // The source has to be null terminated.
+ SMDiagnostic Err;
+ C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(),
+ &PFS.IRSlots);
+ if (!C)
+ return ErrCB(Loc + Err.getColumnNo(), Err.getMessage());
+ return false;
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C) {
+ return ::parseIRConstant(
+ Loc, StringValue, PFS, C,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
+ if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
+ return true;
+ lex();
+ return false;
+}
+
+// See LLT implementation for bit size limits.
+static bool verifyScalarSize(uint64_t Size) {
+ return Size != 0 && isUInt<16>(Size);
+}
+
+static bool verifyVectorElementCount(uint64_t NumElts) {
+ return NumElts != 0 && isUInt<16>(NumElts);
+}
+
+static bool verifyAddrSpace(uint64_t AddrSpace) {
+ return isUInt<24>(AddrSpace);
+}
+
+bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
+ if (Token.range().front() == 's' || Token.range().front() == 'p') {
+ StringRef SizeStr = Token.range().drop_front();
+ if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
+ return error("expected integers after 's'/'p' type character");
+ }
+
+ if (Token.range().front() == 's') {
+ auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+
+ Ty = LLT::scalar(ScalarSize);
+ lex();
+ return false;
+ } else if (Token.range().front() == 'p') {
+ const DataLayout &DL = MF.getDataLayout();
+ uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyAddrSpace(AS))
+ return error("invalid address space number");
+
+ Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+ lex();
+ return false;
+ }
+
+ // Now we're looking for a vector.
+ if (Token.isNot(MIToken::less))
+ return error(Loc,
+ "expected sN, pA, <M x sN>, or <M x pA> for GlobalISel type");
+ lex();
+
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ uint64_t NumElements = Token.integerValue().getZExtValue();
+ if (!verifyVectorElementCount(NumElements))
+ return error("invalid number of vector elements");
+
+ lex();
+
+ if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ lex();
+
+ if (Token.range().front() != 's' && Token.range().front() != 'p')
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ StringRef SizeStr = Token.range().drop_front();
+ if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
+ return error("expected integers after 's'/'p' type character");
+
+ if (Token.range().front() == 's') {
+ auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+ Ty = LLT::scalar(ScalarSize);
+ } else if (Token.range().front() == 'p') {
+ const DataLayout &DL = MF.getDataLayout();
+ uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyAddrSpace(AS))
+ return error("invalid address space number");
+
+ Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+ } else
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ lex();
+
+ if (Token.isNot(MIToken::greater))
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ lex();
+
+ Ty = LLT::fixed_vector(NumElements, Ty);
+ return false;
+}
+
+bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::Identifier));
+ StringRef TypeStr = Token.range();
+ if (TypeStr.front() != 'i' && TypeStr.front() != 's' &&
+ TypeStr.front() != 'p')
+ return error(
+ "a typed immediate operand should start with one of 'i', 's', or 'p'");
+ StringRef SizeStr = Token.range().drop_front();
+ if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
+ return error("expected integers after 'i'/'s'/'p' type character");
+
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral)) {
+ if (Token.isNot(MIToken::Identifier) ||
+ !(Token.range() == "true" || Token.range() == "false"))
+ return error("expected an integer literal");
+ }
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C));
+ return false;
+}
+
+bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::FloatingPointLiteral) &&
+ Token.isNot(MIToken::HexLiteral))
+ return error("expected a floating point literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C));
+ return false;
+}
+
+static bool getHexUint(const MIToken &Token, APInt &Result) {
+ assert(Token.is(MIToken::HexLiteral));
+ StringRef S = Token.range();
+ assert(S[0] == '0' && tolower(S[1]) == 'x');
+ // This could be a floating point literal with a special prefix.
+ if (!isxdigit(S[2]))
+ return true;
+ StringRef V = S.substr(2);
+ APInt A(V.size()*4, V, 16);
+
+ // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
+ // sure it isn't the case before constructing result.
+ unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
+ Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
+ return false;
+}
+
+static bool getUnsigned(const MIToken &Token, unsigned &Result,
+ ErrorCallbackType ErrCB) {
+ if (Token.hasIntegerValue()) {
+ const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
+ uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
+ if (Val64 == Limit)
+ return ErrCB(Token.location(), "expected 32-bit integer (too large)");
+ Result = Val64;
+ return false;
+ }
+ if (Token.is(MIToken::HexLiteral)) {
+ APInt A;
+ if (getHexUint(Token, A))
+ return true;
+ if (A.getBitWidth() > 32)
+ return ErrCB(Token.location(), "expected 32-bit integer (too large)");
+ Result = A.getZExtValue();
+ return false;
+ }
+ return true;
+}
+
+bool MIParser::getUnsigned(unsigned &Result) {
+ return ::getUnsigned(
+ Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
+bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
+ assert(Token.is(MIToken::MachineBasicBlock) ||
+ Token.is(MIToken::MachineBasicBlockLabel));
+ unsigned Number;
+ if (getUnsigned(Number))
+ return true;
+ auto MBBInfo = PFS.MBBSlots.find(Number);
+ if (MBBInfo == PFS.MBBSlots.end())
+ return error(Twine("use of undefined machine basic block #") +
+ Twine(Number));
+ MBB = MBBInfo->second;
+ // TODO: Only parse the name if it's a MachineBasicBlockLabel. Deprecate once
+ // we drop the <irname> from the bb.<id>.<irname> format.
+ if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName())
+ return error(Twine("the name of machine basic block #") + Twine(Number) +
+ " isn't '" + Token.stringValue() + "'");
+ return false;
+}
+
+bool MIParser::parseMBBOperand(MachineOperand &Dest) {
+ MachineBasicBlock *MBB;
+ if (parseMBBReference(MBB))
+ return true;
+ Dest = MachineOperand::CreateMBB(MBB);
+ lex();
+ return false;
+}
+
+bool MIParser::parseStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::StackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.StackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.StackObjectSlots.end())
+ return error(Twine("use of undefined stack object '%stack.") + Twine(ID) +
+ "'");
+ StringRef Name;
+ if (const auto *Alloca =
+ MF.getFrameInfo().getObjectAllocation(ObjectInfo->second))
+ Name = Alloca->getName();
+ if (!Token.stringValue().empty() && Token.stringValue() != Name)
+ return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
+ "' isn't '" + Token.stringValue() + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseFixedStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::FixedStackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.FixedStackObjectSlots.end())
+ return error(Twine("use of undefined fixed stack object '%fixed-stack.") +
+ Twine(ID) + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+static bool parseGlobalValue(const MIToken &Token,
+ PerFunctionMIParsingState &PFS, GlobalValue *&GV,
+ ErrorCallbackType ErrCB) {
+ switch (Token.kind()) {
+ case MIToken::NamedGlobalValue: {
+ const Module *M = PFS.MF.getFunction().getParent();
+ GV = M->getNamedValue(Token.stringValue());
+ if (!GV)
+ return ErrCB(Token.location(), Twine("use of undefined global value '") +
+ Token.range() + "'");
+ break;
+ }
+ case MIToken::GlobalValue: {
+ unsigned GVIdx;
+ if (getUnsigned(Token, GVIdx, ErrCB))
+ return true;
+ if (GVIdx >= PFS.IRSlots.GlobalValues.size())
+ return ErrCB(Token.location(), Twine("use of undefined global value '@") +
+ Twine(GVIdx) + "'");
+ GV = PFS.IRSlots.GlobalValues[GVIdx];
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be a global value");
+ }
+ return false;
+}
+
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+ return ::parseGlobalValue(
+ Token, PFS, GV,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
+bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ lex();
+ Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ConstantPoolItem));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ConstantInfo = PFS.ConstantPoolSlots.find(ID);
+ if (ConstantInfo == PFS.ConstantPoolSlots.end())
+ return error("use of undefined constant '%const." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::JumpTableIndex));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID);
+ if (JumpTableEntryInfo == PFS.JumpTableSlots.end())
+ return error("use of undefined jump table '%jump-table." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second);
+ return false;
+}
+
+bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ExternalSymbol));
+ const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateES(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::MCSymbol));
+ MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateMCSymbol(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::SubRegisterIndex));
+ StringRef Name = Token.stringValue();
+ unsigned SubRegIndex = PFS.Target.getSubRegIndex(Token.stringValue());
+ if (SubRegIndex == 0)
+ return error(Twine("unknown subregister index '") + Name + "'");
+ lex();
+ Dest = MachineOperand::CreateImm(SubRegIndex);
+ return false;
+}
+
+bool MIParser::parseMDNode(MDNode *&Node) {
+ assert(Token.is(MIToken::exclaim));
+
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) {
+ NodeInfo = PFS.MachineMetadataNodes.find(ID);
+ if (NodeInfo == PFS.MachineMetadataNodes.end())
+ return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+ }
+ lex();
+ Node = NodeInfo->second.get();
+ return false;
+}
+
+bool MIParser::parseDIExpression(MDNode *&Expr) {
+ assert(Token.is(MIToken::md_diexpr));
+ lex();
+
+ // FIXME: Share this parsing with the IL parser.
+ SmallVector<uint64_t, 8> Elements;
+
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ if (Token.isNot(MIToken::rparen)) {
+ do {
+ if (Token.is(MIToken::Identifier)) {
+ if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) {
+ lex();
+ Elements.push_back(Op);
+ continue;
+ }
+ if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) {
+ lex();
+ Elements.push_back(Enc);
+ continue;
+ }
+ return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
+ }
+
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+
+ auto &U = Token.integerValue();
+ if (U.ugt(UINT64_MAX))
+ return error("element too large, limit is " + Twine(UINT64_MAX));
+ Elements.push_back(U.getZExtValue());
+ lex();
+
+ } while (consumeIfPresent(MIToken::comma));
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ Expr = DIExpression::get(MF.getFunction().getContext(), Elements);
+ return false;
+}
+
+bool MIParser::parseDILocation(MDNode *&Loc) {
+ assert(Token.is(MIToken::md_dilocation));
+ lex();
+
+ bool HaveLine = false;
+ unsigned Line = 0;
+ unsigned Column = 0;
+ MDNode *Scope = nullptr;
+ MDNode *InlinedAt = nullptr;
+ bool ImplicitCode = false;
+
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ if (Token.isNot(MIToken::rparen)) {
+ do {
+ if (Token.is(MIToken::Identifier)) {
+ if (Token.stringValue() == "line") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+ Line = Token.integerValue().getZExtValue();
+ HaveLine = true;
+ lex();
+ continue;
+ }
+ if (Token.stringValue() == "column") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+ Column = Token.integerValue().getZExtValue();
+ lex();
+ continue;
+ }
+ if (Token.stringValue() == "scope") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (parseMDNode(Scope))
+ return error("expected metadata node");
+ if (!isa<DIScope>(Scope))
+ return error("expected DIScope node");
+ continue;
+ }
+ if (Token.stringValue() == "inlinedAt") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(InlinedAt))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(InlinedAt))
+ return true;
+ } else
+ return error("expected metadata node");
+ if (!isa<DILocation>(InlinedAt))
+ return error("expected DILocation node");
+ continue;
+ }
+ if (Token.stringValue() == "isImplicitCode") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (!Token.is(MIToken::Identifier))
+ return error("expected true/false");
+ // As far as I can see, we don't have any existing need for parsing
+ // true/false in MIR yet. Do it ad-hoc until there's something else
+ // that needs it.
+ if (Token.stringValue() == "true")
+ ImplicitCode = true;
+ else if (Token.stringValue() == "false")
+ ImplicitCode = false;
+ else
+ return error("expected true/false");
+ lex();
+ continue;
+ }
+ }
+ return error(Twine("invalid DILocation argument '") +
+ Token.stringValue() + "'");
+ } while (consumeIfPresent(MIToken::comma));
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (!HaveLine)
+ return error("DILocation requires line number");
+ if (!Scope)
+ return error("DILocation requires a scope");
+
+ Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope,
+ InlinedAt, ImplicitCode);
+ return false;
+}
+
+bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
+ MDNode *Node = nullptr;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_diexpr)) {
+ if (parseDIExpression(Node))
+ return true;
+ }
+ Dest = MachineOperand::CreateMetadata(Node);
+ return false;
+}
+
+bool MIParser::parseCFIOffset(int &Offset) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected a cfi offset");
+ if (Token.integerValue().getSignificantBits() > 32)
+ return error("expected a 32 bit integer (the cfi offset is too large)");
+ Offset = (int)Token.integerValue().getExtValue();
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIRegister(Register &Reg) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a cfi register");
+ Register LLVMReg;
+ if (parseNamedRegister(LLVMReg))
+ return true;
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true);
+ if (DwarfReg < 0)
+ return error("invalid DWARF register");
+ Reg = (unsigned)DwarfReg;
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIAddressSpace(unsigned &AddressSpace) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected a cfi address space literal");
+ if (Token.integerValue().isSigned())
+ return error("expected an unsigned integer (cfi address space)");
+ AddressSpace = Token.integerValue().getZExtValue();
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIEscapeValues(std::string &Values) {
+ do {
+ if (Token.isNot(MIToken::HexLiteral))
+ return error("expected a hexadecimal literal");
+ unsigned Value;
+ if (getUnsigned(Value))
+ return true;
+ if (Value > UINT8_MAX)
+ return error("expected a 8-bit integer (too large)");
+ Values.push_back(static_cast<uint8_t>(Value));
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+ return false;
+}
+
+bool MIParser::parseCFIOperand(MachineOperand &Dest) {
+ auto Kind = Token.kind();
+ lex();
+ int Offset;
+ Register Reg;
+ unsigned AddressSpace;
+ unsigned CFIIndex;
+ switch (Kind) {
+ case MIToken::kw_cfi_same_value:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_offset:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+ break;
+ case MIToken::kw_cfi_rel_offset:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createRelOffset(nullptr, Reg, Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa_register:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_def_cfa_offset:
+ if (parseCFIOffset(Offset))
+ return true;
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ break;
+ case MIToken::kw_cfi_adjust_cfa_offset:
+ if (parseCFIOffset(Offset))
+ return true;
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, Offset));
+ break;
+ case MIToken::kw_cfi_llvm_def_aspace_cfa:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset) || expectAndConsume(MIToken::comma) ||
+ parseCFIAddressSpace(AddressSpace))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMDefAspaceCfa(
+ nullptr, Reg, Offset, AddressSpace, SMLoc()));
+ break;
+ case MIToken::kw_cfi_remember_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+ break;
+ case MIToken::kw_cfi_restore:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_restore_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+ break;
+ case MIToken::kw_cfi_undefined:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_register: {
+ Register Reg2;
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIRegister(Reg2))
+ return true;
+
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRegister(nullptr, Reg, Reg2));
+ break;
+ }
+ case MIToken::kw_cfi_window_save:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
+ break;
+ case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ break;
+ case MIToken::kw_cfi_escape: {
+ std::string Values;
+ if (parseCFIEscapeValues(Values))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, Values));
+ break;
+ }
+ default:
+ // TODO: Parse the other CFI operands.
+ llvm_unreachable("The current token should be a cfi operand");
+ }
+ Dest = MachineOperand::CreateCFIIndex(CFIIndex);
+ return false;
+}
+
+bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRBlock: {
+ BB = dyn_cast_or_null<BasicBlock>(
+ F.getValueSymbolTable()->lookup(Token.stringValue()));
+ if (!BB)
+ return error(Twine("use of undefined IR block '") + Token.range() + "'");
+ break;
+ }
+ case MIToken::IRBlock: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F));
+ if (!BB)
+ return error(Twine("use of undefined IR block '%ir-block.") +
+ Twine(SlotNumber) + "'");
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ return false;
+}
+
+bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_blockaddress));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue))
+ return error("expected a global value");
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ auto *F = dyn_cast<Function>(GV);
+ if (!F)
+ return error("expected an IR function reference");
+ lex();
+ if (expectAndConsume(MIToken::comma))
+ return true;
+ BasicBlock *BB = nullptr;
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+ return error("expected an IR block reference");
+ if (parseIRBlock(BB, *F))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_intrinsic));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax intrinsic(@llvm.whatever)");
+
+ if (Token.isNot(MIToken::NamedGlobalValue))
+ return error("expected syntax intrinsic(@llvm.whatever)");
+
+ std::string Name = std::string(Token.stringValue());
+ lex();
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected ')' to terminate intrinsic name");
+
+ // Find out what intrinsic we're dealing with, first try the global namespace
+ // and then the target's private intrinsics if that fails.
+ const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+ Intrinsic::ID ID = Function::lookupIntrinsicID(Name);
+ if (ID == Intrinsic::not_intrinsic && TII)
+ ID = static_cast<Intrinsic::ID>(TII->lookupName(Name));
+
+ if (ID == Intrinsic::not_intrinsic)
+ return error("unknown intrinsic name");
+ Dest = MachineOperand::CreateIntrinsicID(ID);
+
+ return false;
+}
+
+bool MIParser::parsePredicateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_intpred) || Token.is(MIToken::kw_floatpred));
+ bool IsFloat = Token.is(MIToken::kw_floatpred);
+ lex();
+
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax intpred(whatever) or floatpred(whatever");
+
+ if (Token.isNot(MIToken::Identifier))
+ return error("whatever");
+
+ CmpInst::Predicate Pred;
+ if (IsFloat) {
+ Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+ .Case("false", CmpInst::FCMP_FALSE)
+ .Case("oeq", CmpInst::FCMP_OEQ)
+ .Case("ogt", CmpInst::FCMP_OGT)
+ .Case("oge", CmpInst::FCMP_OGE)
+ .Case("olt", CmpInst::FCMP_OLT)
+ .Case("ole", CmpInst::FCMP_OLE)
+ .Case("one", CmpInst::FCMP_ONE)
+ .Case("ord", CmpInst::FCMP_ORD)
+ .Case("uno", CmpInst::FCMP_UNO)
+ .Case("ueq", CmpInst::FCMP_UEQ)
+ .Case("ugt", CmpInst::FCMP_UGT)
+ .Case("uge", CmpInst::FCMP_UGE)
+ .Case("ult", CmpInst::FCMP_ULT)
+ .Case("ule", CmpInst::FCMP_ULE)
+ .Case("une", CmpInst::FCMP_UNE)
+ .Case("true", CmpInst::FCMP_TRUE)
+ .Default(CmpInst::BAD_FCMP_PREDICATE);
+ if (!CmpInst::isFPPredicate(Pred))
+ return error("invalid floating-point predicate");
+ } else {
+ Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+ .Case("eq", CmpInst::ICMP_EQ)
+ .Case("ne", CmpInst::ICMP_NE)
+ .Case("sgt", CmpInst::ICMP_SGT)
+ .Case("sge", CmpInst::ICMP_SGE)
+ .Case("slt", CmpInst::ICMP_SLT)
+ .Case("sle", CmpInst::ICMP_SLE)
+ .Case("ugt", CmpInst::ICMP_UGT)
+ .Case("uge", CmpInst::ICMP_UGE)
+ .Case("ult", CmpInst::ICMP_ULT)
+ .Case("ule", CmpInst::ICMP_ULE)
+ .Default(CmpInst::BAD_ICMP_PREDICATE);
+ if (!CmpInst::isIntPredicate(Pred))
+ return error("invalid integer predicate");
+ }
+
+ lex();
+ Dest = MachineOperand::CreatePredicate(Pred);
+ if (expectAndConsume(MIToken::rparen))
+ return error("predicate should be terminated by ')'.");
+
+ return false;
+}
+
+bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_shufflemask));
+
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax shufflemask(<integer or undef>, ...)");
+
+ SmallVector<int, 32> ShufMask;
+ do {
+ if (Token.is(MIToken::kw_undef)) {
+ ShufMask.push_back(-1);
+ } else if (Token.is(MIToken::IntegerLiteral)) {
+ const APSInt &Int = Token.integerValue();
+ ShufMask.push_back(Int.getExtValue());
+ } else
+ return error("expected integer constant");
+
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("shufflemask should be terminated by ')'.");
+
+ ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);
+ Dest = MachineOperand::CreateShuffleMask(MaskAlloc);
+ return false;
+}
+
+bool MIParser::parseDbgInstrRefOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_dbg_instr_ref));
+
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)");
+
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isNegative())
+ return error("expected unsigned integer for instruction index");
+ uint64_t InstrIdx = Token.integerValue().getZExtValue();
+ assert(InstrIdx <= std::numeric_limits<unsigned>::max() &&
+ "Instruction reference's instruction index is too large");
+ lex();
+
+ if (expectAndConsume(MIToken::comma))
+ return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)");
+
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isNegative())
+ return error("expected unsigned integer for operand index");
+ uint64_t OpIdx = Token.integerValue().getZExtValue();
+ assert(OpIdx <= std::numeric_limits<unsigned>::max() &&
+ "Instruction reference's operand index is too large");
+ lex();
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)");
+
+ Dest = MachineOperand::CreateDbgInstrRef(InstrIdx, OpIdx);
+ return false;
+}
+
+bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_target_index));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target index");
+ int Index = 0;
+ if (PFS.Target.getTargetIndex(Token.stringValue(), Index))
+ return error("use of undefined target index '" + Token.stringValue() + "'");
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask");
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ uint32_t *Mask = MF.allocateRegMask();
+ do {
+ if (Token.isNot(MIToken::rparen)) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ Register Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ }
+
+ // TODO: Report an error if the same register is used more than once.
+ } while (consumeIfPresent(MIToken::comma));
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegMask(Mask);
+ return false;
+}
+
+bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_liveout));
+ uint32_t *Mask = MF.allocateRegMask();
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ Register Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegLiveOut(Mask);
+ return false;
+}
+
+bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest,
+ std::optional<unsigned> &TiedDefIdx) {
+ switch (Token.kind()) {
+ case MIToken::kw_implicit:
+ case MIToken::kw_implicit_define:
+ case MIToken::kw_def:
+ case MIToken::kw_dead:
+ case MIToken::kw_killed:
+ case MIToken::kw_undef:
+ case MIToken::kw_internal:
+ case MIToken::kw_early_clobber:
+ case MIToken::kw_debug_use:
+ case MIToken::kw_renamable:
+ case MIToken::underscore:
+ case MIToken::NamedRegister:
+ case MIToken::VirtualRegister:
+ case MIToken::NamedVirtualRegister:
+ return parseRegisterOperand(Dest, TiedDefIdx);
+ case MIToken::IntegerLiteral:
+ return parseImmediateOperand(Dest);
+ case MIToken::kw_half:
+ case MIToken::kw_float:
+ case MIToken::kw_double:
+ case MIToken::kw_x86_fp80:
+ case MIToken::kw_fp128:
+ case MIToken::kw_ppc_fp128:
+ return parseFPImmediateOperand(Dest);
+ case MIToken::MachineBasicBlock:
+ return parseMBBOperand(Dest);
+ case MIToken::StackObject:
+ return parseStackObjectOperand(Dest);
+ case MIToken::FixedStackObject:
+ return parseFixedStackObjectOperand(Dest);
+ case MIToken::GlobalValue:
+ case MIToken::NamedGlobalValue:
+ return parseGlobalAddressOperand(Dest);
+ case MIToken::ConstantPoolItem:
+ return parseConstantPoolIndexOperand(Dest);
+ case MIToken::JumpTableIndex:
+ return parseJumpTableIndexOperand(Dest);
+ case MIToken::ExternalSymbol:
+ return parseExternalSymbolOperand(Dest);
+ case MIToken::MCSymbol:
+ return parseMCSymbolOperand(Dest);
+ case MIToken::SubRegisterIndex:
+ return parseSubRegisterIndexOperand(Dest);
+ case MIToken::md_diexpr:
+ case MIToken::exclaim:
+ return parseMetadataOperand(Dest);
+ case MIToken::kw_cfi_same_value:
+ case MIToken::kw_cfi_offset:
+ case MIToken::kw_cfi_rel_offset:
+ case MIToken::kw_cfi_def_cfa_register:
+ case MIToken::kw_cfi_def_cfa_offset:
+ case MIToken::kw_cfi_adjust_cfa_offset:
+ case MIToken::kw_cfi_escape:
+ case MIToken::kw_cfi_def_cfa:
+ case MIToken::kw_cfi_llvm_def_aspace_cfa:
+ case MIToken::kw_cfi_register:
+ case MIToken::kw_cfi_remember_state:
+ case MIToken::kw_cfi_restore:
+ case MIToken::kw_cfi_restore_state:
+ case MIToken::kw_cfi_undefined:
+ case MIToken::kw_cfi_window_save:
+ case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+ return parseCFIOperand(Dest);
+ case MIToken::kw_blockaddress:
+ return parseBlockAddressOperand(Dest);
+ case MIToken::kw_intrinsic:
+ return parseIntrinsicOperand(Dest);
+ case MIToken::kw_target_index:
+ return parseTargetIndexOperand(Dest);
+ case MIToken::kw_liveout:
+ return parseLiveoutRegisterMaskOperand(Dest);
+ case MIToken::kw_floatpred:
+ case MIToken::kw_intpred:
+ return parsePredicateOperand(Dest);
+ case MIToken::kw_shufflemask:
+ return parseShuffleMaskOperand(Dest);
+ case MIToken::kw_dbg_instr_ref:
+ return parseDbgInstrRefOperand(Dest);
+ case MIToken::Error:
+ return true;
+ case MIToken::Identifier:
+ if (const auto *RegMask = PFS.Target.getRegMask(Token.stringValue())) {
+ Dest = MachineOperand::CreateRegMask(RegMask);
+ lex();
+ break;
+ } else if (Token.stringValue() == "CustomRegMask") {
+ return parseCustomRegisterMaskOperand(Dest);
+ } else
+ return parseTypedImmediateOperand(Dest);
+ case MIToken::dot: {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ if (const auto *Formatter = TII->getMIRFormatter()) {
+ return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter);
+ }
+ [[fallthrough]];
+ }
+ default:
+ // FIXME: Parse the MCSymbol machine operand.
+ return error("expected a machine operand");
+ }
+ return false;
+}
+
+bool MIParser::parseMachineOperandAndTargetFlags(
+ const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest,
+ std::optional<unsigned> &TiedDefIdx) {
+ unsigned TF = 0;
+ bool HasTargetFlags = false;
+ if (Token.is(MIToken::kw_target_flags)) {
+ HasTargetFlags = true;
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ if (PFS.Target.getDirectTargetFlag(Token.stringValue(), TF)) {
+ if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ }
+ lex();
+ while (Token.is(MIToken::comma)) {
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ unsigned BitFlag = 0;
+ if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ // TODO: Report an error when using a duplicate bit target flag.
+ TF |= BitFlag;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ auto Loc = Token.location();
+ if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx))
+ return true;
+ if (!HasTargetFlags)
+ return false;
+ if (Dest.isReg())
+ return error(Loc, "register operands can't have target flags");
+ Dest.setTargetFlags(TF);
+ return false;
+}
+
+bool MIParser::parseOffset(int64_t &Offset) {
+ if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus))
+ return false;
+ StringRef Sign = Token.range();
+ bool IsNegative = Token.is(MIToken::minus);
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '" + Sign + "'");
+ if (Token.integerValue().getSignificantBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Offset = Token.integerValue().getExtValue();
+ if (IsNegative)
+ Offset = -Offset;
+ lex();
+ return false;
+}
+
+bool MIParser::parseIRBlockAddressTaken(BasicBlock *&BB) {
+ assert(Token.is(MIToken::kw_ir_block_address_taken));
+ lex();
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+ return error("expected basic block after 'ir_block_address_taken'");
+
+ if (parseIRBlock(BB, MF.getFunction()))
+ return true;
+
+ lex();
+ return false;
+}
+
+bool MIParser::parseAlignment(uint64_t &Alignment) {
+ assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected an integer literal after 'align'");
+ if (getUint64(Alignment))
+ return true;
+ lex();
+
+ if (!isPowerOf2_64(Alignment))
+ return error("expected a power-of-2 literal after 'align'");
+
+ return false;
+}
+
+bool MIParser::parseAddrspace(unsigned &Addrspace) {
+ assert(Token.is(MIToken::kw_addrspace));
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected an integer literal after 'addrspace'");
+ if (getUnsigned(Addrspace))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseOperandsOffset(MachineOperand &Op) {
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Op.setOffset(Offset);
+ return false;
+}
+
+static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS,
+ const Value *&V, ErrorCallbackType ErrCB) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRValue: {
+ V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
+ break;
+ }
+ case MIToken::IRValue: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(Token, SlotNumber, ErrCB))
+ return true;
+ V = PFS.getIRValue(SlotNumber);
+ break;
+ }
+ case MIToken::NamedGlobalValue:
+ case MIToken::GlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(Token, PFS, GV, ErrCB))
+ return true;
+ V = GV;
+ break;
+ }
+ case MIToken::QuotedIRValue: {
+ const Constant *C = nullptr;
+ if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB))
+ return true;
+ V = C;
+ break;
+ }
+ case MIToken::kw_unknown_address:
+ V = nullptr;
+ return false;
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ if (!V)
+ return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'");
+ return false;
+}
+
+bool MIParser::parseIRValue(const Value *&V) {
+ return ::parseIRValue(
+ Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
+bool MIParser::getUint64(uint64_t &Result) {
+ if (Token.hasIntegerValue()) {
+ if (Token.integerValue().getActiveBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = Token.integerValue().getZExtValue();
+ return false;
+ }
+ if (Token.is(MIToken::HexLiteral)) {
+ APInt A;
+ if (getHexUint(A))
+ return true;
+ if (A.getBitWidth() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = A.getZExtValue();
+ return false;
+ }
+ return true;
+}
+
+bool MIParser::getHexUint(APInt &Result) {
+ return ::getHexUint(Token, Result);
+}
+
+bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
+ const auto OldFlags = Flags;
+ switch (Token.kind()) {
+ case MIToken::kw_volatile:
+ Flags |= MachineMemOperand::MOVolatile;
+ break;
+ case MIToken::kw_non_temporal:
+ Flags |= MachineMemOperand::MONonTemporal;
+ break;
+ case MIToken::kw_dereferenceable:
+ Flags |= MachineMemOperand::MODereferenceable;
+ break;
+ case MIToken::kw_invariant:
+ Flags |= MachineMemOperand::MOInvariant;
+ break;
+ case MIToken::StringConstant: {
+ MachineMemOperand::Flags TF;
+ if (PFS.Target.getMMOTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target MMO flag '" + Token.stringValue() +
+ "'");
+ Flags |= TF;
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be a memory operand flag");
+ }
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' memory operand flag");
+ lex();
+ return false;
+}
+
+bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
+ switch (Token.kind()) {
+ case MIToken::kw_stack:
+ PSV = MF.getPSVManager().getStack();
+ break;
+ case MIToken::kw_got:
+ PSV = MF.getPSVManager().getGOT();
+ break;
+ case MIToken::kw_jump_table:
+ PSV = MF.getPSVManager().getJumpTable();
+ break;
+ case MIToken::kw_constant_pool:
+ PSV = MF.getPSVManager().getConstantPool();
+ break;
+ case MIToken::FixedStackObject: {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
+ case MIToken::StackObject: {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
+ case MIToken::kw_call_entry:
+ lex();
+ switch (Token.kind()) {
+ case MIToken::GlobalValue:
+ case MIToken::NamedGlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ PSV = MF.getPSVManager().getGlobalValueCallEntry(GV);
+ break;
+ }
+ case MIToken::ExternalSymbol:
+ PSV = MF.getPSVManager().getExternalSymbolCallEntry(
+ MF.createExternalSymbolName(Token.stringValue()));
+ break;
+ default:
+ return error(
+ "expected a global value or an external symbol after 'call-entry'");
+ }
+ break;
+ case MIToken::kw_custom: {
+ lex();
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ if (const auto *Formatter = TII->getMIRFormatter()) {
+ if (Formatter->parseCustomPseudoSourceValue(
+ Token.stringValue(), MF, PFS, PSV,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ }))
+ return true;
+ } else
+ return error("unable to parse target custom pseudo source value");
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be pseudo source value");
+ }
+ lex();
+ return false;
+}
+
+bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
+ if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
+ Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
+ Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
+ Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) {
+ const PseudoSourceValue *PSV = nullptr;
+ if (parseMemoryPseudoSourceValue(PSV))
+ return true;
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(PSV, Offset);
+ return false;
+ }
+ if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) &&
+ Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue) &&
+ Token.isNot(MIToken::QuotedIRValue) &&
+ Token.isNot(MIToken::kw_unknown_address))
+ return error("expected an IR value reference");
+ const Value *V = nullptr;
+ if (parseIRValue(V))
+ return true;
+ if (V && !V->getType()->isPointerTy())
+ return error("expected a pointer IR value");
+ lex();
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(V, Offset);
+ return false;
+}
+
+bool MIParser::parseOptionalScope(LLVMContext &Context,
+ SyncScope::ID &SSID) {
+ SSID = SyncScope::System;
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") {
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected '(' in syncscope");
+
+ std::string SSN;
+ if (parseStringConstant(SSN))
+ return true;
+
+ SSID = Context.getOrInsertSyncScopeID(SSN);
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected ')' in syncscope");
+ }
+
+ return false;
+}
+
+bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
+ Order = AtomicOrdering::NotAtomic;
+ if (Token.isNot(MIToken::Identifier))
+ return false;
+
+ Order = StringSwitch<AtomicOrdering>(Token.stringValue())
+ .Case("unordered", AtomicOrdering::Unordered)
+ .Case("monotonic", AtomicOrdering::Monotonic)
+ .Case("acquire", AtomicOrdering::Acquire)
+ .Case("release", AtomicOrdering::Release)
+ .Case("acq_rel", AtomicOrdering::AcquireRelease)
+ .Case("seq_cst", AtomicOrdering::SequentiallyConsistent)
+ .Default(AtomicOrdering::NotAtomic);
+
+ if (Order != AtomicOrdering::NotAtomic) {
+ lex();
+ return false;
+ }
+
+ return error("expected an atomic scope, ordering or a size specification");
+}
+
+bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+ while (Token.isMemoryOperandFlag()) {
+ if (parseMemoryOperandFlag(Flags))
+ return true;
+ }
+ if (Token.isNot(MIToken::Identifier) ||
+ (Token.stringValue() != "load" && Token.stringValue() != "store"))
+ return error("expected 'load' or 'store' memory operation");
+ if (Token.stringValue() == "load")
+ Flags |= MachineMemOperand::MOLoad;
+ else
+ Flags |= MachineMemOperand::MOStore;
+ lex();
+
+ // Optional 'store' for operands that both load and store.
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "store") {
+ Flags |= MachineMemOperand::MOStore;
+ lex();
+ }
+
+ // Optional synchronization scope.
+ SyncScope::ID SSID;
+ if (parseOptionalScope(MF.getFunction().getContext(), SSID))
+ return true;
+
+ // Up to two atomic orderings (cmpxchg provides guarantees on failure).
+ AtomicOrdering Order, FailureOrder;
+ if (parseOptionalAtomicOrdering(Order))
+ return true;
+
+ if (parseOptionalAtomicOrdering(FailureOrder))
+ return true;
+
+ LLT MemoryType;
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::kw_unknown_size) &&
+ Token.isNot(MIToken::lparen))
+ return error("expected memory LLT, the size integer literal or 'unknown-size' after "
+ "memory operation");
+
+ uint64_t Size = MemoryLocation::UnknownSize;
+ if (Token.is(MIToken::IntegerLiteral)) {
+ if (getUint64(Size))
+ return true;
+
+ // Convert from bytes to bits for storage.
+ MemoryType = LLT::scalar(8 * Size);
+ lex();
+ } else if (Token.is(MIToken::kw_unknown_size)) {
+ Size = MemoryLocation::UnknownSize;
+ lex();
+ } else {
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (parseLowLevelType(Token.location(), MemoryType))
+ return true;
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ Size = MemoryType.getSizeInBytes();
+ }
+
+ MachinePointerInfo Ptr = MachinePointerInfo();
+ if (Token.is(MIToken::Identifier)) {
+ const char *Word =
+ ((Flags & MachineMemOperand::MOLoad) &&
+ (Flags & MachineMemOperand::MOStore))
+ ? "on"
+ : Flags & MachineMemOperand::MOLoad ? "from" : "into";
+ if (Token.stringValue() != Word)
+ return error(Twine("expected '") + Word + "'");
+ lex();
+
+ if (parseMachinePointerInfo(Ptr))
+ return true;
+ }
+ uint64_t BaseAlignment =
+ (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1);
+ AAMDNodes AAInfo;
+ MDNode *Range = nullptr;
+ while (consumeIfPresent(MIToken::comma)) {
+ switch (Token.kind()) {
+ case MIToken::kw_align: {
+ // align is printed if it is different than size.
+ uint64_t Alignment;
+ if (parseAlignment(Alignment))
+ return true;
+ if (Ptr.Offset & (Alignment - 1)) {
+ // MachineMemOperand::getAlign never returns a value greater than the
+ // alignment of offset, so this just guards against hand-written MIR
+ // that specifies a large "align" value when it should probably use
+ // "basealign" instead.
+ return error("specified alignment is more aligned than offset");
+ }
+ BaseAlignment = Alignment;
+ break;
+ }
+ case MIToken::kw_basealign:
+ // basealign is printed if it is different than align.
+ if (parseAlignment(BaseAlignment))
+ return true;
+ break;
+ case MIToken::kw_addrspace:
+ if (parseAddrspace(Ptr.AddrSpace))
+ return true;
+ break;
+ case MIToken::md_tbaa:
+ lex();
+ if (parseMDNode(AAInfo.TBAA))
+ return true;
+ break;
+ case MIToken::md_alias_scope:
+ lex();
+ if (parseMDNode(AAInfo.Scope))
+ return true;
+ break;
+ case MIToken::md_noalias:
+ lex();
+ if (parseMDNode(AAInfo.NoAlias))
+ return true;
+ break;
+ case MIToken::md_range:
+ lex();
+ if (parseMDNode(Range))
+ return true;
+ break;
+ // TODO: Report an error on duplicate metadata nodes.
+ default:
+ return error("expected 'align' or '!tbaa' or '!alias.scope' or "
+ "'!noalias' or '!range'");
+ }
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MF.getMachineMemOperand(Ptr, Flags, MemoryType, Align(BaseAlignment),
+ AAInfo, Range, SSID, Order, FailureOrder);
+ return false;
+}
+
+bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
+ assert((Token.is(MIToken::kw_pre_instr_symbol) ||
+ Token.is(MIToken::kw_post_instr_symbol)) &&
+ "Invalid token for a pre- post-instruction symbol!");
+ lex();
+ if (Token.isNot(MIToken::MCSymbol))
+ return error("expected a symbol after 'pre-instr-symbol'");
+ Symbol = getOrCreateMCSymbol(Token.stringValue());
+ lex();
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
+bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
+ assert(Token.is(MIToken::kw_heap_alloc_marker) &&
+ "Invalid token for a heap alloc marker!");
+ lex();
+ if (parseMDNode(Node))
+ return true;
+ if (!Node)
+ return error("expected a MDNode after 'heap-alloc-marker'");
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
+bool MIParser::parsePCSections(MDNode *&Node) {
+ assert(Token.is(MIToken::kw_pcsections) &&
+ "Invalid token for a PC sections!");
+ lex();
+ if (parseMDNode(Node))
+ return true;
+ if (!Node)
+ return error("expected a MDNode after 'pcsections'");
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
+static void initSlots2BasicBlocks(
+ const Function &F,
+ DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &BB : F) {
+ if (BB.hasName())
+ continue;
+ int Slot = MST.getLocalSlot(&BB);
+ if (Slot == -1)
+ continue;
+ Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB));
+ }
+}
+
+static const BasicBlock *getIRBlockFromSlot(
+ unsigned Slot,
+ const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ return Slots2BasicBlocks.lookup(Slot);
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
+ if (Slots2BasicBlocks.empty())
+ initSlots2BasicBlocks(MF.getFunction(), Slots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
+ if (&F == &MF.getFunction())
+ return getIRBlock(Slot);
+ DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
+ initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
+}
+
+MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
+ // FIXME: Currently we can't recognize temporary or local symbols and call all
+ // of the appropriate forms to create them. However, this handles basic cases
+ // well as most of the special aspects are recognized by a prefix on their
+ // name, and the input names should already be unique. For test cases, keeping
+ // the symbol name out of the symbol table isn't terribly important.
+ return MF.getContext().getOrCreateSymbol(Name);
+}
+
+bool MIParser::parseStringConstant(std::string &Result) {
+ if (Token.isNot(MIToken::StringConstant))
+ return error("expected string constant");
+ Result = std::string(Token.stringValue());
+ lex();
+ return false;
+}
+
+bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+ StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots);
+}
+
+bool llvm::parseMachineInstructions(PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseBasicBlocks();
+}
+
+bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB);
+}
+
+bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS,
+ Register &Reg, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg);
+}
+
+bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS,
+ Register &Reg, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
+}
+
+bool llvm::parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+ VRegInfo *&Info, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Info);
+}
+
+bool llvm::parseStackObjectReference(PerFunctionMIParsingState &PFS,
+ int &FI, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI);
+}
+
+bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
+ MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
+}
+
+bool llvm::parseMachineMetadata(PerFunctionMIParsingState &PFS, StringRef Src,
+ SMRange SrcRange, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src, SrcRange).parseMachineMetadata();
+}
+
+bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const Value *&V,
+ ErrorCallbackType ErrorCallback) {
+ MIToken Token;
+ Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) {
+ ErrorCallback(Loc, Msg);
+ });
+ V = nullptr;
+
+ return ::parseIRValue(Token, PFS, V, ErrorCallback);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
new file mode 100644
index 000000000000..b2e570c5e67e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -0,0 +1,1133 @@
+//===- MIRParser.cpp - MIR serialization format parser implementation -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that parses the optional LLVM IR and machine
+// functions that are stored in MIR files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Target/TargetMachine.h"
+#include <memory>
+
+using namespace llvm;
+
+namespace llvm {
+class MDNode;
+class RegisterBank;
+
+/// This class implements the parsing of LLVM IR that's embedded inside a MIR
+/// file.
+class MIRParserImpl {
+ SourceMgr SM;
+ LLVMContext &Context;
+ yaml::Input In;
+ StringRef Filename;
+ SlotMapping IRSlots;
+ std::unique_ptr<PerTargetMIParsingState> Target;
+
+ /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are
+ /// created and inserted into the given module when this is true.
+ bool NoLLVMIR = false;
+ /// True when a well formed MIR file does not contain any MIR/machine function
+ /// parts.
+ bool NoMIRDocuments = false;
+
+ std::function<void(Function &)> ProcessIRFunction;
+
+public:
+ MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
+ LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction);
+
+ void reportDiagnostic(const SMDiagnostic &Diag);
+
+ /// Report an error with the given message at unknown location.
+ ///
+ /// Always returns true.
+ bool error(const Twine &Message);
+
+ /// Report an error with the given message at the given location.
+ ///
+ /// Always returns true.
+ bool error(SMLoc Loc, const Twine &Message);
+
+ /// Report a given error with the location translated from the location in an
+ /// embedded string literal to a location in the MIR file.
+ ///
+ /// Always returns true.
+ bool error(const SMDiagnostic &Error, SMRange SourceRange);
+
+ /// Try to parse the optional LLVM module and the machine functions in the MIR
+ /// file.
+ ///
+ /// Return null if an error occurred.
+ std::unique_ptr<Module>
+ parseIRModule(DataLayoutCallbackTy DataLayoutCallback);
+
+ /// Create an empty function with the given name.
+ Function *createDummyFunction(StringRef Name, Module &M);
+
+ bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
+
+ /// Parse the machine function in the current YAML document.
+ ///
+ ///
+ /// Return true if an error occurred.
+ bool parseMachineFunction(Module &M, MachineModuleInfo &MMI);
+
+ /// Initialize the machine function to the state that's described in the MIR
+ /// file.
+ ///
+ /// Return true if error occurred.
+ bool initializeMachineFunction(const yaml::MachineFunction &YamlMF,
+ MachineFunction &MF);
+
+ bool parseRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ bool setupRegisterInfo(const PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeCallSiteInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource,
+ bool IsRestored, int FrameIdx);
+
+ struct VarExprLoc {
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ };
+
+ std::optional<VarExprLoc> parseVarExprLoc(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &VarStr,
+ const yaml::StringValue &ExprStr,
+ const yaml::StringValue &LocStr);
+ template <typename T>
+ bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
+ const T &Object,
+ int FrameIdx);
+
+ bool initializeConstantPool(PerFunctionMIParsingState &PFS,
+ MachineConstantPool &ConstantPool,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineJumpTable &YamlJTI);
+
+ bool parseMachineMetadataNodes(PerFunctionMIParsingState &PFS,
+ MachineFunction &MF,
+ const yaml::MachineFunction &YMF);
+
+private:
+ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node,
+ const yaml::StringValue &Source);
+
+ bool parseMBBReference(PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source);
+
+ bool parseMachineMetadata(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &Source);
+
+ /// Return a MIR diagnostic converted from an MI string diagnostic.
+ SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
+
+ /// Return a MIR diagnostic converted from a diagnostic located in a YAML
+ /// block scalar string.
+ SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
+
+ void computeFunctionProperties(MachineFunction &MF);
+
+ void setupDebugValueTracking(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF);
+};
+
+} // end namespace llvm
+
+static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
+ reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
+}
+
+MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
+ StringRef Filename, LLVMContext &Context,
+ std::function<void(Function &)> Callback)
+ : Context(Context),
+ In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))
+ ->getBuffer(),
+ nullptr, handleYAMLDiag, this),
+ Filename(Filename), ProcessIRFunction(Callback) {
+ In.setContext(&In);
+}
+
+bool MIRParserImpl::error(const Twine &Message) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error, SMDiagnostic(Filename, SourceMgr::DK_Error, Message.str())));
+ return true;
+}
+
+bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message)));
+ return true;
+}
+
+bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) {
+ assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error");
+ reportDiagnostic(diagFromMIStringDiag(Error, SourceRange));
+ return true;
+}
+
+void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
+ DiagnosticSeverity Kind;
+ switch (Diag.getKind()) {
+ case SourceMgr::DK_Error:
+ Kind = DS_Error;
+ break;
+ case SourceMgr::DK_Warning:
+ Kind = DS_Warning;
+ break;
+ case SourceMgr::DK_Note:
+ Kind = DS_Note;
+ break;
+ case SourceMgr::DK_Remark:
+ llvm_unreachable("remark unexpected");
+ break;
+ }
+ Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
+}
+
+std::unique_ptr<Module>
+MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
+ if (!In.setCurrentDocument()) {
+ if (In.error())
+ return nullptr;
+ // Create an empty module when the MIR file is empty.
+ NoMIRDocuments = true;
+ auto M = std::make_unique<Module>(Filename, Context);
+ if (auto LayoutOverride =
+ DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr()))
+ M->setDataLayout(*LayoutOverride);
+ return M;
+ }
+
+ std::unique_ptr<Module> M;
+ // Parse the block scalar manually so that we can return unique pointer
+ // without having to go trough YAML traits.
+ if (const auto *BSN =
+ dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
+ SMDiagnostic Error;
+ M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
+ Context, &IRSlots, DataLayoutCallback);
+ if (!M) {
+ reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
+ return nullptr;
+ }
+ In.nextDocument();
+ if (!In.setCurrentDocument())
+ NoMIRDocuments = true;
+ } else {
+ // Create an new, empty module.
+ M = std::make_unique<Module>(Filename, Context);
+ if (auto LayoutOverride =
+ DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr()))
+ M->setDataLayout(*LayoutOverride);
+ NoLLVMIR = true;
+ }
+ return M;
+}
+
+bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+ if (NoMIRDocuments)
+ return false;
+
+ // Parse the machine functions.
+ do {
+ if (parseMachineFunction(M, MMI))
+ return true;
+ In.nextDocument();
+ } while (In.setCurrentDocument());
+
+ return false;
+}
+
+Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
+ auto &Context = M.getContext();
+ Function *F =
+ Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
+ Function::ExternalLinkage, Name, M);
+ BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
+ new UnreachableInst(Context, BB);
+
+ if (ProcessIRFunction)
+ ProcessIRFunction(*F);
+
+ return F;
+}
+
+bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
+ // Parse the yaml.
+ yaml::MachineFunction YamlMF;
+ yaml::EmptyContext Ctx;
+
+ const LLVMTargetMachine &TM = MMI.getTarget();
+ YamlMF.MachineFuncInfo = std::unique_ptr<yaml::MachineFunctionInfo>(
+ TM.createDefaultFuncInfoYAML());
+
+ yaml::yamlize(In, YamlMF, false, Ctx);
+ if (In.error())
+ return true;
+
+ // Search for the corresponding IR function.
+ StringRef FunctionName = YamlMF.Name;
+ Function *F = M.getFunction(FunctionName);
+ if (!F) {
+ if (NoLLVMIR) {
+ F = createDummyFunction(FunctionName, M);
+ } else {
+ return error(Twine("function '") + FunctionName +
+ "' isn't defined in the provided LLVM IR");
+ }
+ }
+ if (MMI.getMachineFunction(*F) != nullptr)
+ return error(Twine("redefinition of machine function '") + FunctionName +
+ "'");
+
+ // Create the MachineFunction.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ if (initializeMachineFunction(YamlMF, MF))
+ return true;
+
+ return false;
+}
+
+static bool isSSA(const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
+ return false;
+
+ // Subregister defs are invalid in SSA.
+ const MachineOperand *RegDef = MRI.getOneDef(Reg);
+ if (RegDef && RegDef->getSubReg() != 0)
+ return false;
+ }
+ return true;
+}
+
+void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
+ MachineFunctionProperties &Properties = MF.getProperties();
+
+ bool HasPHI = false;
+ bool HasInlineAsm = false;
+ bool AllTiedOpsRewritten = true, HasTiedOps = false;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isPHI())
+ HasPHI = true;
+ if (MI.isInlineAsm())
+ HasInlineAsm = true;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ unsigned DefIdx;
+ if (MO.isUse() && MI.isRegTiedToDefOperand(I, &DefIdx)) {
+ HasTiedOps = true;
+ if (MO.getReg() != MI.getOperand(DefIdx).getReg())
+ AllTiedOpsRewritten = false;
+ }
+ }
+ }
+ }
+ if (!HasPHI)
+ Properties.set(MachineFunctionProperties::Property::NoPHIs);
+ MF.setHasInlineAsm(HasInlineAsm);
+
+ if (HasTiedOps && AllTiedOpsRewritten)
+ Properties.set(MachineFunctionProperties::Property::TiedOpsRewritten);
+
+ if (isSSA(MF))
+ Properties.set(MachineFunctionProperties::Property::IsSSA);
+ else
+ Properties.reset(MachineFunctionProperties::Property::IsSSA);
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.getNumVirtRegs() == 0)
+ Properties.set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+bool MIRParserImpl::initializeCallSiteInfo(
+ PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ SMDiagnostic Error;
+ const LLVMTargetMachine &TM = MF.getTarget();
+ for (auto &YamlCSInfo : YamlMF.CallSitesInfo) {
+ yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
+ if (MILoc.BlockNum >= MF.size())
+ return error(Twine(MF.getName()) +
+ Twine(" call instruction block out of range.") +
+ " Unable to reference bb:" + Twine(MILoc.BlockNum));
+ auto CallB = std::next(MF.begin(), MILoc.BlockNum);
+ if (MILoc.Offset >= CallB->size())
+ return error(Twine(MF.getName()) +
+ Twine(" call instruction offset out of range.") +
+ " Unable to reference instruction at bb: " +
+ Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
+ auto CallI = std::next(CallB->instr_begin(), MILoc.Offset);
+ if (!CallI->isCall(MachineInstr::IgnoreBundle))
+ return error(Twine(MF.getName()) +
+ Twine(" call site info should reference call "
+ "instruction. Instruction at bb:") +
+ Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) +
+ " is not a call instruction");
+ MachineFunction::CallSiteInfo CSInfo;
+ for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) {
+ Register Reg;
+ if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error))
+ return error(Error, ArgRegPair.Reg.SourceRange);
+ CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
+ }
+
+ if (TM.Options.EmitCallSiteInfo)
+ MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
+ }
+
+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
+ return error(Twine("Call site info provided but not used"));
+ return false;
+}
+
+void MIRParserImpl::setupDebugValueTracking(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ // Compute the value of the "next instruction number" field.
+ unsigned MaxInstrNum = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ MaxInstrNum = std::max((unsigned)MI.peekDebugInstrNum(), MaxInstrNum);
+ MF.setDebugInstrNumberingCount(MaxInstrNum);
+
+ // Load any substitutions.
+ for (const auto &Sub : YamlMF.DebugValueSubstitutions) {
+ MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp},
+ {Sub.DstInst, Sub.DstOp}, Sub.Subreg);
+ }
+
+ // Flag for whether we're supposed to be using DBG_INSTR_REF.
+ MF.setUseDebugInstrRef(YamlMF.UseDebugInstrRef);
+}
+
+bool
+MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
+ MachineFunction &MF) {
+ // TODO: Recreate the machine function.
+ if (Target) {
+ // Avoid clearing state if we're using the same subtarget again.
+ Target->setTarget(MF.getSubtarget());
+ } else {
+ Target.reset(new PerTargetMIParsingState(MF.getSubtarget()));
+ }
+
+ MF.setAlignment(YamlMF.Alignment.valueOrOne());
+ MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+ MF.setHasWinCFI(YamlMF.HasWinCFI);
+
+ MF.setCallsEHReturn(YamlMF.CallsEHReturn);
+ MF.setCallsUnwindInit(YamlMF.CallsUnwindInit);
+ MF.setHasEHCatchret(YamlMF.HasEHCatchret);
+ MF.setHasEHScopes(YamlMF.HasEHScopes);
+ MF.setHasEHFunclets(YamlMF.HasEHFunclets);
+ MF.setIsOutlined(YamlMF.IsOutlined);
+
+ if (YamlMF.Legalized)
+ MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
+ if (YamlMF.RegBankSelected)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::RegBankSelected);
+ if (YamlMF.Selected)
+ MF.getProperties().set(MachineFunctionProperties::Property::Selected);
+ if (YamlMF.FailedISel)
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (YamlMF.FailsVerification)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
+ if (YamlMF.TracksDebugUserValues)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
+
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
+ if (parseRegisterInfo(PFS, YamlMF))
+ return true;
+ if (!YamlMF.Constants.empty()) {
+ auto *ConstantPool = MF.getConstantPool();
+ assert(ConstantPool && "Constant pool must be created");
+ if (initializeConstantPool(PFS, *ConstantPool, YamlMF))
+ return true;
+ }
+ if (!YamlMF.MachineMetadataNodes.empty() &&
+ parseMachineMetadataNodes(PFS, MF, YamlMF))
+ return true;
+
+ StringRef BlockStr = YamlMF.Body.Value.Value;
+ SMDiagnostic Error;
+ SourceMgr BlockSM;
+ BlockSM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(BlockStr, "",/*RequiresNullTerminator=*/false),
+ SMLoc());
+ PFS.SM = &BlockSM;
+ if (parseMachineBasicBlockDefinitions(PFS, BlockStr, Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
+ }
+ // Check Basic Block Section Flags.
+ if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) {
+ MF.setBBSectionsType(BasicBlockSection::Labels);
+ } else if (MF.hasBBSections()) {
+ MF.assignBeginEndSections();
+ }
+ PFS.SM = &SM;
+
+ // Initialize the frame information after creating all the MBBs so that the
+ // MBB references in the frame information can be resolved.
+ if (initializeFrameInfo(PFS, YamlMF))
+ return true;
+ // Initialize the jump table after creating all the MBBs so that the MBB
+ // references can be resolved.
+ if (!YamlMF.JumpTableInfo.Entries.empty() &&
+ initializeJumpTableInfo(PFS, YamlMF.JumpTableInfo))
+ return true;
+ // Parse the machine instructions after creating all of the MBBs so that the
+ // parser can resolve the MBB references.
+ StringRef InsnStr = YamlMF.Body.Value.Value;
+ SourceMgr InsnSM;
+ InsnSM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(InsnStr, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ PFS.SM = &InsnSM;
+ if (parseMachineInstructions(PFS, InsnStr, Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
+ }
+ PFS.SM = &SM;
+
+ if (setupRegisterInfo(PFS, YamlMF))
+ return true;
+
+ if (YamlMF.MachineFuncInfo) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ // Note this is called after the initial constructor of the
+ // MachineFunctionInfo based on the MachineFunction, which may depend on the
+ // IR.
+
+ SMRange SrcRange;
+ if (TM.parseMachineFunctionInfo(*YamlMF.MachineFuncInfo, PFS, Error,
+ SrcRange)) {
+ return error(Error, SrcRange);
+ }
+ }
+
+ // Set the reserved registers after parsing MachineFuncInfo. The target may
+ // have been recording information used to select the reserved registers
+ // there.
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.freezeReservedRegs(MF);
+
+ computeFunctionProperties(MF);
+
+ if (initializeCallSiteInfo(PFS, YamlMF))
+ return false;
+
+ setupDebugValueTracking(MF, PFS, YamlMF);
+
+ MF.getSubtarget().mirFileLoaded(MF);
+
+ MF.verify();
+ return false;
+}
+
+bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ assert(RegInfo.tracksLiveness());
+ if (!YamlMF.TracksRegLiveness)
+ RegInfo.invalidateLiveness();
+
+ SMDiagnostic Error;
+ // Parse the virtual register information.
+ for (const auto &VReg : YamlMF.VirtualRegisters) {
+ VRegInfo &Info = PFS.getVRegInfo(VReg.ID.Value);
+ if (Info.Explicit)
+ return error(VReg.ID.SourceRange.Start,
+ Twine("redefinition of virtual register '%") +
+ Twine(VReg.ID.Value) + "'");
+ Info.Explicit = true;
+
+ if (StringRef(VReg.Class.Value).equals("_")) {
+ Info.Kind = VRegInfo::GENERIC;
+ Info.D.RegBank = nullptr;
+ } else {
+ const auto *RC = Target->getRegClass(VReg.Class.Value);
+ if (RC) {
+ Info.Kind = VRegInfo::NORMAL;
+ Info.D.RC = RC;
+ } else {
+ const RegisterBank *RegBank = Target->getRegBank(VReg.Class.Value);
+ if (!RegBank)
+ return error(
+ VReg.Class.SourceRange.Start,
+ Twine("use of undefined register class or register bank '") +
+ VReg.Class.Value + "'");
+ Info.Kind = VRegInfo::REGBANK;
+ Info.D.RegBank = RegBank;
+ }
+ }
+
+ if (!VReg.PreferredRegister.Value.empty()) {
+ if (Info.Kind != VRegInfo::NORMAL)
+ return error(VReg.Class.SourceRange.Start,
+ Twine("preferred register can only be set for normal vregs"));
+
+ if (parseRegisterReference(PFS, Info.PreferredReg,
+ VReg.PreferredRegister.Value, Error))
+ return error(Error, VReg.PreferredRegister.SourceRange);
+ }
+ }
+
+ // Parse the liveins.
+ for (const auto &LiveIn : YamlMF.LiveIns) {
+ Register Reg;
+ if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error))
+ return error(Error, LiveIn.Register.SourceRange);
+ Register VReg;
+ if (!LiveIn.VirtualRegister.Value.empty()) {
+ VRegInfo *Info;
+ if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value,
+ Error))
+ return error(Error, LiveIn.VirtualRegister.SourceRange);
+ VReg = Info->VReg;
+ }
+ RegInfo.addLiveIn(Reg, VReg);
+ }
+
+ // Parse the callee saved registers (Registers that will
+ // be saved for the caller).
+ if (YamlMF.CalleeSavedRegisters) {
+ SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
+ for (const auto &RegSource : *YamlMF.CalleeSavedRegisters) {
+ Register Reg;
+ if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ RegInfo.setCalleeSavedRegs(CalleeSavedRegisters);
+ }
+
+ return false;
+}
+
+bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ bool Error = false;
+ // Create VRegs
+ auto populateVRegInfo = [&](const VRegInfo &Info, Twine Name) {
+ Register Reg = Info.VReg;
+ switch (Info.Kind) {
+ case VRegInfo::UNKNOWN:
+ error(Twine("Cannot determine class/bank of virtual register ") +
+ Name + " in function '" + MF.getName() + "'");
+ Error = true;
+ break;
+ case VRegInfo::NORMAL:
+ if (!Info.D.RC->isAllocatable()) {
+ error(Twine("Cannot use non-allocatable class '") +
+ TRI->getRegClassName(Info.D.RC) + "' for virtual register " +
+ Name + " in function '" + MF.getName() + "'");
+ Error = true;
+ break;
+ }
+
+ MRI.setRegClass(Reg, Info.D.RC);
+ if (Info.PreferredReg != 0)
+ MRI.setSimpleHint(Reg, Info.PreferredReg);
+ break;
+ case VRegInfo::GENERIC:
+ break;
+ case VRegInfo::REGBANK:
+ MRI.setRegBank(Reg, *Info.D.RegBank);
+ break;
+ }
+ };
+
+ for (const auto &P : PFS.VRegInfosNamed) {
+ const VRegInfo &Info = *P.second;
+ populateVRegInfo(Info, Twine(P.first()));
+ }
+
+ for (auto P : PFS.VRegInfos) {
+ const VRegInfo &Info = *P.second;
+ populateVRegInfo(Info, Twine(P.first));
+ }
+
+ // Compute MachineRegisterInfo::UsedPhysRegMask
+ for (const MachineBasicBlock &MBB : MF) {
+ // Make sure MRI knows about registers clobbered by unwinder.
+ if (MBB.isEHPad())
+ if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF))
+ MRI.addPhysRegsUsedFromRegMask(RegMask);
+
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+ }
+ }
+ }
+
+ return Error;
+}
+
+bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const Function &F = MF.getFunction();
+ const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
+ MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
+ MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
+ MFI.setHasStackMap(YamlMFI.HasStackMap);
+ MFI.setHasPatchPoint(YamlMFI.HasPatchPoint);
+ MFI.setStackSize(YamlMFI.StackSize);
+ MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
+ if (YamlMFI.MaxAlignment)
+ MFI.ensureMaxAlignment(Align(YamlMFI.MaxAlignment));
+ MFI.setAdjustsStack(YamlMFI.AdjustsStack);
+ MFI.setHasCalls(YamlMFI.HasCalls);
+ if (YamlMFI.MaxCallFrameSize != ~0u)
+ MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+ MFI.setCVBytesOfCalleeSavedRegisters(YamlMFI.CVBytesOfCalleeSavedRegisters);
+ MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
+ MFI.setHasVAStart(YamlMFI.HasVAStart);
+ MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+ MFI.setHasTailCall(YamlMFI.HasTailCall);
+ MFI.setLocalFrameSize(YamlMFI.LocalFrameSize);
+ if (!YamlMFI.SavePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint))
+ return true;
+ MFI.setSavePoint(MBB);
+ }
+ if (!YamlMFI.RestorePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(PFS, MBB, YamlMFI.RestorePoint))
+ return true;
+ MFI.setRestorePoint(MBB);
+ }
+
+ std::vector<CalleeSavedInfo> CSIInfo;
+ // Initialize the fixed frame objects.
+ for (const auto &Object : YamlMF.FixedStackObjects) {
+ int ObjectIdx;
+ if (Object.Type != yaml::FixedMachineStackObject::SpillSlot)
+ ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset,
+ Object.IsImmutable, Object.IsAliased);
+ else
+ ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
+
+ if (!TFI->isSupportedStackID(Object.StackID))
+ return error(Object.ID.SourceRange.Start,
+ Twine("StackID is not supported by target"));
+ MFI.setStackID(ObjectIdx, Object.StackID);
+ MFI.setObjectAlignment(ObjectIdx, Object.Alignment.valueOrOne());
+ if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
+ ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of fixed stack object '%fixed-stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
+ Object.CalleeSavedRestored, ObjectIdx))
+ return true;
+ if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
+ return true;
+ }
+
+ for (const auto &Object : YamlMF.EntryValueObjects) {
+ SMDiagnostic Error;
+ Register Reg;
+ if (parseNamedRegisterReference(PFS, Reg, Object.EntryValueRegister.Value,
+ Error))
+ return error(Error, Object.EntryValueRegister.SourceRange);
+ if (!Reg.isPhysical())
+ return error(Object.EntryValueRegister.SourceRange.Start,
+ "Expected physical register for entry value field");
+ std::optional<VarExprLoc> MaybeInfo = parseVarExprLoc(
+ PFS, Object.DebugVar, Object.DebugExpr, Object.DebugLoc);
+ if (!MaybeInfo)
+ return true;
+ if (MaybeInfo->DIVar || MaybeInfo->DIExpr || MaybeInfo->DILoc)
+ PFS.MF.setVariableDbgInfo(MaybeInfo->DIVar, MaybeInfo->DIExpr,
+ Reg.asMCReg(), MaybeInfo->DILoc);
+ }
+
+ // Initialize the ordinary frame objects.
+ for (const auto &Object : YamlMF.StackObjects) {
+ int ObjectIdx;
+ const AllocaInst *Alloca = nullptr;
+ const yaml::StringValue &Name = Object.Name;
+ if (!Name.Value.empty()) {
+ Alloca = dyn_cast_or_null<AllocaInst>(
+ F.getValueSymbolTable()->lookup(Name.Value));
+ if (!Alloca)
+ return error(Name.SourceRange.Start,
+ "alloca instruction named '" + Name.Value +
+ "' isn't defined in the function '" + F.getName() +
+ "'");
+ }
+ if (!TFI->isSupportedStackID(Object.StackID))
+ return error(Object.ID.SourceRange.Start,
+ Twine("StackID is not supported by target"));
+ if (Object.Type == yaml::MachineStackObject::VariableSized)
+ ObjectIdx =
+ MFI.CreateVariableSizedObject(Object.Alignment.valueOrOne(), Alloca);
+ else
+ ObjectIdx = MFI.CreateStackObject(
+ Object.Size, Object.Alignment.valueOrOne(),
+ Object.Type == yaml::MachineStackObject::SpillSlot, Alloca,
+ Object.StackID);
+ MFI.setObjectOffset(ObjectIdx, Object.Offset);
+
+ if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of stack object '%stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
+ Object.CalleeSavedRestored, ObjectIdx))
+ return true;
+ if (Object.LocalOffset)
+ MFI.mapLocalFrameObject(ObjectIdx, *Object.LocalOffset);
+ if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
+ return true;
+ }
+ MFI.setCalleeSavedInfo(CSIInfo);
+ if (!CSIInfo.empty())
+ MFI.setCalleeSavedInfoValid(true);
+
+ // Initialize the various stack object references after initializing the
+ // stack objects.
+ if (!YamlMFI.StackProtector.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(PFS, FI, YamlMFI.StackProtector.Value, Error))
+ return error(Error, YamlMFI.StackProtector.SourceRange);
+ MFI.setStackProtectorIndex(FI);
+ }
+
+ if (!YamlMFI.FunctionContext.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(PFS, FI, YamlMFI.FunctionContext.Value, Error))
+ return error(Error, YamlMFI.FunctionContext.SourceRange);
+ MFI.setFunctionContextIndex(FI);
+ }
+
+ return false;
+}
+
+bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) {
+ if (RegisterSource.Value.empty())
+ return false;
+ Register Reg;
+ SMDiagnostic Error;
+ if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error))
+ return error(Error, RegisterSource.SourceRange);
+ CalleeSavedInfo CSI(Reg, FrameIdx);
+ CSI.setRestored(IsRestored);
+ CSIInfo.push_back(CSI);
+ return false;
+}
+
+/// Verify that given node is of a certain type. Return true on error.
+template <typename T>
+static bool typecheckMDNode(T *&Result, MDNode *Node,
+ const yaml::StringValue &Source,
+ StringRef TypeString, MIRParserImpl &Parser) {
+ if (!Node)
+ return false;
+ Result = dyn_cast<T>(Node);
+ if (!Result)
+ return Parser.error(Source.SourceRange.Start,
+ "expected a reference to a '" + TypeString +
+ "' metadata node");
+ return false;
+}
+
+std::optional<MIRParserImpl::VarExprLoc> MIRParserImpl::parseVarExprLoc(
+ PerFunctionMIParsingState &PFS, const yaml::StringValue &VarStr,
+ const yaml::StringValue &ExprStr, const yaml::StringValue &LocStr) {
+ MDNode *Var = nullptr;
+ MDNode *Expr = nullptr;
+ MDNode *Loc = nullptr;
+ if (parseMDNode(PFS, Var, VarStr) || parseMDNode(PFS, Expr, ExprStr) ||
+ parseMDNode(PFS, Loc, LocStr))
+ return std::nullopt;
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ if (typecheckMDNode(DIVar, Var, VarStr, "DILocalVariable", *this) ||
+ typecheckMDNode(DIExpr, Expr, ExprStr, "DIExpression", *this) ||
+ typecheckMDNode(DILoc, Loc, LocStr, "DILocation", *this))
+ return std::nullopt;
+ return VarExprLoc{DIVar, DIExpr, DILoc};
+}
+
+template <typename T>
+bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
+ const T &Object, int FrameIdx) {
+ std::optional<VarExprLoc> MaybeInfo =
+ parseVarExprLoc(PFS, Object.DebugVar, Object.DebugExpr, Object.DebugLoc);
+ if (!MaybeInfo)
+ return true;
+ // Debug information can only be attached to stack objects; Fixed stack
+ // objects aren't supported.
+ if (MaybeInfo->DIVar || MaybeInfo->DIExpr || MaybeInfo->DILoc)
+ PFS.MF.setVariableDbgInfo(MaybeInfo->DIVar, MaybeInfo->DIExpr, FrameIdx,
+ MaybeInfo->DILoc);
+ return false;
+}
+
+bool MIRParserImpl::parseMDNode(PerFunctionMIParsingState &PFS,
+ MDNode *&Node, const yaml::StringValue &Source) {
+ if (Source.Value.empty())
+ return false;
+ SMDiagnostic Error;
+ if (llvm::parseMDNode(PFS, Node, Source.Value, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
+ MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) {
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots;
+ const MachineFunction &MF = PFS.MF;
+ const auto &M = *MF.getFunction().getParent();
+ SMDiagnostic Error;
+ for (const auto &YamlConstant : YamlMF.Constants) {
+ if (YamlConstant.IsTargetSpecific)
+ // FIXME: Support target-specific constant pools
+ return error(YamlConstant.Value.SourceRange.Start,
+ "Can't parse target-specific constant pool entries yet");
+ const Constant *Value = dyn_cast_or_null<Constant>(
+ parseConstantValue(YamlConstant.Value.Value, Error, M));
+ if (!Value)
+ return error(Error, YamlConstant.Value.SourceRange);
+ const Align PrefTypeAlign =
+ M.getDataLayout().getPrefTypeAlign(Value->getType());
+ const Align Alignment = YamlConstant.Alignment.value_or(PrefTypeAlign);
+ unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
+ if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
+ .second)
+ return error(YamlConstant.ID.SourceRange.Start,
+ Twine("redefinition of constant pool item '%const.") +
+ Twine(YamlConstant.ID.Value) + "'");
+ }
+ return false;
+}
+
+bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineJumpTable &YamlJTI) {
+ MachineJumpTableInfo *JTI = PFS.MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
+ for (const auto &Entry : YamlJTI.Entries) {
+ std::vector<MachineBasicBlock *> Blocks;
+ for (const auto &MBBSource : Entry.Blocks) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(PFS, MBB, MBBSource.Value))
+ return true;
+ Blocks.push_back(MBB);
+ }
+ unsigned Index = JTI->createJumpTableIndex(Blocks);
+ if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index))
+ .second)
+ return error(Entry.ID.SourceRange.Start,
+ Twine("redefinition of jump table entry '%jump-table.") +
+ Twine(Entry.ID.Value) + "'");
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source) {
+ SMDiagnostic Error;
+ if (llvm::parseMBBReference(PFS, MBB, Source.Value, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &Source) {
+ SMDiagnostic Error;
+ if (llvm::parseMachineMetadata(PFS, Source.Value, Source.SourceRange, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::parseMachineMetadataNodes(
+ PerFunctionMIParsingState &PFS, MachineFunction &MF,
+ const yaml::MachineFunction &YMF) {
+ for (const auto &MDS : YMF.MachineMetadataNodes) {
+ if (parseMachineMetadata(PFS, MDS))
+ return true;
+ }
+ // Report missing definitions from forward referenced nodes.
+ if (!PFS.MachineForwardRefMDNodes.empty())
+ return error(PFS.MachineForwardRefMDNodes.begin()->second.second,
+ "use of undefined metadata '!" +
+ Twine(PFS.MachineForwardRefMDNodes.begin()->first) + "'");
+ return false;
+}
+
+SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
+ assert(SourceRange.isValid() && "Invalid source range");
+ SMLoc Loc = SourceRange.Start;
+ bool HasQuote = Loc.getPointer() < SourceRange.End.getPointer() &&
+ *Loc.getPointer() == '\'';
+ // Translate the location of the error from the location in the MI string to
+ // the corresponding location in the MIR file.
+ Loc = Loc.getFromPointer(Loc.getPointer() + Error.getColumnNo() +
+ (HasQuote ? 1 : 0));
+
+ // TODO: Translate any source ranges as well.
+ return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), std::nullopt,
+ Error.getFixIts());
+}
+
+SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
+ assert(SourceRange.isValid());
+
+ // Translate the location of the error from the location in the llvm IR string
+ // to the corresponding location in the MIR file.
+ auto LineAndColumn = SM.getLineAndColumn(SourceRange.Start);
+ unsigned Line = LineAndColumn.first + Error.getLineNo() - 1;
+ unsigned Column = Error.getColumnNo();
+ StringRef LineStr = Error.getLineContents();
+ SMLoc Loc = Error.getLoc();
+
+ // Get the full line and adjust the column number by taking the indentation of
+ // LLVM IR into account.
+ for (line_iterator L(*SM.getMemoryBuffer(SM.getMainFileID()), false), E;
+ L != E; ++L) {
+ if (L.line_number() == Line) {
+ LineStr = *L;
+ Loc = SMLoc::getFromPointer(LineStr.data());
+ auto Indent = LineStr.find(Error.getLineContents());
+ if (Indent != StringRef::npos)
+ Column += Indent;
+ break;
+ }
+ }
+
+ return SMDiagnostic(SM, Loc, Filename, Line, Column, Error.getKind(),
+ Error.getMessage(), LineStr, Error.getRanges(),
+ Error.getFixIts());
+}
+
+MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
+ : Impl(std::move(Impl)) {}
+
+MIRParser::~MIRParser() = default;
+
+std::unique_ptr<Module>
+MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
+ return Impl->parseIRModule(DataLayoutCallback);
+}
+
+bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+ return Impl->parseMachineFunctions(M, MMI);
+}
+
+std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
+ StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction) {
+ auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
+ if (std::error_code EC = FileOrErr.getError()) {
+ Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + EC.message());
+ return nullptr;
+ }
+ return createMIRParser(std::move(FileOrErr.get()), Context,
+ ProcessIRFunction);
+}
+
+std::unique_ptr<MIRParser>
+llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
+ LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction) {
+ auto Filename = Contents->getBufferIdentifier();
+ if (Context.shouldDiscardValueNames()) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error,
+ SMDiagnostic(
+ Filename, SourceMgr::DK_Error,
+ "Can't read MIR with a Context that discards named Values")));
+ return nullptr;
+ }
+ return std::make_unique<MIRParser>(std::make_unique<MIRParserImpl>(
+ std::move(Contents), Filename, Context, ProcessIRFunction));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
new file mode 100644
index 000000000000..b91d9c4727fc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -0,0 +1,989 @@
+//===- MIRPrinter.cpp - MIR serialization format printer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that prints out the LLVM IR and machine
+// functions using the MIR serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleSlotTracker.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<bool> SimplifyMIR(
+ "simplify-mir", cl::Hidden,
+ cl::desc("Leave out unnecessary information when printing MIR"));
+
+static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true),
+ cl::desc("Print MIR debug-locations"));
+
+namespace {
+
+/// This structure describes how to print out stack object references.
+struct FrameIndexOperand {
+ std::string Name;
+ unsigned ID;
+ bool IsFixed;
+
+ FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed)
+ : Name(Name.str()), ID(ID), IsFixed(IsFixed) {}
+
+ /// Return an ordinary stack object reference.
+ static FrameIndexOperand create(StringRef Name, unsigned ID) {
+ return FrameIndexOperand(Name, ID, /*IsFixed=*/false);
+ }
+
+ /// Return a fixed stack object reference.
+ static FrameIndexOperand createFixed(unsigned ID) {
+ return FrameIndexOperand("", ID, /*IsFixed=*/true);
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+/// This class prints out the machine functions using the MIR serialization
+/// format.
+class MIRPrinter {
+ raw_ostream &OS;
+ DenseMap<const uint32_t *, unsigned> RegisterMaskIds;
+ /// Maps from stack object indices to operand indices which will be used when
+ /// printing frame index machine operands.
+ DenseMap<int, FrameIndexOperand> StackObjectOperandMapping;
+
+public:
+ MIRPrinter(raw_ostream &OS) : OS(OS) {}
+
+ void print(const MachineFunction &MF);
+
+ void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI);
+ void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI);
+ void convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool);
+ void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI);
+ void convertStackObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF, ModuleSlotTracker &MST);
+ void convertEntryValueObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST);
+ void convertCallSiteObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST);
+ void convertMachineMetadataNodes(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ MachineModuleSlotTracker &MST);
+
+private:
+ void initRegisterMaskIds(const MachineFunction &MF);
+};
+
+/// This class prints out the machine instructions using the MIR serialization
+/// format.
+class MIPrinter {
+ raw_ostream &OS;
+ ModuleSlotTracker &MST;
+ const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
+ /// Synchronization scope names registered with LLVMContext.
+ SmallVector<StringRef, 8> SSNs;
+
+ bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const;
+ bool canPredictSuccessors(const MachineBasicBlock &MBB) const;
+
+public:
+ MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
+ const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds,
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping)
+ : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds),
+ StackObjectOperandMapping(StackObjectOperandMapping) {}
+
+ void print(const MachineBasicBlock &MBB);
+
+ void print(const MachineInstr &MI);
+ void printStackObjectReference(int FrameIndex);
+ void print(const MachineInstr &MI, unsigned OpIdx,
+ const TargetRegisterInfo *TRI, const TargetInstrInfo *TII,
+ bool ShouldPrintRegisterTies, LLT TypeToPrint,
+ bool PrintDef = true);
+};
+
+} // end namespace llvm
+
+namespace llvm {
+namespace yaml {
+
+/// This struct serializes the LLVM IR module.
+template <> struct BlockScalarTraits<Module> {
+ static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
+ Mod.print(OS, nullptr);
+ }
+
+ static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
+ llvm_unreachable("LLVM Module is supposed to be parsed separately");
+ return "";
+ }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+static void printRegMIR(unsigned Reg, yaml::StringValue &Dest,
+ const TargetRegisterInfo *TRI) {
+ raw_string_ostream OS(Dest.Value);
+ OS << printReg(Reg, TRI);
+}
+
+void MIRPrinter::print(const MachineFunction &MF) {
+ initRegisterMaskIds(MF);
+
+ yaml::MachineFunction YamlMF;
+ YamlMF.Name = MF.getName();
+ YamlMF.Alignment = MF.getAlignment();
+ YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+ YamlMF.HasWinCFI = MF.hasWinCFI();
+
+ YamlMF.CallsEHReturn = MF.callsEHReturn();
+ YamlMF.CallsUnwindInit = MF.callsUnwindInit();
+ YamlMF.HasEHCatchret = MF.hasEHCatchret();
+ YamlMF.HasEHScopes = MF.hasEHScopes();
+ YamlMF.HasEHFunclets = MF.hasEHFunclets();
+ YamlMF.IsOutlined = MF.isOutlined();
+ YamlMF.UseDebugInstrRef = MF.useDebugInstrRef();
+
+ YamlMF.Legalized = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized);
+ YamlMF.RegBankSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::RegBankSelected);
+ YamlMF.Selected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected);
+ YamlMF.FailedISel = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel);
+ YamlMF.FailsVerification = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification);
+ YamlMF.TracksDebugUserValues = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
+
+ convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
+ MachineModuleSlotTracker MST(&MF);
+ MST.incorporateFunction(MF.getFunction());
+ convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
+ convertStackObjects(YamlMF, MF, MST);
+ convertEntryValueObjects(YamlMF, MF, MST);
+ convertCallSiteObjects(YamlMF, MF, MST);
+ for (const auto &Sub : MF.DebugValueSubstitutions) {
+ const auto &SubSrc = Sub.Src;
+ const auto &SubDest = Sub.Dest;
+ YamlMF.DebugValueSubstitutions.push_back({SubSrc.first, SubSrc.second,
+ SubDest.first,
+ SubDest.second,
+ Sub.Subreg});
+ }
+ if (const auto *ConstantPool = MF.getConstantPool())
+ convert(YamlMF, *ConstantPool);
+ if (const auto *JumpTableInfo = MF.getJumpTableInfo())
+ convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+
+ const TargetMachine &TM = MF.getTarget();
+ YamlMF.MachineFuncInfo =
+ std::unique_ptr<yaml::MachineFunctionInfo>(TM.convertFuncInfoToYAML(MF));
+
+ raw_string_ostream StrOS(YamlMF.Body.Value.Value);
+ bool IsNewlineNeeded = false;
+ for (const auto &MBB : MF) {
+ if (IsNewlineNeeded)
+ StrOS << "\n";
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .print(MBB);
+ IsNewlineNeeded = true;
+ }
+ StrOS.flush();
+ // Convert machine metadata collected during the print of the machine
+ // function.
+ convertMachineMetadataNodes(YamlMF, MF, MST);
+
+ yaml::Output Out(OS);
+ if (!SimplifyMIR)
+ Out.setWriteDefaultValues(true);
+ Out << YamlMF;
+}
+
+static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ assert(RegMask && "Can't print an empty register mask");
+ OS << StringRef("CustomRegMask(");
+
+ bool IsRegInRegMaskFound = false;
+ for (int I = 0, E = TRI->getNumRegs(); I < E; I++) {
+ // Check whether the register is asserted in regmask.
+ if (RegMask[I / 32] & (1u << (I % 32))) {
+ if (IsRegInRegMaskFound)
+ OS << ',';
+ OS << printReg(I, TRI);
+ IsRegInRegMaskFound = true;
+ }
+ }
+
+ OS << ')';
+}
+
+static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest,
+ const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
+ raw_string_ostream OS(Dest.Value);
+ OS << printRegClassOrBank(Reg, RegInfo, TRI);
+}
+
+template <typename T>
+static void
+printStackObjectDbgInfo(const MachineFunction::VariableDbgInfo &DebugVar,
+ T &Object, ModuleSlotTracker &MST) {
+ std::array<std::string *, 3> Outputs{{&Object.DebugVar.Value,
+ &Object.DebugExpr.Value,
+ &Object.DebugLoc.Value}};
+ std::array<const Metadata *, 3> Metas{{DebugVar.Var,
+ DebugVar.Expr,
+ DebugVar.Loc}};
+ for (unsigned i = 0; i < 3; ++i) {
+ raw_string_ostream StrOS(*Outputs[i]);
+ Metas[i]->printAsOperand(StrOS, MST);
+ }
+}
+
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+ const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
+ MF.TracksRegLiveness = RegInfo.tracksLiveness();
+
+ // Print the virtual register definitions.
+ for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ yaml::VirtualRegisterDefinition VReg;
+ VReg.ID = I;
+ if (RegInfo.getVRegName(Reg) != "")
+ continue;
+ ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
+ Register PreferredReg = RegInfo.getSimpleHint(Reg);
+ if (PreferredReg)
+ printRegMIR(PreferredReg, VReg.PreferredRegister, TRI);
+ MF.VirtualRegisters.push_back(VReg);
+ }
+
+ // Print the live ins.
+ for (std::pair<unsigned, unsigned> LI : RegInfo.liveins()) {
+ yaml::MachineFunctionLiveIn LiveIn;
+ printRegMIR(LI.first, LiveIn.Register, TRI);
+ if (LI.second)
+ printRegMIR(LI.second, LiveIn.VirtualRegister, TRI);
+ MF.LiveIns.push_back(LiveIn);
+ }
+
+ // Prints the callee saved registers.
+ if (RegInfo.isUpdatedCSRsInitialized()) {
+ const MCPhysReg *CalleeSavedRegs = RegInfo.getCalleeSavedRegs();
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) {
+ yaml::FlowStringValue Reg;
+ printRegMIR(*I, Reg, TRI);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
+ }
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI) {
+ YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
+ YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
+ YamlMFI.HasStackMap = MFI.hasStackMap();
+ YamlMFI.HasPatchPoint = MFI.hasPatchPoint();
+ YamlMFI.StackSize = MFI.getStackSize();
+ YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment();
+ YamlMFI.MaxAlignment = MFI.getMaxAlign().value();
+ YamlMFI.AdjustsStack = MFI.adjustsStack();
+ YamlMFI.HasCalls = MFI.hasCalls();
+ YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
+ ? MFI.getMaxCallFrameSize() : ~0u;
+ YamlMFI.CVBytesOfCalleeSavedRegisters =
+ MFI.getCVBytesOfCalleeSavedRegisters();
+ YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
+ YamlMFI.HasVAStart = MFI.hasVAStart();
+ YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+ YamlMFI.HasTailCall = MFI.hasTailCall();
+ YamlMFI.LocalFrameSize = MFI.getLocalFrameSize();
+ if (MFI.getSavePoint()) {
+ raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
+ StrOS << printMBBReference(*MFI.getSavePoint());
+ }
+ if (MFI.getRestorePoint()) {
+ raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
+ StrOS << printMBBReference(*MFI.getRestorePoint());
+ }
+}
+
+void MIRPrinter::convertEntryValueObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (const MachineFunction::VariableDbgInfo &DebugVar :
+ MF.getEntryValueVariableDbgInfo()) {
+ yaml::EntryValueObject &Obj = YMF.EntryValueObjects.emplace_back();
+ printStackObjectDbgInfo(DebugVar, Obj, MST);
+ MCRegister EntryValReg = DebugVar.getEntryValueRegister();
+ printRegMIR(EntryValReg, Obj.EntryValueRegister, TRI);
+ }
+}
+
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Process fixed stack objects.
+ assert(YMF.FixedStackObjects.empty());
+ SmallVector<int, 32> FixedStackObjectsIdx;
+ const int BeginIdx = MFI.getObjectIndexBegin();
+ if (BeginIdx < 0)
+ FixedStackObjectsIdx.reserve(-BeginIdx);
+
+ unsigned ID = 0;
+ for (int I = BeginIdx; I < 0; ++I, ++ID) {
+ FixedStackObjectsIdx.push_back(-1); // Fill index for possible dead.
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ yaml::FixedMachineStackObject YamlObject;
+ YamlObject.ID = ID;
+ YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+ ? yaml::FixedMachineStackObject::SpillSlot
+ : yaml::FixedMachineStackObject::DefaultType;
+ YamlObject.Offset = MFI.getObjectOffset(I);
+ YamlObject.Size = MFI.getObjectSize(I);
+ YamlObject.Alignment = MFI.getObjectAlign(I);
+ YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
+ YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
+ YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
+ // Save the ID' position in FixedStackObjects storage vector.
+ FixedStackObjectsIdx[ID] = YMF.FixedStackObjects.size();
+ YMF.FixedStackObjects.push_back(YamlObject);
+ StackObjectOperandMapping.insert(
+ std::make_pair(I, FrameIndexOperand::createFixed(ID)));
+ }
+
+ // Process ordinary stack objects.
+ assert(YMF.StackObjects.empty());
+ SmallVector<unsigned, 32> StackObjectsIdx;
+ const int EndIdx = MFI.getObjectIndexEnd();
+ if (EndIdx > 0)
+ StackObjectsIdx.reserve(EndIdx);
+ ID = 0;
+ for (int I = 0; I < EndIdx; ++I, ++ID) {
+ StackObjectsIdx.push_back(-1); // Fill index for possible dead.
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ yaml::MachineStackObject YamlObject;
+ YamlObject.ID = ID;
+ if (const auto *Alloca = MFI.getObjectAllocation(I))
+ YamlObject.Name.Value = std::string(
+ Alloca->hasName() ? Alloca->getName() : "");
+ YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+ ? yaml::MachineStackObject::SpillSlot
+ : MFI.isVariableSizedObjectIndex(I)
+ ? yaml::MachineStackObject::VariableSized
+ : yaml::MachineStackObject::DefaultType;
+ YamlObject.Offset = MFI.getObjectOffset(I);
+ YamlObject.Size = MFI.getObjectSize(I);
+ YamlObject.Alignment = MFI.getObjectAlign(I);
+ YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
+
+ // Save the ID' position in StackObjects storage vector.
+ StackObjectsIdx[ID] = YMF.StackObjects.size();
+ YMF.StackObjects.push_back(YamlObject);
+ StackObjectOperandMapping.insert(std::make_pair(
+ I, FrameIndexOperand::create(YamlObject.Name.Value, ID)));
+ }
+
+ for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+ const int FrameIdx = CSInfo.getFrameIdx();
+ if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(FrameIdx))
+ continue;
+
+ yaml::StringValue Reg;
+ printRegMIR(CSInfo.getReg(), Reg, TRI);
+ if (!CSInfo.isSpilledToReg()) {
+ assert(FrameIdx >= MFI.getObjectIndexBegin() &&
+ FrameIdx < MFI.getObjectIndexEnd() &&
+ "Invalid stack object index");
+ if (FrameIdx < 0) { // Negative index means fixed objects.
+ auto &Object =
+ YMF.FixedStackObjects
+ [FixedStackObjectsIdx[FrameIdx + MFI.getNumFixedObjects()]];
+ Object.CalleeSavedRegister = Reg;
+ Object.CalleeSavedRestored = CSInfo.isRestored();
+ } else {
+ auto &Object = YMF.StackObjects[StackObjectsIdx[FrameIdx]];
+ Object.CalleeSavedRegister = Reg;
+ Object.CalleeSavedRestored = CSInfo.isRestored();
+ }
+ }
+ }
+ for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
+ auto LocalObject = MFI.getLocalFrameObjectMap(I);
+ assert(LocalObject.first >= 0 && "Expected a locally mapped stack object");
+ YMF.StackObjects[StackObjectsIdx[LocalObject.first]].LocalOffset =
+ LocalObject.second;
+ }
+
+ // Print the stack object references in the frame information class after
+ // converting the stack objects.
+ if (MFI.hasStackProtectorIndex()) {
+ raw_string_ostream StrOS(YMF.FrameInfo.StackProtector.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getStackProtectorIndex());
+ }
+
+ if (MFI.hasFunctionContextIndex()) {
+ raw_string_ostream StrOS(YMF.FrameInfo.FunctionContext.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getFunctionContextIndex());
+ }
+
+ // Print the debug variable information.
+ for (const MachineFunction::VariableDbgInfo &DebugVar :
+ MF.getInStackSlotVariableDbgInfo()) {
+ int Idx = DebugVar.getStackSlot();
+ assert(Idx >= MFI.getObjectIndexBegin() && Idx < MFI.getObjectIndexEnd() &&
+ "Invalid stack object index");
+ if (Idx < 0) { // Negative index means fixed objects.
+ auto &Object =
+ YMF.FixedStackObjects[FixedStackObjectsIdx[Idx +
+ MFI.getNumFixedObjects()]];
+ printStackObjectDbgInfo(DebugVar, Object, MST);
+ } else {
+ auto &Object = YMF.StackObjects[StackObjectsIdx[Idx]];
+ printStackObjectDbgInfo(DebugVar, Object, MST);
+ }
+ }
+}
+
+void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ for (auto CSInfo : MF.getCallSitesInfo()) {
+ yaml::CallSiteInfo YmlCS;
+ yaml::CallSiteInfo::MachineInstrLoc CallLocation;
+
+ // Prepare instruction position.
+ MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator();
+ CallLocation.BlockNum = CallI->getParent()->getNumber();
+ // Get call instruction offset from the beginning of block.
+ CallLocation.Offset =
+ std::distance(CallI->getParent()->instr_begin(), CallI);
+ YmlCS.CallLocation = CallLocation;
+ // Construct call arguments and theirs forwarding register info.
+ for (auto ArgReg : CSInfo.second) {
+ yaml::CallSiteInfo::ArgRegPair YmlArgReg;
+ YmlArgReg.ArgNo = ArgReg.ArgNo;
+ printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI);
+ YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg);
+ }
+ YMF.CallSitesInfo.push_back(YmlCS);
+ }
+
+ // Sort call info by position of call instructions.
+ llvm::sort(YMF.CallSitesInfo.begin(), YMF.CallSitesInfo.end(),
+ [](yaml::CallSiteInfo A, yaml::CallSiteInfo B) {
+ if (A.CallLocation.BlockNum == B.CallLocation.BlockNum)
+ return A.CallLocation.Offset < B.CallLocation.Offset;
+ return A.CallLocation.BlockNum < B.CallLocation.BlockNum;
+ });
+}
+
+void MIRPrinter::convertMachineMetadataNodes(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ MachineModuleSlotTracker &MST) {
+ MachineModuleSlotTracker::MachineMDNodeListType MDList;
+ MST.collectMachineMDNodes(MDList);
+ for (auto &MD : MDList) {
+ std::string NS;
+ raw_string_ostream StrOS(NS);
+ MD.second->print(StrOS, MST, MF.getFunction().getParent());
+ YMF.MachineMetadataNodes.push_back(StrOS.str());
+ }
+}
+
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool) {
+ unsigned ID = 0;
+ for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
+ std::string Str;
+ raw_string_ostream StrOS(Str);
+ if (Constant.isMachineConstantPoolEntry()) {
+ Constant.Val.MachineCPVal->print(StrOS);
+ } else {
+ Constant.Val.ConstVal->printAsOperand(StrOS);
+ }
+
+ yaml::MachineConstantPoolValue YamlConstant;
+ YamlConstant.ID = ID++;
+ YamlConstant.Value = StrOS.str();
+ YamlConstant.Alignment = Constant.getAlign();
+ YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry();
+
+ MF.Constants.push_back(YamlConstant);
+ }
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI) {
+ YamlJTI.Kind = JTI.getEntryKind();
+ unsigned ID = 0;
+ for (const auto &Table : JTI.getJumpTables()) {
+ std::string Str;
+ yaml::MachineJumpTable::Entry Entry;
+ Entry.ID = ID++;
+ for (const auto *MBB : Table.MBBs) {
+ raw_string_ostream StrOS(Str);
+ StrOS << printMBBReference(*MBB);
+ Entry.Blocks.push_back(StrOS.str());
+ Str.clear();
+ }
+ YamlJTI.Entries.push_back(Entry);
+ }
+}
+
+void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned I = 0;
+ for (const uint32_t *Mask : TRI->getRegMasks())
+ RegisterMaskIds.insert(std::make_pair(Mask, I++));
+}
+
+void llvm::guessSuccessors(const MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineBasicBlock*> &Result,
+ bool &IsFallthrough) {
+ SmallPtrSet<MachineBasicBlock*,8> Seen;
+
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isPHI())
+ continue;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isMBB())
+ continue;
+ MachineBasicBlock *Succ = MO.getMBB();
+ auto RP = Seen.insert(Succ);
+ if (RP.second)
+ Result.push_back(Succ);
+ }
+ }
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ IsFallthrough = I == MBB.end() || !I->isBarrier();
+}
+
+bool
+MIPrinter::canPredictBranchProbabilities(const MachineBasicBlock &MBB) const {
+ if (MBB.succ_size() <= 1)
+ return true;
+ if (!MBB.hasSuccessorProbabilities())
+ return true;
+
+ SmallVector<BranchProbability,8> Normalized(MBB.Probs.begin(),
+ MBB.Probs.end());
+ BranchProbability::normalizeProbabilities(Normalized.begin(),
+ Normalized.end());
+ SmallVector<BranchProbability,8> Equal(Normalized.size());
+ BranchProbability::normalizeProbabilities(Equal.begin(), Equal.end());
+
+ return std::equal(Normalized.begin(), Normalized.end(), Equal.begin());
+}
+
+bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const {
+ SmallVector<MachineBasicBlock*,8> GuessedSuccs;
+ bool GuessedFallthrough;
+ guessSuccessors(MBB, GuessedSuccs, GuessedFallthrough);
+ if (GuessedFallthrough) {
+ const MachineFunction &MF = *MBB.getParent();
+ MachineFunction::const_iterator NextI = std::next(MBB.getIterator());
+ if (NextI != MF.end()) {
+ MachineBasicBlock *Next = const_cast<MachineBasicBlock*>(&*NextI);
+ if (!is_contained(GuessedSuccs, Next))
+ GuessedSuccs.push_back(Next);
+ }
+ }
+ if (GuessedSuccs.size() != MBB.succ_size())
+ return false;
+ return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin());
+}
+
+void MIPrinter::print(const MachineBasicBlock &MBB) {
+ assert(MBB.getNumber() >= 0 && "Invalid MBB number");
+ MBB.printName(OS,
+ MachineBasicBlock::PrintNameIr |
+ MachineBasicBlock::PrintNameAttributes,
+ &MST);
+ OS << ":\n";
+
+ bool HasLineAttributes = false;
+ // Print the successors
+ bool canPredictProbs = canPredictBranchProbabilities(MBB);
+ // Even if the list of successors is empty, if we cannot guess it,
+ // we need to print it to tell the parser that the list is empty.
+ // This is needed, because MI model unreachable as empty blocks
+ // with an empty successor list. If the parser would see that
+ // without the successor list, it would guess the code would
+ // fallthrough.
+ if ((!MBB.succ_empty() && !SimplifyMIR) || !canPredictProbs ||
+ !canPredictSuccessors(MBB)) {
+ OS.indent(2) << "successors: ";
+ for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
+ if (I != MBB.succ_begin())
+ OS << ", ";
+ OS << printMBBReference(**I);
+ if (!SimplifyMIR || !canPredictProbs)
+ OS << '('
+ << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
+ << ')';
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ // Print the live in registers.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ if (!MBB.livein_empty()) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ OS.indent(2) << "liveins: ";
+ bool First = true;
+ for (const auto &LI : MBB.liveins_dbg()) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << printReg(LI.PhysReg, &TRI);
+ if (!LI.LaneMask.all())
+ OS << ":0x" << PrintLaneMask(LI.LaneMask);
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ if (HasLineAttributes)
+ OS << "\n";
+ bool IsInBundle = false;
+ for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (IsInBundle && !MI.isInsideBundle()) {
+ OS.indent(2) << "}\n";
+ IsInBundle = false;
+ }
+ OS.indent(IsInBundle ? 4 : 2);
+ print(MI);
+ if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+ OS << " {";
+ IsInBundle = true;
+ }
+ OS << "\n";
+ }
+ if (IsInBundle)
+ OS.indent(2) << "}\n";
+}
+
+void MIPrinter::print(const MachineInstr &MI) {
+ const auto *MF = MI.getMF();
+ const auto &MRI = MF->getRegInfo();
+ const auto &SubTarget = MF->getSubtarget();
+ const auto *TRI = SubTarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ const auto *TII = SubTarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ if (MI.isCFIInstruction())
+ assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
+
+ SmallBitVector PrintedTypes(8);
+ bool ShouldPrintRegisterTies = MI.hasComplexRegisterTies();
+ unsigned I = 0, E = MI.getNumOperands();
+ for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
+ !MI.getOperand(I).isImplicit();
+ ++I) {
+ if (I)
+ OS << ", ";
+ print(MI, I, TRI, TII, ShouldPrintRegisterTies,
+ MI.getTypeToPrint(I, PrintedTypes, MRI),
+ /*PrintDef=*/false);
+ }
+
+ if (I)
+ OS << " = ";
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ OS << "frame-setup ";
+ if (MI.getFlag(MachineInstr::FrameDestroy))
+ OS << "frame-destroy ";
+ if (MI.getFlag(MachineInstr::FmNoNans))
+ OS << "nnan ";
+ if (MI.getFlag(MachineInstr::FmNoInfs))
+ OS << "ninf ";
+ if (MI.getFlag(MachineInstr::FmNsz))
+ OS << "nsz ";
+ if (MI.getFlag(MachineInstr::FmArcp))
+ OS << "arcp ";
+ if (MI.getFlag(MachineInstr::FmContract))
+ OS << "contract ";
+ if (MI.getFlag(MachineInstr::FmAfn))
+ OS << "afn ";
+ if (MI.getFlag(MachineInstr::FmReassoc))
+ OS << "reassoc ";
+ if (MI.getFlag(MachineInstr::NoUWrap))
+ OS << "nuw ";
+ if (MI.getFlag(MachineInstr::NoSWrap))
+ OS << "nsw ";
+ if (MI.getFlag(MachineInstr::IsExact))
+ OS << "exact ";
+ if (MI.getFlag(MachineInstr::NoFPExcept))
+ OS << "nofpexcept ";
+ if (MI.getFlag(MachineInstr::NoMerge))
+ OS << "nomerge ";
+ if (MI.getFlag(MachineInstr::Unpredictable))
+ OS << "unpredictable ";
+
+ OS << TII->getName(MI.getOpcode());
+ if (I < E)
+ OS << ' ';
+
+ bool NeedComma = false;
+ for (; I < E; ++I) {
+ if (NeedComma)
+ OS << ", ";
+ print(MI, I, TRI, TII, ShouldPrintRegisterTies,
+ MI.getTypeToPrint(I, PrintedTypes, MRI));
+ NeedComma = true;
+ }
+
+ // Print any optional symbols attached to this instruction as-if they were
+ // operands.
+ if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " pre-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PreInstrSymbol);
+ NeedComma = true;
+ }
+ if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " post-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PostInstrSymbol);
+ NeedComma = true;
+ }
+ if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " heap-alloc-marker ";
+ HeapAllocMarker->printAsOperand(OS, MST);
+ NeedComma = true;
+ }
+ if (MDNode *PCSections = MI.getPCSections()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " pcsections ";
+ PCSections->printAsOperand(OS, MST);
+ NeedComma = true;
+ }
+ if (uint32_t CFIType = MI.getCFIType()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " cfi-type " << CFIType;
+ NeedComma = true;
+ }
+
+ if (auto Num = MI.peekDebugInstrNum()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-instr-number " << Num;
+ NeedComma = true;
+ }
+
+ if (PrintLocations) {
+ if (const DebugLoc &DL = MI.getDebugLoc()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-location ";
+ DL->printAsOperand(OS, MST);
+ }
+ }
+
+ if (!MI.memoperands_empty()) {
+ OS << " :: ";
+ const LLVMContext &Context = MF->getFunction().getContext();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ bool NeedComma = false;
+ for (const auto *Op : MI.memoperands()) {
+ if (NeedComma)
+ OS << ", ";
+ Op->print(OS, MST, SSNs, Context, &MFI, TII);
+ NeedComma = true;
+ }
+ }
+}
+
+void MIPrinter::printStackObjectReference(int FrameIndex) {
+ auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);
+ assert(ObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid frame index");
+ const FrameIndexOperand &Operand = ObjectInfo->second;
+ MachineOperand::printStackObjectReference(OS, Operand.ID, Operand.IsFixed,
+ Operand.Name);
+}
+
+static std::string formatOperandComment(std::string Comment) {
+ if (Comment.empty())
+ return Comment;
+ return std::string(" /* " + Comment + " */");
+}
+
+void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ bool ShouldPrintRegisterTies, LLT TypeToPrint,
+ bool PrintDef) {
+ const MachineOperand &Op = MI.getOperand(OpIdx);
+ std::string MOComment = TII->createMIROperandComment(MI, Op, OpIdx, TRI);
+
+ switch (Op.getType()) {
+ case MachineOperand::MO_Immediate:
+ if (MI.isOperandSubregIdx(OpIdx)) {
+ MachineOperand::printTargetFlags(OS, Op);
+ MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI);
+ break;
+ }
+ [[fallthrough]];
+ case MachineOperand::MO_Register:
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate:
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_RegisterLiveOut:
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ case MachineOperand::MO_CFIIndex:
+ case MachineOperand::MO_IntrinsicID:
+ case MachineOperand::MO_Predicate:
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_DbgInstrRef:
+ case MachineOperand::MO_ShuffleMask: {
+ unsigned TiedOperandIdx = 0;
+ if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
+ TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
+ const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
+ Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
+ OS << formatOperandComment(MOComment);
+ break;
+ }
+ case MachineOperand::MO_FrameIndex:
+ printStackObjectReference(Op.getIndex());
+ break;
+ case MachineOperand::MO_RegisterMask: {
+ auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
+ if (RegMaskInfo != RegisterMaskIds.end())
+ OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower();
+ else
+ printCustomRegMask(Op.getRegMask(), OS, TRI);
+ break;
+ }
+ }
+}
+
+void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
+ ModuleSlotTracker &MST) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+ MachineOperand::printIRSlotNumber(OS, Slot);
+}
+
+void llvm::printMIR(raw_ostream &OS, const Module &M) {
+ yaml::Output Out(OS);
+ Out << const_cast<Module &>(M);
+}
+
+void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
+ MIRPrinter Printer(OS);
+ Printer.print(MF);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
new file mode 100644
index 000000000000..1b5a9ade0871
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -0,0 +1,70 @@
+//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints out the LLVM module using the MIR
+// serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// This pass prints out the LLVM IR to an output stream using the MIR
+/// serialization format.
+struct MIRPrintingPass : public MachineFunctionPass {
+ static char ID;
+ raw_ostream &OS;
+ std::string MachineFunctions;
+
+ MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {}
+ MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {}
+
+ StringRef getPassName() const override { return "MIR Printing Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ std::string Str;
+ raw_string_ostream StrOS(Str);
+ printMIR(StrOS, MF);
+ MachineFunctions.append(StrOS.str());
+ return false;
+ }
+
+ bool doFinalization(Module &M) override {
+ printMIR(OS, M);
+ OS << MachineFunctions;
+ return false;
+ }
+};
+
+char MIRPrintingPass::ID = 0;
+
+} // end anonymous namespace
+
+char &llvm::MIRPrintingPassID = MIRPrintingPass::ID;
+INITIALIZE_PASS(MIRPrintingPass, "mir-printer", "MIR Printer", false, false)
+
+namespace llvm {
+
+MachineFunctionPass *createPrintMIRPass(raw_ostream &OS) {
+ return new MIRPrintingPass(OS);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
new file mode 100644
index 000000000000..96f8589e682d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -0,0 +1,406 @@
+//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MIRSampleProfile loader, mainly
+// for flow sensitive SampleFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRSampleProfile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include <optional>
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::sampleprofutil;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "fs-profile-loader"
+
+static cl::opt<bool> ShowFSBranchProb(
+ "show-fs-branchprob", cl::Hidden, cl::init(false),
+ cl::desc("Print setting flow sensitive branch probabilities"));
+static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
+ "fs-profile-debug-prob-diff-threshold", cl::init(10),
+ cl::desc("Only show debug message if the branch probility is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> FSProfileDebugBWThreshold(
+ "fs-profile-debug-bw-threshold", cl::init(10000),
+ cl::desc("Only show debug message if the source branch weight is greater "
+ " than this value."));
+
+static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI before MIR loader"));
+static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI after MIR loader"));
+
+extern cl::opt<bool> ImprovedFSDiscriminator;
+char MIRProfileLoaderPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
+ "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
+
+FunctionPass *
+llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile,
+ FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS));
+}
+
+namespace llvm {
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi={none | fraction | integer | count}
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+std::optional<PseudoProbe> extractProbe(const MachineInstr &MI) {
+ if (MI.isPseudoProbe()) {
+ PseudoProbe Probe;
+ Probe.Id = MI.getOperand(1).getImm();
+ Probe.Type = MI.getOperand(2).getImm();
+ Probe.Attr = MI.getOperand(3).getImm();
+ Probe.Factor = 1;
+ DILocation *DebugLoc = MI.getDebugLoc();
+ Probe.Discriminator = DebugLoc ? DebugLoc->getDiscriminator() : 0;
+ return Probe;
+ }
+
+ // Ignore callsite probes since they do not have FS discriminators.
+ return std::nullopt;
+}
+
+namespace afdo_detail {
+template <> struct IRTraits<MachineBasicBlock> {
+ using InstructionT = MachineInstr;
+ using BasicBlockT = MachineBasicBlock;
+ using FunctionT = MachineFunction;
+ using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
+ using LoopT = MachineLoop;
+ using LoopInfoPtrT = MachineLoopInfo *;
+ using DominatorTreePtrT = MachineDominatorTree *;
+ using PostDominatorTreePtrT = MachinePostDominatorTree *;
+ using PostDominatorTreeT = MachinePostDominatorTree;
+ using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
+ using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
+ using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
+ static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
+ return GraphTraits<const MachineFunction *>::getEntryNode(F);
+ }
+ static PredRangeT getPredecessors(MachineBasicBlock *BB) {
+ return BB->predecessors();
+ }
+ static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
+ return BB->successors();
+ }
+};
+} // namespace afdo_detail
+
+class MIRProfileLoader final
+ : public SampleProfileLoaderBaseImpl<MachineFunction> {
+public:
+ void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
+ MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
+ MachineOptimizationRemarkEmitter *MORE) {
+ DT = MDT;
+ PDT = MPDT;
+ LI = MLI;
+ BFI = MBFI;
+ ORE = MORE;
+ }
+ void setFSPass(FSDiscriminatorPass Pass) {
+ P = Pass;
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+ }
+
+ MIRProfileLoader(StringRef Name, StringRef RemapName,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
+ std::move(FS)) {}
+
+ void setBranchProbs(MachineFunction &F);
+ bool runOnFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool isValid() const { return ProfileIsValid; }
+
+protected:
+ friend class SampleCoverageTracker;
+
+ /// Hold the information of the basic block frequency.
+ MachineBlockFrequencyInfo *BFI;
+
+ /// PassNum is the sequence number this pass is called, start from 1.
+ FSDiscriminatorPass P;
+
+ // LowBit in the FS discriminator used by this instance. Note the number is
+ // 0-based. Base discrimnator use bit 0 to bit 11.
+ unsigned LowBit;
+ // HighwBit in the FS discriminator used by this instance. Note the number
+ // is 0-based.
+ unsigned HighBit;
+
+ bool ProfileIsValid = true;
+ ErrorOr<uint64_t> getInstWeight(const MachineInstr &MI) override {
+ if (FunctionSamples::ProfileIsProbeBased)
+ return getProbeWeight(MI);
+ if (ImprovedFSDiscriminator && MI.isMetaInstruction())
+ return std::error_code();
+ return getInstWeightImpl(MI);
+ }
+};
+
+template <>
+void SampleProfileLoaderBaseImpl<MachineFunction>::computeDominanceAndLoopInfo(
+ MachineFunction &F) {}
+
+void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
+ for (auto &BI : F) {
+ MachineBasicBlock *BB = &BI;
+ if (BB->succ_size() < 2)
+ continue;
+ const MachineBasicBlock *EC = EquivalenceClass[BB];
+ uint64_t BBWeight = BlockWeights[EC];
+ uint64_t SumEdgeWeight = 0;
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ Edge E = std::make_pair(BB, Succ);
+ SumEdgeWeight += EdgeWeights[E];
+ }
+
+ if (BBWeight != SumEdgeWeight) {
+ LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
+ << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
+ << "\n");
+ BBWeight = SumEdgeWeight;
+ }
+ if (BBWeight == 0) {
+ LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ continue;
+ }
+
+#ifndef NDEBUG
+ uint64_t BBWeightOrig = BBWeight;
+#endif
+ uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
+ uint32_t Factor = 1;
+ if (BBWeight > MaxWeight) {
+ Factor = BBWeight / MaxWeight + 1;
+ BBWeight /= Factor;
+ LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
+ }
+
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ MachineBasicBlock *Succ = *SI;
+ Edge E = std::make_pair(BB, Succ);
+ uint64_t EdgeWeight = EdgeWeights[E];
+ EdgeWeight /= Factor;
+
+ assert(BBWeight >= EdgeWeight &&
+ "BBweight is larger than EdgeWeight -- should not happen.\n");
+
+ BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
+ BranchProbability NewProb(EdgeWeight, BBWeight);
+ if (OldProb == NewProb)
+ continue;
+ BB->setSuccProbability(SI, NewProb);
+#ifndef NDEBUG
+ if (!ShowFSBranchProb)
+ continue;
+ bool Show = false;
+ BranchProbability Diff;
+ if (OldProb > NewProb)
+ Diff = OldProb - NewProb;
+ else
+ Diff = NewProb - OldProb;
+ Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
+ Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
+
+ auto DIL = BB->findBranchDebugLoc();
+ auto SuccDIL = Succ->findBranchDebugLoc();
+ if (Show) {
+ dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
+ << Succ->getNumber() << "): ";
+ if (DIL)
+ dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn();
+ if (SuccDIL)
+ dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
+ << ":" << SuccDIL->getColumn();
+ dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
+ << "\n";
+ }
+#endif
+ }
+ }
+}
+
+bool MIRProfileLoader::doInitialization(Module &M) {
+ auto &Ctx = M.getContext();
+
+ auto ReaderOrErr = sampleprof::SampleProfileReader::create(
+ Filename, Ctx, *FS, P, RemappingFilename);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+
+ Reader = std::move(ReaderOrErr.get());
+ Reader->setModule(&M);
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+
+ // Load pseudo probe descriptors for probe-based function samples.
+ if (Reader->profileIsProbeBased()) {
+ ProbeManager = std::make_unique<PseudoProbeManager>(M);
+ if (!ProbeManager->moduleIsProbed(M)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+ // Do not load non-FS profiles. A line or probe can get a zero-valued
+ // discriminator at certain pass which could result in accidentally loading
+ // the corresponding base counter in the non-FS profile, while a non-zero
+ // discriminator would end up getting zero samples. This could in turn undo
+ // the sample distribution effort done by previous BFI maintenance and the
+ // probe distribution factor work for pseudo probes.
+ if (!Reader->profileIsFS())
+ return false;
+
+ Function &Func = MF.getFunction();
+ clearFunctionData(false);
+ Samples = Reader->getSamplesFor(Func);
+ if (!Samples || Samples->empty())
+ return false;
+
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (!ProbeManager->profileIsValid(MF.getFunction(), *Samples))
+ return false;
+ } else {
+ if (getFunctionLoc(MF) == 0)
+ return false;
+ }
+
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
+
+ // Set the new BPI, BFI.
+ setBranchProbs(MF);
+
+ return Changed;
+}
+
+} // namespace llvm
+
+MIRProfileLoaderPass::MIRProfileLoaderPass(
+ std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) {
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+
+ auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem();
+ MIRSampleLoader = std::make_unique<MIRProfileLoader>(
+ FileName, RemappingFileName, std::move(VFS));
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+}
+
+bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
+ if (!MIRSampleLoader->isValid())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
+ << MF.getFunction().getName() << "\n");
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MIRSampleLoader->setInitVals(
+ &getAnalysis<MachineDominatorTree>(),
+ &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
+ MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+
+ MF.RenumberBlocks();
+ if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
+ }
+
+ bool Changed = MIRSampleLoader->runOnFunction(MF);
+ if (Changed)
+ MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>());
+
+ if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
+ }
+
+ return Changed;
+}
+
+bool MIRProfileLoaderPass::doInitialization(Module &M) {
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
+ << "\n");
+
+ MIRSampleLoader->setFSPass(P);
+ return MIRSampleLoader->doInitialization(M);
+}
+
+void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
new file mode 100644
index 000000000000..812d57984e6c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -0,0 +1,174 @@
+//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mir-vregnamer-utils"
+
+static cl::opt<bool>
+ UseStableNamerHash("mir-vreg-namer-use-stable-hash", cl::init(false),
+ cl::Hidden,
+ cl::desc("Use Stable Hashing for MIR VReg Renaming"));
+
+using VRegRenameMap = std::map<unsigned, unsigned>;
+
+bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
+ bool Changed = false;
+
+ for (const auto &E : VRM) {
+ Changed = Changed || !MRI.reg_empty(E.first);
+ MRI.replaceRegWith(E.first, E.second);
+ }
+
+ return Changed;
+}
+
+VRegRenameMap
+VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
+
+ StringMap<unsigned> VRegNameCollisionMap;
+
+ auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
+ if (!VRegNameCollisionMap.contains(Reg.getName()))
+ VRegNameCollisionMap[Reg.getName()] = 0;
+ const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
+ return Reg.getName() + "__" + std::to_string(Counter);
+ };
+
+ VRegRenameMap VRM;
+ for (const auto &VReg : VRegs) {
+ const unsigned Reg = VReg.getReg();
+ VRM[Reg] = createVirtualRegisterWithLowerName(Reg, GetUniqueVRegName(VReg));
+ }
+ return VRM;
+}
+
+std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
+ std::string S;
+ raw_string_ostream OS(S);
+
+ if (UseStableNamerHash) {
+ auto Hash = stableHashValue(MI, /* HashVRegs */ true,
+ /* HashConstantPoolIndices */ true,
+ /* HashMemOperands */ true);
+ assert(Hash && "Expected non-zero Hash");
+ OS << format_hex_no_prefix(Hash, 16, true);
+ return OS.str();
+ }
+
+ // Gets a hashable artifact from a given MachineOperand (ie an unsigned).
+ auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
+ switch (MO.getType()) {
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getCImm()->getZExtValue());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(
+ MO.getType(), MO.getTargetFlags(),
+ MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+ case MachineOperand::MO_Register:
+ if (MO.getReg().isVirtual())
+ return MRI.getVRegDef(MO.getReg())->getOpcode();
+ return MO.getReg();
+ case MachineOperand::MO_Immediate:
+ return MO.getImm();
+ case MachineOperand::MO_TargetIndex:
+ return MO.getOffset() | (MO.getTargetFlags() << 16);
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ return llvm::hash_value(MO);
+
+ // We could explicitly handle all the types of the MachineOperand,
+ // here but we can just return a common number until we find a
+ // compelling test case where this is bad. The only side effect here
+ // is contributing to a hash collision but there's enough information
+ // (Opcodes,other registers etc) that this will likely not be a problem.
+
+ // TODO: Handle the following Index/ID/Predicate cases. They can
+ // be hashed on in a stable manner.
+ case MachineOperand::MO_CFIIndex:
+ case MachineOperand::MO_IntrinsicID:
+ case MachineOperand::MO_Predicate:
+
+ // In the cases below we havn't found a way to produce an artifact that will
+ // result in a stable hash, in most cases because they are pointers. We want
+ // stable hashes because we want the hash to be the same run to run.
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ case MachineOperand::MO_ShuffleMask:
+ case MachineOperand::MO_DbgInstrRef:
+ return 0;
+ }
+ llvm_unreachable("Unexpected MachineOperandType.");
+ };
+
+ SmallVector<unsigned, 16> MIOperands = {MI.getOpcode(), MI.getFlags()};
+ llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO);
+
+ for (const auto *Op : MI.memoperands()) {
+ MIOperands.push_back((unsigned)Op->getSize());
+ MIOperands.push_back((unsigned)Op->getFlags());
+ MIOperands.push_back((unsigned)Op->getOffset());
+ MIOperands.push_back((unsigned)Op->getSuccessOrdering());
+ MIOperands.push_back((unsigned)Op->getAddrSpace());
+ MIOperands.push_back((unsigned)Op->getSyncScopeID());
+ MIOperands.push_back((unsigned)Op->getBaseAlign().value());
+ MIOperands.push_back((unsigned)Op->getFailureOrdering());
+ }
+
+ auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
+ OS << format_hex_no_prefix(HashMI, 16, true);
+ return OS.str();
+}
+
+unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
+ assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers");
+ std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg));
+ return createVirtualRegisterWithLowerName(VReg, Name);
+}
+
+bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
+ std::vector<NamedVReg> VRegs;
+ std::string Prefix = "bb" + std::to_string(CurrentBBNumber) + "_";
+ for (MachineInstr &Candidate : *MBB) {
+ // Don't rename stores/branches.
+ if (Candidate.mayStore() || Candidate.isBranch())
+ continue;
+ if (!Candidate.getNumOperands())
+ continue;
+ // Look for instructions that define VRegs in operand 0.
+ MachineOperand &MO = Candidate.getOperand(0);
+ // Avoid non regs, instructions defining physical regs.
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ VRegs.push_back(
+ NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
+ }
+
+ return VRegs.size() ? doVRegRenaming(getVRegRenameMap(VRegs)) : false;
+}
+
+unsigned VRegRenamer::createVirtualRegisterWithLowerName(unsigned VReg,
+ StringRef Name) {
+ std::string LowerName = Name.lower();
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
+ return RC ? MRI.createVirtualRegister(RC, LowerName)
+ : MRI.createGenericVirtualRegister(MRI.getType(VReg), LowerName);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
new file mode 100644
index 000000000000..a059bc5333c6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
@@ -0,0 +1,97 @@
+
+//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of these utilities is to abstract out parts of the MIRCanon pass
+// that are responsible for renaming virtual registers with the purpose of
+// sharing code with a MIRVRegNamer pass that could be the analog of the
+// opt -instnamer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
+#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
+
+#include "llvm/CodeGen/Register.h"
+#include <map>
+#include <vector>
+#include <string>
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class StringRef;
+
+/// VRegRenamer - This class is used for renaming vregs in a machine basic
+/// block according to semantics of the instruction.
+class VRegRenamer {
+ class NamedVReg {
+ Register Reg;
+ std::string Name;
+
+ public:
+ NamedVReg(Register Reg, std::string Name = "") : Reg(Reg), Name(Name) {}
+ NamedVReg(std::string Name = "") : Reg(~0U), Name(Name) {}
+
+ const std::string &getName() const { return Name; }
+
+ Register getReg() const { return Reg; }
+ };
+
+ MachineRegisterInfo &MRI;
+
+ unsigned CurrentBBNumber = 0;
+
+ /// Given an Instruction, construct a hash of the operands
+ /// of the instructions along with the opcode.
+ /// When dealing with virtual registers, just hash the opcode of
+ /// the instruction defining that vreg.
+ /// Handle immediates, registers (physical and virtual) explicitly,
+ /// and return a common value for the other cases.
+ /// Instruction will be named in the following scheme
+ /// bb<block_no>_hash_<collission_count>.
+ std::string getInstructionOpcodeHash(MachineInstr &MI);
+
+ /// For all the VRegs that are candidates for renaming,
+ /// return a mapping from old vregs to new vregs with names.
+ std::map<unsigned, unsigned>
+ getVRegRenameMap(const std::vector<NamedVReg> &VRegs);
+
+ /// Perform replacing of registers based on the <old,new> vreg map.
+ bool doVRegRenaming(const std::map<unsigned, unsigned> &VRegRenameMap);
+
+ /// createVirtualRegister - Given an existing vreg, create a named vreg to
+ /// take its place. The name is determined by calling
+ /// getInstructionOpcodeHash.
+ unsigned createVirtualRegister(unsigned VReg);
+
+ /// Create a vreg with name and return it.
+ unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name);
+
+ /// Linearly traverse the MachineBasicBlock and rename each instruction's
+ /// vreg definition based on the semantics of the instruction.
+ /// Names are as follows bb<BBNum>_hash_[0-9]+
+ bool renameInstsInMBB(MachineBasicBlock *MBB);
+
+public:
+ VRegRenamer() = delete;
+ VRegRenamer(MachineRegisterInfo &MRI) : MRI(MRI) {}
+
+ /// Same as the above, but sets a BBNum depending on BB traversal that
+ /// will be used as prefix for the vreg names.
+ bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum) {
+ CurrentBBNumber = BBNum;
+ return renameInstsInMBB(MBB);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRYamlMapping.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRYamlMapping.cpp
new file mode 100644
index 000000000000..b1a538cad8a0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRYamlMapping.cpp
@@ -0,0 +1,43 @@
+//===- MIRYamlMapping.cpp - Describe mapping between MIR and YAML ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mapping between various MIR data structures and
+// their corresponding YAML representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+using namespace llvm::yaml;
+
+FrameIndex::FrameIndex(int FI, const llvm::MachineFrameInfo &MFI) {
+ IsFixed = MFI.isFixedObjectIndex(FI);
+ if (IsFixed)
+ FI -= MFI.getObjectIndexBegin();
+ this->FI = FI;
+}
+
+// Returns the value and if the frame index is fixed or not.
+Expected<int> FrameIndex::getFI(const llvm::MachineFrameInfo &MFI) const {
+ int FI = this->FI;
+ if (IsFixed) {
+ if (unsigned(FI) >= MFI.getNumFixedObjects())
+ return make_error<StringError>(
+ formatv("invalid fixed frame index {0}", FI).str(),
+ inconvertibleErrorCode());
+ FI += MFI.getObjectIndexBegin();
+ }
+ if (unsigned(FI + MFI.getNumFixedObjects()) >= MFI.getNumObjects())
+ return make_error<StringError>(formatv("invalid frame index {0}", FI).str(),
+ inconvertibleErrorCode());
+ return FI;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
new file mode 100644
index 000000000000..7b3746fde503
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -0,0 +1,1164 @@
+//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the ML eviction advisor and reward injection pass
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocGreedy.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
+#include "llvm/Analysis/ModelUnderTrainingRunner.h"
+#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
+#endif
+#include "MLRegallocEvictAdvisor.h"
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <array>
+#include <bitset>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ml-regalloc"
+
+// Generated header in release (AOT) mode
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
+#include "RegallocEvictModel.h"
+using CompiledModelType = RegallocEvictModel;
+#else
+using CompiledModelType = NoopSavedModelImpl;
+#endif
+
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "regalloc-evict-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <regalloc-evict-interactive-channel-base>.in, while the "
+ "outgoing name should be "
+ "<regalloc-evict-interactive-channel-base>.out"));
+
+// Options that only make sense in development mode
+#ifdef LLVM_HAVE_TFLITE
+#include "RegAllocScore.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+
+static cl::opt<std::string> TrainingLog(
+ "regalloc-training-log", cl::Hidden,
+ cl::desc("Training log for the register allocator eviction model"));
+
+static cl::opt<std::string> ModelUnderTraining(
+ "regalloc-model", cl::Hidden,
+ cl::desc("The model being trained for register allocation eviction"));
+
+static cl::opt<bool> EnableDevelopmentFeatures(
+ "regalloc-enable-development-features", cl::Hidden,
+ cl::desc("Whether or not to enable features under development for the ML "
+ "regalloc advisor"));
+
+#else
+static const bool EnableDevelopmentFeatures = false;
+#endif // #ifdef LLVM_HAVE_TFLITE
+
+/// The score injection pass.
+/// This pass calculates the score for a function and inserts it in the log, but
+/// this happens only in development mode. It's a no-op otherwise.
+namespace llvm {
+extern cl::opt<unsigned> EvictInterferenceCutoff;
+
+class RegAllocScoring : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RegAllocScoring() : MachineFunctionPass(ID) {
+ initializeRegAllocScoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ ~RegAllocScoring() override = default;
+
+ StringRef getPassName() const override {
+ return "Register Allocation Pass Scoring";
+ }
+
+ /// RegAllocReward analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
+ AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// Performs this pass
+ bool runOnMachineFunction(MachineFunction &) override;
+};
+
+char RegAllocScoring::ID = 0;
+FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); }
+
+} // namespace llvm
+
+INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
+ "Register Allocation Scoring Pass", false, false)
+
+// ===================================
+// Common ML Advisor declarations
+// ===================================
+namespace {
+// The model can only accept a specified number of opcodes and will error it if
+// fed an opcode it hasn't seen before. This constant sets the current cutoff.
+static const int OpcodeValueCutoff = 17716;
+
+// Most features are as described above, so we'll reuse this vector in defining
+// them.
+static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
+
+// --------------
+// Features table
+// --------------
+// For each interfering live range (incl. the candidate) we collect a number of
+// features. However, because the features are of different types (and because
+// of ML best practices), we organize the tensors per feature, not per
+// candidate. Each such tensor has a scalar value corresponding to the
+// interferring live range at that position, in the order in AllocationOrder.
+// The last position corresponds to the virt reg seeking allocation.
+// Exception to all that is the progression feature, which is just a scalar (see
+// its documentation for details).
+// Note on naming: the "_by_max" are normalized using the largest value of that
+// tensor, as observed in the current decision making stage (i.e. for the
+// current call to the advisor's tryFindEvictionCandidate)
+//
+// The feature list format: type, name, shape, documentation.
+// Note: we can really just use int64 and float, hence the modeling of some
+// bools as int64 values.
+#define RA_EVICT_FEATURES_LIST(M) \
+ M(int64_t, mask, PerLiveRangeShape, \
+ "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \
+ "it " \
+ "can't be evicted)") \
+ M(int64_t, is_free, PerLiveRangeShape, \
+ "boolean values, 1 if this phys reg is actually free (no interferences)") \
+ M(float, nr_urgent, PerLiveRangeShape, \
+ "number of 'urgent' intervals, normalized. Urgent are those that are OK " \
+ "to break cascades") \
+ M(float, nr_broken_hints, PerLiveRangeShape, \
+ "if this position were evicted, how many broken hints would there be") \
+ M(int64_t, is_hint, PerLiveRangeShape, \
+ "is this a preferred phys reg for the candidate") \
+ M(int64_t, is_local, PerLiveRangeShape, \
+ "is this live range local to a basic block") \
+ M(float, nr_rematerializable, PerLiveRangeShape, \
+ "nr rematerializable ranges") \
+ M(float, nr_defs_and_uses, PerLiveRangeShape, \
+ "bb freq - weighed nr defs and uses") \
+ M(float, weighed_reads_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of reads, normalized") \
+ M(float, weighed_writes_by_max, PerLiveRangeShape, \
+ "bb feq - weighed nr of writes, normalized") \
+ M(float, weighed_read_writes_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of uses that are both read and writes, normalized") \
+ M(float, weighed_indvars_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of uses that are indvars, normalized") \
+ M(float, hint_weights_by_max, PerLiveRangeShape, \
+ "bb freq - weighed nr of uses that are hints, normalized") \
+ M(float, start_bb_freq_by_max, PerLiveRangeShape, \
+ "the freq in the start block, normalized") \
+ M(float, end_bb_freq_by_max, PerLiveRangeShape, \
+ "freq of end block, normalized") \
+ M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \
+ "hottest BB freq, normalized") \
+ M(float, liverange_size, PerLiveRangeShape, \
+ "size (instr index diff) of the LR") \
+ M(float, use_def_density, PerLiveRangeShape, \
+ "the max weight, as computed by the manual heuristic") \
+ M(int64_t, max_stage, PerLiveRangeShape, \
+ "largest stage of an interval in this LR") \
+ M(int64_t, min_stage, PerLiveRangeShape, \
+ "lowest stage of an interval in this LR") \
+ M(float, progress, {1}, "ratio of current queue size to initial size")
+
+#ifdef LLVM_HAVE_TFLITE
+#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \
+ M(int64_t, instructions, InstructionsShape, \
+ "Opcodes of the instructions covered by the eviction problem")
+
+#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \
+ M(int64_t, instructions_mapping, InstructionsMappingShape, \
+ "A binary matrix mapping LRs to instruction opcodes") \
+ M(float, mbb_frequencies, MBBFrequencyShape, \
+ "A vector of machine basic block frequencies") \
+ M(int64_t, mbb_mapping, InstructionsShape, \
+ "A vector of indicies mapping instructions to MBBs")
+#else
+#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
+#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
+#endif
+
+// The model learns to pick one of the mask == 1 interferences. This is the
+// name of the output tensor. The contract with the model is that the output
+// will be guaranteed to be to a mask == 1 position. Using a macro here to
+// avoid 'not used' warnings (and keep cond compilation to a minimum)
+#define DecisionName "index_to_evict"
+static const TensorSpec DecisionSpec =
+ TensorSpec::createSpec<int64_t>(DecisionName, {1});
+
+// Named features index.
+enum FeatureIDs {
+#define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
+#define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
+ RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount,
+#ifdef LLVM_HAVE_TFLITE
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount,
+#else
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX)
+#endif // #ifdef LLVM_HAVE_TFLITE
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount
+#undef _FEATURE_IDX
+#undef _FEATURE_IDX_SIMPLE
+};
+
+// The ML advisor will typically have a sparse input to the evaluator, because
+// various phys regs won't be available. It's easier (maintenance-wise) to
+// bulk-reset the state of the evaluator each time we are about to use it
+// again.
+template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) {
+ size_t Ret = sizeof(T);
+ for (const auto V : Shape)
+ Ret *= V;
+ return Ret;
+}
+
+void resetInputs(MLModelRunner &Runner) {
+#define _RESET(TYPE, NAME, SHAPE, __) \
+ std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \
+ getTotalSize<TYPE>(SHAPE));
+ RA_EVICT_FEATURES_LIST(_RESET)
+ if (EnableDevelopmentFeatures) {
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET)
+#undef _RESET
+ }
+}
+
+// Per-live interval components that get aggregated into the feature values
+// that will be passed to the evaluator.
+struct LIFeatureComponents {
+ double R = 0;
+ double W = 0;
+ double RW = 0;
+ double IndVarUpdates = 0;
+ double HintWeights = 0.0;
+ int64_t NrDefsAndUses = 0;
+ float HottestBlockFreq = 0.0;
+ bool IsRemat = false;
+};
+
+using CandidateRegList =
+ std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
+using FeaturesListNormalizer =
+ llvm::SmallVector<float, FeatureIDs::FeatureCount>;
+
+/// The ML evictor (commonalities between release and development mode)
+class MLEvictAdvisor : public RegAllocEvictionAdvisor {
+public:
+ MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
+ const MachineLoopInfo &Loops);
+
+protected:
+ const RegAllocEvictionAdvisor &getDefaultAdvisor() const {
+ return static_cast<const RegAllocEvictionAdvisor &>(DefaultAdvisor);
+ }
+
+ // The assumption is that if the Runner could not be constructed, we emit-ed
+ // error, and we shouldn't be asking for it here.
+ const MLModelRunner &getRunner() const { return *Runner; }
+
+ /// This just calls Evaluate on the Runner, but in the development mode
+ /// case, if we're just capturing the log of the default advisor, it needs
+ /// to call the latter instead, so we need to pass all the necessary
+ /// parameters for it. In the development case, it will also log.
+ virtual int64_t
+ tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const;
+
+ /// Load the features of the given VirtReg (allocated or not) at column Pos,
+ /// but if that can't be evicted, return false instead.
+ bool
+ loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
+ bool IsHint, const SmallVirtRegSet &FixedRegisters,
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
+
+private:
+ static float getInitialQueueSize(const MachineFunction &MF);
+
+ MCRegister tryFindEvictionCandidate(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const override;
+
+ void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
+
+ // Point-in-time: we didn't learn this, so we always delegate to the
+ // default.
+ bool canEvictHintInterference(
+ const LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const override {
+ return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
+ FixedRegisters);
+ }
+
+ const LIFeatureComponents &
+ getLIFeatureComponents(const LiveInterval &LI) const;
+
+ // Hold on to a default advisor for:
+ // 1) the implementation of canEvictHintInterference, because we didn't
+ // learn that nuance yet; 2) for bootstrapping (logging) in the development
+ // mode case.
+ const DefaultEvictionAdvisor DefaultAdvisor;
+ MLModelRunner *const Runner;
+ const MachineBlockFrequencyInfo &MBFI;
+ const MachineLoopInfo &Loops;
+
+ // Indices of those features we don't want to normalize.
+ // This could be static and shared, but its initialization is non-trivial.
+ std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
+ const float InitialQSize;
+
+ using RegID = unsigned;
+ mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures;
+};
+
+#define _DECL_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(#name, shape),
+
+// ===================================
+// Release (AOT) - specifics
+// ===================================
+class ReleaseModeEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ ReleaseModeEvictionAdvisorAnalysis()
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {
+ if (EnableDevelopmentFeatures) {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(
+ _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
+ } else {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ }
+ }
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Release;
+ }
+
+private:
+ std::vector<TensorSpec> InputFeatures;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner) {
+ if (InteractiveChannelBaseName.empty())
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ else
+ Runner = std::make_unique<InteractiveModelRunner>(
+ MF.getFunction().getContext(), InputFeatures, DecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
+ return std::make_unique<MLEvictAdvisor>(
+ MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<MachineLoopInfo>());
+ }
+ std::unique_ptr<MLModelRunner> Runner;
+};
+
+// ===================================
+// Development mode-specifics
+// ===================================
+//
+// Features we log
+#ifdef LLVM_HAVE_TFLITE
+static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
+
+// Features we bind on the model. The tensor names have a prefix, and we also
+// need to include some tensors that are expected to be present by the
+// training algo.
+// TODO: can we just get rid of these?
+#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
+
+class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
+public:
+ DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner,
+ const MachineBlockFrequencyInfo &MBFI,
+ const MachineLoopInfo &Loops, Logger *Log)
+ : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {}
+
+private:
+ int64_t tryFindEvictionCandidatePosition(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const override;
+
+ Logger *const Log;
+};
+
+class DevelopmentModeEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ DevelopmentModeEvictionAdvisorAnalysis()
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {
+ if (EnableDevelopmentFeatures) {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(
+ _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
+ TrainingInputFeatures = {
+ RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})};
+ } else {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ TrainingInputFeatures = {
+ RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})};
+ }
+ }
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Development;
+ }
+
+ void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward) override {
+ if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
+ return;
+ // The function pass manager would run all the function passes for a
+ // function, so we assume the last context belongs to this function. If
+ // this invariant ever changes, we can implement at that time switching
+ // contexts. At this point, it'd be an error
+ if (Log->currentContext() != MF.getName()) {
+ MF.getFunction().getContext().emitError(
+ "The training log context shouldn't have had changed.");
+ }
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(GetReward());
+ }
+
+private:
+ std::vector<TensorSpec> InputFeatures;
+ std::vector<TensorSpec> TrainingInputFeatures;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ bool doInitialization(Module &M) override {
+ LLVMContext &Ctx = M.getContext();
+ if (ModelUnderTraining.empty() && TrainingLog.empty()) {
+ Ctx.emitError("Regalloc development mode should be requested with at "
+ "least logging enabled and/or a training model");
+ return false;
+ }
+ if (ModelUnderTraining.empty())
+ Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
+ else
+ Runner = ModelUnderTrainingRunner::createAndEnsureValid(
+ Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
+ if (!Runner) {
+ Ctx.emitError("Regalloc: could not set up the model runner");
+ return false;
+ }
+ if (TrainingLog.empty())
+ return false;
+ std::error_code EC;
+ auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
+ if (EC) {
+ M.getContext().emitError(EC.message() + ":" + TrainingLog);
+ return false;
+ }
+ std::vector<TensorSpec> LFS = InputFeatures;
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
+ append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
+ // We always log the output; in particular, if we're not evaluating, we
+ // don't have an output spec json file. That's why we handle the
+ // 'normal' output separately.
+ LFS.push_back(DecisionSpec);
+
+ Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
+ /*IncludeReward*/ true);
+ return false;
+ }
+
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner)
+ return nullptr;
+ if (Log)
+ Log->switchContext(MF.getName());
+ return std::make_unique<DevelopmentModeEvictAdvisor>(
+ MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<MachineLoopInfo>(), Log.get());
+ }
+
+ std::unique_ptr<MLModelRunner> Runner;
+ std::unique_ptr<Logger> Log;
+};
+
+#endif //#ifdef LLVM_HAVE_TFLITE
+} // namespace
+
+float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
+ auto &MRI = MF.getRegInfo();
+ float Ret = 0.0;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ ++Ret;
+ }
+ return Ret;
+}
+
+MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner,
+ const MachineBlockFrequencyInfo &MBFI,
+ const MachineLoopInfo &Loops)
+ : RegAllocEvictionAdvisor(MF, RA), DefaultAdvisor(MF, RA),
+ Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
+ InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
+ assert(this->Runner);
+ Runner->switchContext(MF.getName());
+ DoNotNormalize.set(FeatureIDs::mask);
+ DoNotNormalize.set(FeatureIDs::is_free);
+ DoNotNormalize.set(FeatureIDs::is_hint);
+ DoNotNormalize.set(FeatureIDs::is_local);
+ DoNotNormalize.set(FeatureIDs::min_stage);
+ DoNotNormalize.set(FeatureIDs::max_stage);
+ DoNotNormalize.set(FeatureIDs::progress);
+}
+
+int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
+ const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
+ const SmallVirtRegSet &) const {
+ int64_t Ret = Runner->evaluate<int64_t>();
+ assert(Ret >= 0);
+ assert(Ret <= CandidateVirtRegPos);
+ return Ret;
+}
+
+bool MLEvictAdvisor::loadInterferenceFeatures(
+ const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ const SmallVirtRegSet &FixedRegisters,
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
+ // leave unavailable
+ return false;
+ }
+
+ const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
+ int64_t LocalIntfs = 0;
+ float NrUrgent = 0.0f;
+
+ // The cascade tracking is the same as in the default advisor
+ unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
+
+ SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
+ // Different from the default heuristic, we don't make any assumptions
+ // about what having more than 10 results in the query may mean.
+ const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
+ if (IFIntervals.empty() && InterferingIntervals.empty())
+ continue;
+ if (IFIntervals.size() >= EvictInterferenceCutoff)
+ return false;
+ InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
+ for (const LiveInterval *Intf : reverse(IFIntervals)) {
+ assert(Intf->reg().isVirtual() &&
+ "Only expecting virtual register interference from query");
+ // This is the same set of legality checks as in the default case: don't
+ // try to evict fixed regs or 'done' ones. Also don't break cascades,
+ // except in the urgent case, with the same nuances used in the default
+ // heuristic.
+ // We could try sharing this between the advisors, but it may end up
+ // more complex than it is right now.
+ if (FixedRegisters.count(Intf->reg()))
+ return false;
+ if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
+ return false;
+ bool Urgent =
+ !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+ RegClassInfo.getNumAllocatableRegs(
+ MRI->getRegClass(Intf->reg())));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ ++NrUrgent;
+ }
+
+ LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg)));
+ }
+ }
+ // OK, so if we made it this far, this LR is an eviction candidate, load its
+ // features.
+ extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
+ NrUrgent, LRPosInfo);
+ return true;
+}
+
+MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
+ if (!MaybeOrderLimit)
+ return MCRegister::NoRegister;
+ unsigned OrderLimit = *MaybeOrderLimit;
+
+ // The heuristic sets initial costs such as, if CostPerUseLimit is
+ // max<uint8_t>, then any of the costs of the legally-evictable intervals
+ // would be lower. When that happens, one of those will be selected.
+ // Therefore, we allow the candidate be selected, unless the candidate is
+ // unspillable, in which case it would be incorrect to not find a register
+ // for it.
+ const bool MustFindEviction =
+ (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u));
+ // Number of available candidates - if 0, no need to continue.
+ size_t Available = 0;
+ // Make sure we don't have leftover partial state from an attempt where we
+ // had no available candidates and bailed out early.
+ resetInputs(*Runner);
+
+ // Track the index->register mapping because AllocationOrder doesn't do that
+ // and we'd have to scan it.
+ // Also track their mask, to write asserts/debug.
+ CandidateRegList Regs;
+ Regs.fill({0, false});
+
+ // Track the largest value of features seen during this eviction session. We
+ // only normalize (some of) the float features, but it's just simpler to
+ // dimension 'Largest' to all the features, especially since we have the
+ // 'DoNotNormalize' list.
+ FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0);
+
+ // Same overal idea as in the default eviction policy - we visit the values
+ // of AllocationOrder one at a time. If it's not legally available, we mask
+ // off the corresponding feature column (==do nothing because we already
+ // reset all the features to 0) Use Pos to capture the column we load
+ // features at - in AllocationOrder order.
+ size_t Pos = 0;
+ SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo;
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+ ++I, ++Pos) {
+ MCRegister PhysReg = *I;
+ assert(!Regs[Pos].second);
+ assert(PhysReg);
+ if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
+ continue;
+ }
+ if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
+ Largest, Pos, LRPosInfo)) {
+ ++Available;
+ Regs[Pos] = std::make_pair(PhysReg, true);
+ }
+ }
+ if (Available == 0) {
+ // Nothing to decide, nothing to learn.
+ assert(!MustFindEviction);
+ return MCRegister::NoRegister;
+ }
+ const size_t ValidPosLimit = Pos;
+ // If we must find eviction, the candidate should be masked out of the
+ // decision making process.
+ Regs[CandidateVirtRegPos].second = !MustFindEviction;
+ if (!MustFindEviction)
+ extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
+ CandidateVirtRegPos, /*IsHint*/ 0,
+ /*LocalIntfsCount*/ 0,
+ /*NrUrgent*/ 0.0, LRPosInfo);
+ assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
+ "nothing to allocate initially.");
+#ifdef LLVM_HAVE_TFLITE
+ if (EnableDevelopmentFeatures) {
+ extractInstructionFeatures(
+ LRPosInfo, Runner,
+ [this](SlotIndex InputIndex) -> int {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ if (!CurrentMachineInstruction) {
+ return -1;
+ }
+ return CurrentMachineInstruction->getOpcode();
+ },
+ [this](SlotIndex InputIndex) -> float {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ return MBFI.getBlockFreqRelativeToEntryBlock(
+ CurrentMachineInstruction->getParent());
+ },
+ [this](SlotIndex InputIndex) -> MachineBasicBlock * {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ return CurrentMachineInstruction->getParent();
+ },
+ FeatureIDs::instructions, FeatureIDs::instructions_mapping,
+ FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
+ LIS->getSlotIndexes()->getLastIndex());
+ }
+#endif // #ifdef LLVM_HAVE_TFLITE
+ // Normalize the features.
+ for (auto &V : Largest)
+ V = V ? V : 1.0;
+ for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount;
+ ++FeatureIndex) {
+ if (DoNotNormalize.test(FeatureIndex))
+ continue;
+ for (size_t Pos = 0; Pos < NumberOfInterferences; ++Pos) {
+ Runner->getTensor<float>(FeatureIndex)[Pos] /= Largest[FeatureIndex];
+ }
+ }
+ *Runner->getTensor<float>(FeatureIDs::progress) =
+ static_cast<float>(RA.getQueueSize()) / InitialQSize;
+
+ // Get a decision.
+ size_t CandidatePos = tryFindEvictionCandidatePosition(
+ VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
+ // The contract with the ML side is that CandidatePos is mask == 1 (i.e.
+ // Regs[CandidatePos].second)
+ assert(Regs[CandidatePos].second);
+ if (CandidatePos == CandidateVirtRegPos) {
+ assert(!MustFindEviction);
+ return MCRegister::NoRegister;
+ }
+ assert(CandidatePos < ValidPosLimit);
+ (void)ValidPosLimit;
+ return Regs[CandidatePos].first;
+}
+
+const LIFeatureComponents &
+MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
+ RegID ID = LI.reg().id();
+ LIFeatureComponents Empty;
+ auto I = CachedFeatures.insert(std::make_pair(ID, Empty));
+ LIFeatureComponents &Ret = I.first->getSecond();
+ if (!I.second)
+ return Ret;
+
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ for (MachineRegisterInfo::reg_instr_nodbg_iterator
+ I = MRI->reg_instr_nodbg_begin(LI.reg()),
+ E = MRI->reg_instr_nodbg_end();
+ I != E;) {
+ MachineInstr *MI = &*(I++);
+
+ ++Ret.NrDefsAndUses;
+ if (!Visited.insert(MI).second)
+ continue;
+
+ if (MI->isIdentityCopy() || MI->isImplicitDef())
+ continue;
+
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+
+ float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
+ Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq);
+
+ Ret.R += (Reads && !Writes) * Freq;
+ Ret.W += (!Reads && Writes) * Freq;
+ Ret.RW += (Reads && Writes) * Freq;
+
+ auto *MBB = MI->getParent();
+ auto *Loop = Loops.getLoopFor(MBB);
+ bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
+
+ if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
+ Ret.IndVarUpdates += Freq;
+
+ if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
+ Ret.HintWeights += Freq;
+ }
+ Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
+ LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
+ return Ret;
+}
+
+// Overall, this currently mimics what we do for weight calculation, but instead
+// of accummulating the various features, we keep them separate.
+void MLEvictAdvisor::extractFeatures(
+ const SmallVectorImpl<const LiveInterval *> &Intervals,
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
+ int64_t LocalIntfsCount, float NrUrgent,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
+ int64_t NrDefsAndUses = 0;
+ int64_t NrBrokenHints = 0;
+ double R = 0.0;
+ double W = 0.0;
+ double RW = 0.0;
+ double IndVarUpdates = 0.0;
+ double HintWeights = 0.0;
+ float StartBBFreq = 0.0;
+ float EndBBFreq = 0.0;
+ float HottestBlockFreq = 0.0;
+ int32_t NrRematerializable = 0;
+ float TotalWeight = 0.0;
+
+ SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex();
+ SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex();
+ int64_t MaxStage = 0;
+ int64_t MinStage =
+ Intervals.empty() ? 0 : std::numeric_limits<int64_t>::max();
+
+ for (const auto *L : Intervals) {
+ const LiveInterval &LI = *L;
+ MaxStage = std::max<int64_t>(
+ MaxStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
+ MinStage = std::min<int64_t>(
+ MinStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
+
+ TotalWeight = std::max(TotalWeight, LI.weight());
+
+ if (LI.beginIndex() < StartSI)
+ StartSI = LI.beginIndex();
+
+ if (LI.endIndex() > EndSI)
+ EndSI = LI.endIndex();
+ const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
+ NrBrokenHints += VRM->hasPreferredPhys(LI.reg());
+
+ NrDefsAndUses += LIFC.NrDefsAndUses;
+ HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
+ R += LIFC.R;
+ W += LIFC.W;
+ RW += LIFC.RW;
+
+ IndVarUpdates += LIFC.IndVarUpdates;
+
+ HintWeights += LIFC.HintWeights;
+ NrRematerializable += LIFC.IsRemat;
+
+ if (EnableDevelopmentFeatures) {
+ for (auto CurrentSegment : LI) {
+ LRPosInfo.push_back(
+ LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos});
+ }
+ }
+ }
+ size_t Size = 0;
+ if (!Intervals.empty()) {
+ StartBBFreq =
+ MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(StartSI));
+ if (EndSI >= LIS->getSlotIndexes()->getLastIndex())
+ EndSI = LIS->getSlotIndexes()->getLastIndex().getPrevIndex();
+ EndBBFreq =
+ MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(EndSI));
+ Size = StartSI.distance(EndSI);
+ }
+ // Set the features at the column 'Pos'.
+#define SET(ID, TYPE, VAL) \
+ do { \
+ Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL); \
+ if (!DoNotNormalize.test(FeatureIDs::ID)) \
+ Largest[FeatureIDs::ID] = \
+ std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL)); \
+ } while (false)
+ SET(mask, int64_t, 1);
+ SET(is_free, int64_t, Intervals.empty());
+ SET(nr_urgent, float, NrUrgent);
+ SET(nr_broken_hints, float, NrBrokenHints);
+ SET(is_hint, int64_t, IsHint);
+ SET(is_local, int64_t, LocalIntfsCount);
+ SET(nr_rematerializable, float, NrRematerializable);
+ SET(nr_defs_and_uses, float, NrDefsAndUses);
+ SET(weighed_reads_by_max, float, R);
+ SET(weighed_writes_by_max, float, W);
+ SET(weighed_read_writes_by_max, float, RW);
+ SET(weighed_indvars_by_max, float, IndVarUpdates);
+ SET(hint_weights_by_max, float, HintWeights);
+ SET(start_bb_freq_by_max, float, StartBBFreq);
+ SET(end_bb_freq_by_max, float, EndBBFreq);
+ SET(hottest_bb_freq_by_max, float, HottestBlockFreq);
+ SET(liverange_size, float, Size);
+ SET(use_def_density, float, TotalWeight);
+ SET(max_stage, int64_t, MaxStage);
+ SET(min_stage, int64_t, MinStage);
+#undef SET
+}
+
+void extractInstructionFeatures(
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
+ function_ref<int(SlotIndex)> GetOpcode,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
+ const int InstructionsIndex, const int InstructionsMappingIndex,
+ const int MBBFreqIndex, const int MBBMappingIndex,
+ const SlotIndex LastIndex) {
+ // This function extracts instruction based features relevant to the eviction
+ // problem currently being solved. This function ends up extracting two
+ // tensors.
+ // 1 - A vector of size max instruction count. It contains the opcodes of the
+ // instructions spanned by all the intervals in the current instance of the
+ // eviction problem.
+ // 2 - A binary mapping matrix of size (LR count * max
+ // instruction count) which maps where the LRs are live to the actual opcodes
+ // for which they are live.
+ // 3 - A vector of size max supported MBB count storing MBB frequencies,
+ // encompassing all of the MBBs covered by the eviction problem.
+ // 4 - A vector of size max instruction count of indices to members of the MBB
+ // frequency vector, mapping each instruction to its associated MBB.
+
+ // Start off by sorting the segments based on the beginning slot index.
+ std::sort(
+ LRPosInfo.begin(), LRPosInfo.end(),
+ [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; });
+ size_t InstructionIndex = 0;
+ size_t CurrentSegmentIndex = 0;
+ SlotIndex CurrentIndex = LRPosInfo[0].Begin;
+ std::map<MachineBasicBlock *, size_t> VisitedMBBs;
+ size_t CurrentMBBIndex = 0;
+ // This loop processes all the segments sequentially by starting at the
+ // beginning slot index of the first segment, iterating through all the slot
+ // indices before the end slot index of that segment (while checking for
+ // overlaps with segments that start at greater slot indices). After hitting
+ // that end index, the current segment being processed gets bumped until they
+ // are all processed or the max instruction count is hit, where everything is
+ // just truncated.
+ while (true) {
+ // If the index that we are currently at is within the current segment and
+ // we haven't hit the max instruction count, continue processing the current
+ // segment.
+ while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
+ InstructionIndex < ModelMaxSupportedInstructionCount) {
+ int CurrentOpcode = GetOpcode(CurrentIndex);
+ // If the current machine instruction is null, skip it
+ if (CurrentOpcode == -1) {
+ // If we're currently at the last index in the SlotIndex analysis,
+ // we can't go any further, so return from the function
+ if (CurrentIndex >= LastIndex) {
+ return;
+ }
+ CurrentIndex = CurrentIndex.getNextIndex();
+ continue;
+ }
+ MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex);
+ if (VisitedMBBs.count(CurrentMBBReference) == 0) {
+ VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
+ ++CurrentMBBIndex;
+ }
+ extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs,
+ GetMBBFreq, CurrentMBBReference, RegallocRunner,
+ MBBFreqIndex, MBBMappingIndex);
+ // Current code assumes we're not going to get any disjointed segments
+ assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
+ RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
+ CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
+ // set value in the binary mapping matrix for the current instruction
+ auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
+ RegallocRunner->getTensor<int64_t>(
+ InstructionsMappingIndex)[CurrentSegmentPosition *
+ ModelMaxSupportedInstructionCount +
+ InstructionIndex] = 1;
+ // All of the segments are sorted based on the beginning slot index, but
+ // this doesn't mean that the beginning slot index of the next segment is
+ // after the end segment of the one being currently processed. This while
+ // loop checks for overlapping segments and modifies the portion of the
+ // column in the mapping matrix for the currently processed instruction
+ // for the LR it is checking. Also make sure that the beginning of the
+ // current segment we're checking for overlap in is less than the current
+ // index, otherwise we're done checking overlaps.
+ size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
+ while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
+ LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
+ auto OverlapCurrentSegmentPosition =
+ LRPosInfo[OverlapCheckCurrentSegment].Pos;
+ if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
+ RegallocRunner->getTensor<int64_t>(
+ InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
+ ModelMaxSupportedInstructionCount +
+ InstructionIndex] = 1;
+ }
+ ++OverlapCheckCurrentSegment;
+ }
+ ++InstructionIndex;
+ if (CurrentIndex >= LastIndex) {
+ return;
+ }
+ CurrentIndex = CurrentIndex.getNextIndex();
+ }
+ // if we've just finished processing through the last segment or if we've
+ // hit the maximum number of instructions, break out of the loop.
+ if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
+ InstructionIndex >= ModelMaxSupportedInstructionCount) {
+ break;
+ }
+ // If the segments are not overlapping, we need to move to the beginning
+ // index of the next segment to avoid having instructions not attached to
+ // any register.
+ if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
+ LRPosInfo[CurrentSegmentIndex].End) {
+ CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
+ }
+ ++CurrentSegmentIndex;
+ }
+}
+
+void extractMBBFrequency(const SlotIndex CurrentIndex,
+ const size_t CurrentInstructionIndex,
+ std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ MachineBasicBlock *CurrentMBBReference,
+ MLModelRunner *RegallocRunner, const int MBBFreqIndex,
+ const int MBBMappingIndex) {
+ size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
+ float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
+ if (CurrentMBBIndex < ModelMaxSupportedMBBCount) {
+ RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] =
+ CurrentMBBFreq;
+ RegallocRunner->getTensor<int64_t>(
+ MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
+ }
+}
+
+// Development mode-specific implementations
+#ifdef LLVM_HAVE_TFLITE
+
+RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
+ return new DevelopmentModeEvictionAdvisorAnalysis();
+}
+
+int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const {
+ int64_t Ret = 0;
+ if (isa<ModelUnderTrainingRunner>(getRunner())) {
+ Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
+ VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
+ } else {
+ MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
+ VirtReg, Order, CostPerUseLimit, FixedRegisters);
+ // Find the index of the selected PhysReg. We need it for logging,
+ // otherwise this is wasted cycles (but so would starting development mode
+ // without a model nor logging)
+ if (!PhysReg)
+ Ret = CandidateVirtRegPos;
+ else
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit);
+ I != E; ++I, ++Ret)
+ if (*I == PhysReg)
+ break;
+ }
+ if (TrainingLog.empty())
+ return Ret;
+ // TODO(mtrofin): when we support optional rewards, this can go away. In the
+ // meantime, we log the "pretend" reward (0) for the previous observation
+ // before starting a new one.
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(0.0);
+
+ Log->startObservation();
+ size_t CurrentFeature = 0;
+ size_t FeatureCount = EnableDevelopmentFeatures
+ ? FeatureIDs::FeaturesWithDevelopmentCount
+ : FeatureIDs::FeatureCount;
+ for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
+ Log->logTensorValue(CurrentFeature,
+ reinterpret_cast<const char *>(
+ getRunner().getTensorUntyped(CurrentFeature)));
+ }
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
+ for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
+ ++I, ++CurrentFeature)
+ Log->logTensorValue(
+ CurrentFeature,
+ reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
+ // The output is right after the features and the extra outputs
+ Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
+ Log->endObservation();
+ return Ret;
+}
+
+bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
+ std::optional<float> CachedReward;
+ auto GetReward = [&]() {
+ if (!CachedReward)
+ CachedReward = static_cast<float>(
+ calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>())
+ .getScore());
+ return *CachedReward;
+ };
+
+ getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF,
+ GetReward);
+ getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF,
+ GetReward);
+ return false;
+}
+#endif // #ifdef LLVM_HAVE_TFLITE
+
+RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
+ return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
+ !InteractiveChannelBaseName.empty()
+ ? new ReleaseModeEvictionAdvisorAnalysis()
+ : nullptr;
+}
+
+// In all cases except development mode, we don't need scoring.
+#if !defined(LLVM_HAVE_TFLITE)
+bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; }
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
new file mode 100644
index 000000000000..e36a41154096
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
@@ -0,0 +1,93 @@
+//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Function declarations of utilities related to feature extraction for unit
+// testing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
+
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+using namespace llvm;
+
+// LRStartEndInfo contains the start and end of a specific live range as
+// slot indices as well as storing the index of the physical register it
+// is assigned to (or 1 above the phys reg count if its the candidate).
+// Used when extracting per-instruction features in the context of a
+// specific eviction problem.
+struct LRStartEndInfo {
+ SlotIndex Begin;
+ SlotIndex End;
+ size_t Pos = 0;
+};
+
+void extractInstructionFeatures(
+ llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
+ MLModelRunner *RegallocRunner, function_ref<int(SlotIndex)> GetOpcode,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
+ const int InstructionsIndex, const int InstructionsMappingIndex,
+ const int MBBFreqIndex, const int MBBMappingIndex,
+ const SlotIndex LastIndex);
+
+void extractMBBFrequency(const SlotIndex CurrentIndex,
+ const size_t CurrentInstructionIndex,
+ std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ MachineBasicBlock *CurrentMBBReference,
+ MLModelRunner *RegallocRunner, const int MBBFreqIndex,
+ const int MBBMappingIndex);
+
+// This is the maximum number of interfererring ranges. That's the number of
+// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
+// For X86, that's 32.
+// TODO: find a way to get this, statically, in a programmatic way.
+static const int64_t MaxInterferences = 32;
+
+// Logically, we can think of the feature set given to the evaluator as a 2D
+// matrix. The rows are the features (see next). The columns correspond to the
+// interferences. We treat the candidate virt reg as an 'interference', too, as
+// its feature set is the same as that of the interferring ranges. So we'll have
+// MaxInterferences + 1 columns and by convention, we will use the last column
+// for the virt reg seeking allocation.
+static const int64_t CandidateVirtRegPos = MaxInterferences;
+static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
+
+// The number of instructions that a specific live range might have is variable,
+// but we're passing in a single matrix of instructions and tensorflow saved
+// models only support a fixed input size, so we have to cap the number of
+// instructions that can be passed along. The specific value was derived from
+// experimentation such that the majority of eviction problems would be
+// completely covered.
+static const int ModelMaxSupportedInstructionCount = 300;
+
+// When extracting per-instruction features, the advisor will currently create
+// a vector of size ModelMaxSupportedInstructionCount to hold the opcodes of the
+// instructions relevant to the eviction problem, and a NumberOfInterferences *
+// ModelMaxSupportedInstructionCount matrix that maps LRs to the instructions
+// that they span.
+static const std::vector<int64_t> InstructionsShape{
+ 1, ModelMaxSupportedInstructionCount};
+static const std::vector<int64_t> InstructionsMappingShape{
+ 1, NumberOfInterferences, ModelMaxSupportedInstructionCount};
+
+// When extracting mappings between MBBs and individual instructions, we create
+// a vector of MBB frequencies, currently of size 100, which was a value
+// determined through experimentation to encompass the vast majority of eviction
+// problems. The actual mapping is the same shape as the instruction opcodes
+// vector.
+static const int64_t ModelMaxSupportedMBBCount = 100;
+static const std::vector<int64_t> MBBFrequencyShape{1,
+ ModelMaxSupportedMBBCount};
+
+#endif // LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
new file mode 100644
index 000000000000..422781593a9c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
@@ -0,0 +1,357 @@
+//===- MLRegAllocPriorityAdvisor.cpp - ML priority advisor-----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the ML priority advisor and reward injection pass
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "RegAllocGreedy.h"
+#include "RegAllocPriorityAdvisor.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+
+#if defined(LLVM_HAVE_TFLITE)
+#include "llvm/Analysis/ModelUnderTrainingRunner.h"
+#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
+#endif
+
+using namespace llvm;
+
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "regalloc-priority-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <regalloc-priority-interactive-channel-base>.in, while "
+ "the outgoing name should be "
+ "<regalloc-priority-interactive-channel-base>.out"));
+
+using CompiledModelType = NoopSavedModelImpl;
+
+// Options that only make sense in development mode
+#ifdef LLVM_HAVE_TFLITE
+#include "RegAllocScore.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+
+static cl::opt<std::string> TrainingLog(
+ "regalloc-priority-training-log", cl::Hidden,
+ cl::desc("Training log for the register allocator priority model"));
+
+static cl::opt<std::string> ModelUnderTraining(
+ "regalloc-priority-model", cl::Hidden,
+ cl::desc("The model being trained for register allocation priority"));
+
+#endif // #ifdef LLVM_HAVE_TFLITE
+
+namespace llvm {
+
+static const std::vector<int64_t> PerLiveRangeShape{1};
+
+#define RA_PRIORITY_FEATURES_LIST(M) \
+ M(int64_t, li_size, PerLiveRangeShape, "size") \
+ M(int64_t, stage, PerLiveRangeShape, "stage") \
+ M(float, weight, PerLiveRangeShape, "weight")
+
+#define DecisionName "priority"
+static const TensorSpec DecisionSpec =
+ TensorSpec::createSpec<float>(DecisionName, {1});
+
+
+// Named features index.
+enum FeatureIDs {
+#define _FEATURE_IDX(_, name, __, ___) name,
+ RA_PRIORITY_FEATURES_LIST(_FEATURE_IDX)
+#undef _FEATURE_IDX
+ FeatureCount
+};
+
+class MLPriorityAdvisor : public RegAllocPriorityAdvisor {
+public:
+ MLPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes, MLModelRunner *Runner);
+
+protected:
+ const RegAllocPriorityAdvisor &getDefaultAdvisor() const {
+ return static_cast<const RegAllocPriorityAdvisor &>(DefaultAdvisor);
+ }
+
+ // The assumption is that if the Runner could not be constructed, we emit-ed
+ // error, and we shouldn't be asking for it here.
+ const MLModelRunner &getRunner() const { return *Runner; }
+ float getPriorityImpl(const LiveInterval &LI) const;
+ unsigned getPriority(const LiveInterval &LI) const override;
+
+private:
+ const DefaultPriorityAdvisor DefaultAdvisor;
+ MLModelRunner *const Runner;
+};
+
+#define _DECL_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(#name, shape),
+
+static const std::vector<TensorSpec> InputFeatures{
+ {RA_PRIORITY_FEATURES_LIST(_DECL_FEATURES)},
+};
+#undef _DECL_FEATURES
+
+// ===================================
+// Release (AOT) - specifics
+// ===================================
+class ReleaseModePriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ ReleaseModePriorityAdvisorAnalysis()
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Release) {}
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Release;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<SlotIndexes>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner) {
+ if (InteractiveChannelBaseName.empty())
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ else
+ Runner = std::make_unique<InteractiveModelRunner>(
+ MF.getFunction().getContext(), InputFeatures, DecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
+ return std::make_unique<MLPriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexes>(), Runner.get());
+ }
+ std::unique_ptr<MLModelRunner> Runner;
+};
+
+// ===================================
+// Development mode-specifics
+// ===================================
+//
+// Features we log
+#ifdef LLVM_HAVE_TFLITE
+static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
+
+#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
+
+static const std::vector<TensorSpec> TrainingInputFeatures{
+ {RA_PRIORITY_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})}};
+#undef _DECL_TRAIN_FEATURES
+
+class DevelopmentModePriorityAdvisor : public MLPriorityAdvisor {
+public:
+ DevelopmentModePriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes,
+ MLModelRunner *Runner, Logger *Log)
+ : MLPriorityAdvisor(MF, RA, Indexes, Runner), Log(Log) {}
+
+private:
+ unsigned getPriority(const LiveInterval &LI) const override;
+ Logger *const Log;
+};
+
+class DevelopmentModePriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ DevelopmentModePriorityAdvisorAnalysis()
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Development) {}
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Development;
+ }
+
+ void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward) override {
+ if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
+ return;
+ // The function pass manager would run all the function passes for a
+ // function, so we assume the last context belongs to this function. If
+ // this invariant ever changes, we can implement at that time switching
+ // contexts. At this point, it'd be an error
+ if (Log->currentContext() != MF.getName()) {
+ MF.getFunction().getContext().emitError(
+ "The training log context shouldn't have had changed.");
+ }
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(GetReward());
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<SlotIndexes>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ // Save all the logs (when requested).
+ bool doInitialization(Module &M) override {
+ LLVMContext &Ctx = M.getContext();
+ if (ModelUnderTraining.empty() && TrainingLog.empty()) {
+ Ctx.emitError("Regalloc development mode should be requested with at "
+ "least logging enabled and/or a training model");
+ return false;
+ }
+ if (ModelUnderTraining.empty())
+ Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
+ else
+ Runner = ModelUnderTrainingRunner::createAndEnsureValid(
+ Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
+ if (!Runner) {
+ Ctx.emitError("Regalloc: could not set up the model runner");
+ return false;
+ }
+ if (TrainingLog.empty())
+ return false;
+ std::error_code EC;
+ auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
+ if (EC) {
+ M.getContext().emitError(EC.message() + ":" + TrainingLog);
+ return false;
+ }
+ std::vector<TensorSpec> LFS = InputFeatures;
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
+ append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
+ // We always log the output; in particular, if we're not evaluating, we
+ // don't have an output spec json file. That's why we handle the
+ // 'normal' output separately.
+ LFS.push_back(DecisionSpec);
+
+ Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
+ /*IncludeReward*/ true);
+ return false;
+ }
+
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner)
+ return nullptr;
+ if (Log) {
+ Log->switchContext(MF.getName());
+ }
+
+ return std::make_unique<DevelopmentModePriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexes>(), Runner.get(), Log.get());
+ }
+
+ std::unique_ptr<MLModelRunner> Runner;
+ std::unique_ptr<Logger> Log;
+};
+#endif //#ifdef LLVM_HAVE_TFLITE
+
+} // namespace llvm
+
+RegAllocPriorityAdvisorAnalysis *llvm::createReleaseModePriorityAdvisor() {
+ return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
+ !InteractiveChannelBaseName.empty()
+ ? new ReleaseModePriorityAdvisorAnalysis()
+ : nullptr;
+}
+
+MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF,
+ const RAGreedy &RA,
+ SlotIndexes *const Indexes,
+ MLModelRunner *Runner)
+ : RegAllocPriorityAdvisor(MF, RA, Indexes), DefaultAdvisor(MF, RA, Indexes),
+ Runner(std::move(Runner)) {
+ assert(this->Runner);
+ Runner->switchContext(MF.getName());
+}
+
+float MLPriorityAdvisor::getPriorityImpl(const LiveInterval &LI) const {
+ const unsigned Size = LI.getSize();
+ LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
+
+ *Runner->getTensor<int64_t>(0) = static_cast<int64_t>(Size);
+ *Runner->getTensor<int64_t>(1) = static_cast<int64_t>(Stage);
+ *Runner->getTensor<float>(2) = static_cast<float>(LI.weight());
+
+ return Runner->evaluate<float>();
+}
+
+unsigned MLPriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ return static_cast<unsigned>(getPriorityImpl(LI));
+}
+
+#ifdef LLVM_HAVE_TFLITE
+RegAllocPriorityAdvisorAnalysis *llvm::createDevelopmentModePriorityAdvisor() {
+ return new DevelopmentModePriorityAdvisorAnalysis();
+}
+
+unsigned
+DevelopmentModePriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ double Prio = 0;
+
+ if (isa<ModelUnderTrainingRunner>(getRunner())) {
+ Prio = MLPriorityAdvisor::getPriorityImpl(LI);
+ } else {
+ Prio = getDefaultAdvisor().getPriority(LI);
+ }
+
+ if (TrainingLog.empty())
+ return Prio;
+
+ // TODO(mtrofin): when we support optional rewards, this can go away. In the
+ // meantime, we log the "pretend" reward (0) for the previous observation
+ // before starting a new one.
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(0.0);
+
+ Log->startObservation();
+ size_t CurrentFeature = 0;
+ for (; CurrentFeature < InputFeatures.size(); ++CurrentFeature) {
+ Log->logTensorValue(CurrentFeature,
+ reinterpret_cast<const char *>(
+ getRunner().getTensorUntyped(CurrentFeature)));
+ }
+
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner())) {
+ for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
+ ++I, ++CurrentFeature)
+ Log->logTensorValue(
+ CurrentFeature,
+ reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
+ }
+
+ float Ret = static_cast<float>(Prio);
+ Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
+ Log->endObservation();
+
+ return static_cast<unsigned>(Prio);
+}
+
+#endif // #ifdef LLVM_HAVE_TFLITE
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 000000000000..231544494c32
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,1740 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+#define DEBUG_TYPE "codegen"
+
+static cl::opt<bool> PrintSlotIndexes(
+ "print-slotindexes",
+ cl::desc("When printing machine IR, annotate instructions and blocks with "
+ "SlotIndexes when available"),
+ cl::init(true), cl::Hidden);
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
+ : BB(B), Number(-1), xParent(&MF) {
+ Insts.Parent = this;
+ if (B)
+ IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
+}
+
+MachineBasicBlock::~MachineBasicBlock() = default;
+
+/// Return the MCSymbol for this basic block.
+MCSymbol *MachineBasicBlock::getSymbol() const {
+ if (!CachedMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+
+ // We emit a non-temporary symbol -- with a descriptive name -- if it begins
+ // a section (with basic block sections). Otherwise we fall back to use temp
+ // label.
+ if (MF->hasBBSections() && isBeginSection()) {
+ SmallString<5> Suffix;
+ if (SectionID == MBBSectionID::ColdSectionID) {
+ Suffix += ".cold";
+ } else if (SectionID == MBBSectionID::ExceptionSectionID) {
+ Suffix += ".eh";
+ } else {
+ // For symbols that represent basic block sections, we add ".__part." to
+ // allow tools like symbolizers to know that this represents a part of
+ // the original function.
+ Suffix = (Suffix + Twine(".__part.") + Twine(SectionID.Number)).str();
+ }
+ CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix);
+ } else {
+ const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+ }
+ return CachedMCSymbol;
+}
+
+MCSymbol *MachineBasicBlock::getEHCatchretSymbol() const {
+ if (!CachedEHCatchretMCSymbol) {
+ const MachineFunction *MF = getParent();
+ SmallString<128> SymbolName;
+ raw_svector_ostream(SymbolName)
+ << "$ehgcr_" << MF->getFunctionNumber() << '_' << getNumber();
+ CachedEHCatchretMCSymbol = MF->getContext().getOrCreateSymbol(SymbolName);
+ }
+ return CachedEHCatchretMCSymbol;
+}
+
+MCSymbol *MachineBasicBlock::getEndSymbol() const {
+ if (!CachedEndMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ CachedEndMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB_END" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+ return CachedEndMCSymbol;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+Printable llvm::printMBBReference(const MachineBasicBlock &MBB) {
+ return Printable([&MBB](raw_ostream &OS) { return MBB.printAsOperand(OS); });
+}
+
+/// When an MBB is added to an MF, we need to update the parent pointer of the
+/// MBB, the MBB numbering, and any instructions in the MBB to be on the right
+/// operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
+ MachineBasicBlock *N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineInstr &MI : N->instrs())
+ MI.addRegOperandsToUseLists(RegInfo);
+}
+
+void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
+ MachineBasicBlock *N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+}
+
+/// When we add an instruction to a basic block list, we update its parent
+/// pointer and add its operands from reg use/def lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+ assert(!N->getParent() && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->addRegOperandsToUseLists(MF->getRegInfo());
+ MF->handleInsertion(*N);
+}
+
+/// When we remove an instruction from a basic block list, we update its parent
+/// pointer and remove its operands from reg use/def lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+ assert(N->getParent() && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ if (MachineFunction *MF = N->getMF()) {
+ MF->handleRemoval(*N);
+ N->removeRegOperandsFromUseLists(MF->getRegInfo());
+ }
+
+ N->setParent(nullptr);
+}
+
+/// When moving a range of instructions from one MBB list to another, we need to
+/// update the parent pointers and the use/def lists.
+void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
+ instr_iterator First,
+ instr_iterator Last) {
+ assert(Parent->getParent() == FromList.Parent->getParent() &&
+ "cannot transfer MachineInstrs between MachineFunctions");
+
+ // If it's within the same BB, there's nothing to do.
+ if (this == &FromList)
+ return;
+
+ assert(Parent != FromList.Parent && "Two lists have the same parent?");
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; First != Last; ++First)
+ First->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->deleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ instr_iterator I = instr_begin(), E = instr_end();
+ while (I != E && I->isPHI())
+ ++I;
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi MI cannot be inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isPosition() ||
+ TII->isBasicBlockPrologue(*I)))
+ ++I;
+ // FIXME: This needs to change if we wish to bundle labels
+ // inside the bundle.
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi / non-label instruction is inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I,
+ bool SkipPseudoOp) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
+ (SkipPseudoOp && I->isPseudoProbe()) ||
+ TII->isBasicBlockPrologue(*I)))
+ ++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi / non-label / non-debug "
+ "instruction is inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugInstr()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator B = instr_begin(), E = instr_end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugInstr()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminatorForward() {
+ return find_if(instrs(), [](auto &II) { return II.isTerminator(); });
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::getFirstNonDebugInstr(bool SkipPseudoOp) {
+ // Skip over begin-of-block dbg_value instructions.
+ return skipDebugInstructionsForward(begin(), end(), SkipPseudoOp);
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) {
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugInstr() || I->isInsideBundle())
+ continue;
+ if (SkipPseudoOp && I->isPseudoProbe())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+bool MachineBasicBlock::hasEHPadSuccessor() const {
+ for (const MachineBasicBlock *Succ : successors())
+ if (Succ->isEHPad())
+ return true;
+ return false;
+}
+
+bool MachineBasicBlock::isEntryBlock() const {
+ return getParent()->begin() == getIterator();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
+ print(dbgs());
+}
+#endif
+
+bool MachineBasicBlock::mayHaveInlineAsmBr() const {
+ for (const MachineBasicBlock *Succ : successors()) {
+ if (Succ->isInlineAsmBrIndirectTarget())
+ return true;
+ }
+ return false;
+}
+
+bool MachineBasicBlock::isLegalToHoistInto() const {
+ if (isReturnBlock() || hasEHPadSuccessor() || mayHaveInlineAsmBr())
+ return false;
+ return true;
+}
+
+StringRef MachineBasicBlock::getName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->getName();
+ else
+ return StringRef("", 0);
+}
+
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += ("BB" + Twine(getNumber())).str();
+ return Name;
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes,
+ bool IsStandalone) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+ const Function &F = MF->getFunction();
+ const Module *M = F.getParent();
+ ModuleSlotTracker MST(M);
+ MST.incorporateFunction(F);
+ print(OS, MST, Indexes, IsStandalone);
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ const SlotIndexes *Indexes,
+ bool IsStandalone) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ if (Indexes && PrintSlotIndexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
+ printName(OS, PrintNameIr | PrintNameAttributes, &MST);
+ OS << ":\n";
+
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo();
+ bool HasLineAttributes = false;
+
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty() && IsStandalone) {
+ if (Indexes) OS << '\t';
+ // Don't indent(2), align with previous line attributes.
+ OS << "; predecessors: ";
+ ListSeparator LS;
+ for (auto *Pred : predecessors())
+ OS << LS << printMBBReference(*Pred);
+ OS << '\n';
+ HasLineAttributes = true;
+ }
+
+ if (!succ_empty()) {
+ if (Indexes) OS << '\t';
+ // Print the successors
+ OS.indent(2) << "successors: ";
+ ListSeparator LS;
+ for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
+ OS << LS << printMBBReference(**I);
+ if (!Probs.empty())
+ OS << '('
+ << format("0x%08" PRIx32, getSuccProbability(I).getNumerator())
+ << ')';
+ }
+ if (!Probs.empty() && IsStandalone) {
+ // Print human readable probabilities as comments.
+ OS << "; ";
+ ListSeparator LS;
+ for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
+ const BranchProbability &BP = getSuccProbability(I);
+ OS << LS << printMBBReference(**I) << '('
+ << format("%.2f%%",
+ rint(((double)BP.getNumerator() / BP.getDenominator()) *
+ 100.0 * 100.0) /
+ 100.0)
+ << ')';
+ }
+ }
+
+ OS << '\n';
+ HasLineAttributes = true;
+ }
+
+ if (!livein_empty() && MRI.tracksLiveness()) {
+ if (Indexes) OS << '\t';
+ OS.indent(2) << "liveins: ";
+
+ ListSeparator LS;
+ for (const auto &LI : liveins()) {
+ OS << LS << printReg(LI.PhysReg, TRI);
+ if (!LI.LaneMask.all())
+ OS << ":0x" << PrintLaneMask(LI.LaneMask);
+ }
+ HasLineAttributes = true;
+ }
+
+ if (HasLineAttributes)
+ OS << '\n';
+
+ bool IsInBundle = false;
+ for (const MachineInstr &MI : instrs()) {
+ if (Indexes && PrintSlotIndexes) {
+ if (Indexes->hasIndex(MI))
+ OS << Indexes->getInstructionIndex(MI);
+ OS << '\t';
+ }
+
+ if (IsInBundle && !MI.isInsideBundle()) {
+ OS.indent(2) << "}\n";
+ IsInBundle = false;
+ }
+
+ OS.indent(IsInBundle ? 4 : 2);
+ MI.print(OS, MST, IsStandalone, /*SkipOpers=*/false, /*SkipDebugLoc=*/false,
+ /*AddNewLine=*/false, &TII);
+
+ if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+ OS << " {";
+ IsInBundle = true;
+ }
+ OS << '\n';
+ }
+
+ if (IsInBundle)
+ OS.indent(2) << "}\n";
+
+ if (IrrLoopHeaderWeight && IsStandalone) {
+ if (Indexes) OS << '\t';
+ OS.indent(2) << "; Irreducible loop header weight: " << *IrrLoopHeaderWeight
+ << '\n';
+ }
+}
+
+/// Print the basic block's name as:
+///
+/// bb.{number}[.{ir-name}] [(attributes...)]
+///
+/// The {ir-name} is only printed when the \ref PrintNameIr flag is passed
+/// (which is the default). If the IR block has no name, it is identified
+/// numerically using the attribute syntax as "(%ir-block.{ir-slot})".
+///
+/// When the \ref PrintNameAttributes flag is passed, additional attributes
+/// of the block are printed when set.
+///
+/// \param printNameFlags Combination of \ref PrintNameFlag flags indicating
+/// the parts to print.
+/// \param moduleSlotTracker Optional ModuleSlotTracker. This method will
+/// incorporate its own tracker when necessary to
+/// determine the block's IR name.
+void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
+ ModuleSlotTracker *moduleSlotTracker) const {
+ os << "bb." << getNumber();
+ bool hasAttributes = false;
+
+ auto PrintBBRef = [&](const BasicBlock *bb) {
+ os << "%ir-block.";
+ if (bb->hasName()) {
+ os << bb->getName();
+ } else {
+ int slot = -1;
+
+ if (moduleSlotTracker) {
+ slot = moduleSlotTracker->getLocalSlot(bb);
+ } else if (bb->getParent()) {
+ ModuleSlotTracker tmpTracker(bb->getModule(), false);
+ tmpTracker.incorporateFunction(*bb->getParent());
+ slot = tmpTracker.getLocalSlot(bb);
+ }
+
+ if (slot == -1)
+ os << "<ir-block badref>";
+ else
+ os << slot;
+ }
+ };
+
+ if (printNameFlags & PrintNameIr) {
+ if (const auto *bb = getBasicBlock()) {
+ if (bb->hasName()) {
+ os << '.' << bb->getName();
+ } else {
+ hasAttributes = true;
+ os << " (";
+ PrintBBRef(bb);
+ }
+ }
+ }
+
+ if (printNameFlags & PrintNameAttributes) {
+ if (isMachineBlockAddressTaken()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "machine-block-address-taken";
+ hasAttributes = true;
+ }
+ if (isIRBlockAddressTaken()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "ir-block-address-taken ";
+ PrintBBRef(getAddressTakenIRBlock());
+ hasAttributes = true;
+ }
+ if (isEHPad()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "landing-pad";
+ hasAttributes = true;
+ }
+ if (isInlineAsmBrIndirectTarget()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "inlineasm-br-indirect-target";
+ hasAttributes = true;
+ }
+ if (isEHFuncletEntry()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "ehfunclet-entry";
+ hasAttributes = true;
+ }
+ if (getAlignment() != Align(1)) {
+ os << (hasAttributes ? ", " : " (");
+ os << "align " << getAlignment().value();
+ hasAttributes = true;
+ }
+ if (getSectionID() != MBBSectionID(0)) {
+ os << (hasAttributes ? ", " : " (");
+ os << "bbsections ";
+ switch (getSectionID().Type) {
+ case MBBSectionID::SectionType::Exception:
+ os << "Exception";
+ break;
+ case MBBSectionID::SectionType::Cold:
+ os << "Cold";
+ break;
+ default:
+ os << getSectionID().Number;
+ }
+ hasAttributes = true;
+ }
+ if (getBBID().has_value()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "bb_id " << *getBBID();
+ hasAttributes = true;
+ }
+ }
+
+ if (hasAttributes)
+ os << ')';
+}
+
+void MachineBasicBlock::printAsOperand(raw_ostream &OS,
+ bool /*PrintType*/) const {
+ OS << '%';
+ printName(OS, 0);
+}
+
+void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
+ LiveInVector::iterator I = find_if(
+ LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ if (I == LiveIns.end())
+ return;
+
+ I->LaneMask &= ~LaneMask;
+ if (I->LaneMask.none())
+ LiveIns.erase(I);
+}
+
+MachineBasicBlock::livein_iterator
+MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
+ // Get non-const version of iterator.
+ LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin());
+ return LiveIns.erase(LI);
+}
+
+bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
+ livein_iterator I = find_if(
+ LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ return I != livein_end() && (I->LaneMask & LaneMask).any();
+}
+
+void MachineBasicBlock::sortUniqueLiveIns() {
+ llvm::sort(LiveIns,
+ [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
+ return LI0.PhysReg < LI1.PhysReg;
+ });
+ // Liveins are sorted by physreg now we can merge their lanemasks.
+ LiveInVector::const_iterator I = LiveIns.begin();
+ LiveInVector::const_iterator J;
+ LiveInVector::iterator Out = LiveIns.begin();
+ for (; I != LiveIns.end(); ++Out, I = J) {
+ MCRegister PhysReg = I->PhysReg;
+ LaneBitmask LaneMask = I->LaneMask;
+ for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J)
+ LaneMask |= J->LaneMask;
+ Out->PhysReg = PhysReg;
+ Out->LaneMask = LaneMask;
+ }
+ LiveIns.erase(Out, LiveIns.end());
+}
+
+Register
+MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
+ assert(getParent() && "MBB must be inserted in function");
+ assert(Register::isPhysicalRegister(PhysReg) && "Expected physreg");
+ assert(RC && "Register class is required");
+ assert((isEHPad() || this == &getParent()->front()) &&
+ "Only the entry block and landing pads can have physreg live ins");
+
+ bool LiveIn = isLiveIn(PhysReg);
+ iterator I = SkipPHIsAndLabels(begin()), E = end();
+ MachineRegisterInfo &MRI = getParent()->getRegInfo();
+ const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo();
+
+ // Look for an existing copy.
+ if (LiveIn)
+ for (;I != E && I->isCopy(); ++I)
+ if (I->getOperand(1).getReg() == PhysReg) {
+ Register VirtReg = I->getOperand(0).getReg();
+ if (!MRI.constrainRegClass(VirtReg, RC))
+ llvm_unreachable("Incompatible live-in register class.");
+ return VirtReg;
+ }
+
+ // No luck, create a virtual register.
+ Register VirtReg = MRI.createVirtualRegister(RC);
+ BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg)
+ .addReg(PhysReg, RegState::Kill);
+ if (!LiveIn)
+ addLiveIn(PhysReg);
+ return VirtReg;
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter->getIterator(), getIterator());
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ getParent()->splice(++NewBefore->getIterator(), getIterator());
+}
+
+static int findJumpTableIndex(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator TerminatorI = MBB.getFirstTerminator();
+ if (TerminatorI == MBB.end())
+ return -1;
+ const MachineInstr &Terminator = *TerminatorI;
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ return TII->getJumpTableIndex(Terminator);
+}
+
+void MachineBasicBlock::updateTerminator(
+ MachineBasicBlock *PreviousLayoutSuccessor) {
+ LLVM_DEBUG(dbgs() << "Updating terminators on " << printMBBReference(*this)
+ << "\n");
+
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty())
+ return;
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc DL = findBranchDebugLoc();
+ bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now its
+ // layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->removeBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough, or the end of the block is
+ // unreachable.
+
+ // Unfortunately, whether the end of the block is unreachable is not
+ // immediately obvious; we must fall back to checking the successor list,
+ // and assuming that if the passed in block is in the succesor list and
+ // not an EHPad, it must be the intended target.
+ if (!PreviousLayoutSuccessor || !isSuccessor(PreviousLayoutSuccessor) ||
+ PreviousLayoutSuccessor->isEHPad())
+ return;
+
+ // If the unconditional successor block is not the current layout
+ // successor, insert a branch to jump to it.
+ if (!isLayoutSuccessor(PreviousLayoutSuccessor))
+ TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
+ }
+ return;
+ }
+
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->reverseBranchCondition(Cond))
+ return;
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, FBB, nullptr, Cond, DL);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
+ }
+ return;
+ }
+
+ // We now know we're going to fallthrough to PreviousLayoutSuccessor.
+ assert(PreviousLayoutSuccessor);
+ assert(!PreviousLayoutSuccessor->isEHPad());
+ assert(isSuccessor(PreviousLayoutSuccessor));
+
+ if (PreviousLayoutSuccessor == TBB) {
+ // We had a fallthrough to the same basic block as the conditional jump
+ // targets. Remove the conditional jump, leaving an unconditional
+ // fallthrough or an unconditional jump.
+ TII->removeBranch(*this);
+ if (!isLayoutSuccessor(TBB)) {
+ Cond.clear();
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
+ }
+ return;
+ }
+
+ // The block has a fallthrough conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->reverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
+ return;
+ }
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
+ } else if (!isLayoutSuccessor(PreviousLayoutSuccessor)) {
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL);
+ }
+}
+
+void MachineBasicBlock::validateSuccProbs() const {
+#ifndef NDEBUG
+ int64_t Sum = 0;
+ for (auto Prob : Probs)
+ Sum += Prob.getNumerator();
+ // Due to precision issue, we assume that the sum of probabilities is one if
+ // the difference between the sum of their numerators and the denominator is
+ // no greater than the number of successors.
+ assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <=
+ Probs.size() &&
+ "The sum of successors's probabilities exceeds one.");
+#endif // NDEBUG
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ,
+ BranchProbability Prob) {
+ // Probability list is either empty (if successor list isn't empty, this means
+ // disabled optimization) or has the same size as successor list.
+ if (!(Probs.empty() && !Successors.empty()))
+ Probs.push_back(Prob);
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
+ // We need to make sure probability list is either empty or has the same size
+ // of successor list. When this function is called, we can safely delete all
+ // probability in the list.
+ Probs.clear();
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New,
+ bool NormalizeSuccProbs) {
+ succ_iterator OldI = llvm::find(successors(), Old);
+ assert(OldI != succ_end() && "Old is not a successor of this block!");
+ assert(!llvm::is_contained(successors(), New) &&
+ "New is already a successor of this block!");
+
+ // Add a new successor with equal probability as the original one. Note
+ // that we directly copy the probability using the iterator rather than
+ // getting a potentially synthetic probability computed when unknown. This
+ // preserves the probabilities as-is and then we can renormalize them and
+ // query them effectively afterward.
+ addSuccessor(New, Probs.empty() ? BranchProbability::getUnknown()
+ : *getProbabilityIterator(OldI));
+ if (NormalizeSuccProbs)
+ normalizeSuccProbs();
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
+ bool NormalizeSuccProbs) {
+ succ_iterator I = find(Successors, Succ);
+ removeSuccessor(I, NormalizeSuccProbs);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) {
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
+ if (!Probs.empty()) {
+ probability_iterator WI = getProbabilityIterator(I);
+ Probs.erase(WI);
+ if (NormalizeSuccProbs)
+ normalizeSuccProbs();
+ }
+
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ if (Old == New)
+ return;
+
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
+ }
+ assert(OldI != E && "Old is not a successor of this block");
+
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ Old->removePredecessor(this);
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its probability instead of adding a duplicate edge.
+ if (!Probs.empty()) {
+ auto ProbIter = getProbabilityIterator(NewI);
+ if (!ProbIter->isUnknown())
+ *ProbIter += *getProbabilityIterator(OldI);
+ }
+ removeSuccessor(OldI);
+}
+
+void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
+ succ_iterator I) {
+ if (!Orig->Probs.empty())
+ addSuccessor(*I, Orig->getSuccProbability(I));
+ else
+ addSuccessorWithoutProb(*I);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
+ Predecessors.push_back(Pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
+ pred_iterator I = find(Predecessors, Pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
+ return;
+
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
+
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
+
+ FromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
+ return;
+
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
+ FromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ Succ->replacePhiUsesWith(FromMBB, this);
+ }
+ normalizeSuccProbs();
+}
+
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return is_contained(predecessors(), MBB);
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ return is_contained(successors(), MBB);
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return std::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const {
+ return Successors.size() == 1 ? Successors[0] : nullptr;
+}
+
+MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
+ MachineFunction::iterator Fallthrough = getIterator();
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == getParent()->end())
+ return nullptr;
+
+ // If FallthroughBlock isn't a successor, no fallthrough is possible.
+ if (!isSuccessor(&*Fallthrough))
+ return nullptr;
+
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+ if (TII->analyzeBranch(*this, TBB, FBB, Cond)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicated check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier.
+ return (empty() || !back().isBarrier() || TII->isPredicated(back()))
+ ? &*Fallthrough
+ : nullptr;
+ }
+
+ // If there is no branch, control always falls through.
+ if (!TBB) return &*Fallthrough;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough))
+ return &*Fallthrough;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return nullptr;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return (FBB == nullptr) ? &*Fallthrough : nullptr;
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ return getFallThrough() != nullptr;
+}
+
+MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI,
+ bool UpdateLiveIns,
+ LiveIntervals *LIS) {
+ MachineBasicBlock::iterator SplitPoint(&MI);
+ ++SplitPoint;
+
+ if (SplitPoint == end()) {
+ // Don't bother with a new block.
+ return this;
+ }
+
+ MachineFunction *MF = getParent();
+
+ LivePhysRegs LiveRegs;
+ if (UpdateLiveIns) {
+ // Make sure we add any physregs we define in the block as liveins to the
+ // new block.
+ MachineBasicBlock::iterator Prev(&MI);
+ LiveRegs.init(*MF->getSubtarget().getRegisterInfo());
+ LiveRegs.addLiveOuts(*this);
+ for (auto I = rbegin(), E = Prev.getReverse(); I != E; ++I)
+ LiveRegs.stepBackward(*I);
+ }
+
+ MachineBasicBlock *SplitBB = MF->CreateMachineBasicBlock(getBasicBlock());
+
+ MF->insert(++MachineFunction::iterator(this), SplitBB);
+ SplitBB->splice(SplitBB->begin(), this, SplitPoint, end());
+
+ SplitBB->transferSuccessorsAndUpdatePHIs(this);
+ addSuccessor(SplitBB);
+
+ if (UpdateLiveIns)
+ addLiveIns(*SplitBB, LiveRegs);
+
+ if (LIS)
+ LIS->insertMBBInMaps(SplitBB);
+
+ return SplitBB;
+}
+
+// Returns `true` if there are possibly other users of the jump table at
+// `JumpTableIndex` except for the ones in `IgnoreMBB`.
+static bool jumpTableHasOtherUses(const MachineFunction &MF,
+ const MachineBasicBlock &IgnoreMBB,
+ int JumpTableIndex) {
+ assert(JumpTableIndex >= 0 && "need valid index");
+ const MachineJumpTableInfo &MJTI = *MF.getJumpTableInfo();
+ const MachineJumpTableEntry &MJTE = MJTI.getJumpTables()[JumpTableIndex];
+ // Take any basic block from the table; every user of the jump table must
+ // show up in the predecessor list.
+ const MachineBasicBlock *MBB = nullptr;
+ for (MachineBasicBlock *B : MJTE.MBBs) {
+ if (B != nullptr) {
+ MBB = B;
+ break;
+ }
+ }
+ if (MBB == nullptr)
+ return true; // can't rule out other users if there isn't any block.
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Pred == &IgnoreMBB)
+ continue;
+ MachineBasicBlock *DummyT = nullptr;
+ MachineBasicBlock *DummyF = nullptr;
+ Cond.clear();
+ if (!TII.analyzeBranch(*Pred, DummyT, DummyF, Cond,
+ /*AllowModify=*/false)) {
+ // analyzable direct jump
+ continue;
+ }
+ int PredJTI = findJumpTableIndex(*Pred);
+ if (PredJTI >= 0) {
+ if (PredJTI == JumpTableIndex)
+ return true;
+ continue;
+ }
+ // Be conservative for unanalyzable jumps.
+ return true;
+ }
+ return false;
+}
+
+MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
+ MachineBasicBlock *Succ, Pass &P,
+ std::vector<SparseBitVector<>> *LiveInSets) {
+ if (!canSplitCriticalEdge(Succ))
+ return nullptr;
+
+ MachineFunction *MF = getParent();
+ MachineBasicBlock *PrevFallthrough = getNextNode();
+ DebugLoc DL; // FIXME: this is nowhere
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+
+ // Is there an indirect jump with jump table?
+ bool ChangedIndirectJump = false;
+ int JTI = findJumpTableIndex(*this);
+ if (JTI >= 0) {
+ MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
+ MJTI.ReplaceMBBInJumpTable(JTI, Succ, NMBB);
+ ChangedIndirectJump = true;
+ }
+
+ MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
+ LLVM_DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
+ << " -- " << printMBBReference(*NMBB) << " -- "
+ << printMBBReference(*Succ) << '\n');
+
+ LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
+ SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
+ if (LIS)
+ LIS->insertMBBInMaps(NMBB);
+ else if (Indexes)
+ Indexes->insertMBBInMaps(NMBB);
+
+ // On some targets like Mips, branches may kill virtual registers. Make sure
+ // that LiveVariables is properly updated after updateTerminator replaces the
+ // terminators.
+ LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>();
+
+ // Collect a list of virtual registers killed by the terminators.
+ SmallVector<Register, 4> KilledRegs;
+ if (LV)
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end())) {
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg() == 0 || !MO.isKill() || MO.isUndef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical() || LV->getVarInfo(Reg).removeKill(MI)) {
+ KilledRegs.push_back(Reg);
+ LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
+ MO.setIsKill(false);
+ }
+ }
+ }
+
+ SmallVector<Register, 4> UsedRegs;
+ if (LIS) {
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end())) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+
+ Register Reg = MO.getReg();
+ if (!is_contained(UsedRegs, Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+
+ // If updateTerminator() removes instructions, we need to remove them from
+ // SlotIndexes.
+ SmallVector<MachineInstr*, 4> Terminators;
+ if (Indexes) {
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end()))
+ Terminators.push_back(&MI);
+ }
+
+ // Since we replaced all uses of Succ with NMBB, that should also be treated
+ // as the fallthrough successor
+ if (Succ == PrevFallthrough)
+ PrevFallthrough = NMBB;
+
+ if (!ChangedIndirectJump)
+ updateTerminator(PrevFallthrough);
+
+ if (Indexes) {
+ SmallVector<MachineInstr*, 4> NewTerminators;
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end()))
+ NewTerminators.push_back(&MI);
+
+ for (MachineInstr *Terminator : Terminators) {
+ if (!is_contained(NewTerminators, Terminator))
+ Indexes->removeMachineInstrFromMaps(*Terminator);
+ }
+ }
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+ TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
+
+ if (Indexes) {
+ for (MachineInstr &MI : NMBB->instrs()) {
+ // Some instructions may have been moved to NMBB by updateTerminator(),
+ // so we first remove any instruction that already has an index.
+ if (Indexes->hasIndex(MI))
+ Indexes->removeMachineInstrFromMaps(MI);
+ Indexes->insertMachineInstrInMaps(MI);
+ }
+ }
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this.
+ Succ->replacePhiUsesWith(this, NMBB);
+
+ // Inherit live-ins from the successor
+ for (const auto &LI : Succ->liveins())
+ NMBB->addLiveIn(LI);
+
+ // Update LiveVariables.
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (LV) {
+ // Restore kills of virtual registers that were killed by the terminators.
+ while (!KilledRegs.empty()) {
+ Register Reg = KilledRegs.pop_back_val();
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
+ continue;
+ if (Reg.isVirtual())
+ LV->getVarInfo(Reg).Kills.push_back(&*I);
+ LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ break;
+ }
+ }
+ // Update relevant live-through information.
+ if (LiveInSets != nullptr)
+ LV->addNewBlock(NMBB, this, Succ, *LiveInSets);
+ else
+ LV->addNewBlock(NMBB, this, Succ);
+ }
+
+ if (LIS) {
+ // After splitting the edge and updating SlotIndexes, live intervals may be
+ // in one of two situations, depending on whether this block was the last in
+ // the function. If the original block was the last in the function, all
+ // live intervals will end prior to the beginning of the new split block. If
+ // the original block was not at the end of the function, all live intervals
+ // will extend to the end of the new split block.
+
+ bool isLastMBB =
+ std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+
+ SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
+ SlotIndex PrevIndex = StartIndex.getPrevSlot();
+ SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
+
+ // Find the registers used from NMBB in PHIs in Succ.
+ SmallSet<Register, 8> PHISrcRegs;
+ for (MachineBasicBlock::instr_iterator
+ I = Succ->instr_begin(), E = Succ->instr_end();
+ I != E && I->isPHI(); ++I) {
+ for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
+ if (I->getOperand(ni+1).getMBB() == NMBB) {
+ MachineOperand &MO = I->getOperand(ni);
+ Register Reg = MO.getReg();
+ PHISrcRegs.insert(Reg);
+ if (MO.isUndef())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI &&
+ "PHI sources should be live out of their predecessors.");
+ LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ }
+ }
+ }
+
+ MachineRegisterInfo *MRI = &getParent()->getRegInfo();
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.liveAt(PrevIndex))
+ continue;
+
+ bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
+ if (isLiveOut && isLastMBB) {
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "LiveInterval should have VNInfo where it is live.");
+ LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ } else if (!isLiveOut && !isLastMBB) {
+ LI.removeSegment(StartIndex, EndIndex);
+ }
+ }
+
+ // Update all intervals for registers whose uses may have been modified by
+ // updateTerminator().
+ LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
+ }
+
+ if (MachineDominatorTree *MDT =
+ P.getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->recordSplitCriticalEdge(this, Succ, NMBB);
+
+ if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
+bool MachineBasicBlock::canSplitCriticalEdge(
+ const MachineBasicBlock *Succ) const {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isEHPad())
+ return false;
+
+ // Splitting the critical edge to a callbr's indirect block isn't advised.
+ // Don't do it in this generic function.
+ if (Succ->isInlineAsmBrIndirectTarget())
+ return false;
+
+ const MachineFunction *MF = getParent();
+ // Performance might be harmed on HW that implements branching using exec mask
+ // where both sides of the branches are always executed.
+ if (MF->getTarget().requiresStructuredCFG())
+ return false;
+
+ // Do we have an Indirect jump with a jumptable that we can rewrite?
+ int JTI = findJumpTableIndex(*this);
+ if (JTI >= 0 && !jumpTableHasOtherUses(*MF, *this, JTI))
+ return true;
+
+ // We may need to update this's terminator, but we can't do that if
+ // analyzeBranch fails.
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ // AnalyzeBanch should modify this, since we did not allow modification.
+ if (TII->analyzeBranch(*const_cast<MachineBasicBlock *>(this), TBB, FBB, Cond,
+ /*AllowModify*/ false))
+ return false;
+
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ LLVM_DEBUG(dbgs() << "Won't split critical edge after degenerate "
+ << printMBBReference(*this) << '\n');
+ return false;
+ }
+ return true;
+}
+
+/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
+/// neighboring instructions so the bundle won't be broken by removing MI.
+static void unbundleSingleMI(MachineInstr *MI) {
+ // Removing the first instruction in a bundle.
+ if (MI->isBundledWithSucc() && !MI->isBundledWithPred())
+ MI->unbundleFromSucc();
+ // Removing the last instruction in a bundle.
+ if (MI->isBundledWithPred() && !MI->isBundledWithSucc())
+ MI->unbundleFromPred();
+ // If MI is not bundled, or if it is internal to a bundle, the neighbor flags
+ // are already fine.
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
+ unbundleSingleMI(&*I);
+ return Insts.erase(I);
+}
+
+MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
+ unbundleSingleMI(MI);
+ MI->clearFlag(MachineInstr::BundledPred);
+ MI->clearFlag(MachineInstr::BundledSucc);
+ return Insts.remove(MI);
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+ assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+ "Cannot insert instruction with bundle flags");
+ // Set the bundle flags when inserting inside a bundle.
+ if (I != instr_end() && I->isBundledWithPred()) {
+ MI->setFlag(MachineInstr::BundledPred);
+ MI->setFlag(MachineInstr::BundledSucc);
+ }
+ return Insts.insert(I, MI);
+}
+
+/// This method unlinks 'this' from the containing function, and returns it, but
+/// does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+/// This method unlinks 'this' from the containing function, and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+/// Given a machine basic block that branched to 'Old', change the code and CFG
+/// so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
+ --I;
+ if (!I->isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ replaceSuccessor(Old, New);
+}
+
+void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ for (MachineInstr &MI : phis())
+ for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.getMBB() == Old)
+ MO.setMBB(New);
+ }
+}
+
+/// Find the next valid DebugLoc starting at MBBI, skipping any debug
+/// instructions. Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MBBI = skipDebugInstructionsForward(MBBI, instr_end());
+ if (MBBI != instr_end())
+ return MBBI->getDebugLoc();
+ return {};
+}
+
+DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) {
+ if (MBBI == instr_rend())
+ return findDebugLoc(instr_begin());
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MBBI = skipDebugInstructionsBackward(MBBI, instr_rbegin());
+ if (!MBBI->isDebugInstr())
+ return MBBI->getDebugLoc();
+ return {};
+}
+
+/// Find the previous valid DebugLoc preceding MBBI, skipping any debug
+/// instructions. Return UnknownLoc if there is none.
+DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
+ if (MBBI == instr_begin())
+ return {};
+ // Skip debug instructions, we don't want a DebugLoc from them.
+ MBBI = prev_nodbg(MBBI, instr_begin());
+ if (!MBBI->isDebugInstr())
+ return MBBI->getDebugLoc();
+ return {};
+}
+
+DebugLoc MachineBasicBlock::rfindPrevDebugLoc(reverse_instr_iterator MBBI) {
+ if (MBBI == instr_rend())
+ return {};
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MBBI = next_nodbg(MBBI, instr_rend());
+ if (MBBI != instr_rend())
+ return MBBI->getDebugLoc();
+ return {};
+}
+
+/// Find and return the merged DebugLoc of the branch instructions of the block.
+/// Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findBranchDebugLoc() {
+ DebugLoc DL;
+ auto TI = getFirstTerminator();
+ while (TI != end() && !TI->isBranch())
+ ++TI;
+
+ if (TI != end()) {
+ DL = TI->getDebugLoc();
+ for (++TI ; TI != end() ; ++TI)
+ if (TI->isBranch())
+ DL = DILocation::getMergedLocation(DL, TI->getDebugLoc());
+ }
+ return DL;
+}
+
+/// Return probability of the edge from this block to MBB.
+BranchProbability
+MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
+ if (Probs.empty())
+ return BranchProbability(1, succ_size());
+
+ const auto &Prob = *getProbabilityIterator(Succ);
+ if (Prob.isUnknown()) {
+ // For unknown probabilities, collect the sum of all known ones, and evenly
+ // ditribute the complemental of the sum to each unknown probability.
+ unsigned KnownProbNum = 0;
+ auto Sum = BranchProbability::getZero();
+ for (const auto &P : Probs) {
+ if (!P.isUnknown()) {
+ Sum += P;
+ KnownProbNum++;
+ }
+ }
+ return Sum.getCompl() / (Probs.size() - KnownProbNum);
+ } else
+ return Prob;
+}
+
+/// Set successor probability of a given iterator.
+void MachineBasicBlock::setSuccProbability(succ_iterator I,
+ BranchProbability Prob) {
+ assert(!Prob.isUnknown());
+ if (Probs.empty())
+ return;
+ *getProbabilityIterator(I) = Prob;
+}
+
+/// Return probability iterator corresonding to the I successor iterator
+MachineBasicBlock::const_probability_iterator
+MachineBasicBlock::getProbabilityIterator(
+ MachineBasicBlock::const_succ_iterator I) const {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
+}
+
+/// Return probability iterator corresonding to the I successor iterator.
+MachineBasicBlock::probability_iterator
+MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
+}
+
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+///
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+ MCRegister Reg, const_iterator Before,
+ unsigned Neighborhood) const {
+ unsigned N = Neighborhood;
+
+ // Try searching forwards from Before, looking for reads or defs.
+ const_iterator I(Before);
+ for (; I != end() && N > 0; ++I) {
+ if (I->isDebugOrPseudoInstr())
+ continue;
+
+ --N;
+
+ PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
+
+ // Register is live when we read it here.
+ if (Info.Read)
+ return LQR_Live;
+ // Register is dead if we can fully overwrite or clobber it here.
+ if (Info.FullyDefined || Info.Clobbered)
+ return LQR_Dead;
+ }
+
+ // If we reached the end, it is safe to clobber Reg at the end of a block of
+ // no successor has it live in.
+ if (I == end()) {
+ for (MachineBasicBlock *S : successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LI : S->liveins()) {
+ if (TRI->regsOverlap(LI.PhysReg, Reg))
+ return LQR_Live;
+ }
+ }
+
+ return LQR_Dead;
+ }
+
+
+ N = Neighborhood;
+
+ // Start by searching backwards from Before, looking for kills, reads or defs.
+ I = const_iterator(Before);
+ // If this is the first insn in the block, don't search backwards.
+ if (I != begin()) {
+ do {
+ --I;
+
+ if (I->isDebugOrPseudoInstr())
+ continue;
+
+ --N;
+
+ PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
+
+ // Defs happen after uses so they take precedence if both are present.
+
+ // Register is dead after a dead def of the full register.
+ if (Info.DeadDef)
+ return LQR_Dead;
+ // Register is (at least partially) live after a def.
+ if (Info.Defined) {
+ if (!Info.PartialDeadDef)
+ return LQR_Live;
+ // As soon as we saw a partial definition (dead or not),
+ // we cannot tell if the value is partial live without
+ // tracking the lanemasks. We are not going to do this,
+ // so fall back on the remaining of the analysis.
+ break;
+ }
+ // Register is dead after a full kill or clobber and no def.
+ if (Info.Killed || Info.Clobbered)
+ return LQR_Dead;
+ // Register must be live if we read it.
+ if (Info.Read)
+ return LQR_Live;
+
+ } while (I != begin() && N > 0);
+ }
+
+ // If all the instructions before this in the block are debug instructions,
+ // skip over them.
+ while (I != begin() && std::prev(I)->isDebugOrPseudoInstr())
+ --I;
+
+ // Did we get to the start of the block?
+ if (I == begin()) {
+ // If so, the register's state is definitely defined by the live-in state.
+ for (const MachineBasicBlock::RegisterMaskPair &LI : liveins())
+ if (TRI->regsOverlap(LI.PhysReg, Reg))
+ return LQR_Live;
+
+ return LQR_Dead;
+ }
+
+ // At this point we have no idea of the liveness of the register.
+ return LQR_Unknown;
+}
+
+const uint32_t *
+MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const {
+ // EH funclet entry does not preserve any registers.
+ return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr;
+}
+
+const uint32_t *
+MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
+ // If we see a return block with successors, this must be a funclet return,
+ // which does not preserve any registers. If there are no successors, we don't
+ // care what kind of return it is, putting a mask after it is a no-op.
+ return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
+}
+
+void MachineBasicBlock::clearLiveIns() {
+ LiveIns.clear();
+}
+
+MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
+ assert(getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness) &&
+ "Liveness information is accurate");
+ return LiveIns.begin();
+}
+
+MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const {
+ const MachineFunction &MF = *getParent();
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness) &&
+ "Liveness information is accurate");
+
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ MCPhysReg ExceptionPointer = 0, ExceptionSelector = 0;
+ if (MF.getFunction().hasPersonalityFn()) {
+ auto PersonalityFn = MF.getFunction().getPersonalityFn();
+ ExceptionPointer = TLI.getExceptionPointerRegister(PersonalityFn);
+ ExceptionSelector = TLI.getExceptionSelectorRegister(PersonalityFn);
+ }
+
+ return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false);
+}
+
+bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const {
+ unsigned Cntr = 0;
+ auto R = instructionsWithoutDebug(begin(), end());
+ for (auto I = R.begin(), E = R.end(); I != E; ++I) {
+ if (++Cntr > Limit)
+ return true;
+ }
+ return false;
+}
+
+unsigned MachineBasicBlock::getBBIDOrNumber() const {
+ uint8_t BBAddrMapVersion = getParent()->getContext().getBBAddrMapVersion();
+ return BBAddrMapVersion < 2 ? getNumber() : *getBBID();
+}
+
+const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
+const MBBSectionID
+ MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
new file mode 100644
index 000000000000..b1cbe525d7e6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -0,0 +1,291 @@
+//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+#include <optional>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-block-freq"
+
+namespace llvm {
+static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
+ "view-machine-block-freq-propagation-dags", cl::Hidden,
+ cl::desc("Pop up a window to show a dag displaying how machine block "
+ "frequencies propagate through the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count", "display a graph using the real "
+ "profile count if available.")));
+
+// Similar option above, but used to control BFI display only after MBP pass
+cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
+ "view-block-layout-with-bfi", cl::Hidden,
+ cl::desc(
+ "Pop up a window to show a dag displaying MBP layout and associated "
+ "block frequencies of the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count",
+ "display a graph using the real "
+ "profile count if available.")));
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+// Command line option to specify hot frequency threshold.
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc=
+extern cl::opt<unsigned> ViewHotFreqPercent;
+
+static cl::opt<bool> PrintMachineBlockFreq(
+ "print-machine-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print the machine block frequency info."));
+
+// Command line option to specify the name of the function for block frequency
+// dump. Defined in Analysis/BlockFrequencyInfo.cpp.
+extern cl::opt<std::string> PrintBlockFreqFuncName;
+} // namespace llvm
+
+static GVDAGType getGVDT() {
+ if (ViewBlockLayoutWithBFI != GVDT_None)
+ return ViewBlockLayoutWithBFI;
+
+ return ViewMachineBlockFreqPropagationDAG;
+}
+
+namespace llvm {
+
+template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
+ using NodeRef = const MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
+ using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>;
+
+ static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) {
+ return &G->getFunction()->front();
+ }
+
+ static ChildIteratorType child_begin(const NodeRef N) {
+ return N->succ_begin();
+ }
+
+ static ChildIteratorType child_end(const NodeRef N) { return N->succ_end(); }
+
+ static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) {
+ return nodes_iterator(G->getFunction()->begin());
+ }
+
+ static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) {
+ return nodes_iterator(G->getFunction()->end());
+ }
+};
+
+using MBFIDOTGraphTraitsBase =
+ BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
+ MachineBranchProbabilityInfo>;
+
+template <>
+struct DOTGraphTraits<MachineBlockFrequencyInfo *>
+ : public MBFIDOTGraphTraitsBase {
+ const MachineFunction *CurFunc = nullptr;
+ DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
+
+ explicit DOTGraphTraits(bool isSimple = false)
+ : MBFIDOTGraphTraitsBase(isSimple) {}
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineBlockFrequencyInfo *Graph) {
+ int layout_order = -1;
+ // Attach additional ordering information if 'isSimple' is false.
+ if (!isSimple()) {
+ const MachineFunction *F = Node->getParent();
+ if (!CurFunc || F != CurFunc) {
+ if (CurFunc)
+ LayoutOrderMap.clear();
+
+ CurFunc = F;
+ int O = 0;
+ for (auto MBI = F->begin(); MBI != F->end(); ++MBI, ++O) {
+ LayoutOrderMap[&*MBI] = O;
+ }
+ }
+ layout_order = LayoutOrderMap[Node];
+ }
+ return MBFIDOTGraphTraitsBase::getNodeLabel(Node, Graph, getGVDT(),
+ layout_order);
+ }
+
+ std::string getNodeAttributes(const MachineBasicBlock *Node,
+ const MachineBlockFrequencyInfo *Graph) {
+ return MBFIDOTGraphTraitsBase::getNodeAttributes(Node, Graph,
+ ViewHotFreqPercent);
+ }
+
+ std::string getEdgeAttributes(const MachineBasicBlock *Node, EdgeIter EI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return MBFIDOTGraphTraitsBase::getEdgeAttributes(
+ Node, EI, MBFI, MBFI->getMBPI(), ViewHotFreqPercent);
+ }
+};
+
+} // end namespace llvm
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE,
+ "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE,
+ "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequencyInfo::ID = 0;
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
+ : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
+ MachineFunction &F,
+ MachineBranchProbabilityInfo &MBPI,
+ MachineLoopInfo &MLI) : MachineFunctionPass(ID) {
+ calculate(F, MBPI, MLI);
+}
+
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
+
+void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void MachineBlockFrequencyInfo::calculate(
+ const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+ const MachineLoopInfo &MLI) {
+ if (!MBFI)
+ MBFI.reset(new ImplType);
+ MBFI->calculate(F, MBPI, MLI);
+ if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
+ view("MachineBlockFrequencyDAGS." + F.getName());
+ }
+ if (PrintMachineBlockFreq &&
+ (PrintBlockFreqFuncName.empty() ||
+ F.getName().equals(PrintBlockFreqFuncName))) {
+ MBFI->print(dbgs());
+ }
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI =
+ getAnalysis<MachineBranchProbabilityInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ calculate(F, MBPI, MLI);
+ return false;
+}
+
+void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void MachineBlockFrequencyInfo::view(const Twine &Name, bool isSimple) const {
+ // This code is only for debugging.
+ ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), Name, isSimple);
+}
+
+BlockFrequency
+MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
+ return MBFI ? MBFI->getBlockFreq(MBB) : 0;
+}
+
+std::optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
+ const MachineBasicBlock *MBB) const {
+ if (!MBFI)
+ return std::nullopt;
+
+ const Function &F = MBFI->getFunction()->getFunction();
+ return MBFI->getBlockProfileCount(F, MBB);
+}
+
+std::optional<uint64_t>
+MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+ if (!MBFI)
+ return std::nullopt;
+
+ const Function &F = MBFI->getFunction()->getFunction();
+ return MBFI->getProfileCountFromFreq(F, Freq);
+}
+
+bool MachineBlockFrequencyInfo::isIrrLoopHeader(
+ const MachineBasicBlock *MBB) const {
+ assert(MBFI && "Expected analysis to be available");
+ return MBFI->isIrrLoopHeader(MBB);
+}
+
+void MachineBlockFrequencyInfo::onEdgeSplit(
+ const MachineBasicBlock &NewPredecessor,
+ const MachineBasicBlock &NewSuccessor,
+ const MachineBranchProbabilityInfo &MBPI) {
+ assert(MBFI && "Expected analysis to be available");
+ auto NewSuccFreq = MBFI->getBlockFreq(&NewPredecessor) *
+ MBPI.getEdgeProbability(&NewPredecessor, &NewSuccessor);
+
+ MBFI->setBlockFreq(&NewSuccessor, NewSuccFreq.getFrequency());
+}
+
+const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
+ return MBFI ? MBFI->getFunction() : nullptr;
+}
+
+const MachineBranchProbabilityInfo *MachineBlockFrequencyInfo::getMBPI() const {
+ return MBFI ? &MBFI->getBPI() : nullptr;
+}
+
+raw_ostream &
+MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const {
+ return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS;
+}
+
+raw_ostream &
+MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const {
+ return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS;
+}
+
+uint64_t MachineBlockFrequencyInfo::getEntryFreq() const {
+ return MBFI ? MBFI->getEntryFreq() : 0;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..912e9ec993e3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,3701 @@
+//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absence of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BranchFolding.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/CodeLayout.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "block-placement"
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of unconditional branches");
+STATISTIC(CondBranchTakenFreq,
+ "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+ "Potential frequency of taking unconditional branches");
+
+static cl::opt<unsigned> AlignAllBlock(
+ "align-all-blocks",
+ cl::desc("Force the alignment of all blocks in the function in log2 format "
+ "(e.g 4 means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> AlignAllNonFallThruBlocks(
+ "align-all-nofallthru-blocks",
+ cl::desc("Force the alignment of all blocks that have no fall-through "
+ "predecessors (i.e. don't add nops that are executed). In log2 "
+ "format (e.g 4 means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> MaxBytesForAlignmentOverride(
+ "max-bytes-for-alignment",
+ cl::desc("Forces the maximum bytes allowed to be emitted when padding for "
+ "alignment"),
+ cl::init(0), cl::Hidden);
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned> ExitBlockBias(
+ "block-placement-exit-block-bias",
+ cl::desc("Block frequency percentage a loop exit block needs "
+ "over the original exit to be considered the new exit."),
+ cl::init(0), cl::Hidden);
+
+// Definition:
+// - Outlining: placement of a basic block outside the chain or hot path.
+
+static cl::opt<unsigned> LoopToColdBlockRatio(
+ "loop-to-cold-block-ratio",
+ cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
+ "(frequency of block) is greater than this ratio"),
+ cl::init(5), cl::Hidden);
+
+static cl::opt<bool> ForceLoopColdBlock(
+ "force-loop-cold-block",
+ cl::desc("Force outlining cold blocks from loops."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ PreciseRotationCost("precise-rotation-cost",
+ cl::desc("Model the cost of loop rotation more "
+ "precisely by using profile data."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ ForcePreciseRotationCost("force-precise-rotation-cost",
+ cl::desc("Force the use of precise cost "
+ "loop rotation strategy."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> MisfetchCost(
+ "misfetch-cost",
+ cl::desc("Cost that models the probabilistic risk of an instruction "
+ "misfetch due to a jump comparing to falling through, whose cost "
+ "is zero."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
+ cl::desc("Cost of jump instructions."),
+ cl::init(1), cl::Hidden);
+static cl::opt<bool>
+TailDupPlacement("tail-dup-placement",
+ cl::desc("Perform tail duplication during placement. "
+ "Creates more fallthrough opportunites in "
+ "outline branches."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+BranchFoldPlacement("branch-fold-placement",
+ cl::desc("Perform branch folding during placement. "
+ "Reduces code size."),
+ cl::init(true), cl::Hidden);
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDupPlacementThreshold(
+ "tail-dup-placement-threshold",
+ cl::desc("Instruction cutoff for tail duplication during layout. "
+ "Tail merging during layout is forced to have a threshold "
+ "that won't conflict."), cl::init(2),
+ cl::Hidden);
+
+// Heuristic for aggressive tail duplication.
+static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
+ "tail-dup-placement-aggressive-threshold",
+ cl::desc("Instruction cutoff for aggressive tail duplication during "
+ "layout. Used at -O3. Tail merging during layout is forced to "
+ "have a threshold that won't conflict."), cl::init(4),
+ cl::Hidden);
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDupPlacementPenalty(
+ "tail-dup-placement-penalty",
+ cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. "
+ "Copying can increase fallthrough, but it also increases icache "
+ "pressure. This parameter controls the penalty to account for that. "
+ "Percent as integer."),
+ cl::init(2),
+ cl::Hidden);
+
+// Heuristic for tail duplication if profile count is used in cost model.
+static cl::opt<unsigned> TailDupProfilePercentThreshold(
+ "tail-dup-profile-percent-threshold",
+ cl::desc("If profile count information is used in tail duplication cost "
+ "model, the gained fall through number from tail duplication "
+ "should be at least this percent of hot count."),
+ cl::init(50), cl::Hidden);
+
+// Heuristic for triangle chains.
+static cl::opt<unsigned> TriangleChainCount(
+ "triangle-chain-count",
+ cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
+ "triangle tail duplication heuristic to kick in. 0 to disable."),
+ cl::init(2),
+ cl::Hidden);
+
+// Use case: When block layout is visualized after MBP pass, the basic blocks
+// are labeled in layout order; meanwhile blocks could be numbered in a
+// different order. It's hard to map between the graph and pass output.
+// With this option on, the basic blocks are renumbered in function layout
+// order. For debugging only.
+static cl::opt<bool> RenumberBlocksBeforeView(
+ "renumber-blocks-before-view",
+ cl::desc(
+ "If true, basic blocks are re-numbered before MBP layout is printed "
+ "into a dot graph. Only used when a function is being printed."),
+ cl::init(false), cl::Hidden);
+
+namespace llvm {
+extern cl::opt<bool> EnableExtTspBlockPlacement;
+extern cl::opt<bool> ApplyExtTspWithoutProfile;
+extern cl::opt<unsigned> StaticLikelyProb;
+extern cl::opt<unsigned> ProfileLikelyProb;
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi=
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+} // namespace llvm
+
+namespace {
+
+class BlockChain;
+
+/// Type for our function-wide basic block -> block chain mapping.
+using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
+
+/// A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities and code locality. We also can use a block chain to represent
+/// a sequence of basic blocks which have some external (correctness)
+/// requirement for sequential layout.
+///
+/// Chains can be built around a single basic block and can be merged to grow
+/// them. They participate in a block-to-chain mapping, which is updated
+/// automatically as chains are merged together.
+class BlockChain {
+ /// The sequence of blocks belonging to this chain.
+ ///
+ /// This is the sequence of blocks for a particular chain. These will be laid
+ /// out in-order within the function.
+ SmallVector<MachineBasicBlock *, 4> Blocks;
+
+ /// A handle to the function-wide basic block to block chain mapping.
+ ///
+ /// This is retained in each block chain to simplify the computation of child
+ /// block chains for SCC-formation and iteration. We store the edges to child
+ /// basic blocks, and map them back to their associated chains using this
+ /// structure.
+ BlockToChainMapType &BlockToChain;
+
+public:
+ /// Construct a new BlockChain.
+ ///
+ /// This builds a new block chain representing a single basic block in the
+ /// function. It also registers itself as the chain that block participates
+ /// in with the BlockToChain mapping.
+ BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+ : Blocks(1, BB), BlockToChain(BlockToChain) {
+ assert(BB && "Cannot create a chain with a null basic block");
+ BlockToChain[BB] = this;
+ }
+
+ /// Iterator over blocks within the chain.
+ using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;
+ using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator;
+
+ /// Beginning of blocks within the chain.
+ iterator begin() { return Blocks.begin(); }
+ const_iterator begin() const { return Blocks.begin(); }
+
+ /// End of blocks within the chain.
+ iterator end() { return Blocks.end(); }
+ const_iterator end() const { return Blocks.end(); }
+
+ bool remove(MachineBasicBlock* BB) {
+ for(iterator i = begin(); i != end(); ++i) {
+ if (*i == BB) {
+ Blocks.erase(i);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// Merge a block chain into this one.
+ ///
+ /// This routine merges a block chain into this one. It takes care of forming
+ /// a contiguous sequence of basic blocks, updating the edge list, and
+ /// updating the block -> chain mapping. It does not free or tear down the
+ /// old chain, but the old chain's block list is no longer valid.
+ void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+ assert(BB && "Can't merge a null block.");
+ assert(!Blocks.empty() && "Can't merge into an empty chain.");
+
+ // Fast path in case we don't have a chain already.
+ if (!Chain) {
+ assert(!BlockToChain[BB] &&
+ "Passed chain is null, but BB has entry in BlockToChain.");
+ Blocks.push_back(BB);
+ BlockToChain[BB] = this;
+ return;
+ }
+
+ assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");
+ assert(Chain->begin() != Chain->end());
+
+ // Update the incoming blocks to point to this chain, and add them to the
+ // chain structure.
+ for (MachineBasicBlock *ChainBB : *Chain) {
+ Blocks.push_back(ChainBB);
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");
+ BlockToChain[ChainBB] = this;
+ }
+ }
+
+#ifndef NDEBUG
+ /// Dump the blocks in this chain.
+ LLVM_DUMP_METHOD void dump() {
+ for (MachineBasicBlock *MBB : *this)
+ MBB->dump();
+ }
+#endif // NDEBUG
+
+ /// Count of predecessors of any block within the chain which have not
+ /// yet been scheduled. In general, we will delay scheduling this chain
+ /// until those predecessors are scheduled (or we find a sufficiently good
+ /// reason to override this heuristic.) Note that when forming loop chains,
+ /// blocks outside the loop are ignored and treated as if they were already
+ /// scheduled.
+ ///
+ /// Note: This field is reinitialized multiple times - once for each loop,
+ /// and then once for the function as a whole.
+ unsigned UnscheduledPredecessors = 0;
+};
+
+class MachineBlockPlacement : public MachineFunctionPass {
+ /// A type for a block filter set.
+ using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
+
+ /// Pair struct containing basic block and taildup profitability
+ struct BlockAndTailDupResult {
+ MachineBasicBlock *BB = nullptr;
+ bool ShouldTailDup;
+ };
+
+ /// Triple struct containing edge weight and the edge.
+ struct WeightedEdge {
+ BlockFrequency Weight;
+ MachineBasicBlock *Src = nullptr;
+ MachineBasicBlock *Dest = nullptr;
+ };
+
+ /// work lists of blocks that are ready to be laid out
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+
+ /// Edges that have already been computed as optimal.
+ DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
+
+ /// Machine Function
+ MachineFunction *F = nullptr;
+
+ /// A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+
+ /// A handle to the function-wide block frequency pass.
+ std::unique_ptr<MBFIWrapper> MBFI;
+
+ /// A handle to the loop info.
+ MachineLoopInfo *MLI = nullptr;
+
+ /// Preferred loop exit.
+ /// Member variable for convenience. It may be removed by duplication deep
+ /// in the call stack.
+ MachineBasicBlock *PreferredLoopExit = nullptr;
+
+ /// A handle to the target's instruction info.
+ const TargetInstrInfo *TII = nullptr;
+
+ /// A handle to the target's lowering info.
+ const TargetLoweringBase *TLI = nullptr;
+
+ /// A handle to the post dominator tree.
+ MachinePostDominatorTree *MPDT = nullptr;
+
+ ProfileSummaryInfo *PSI = nullptr;
+
+ /// Duplicator used to duplicate tails during placement.
+ ///
+ /// Placement decisions can open up new tail duplication opportunities, but
+ /// since tail duplication affects placement decisions of later blocks, it
+ /// must be done inline.
+ TailDuplicator TailDup;
+
+ /// Partial tail duplication threshold.
+ BlockFrequency DupThreshold;
+
+ /// True: use block profile count to compute tail duplication cost.
+ /// False: use block frequency to compute tail duplication cost.
+ bool UseProfileCount = false;
+
+ /// Allocator and owner of BlockChain structures.
+ ///
+ /// We build BlockChains lazily while processing the loop structure of
+ /// a function. To reduce malloc traffic, we allocate them using this
+ /// slab-like allocator, and destroy them after the pass completes. An
+ /// important guarantee is that this allocator produces stable pointers to
+ /// the chains.
+ SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+ /// Function wide BasicBlock to BlockChain mapping.
+ ///
+ /// This mapping allows efficiently moving from any given basic block to the
+ /// BlockChain it participates in, if any. We use it to, among other things,
+ /// allow implicitly defining edges between chains as the existing edges
+ /// between basic blocks.
+ DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
+
+#ifndef NDEBUG
+ /// The set of basic blocks that have terminators that cannot be fully
+ /// analyzed. These basic blocks cannot be re-ordered safely by
+ /// MachineBlockPlacement, and we must preserve physical layout of these
+ /// blocks and their successors through the pass.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
+#endif
+
+ /// Get block profile count or frequency according to UseProfileCount.
+ /// The return value is used to model tail duplication cost.
+ BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
+ if (UseProfileCount) {
+ auto Count = MBFI->getBlockProfileCount(BB);
+ if (Count)
+ return *Count;
+ else
+ return 0;
+ } else
+ return MBFI->getBlockFreq(BB);
+ }
+
+ /// Scale the DupThreshold according to basic block size.
+ BlockFrequency scaleThreshold(MachineBasicBlock *BB);
+ void initDupThreshold();
+
+ /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
+ /// if the count goes to 0, add them to the appropriate work list.
+ void markChainSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
+
+ /// Decrease the UnscheduledPredecessors count for a single block, and
+ /// if the count goes to 0, add them to the appropriate work list.
+ void markBlockSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *BB,
+ const MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
+
+ BranchProbability
+ collectViableSuccessors(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
+ BlockFilterSet *BlockFilter);
+ void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
+ MachineBasicBlock *BB,
+ BlockFilterSet *BlockFilter);
+ bool repeatedlyTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+ const MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt);
+ bool maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToLPred);
+ bool hasBetterLayoutPredecessor(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ BlockAndTailDupResult selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
+
+ /// Add a basic block to the work list if it is appropriate.
+ ///
+ /// If the optional parameter BlockFilter is provided, only MBB
+ /// present in the set will be added to the worklist. If nullptr
+ /// is provided, no filtering occurs.
+ void fillWorkLists(const MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter);
+
+ void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
+ BlockFilterSet *BlockFilter = nullptr);
+ bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop);
+ bool hasViableTopFallthrough(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopTop(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq);
+ BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
+ void buildLoopChains(const MachineLoop &L);
+ void rotateLoop(
+ BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
+ BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ void buildCFGChains();
+ void optimizeBranches();
+ void alignBlocks();
+ /// Returns true if a block should be tail-duplicated to increase fallthrough
+ /// opportunities.
+ bool shouldTailDuplicate(MachineBasicBlock *BB);
+ /// Check the edge frequencies to see if tail duplication will increase
+ /// fallthroughs.
+ bool isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability QProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
+ /// Check for a trellis layout.
+ bool isTrellis(const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
+ /// Get the best successor given a trellis layout.
+ BlockAndTailDupResult getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+
+ /// Get the best pair of non-conflicting edges.
+ static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+
+ /// Returns true if a block can tail duplicate into all unplaced
+ /// predecessors. Filters based on loop.
+ bool canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
+ /// Find chains of triangles to tail-duplicate where a global analysis works,
+ /// but a local analysis would not find them.
+ void precomputeTriangleChains();
+
+ /// Apply a post-processing step optimizing block placement.
+ void applyExtTsp();
+
+ /// Modify the existing block placement in the function and adjust all jumps.
+ void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);
+
+ /// Create a single CFG chain from the current block order.
+ void createCFGChainExtTsp();
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ MachineBlockPlacement() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ bool allowTailDupPlacement() const {
+ assert(F);
+ return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ if (TailDupPlacement)
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char MachineBlockPlacement::ID = 0;
+
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
+ "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
+ "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(const MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << printMBBReference(*BB);
+ OS << " ('" << BB->getName() << "')";
+ OS.flush();
+ return Result;
+}
+#endif
+
+/// Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the appropriate worklist.
+void MachineBlockPlacement::markChainSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter) {
+ // Walk all the blocks in this chain, marking their successors as having
+ // a predecessor placed.
+ for (MachineBasicBlock *MBB : Chain) {
+ markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
+ }
+}
+
+/// Mark a single block's successors as having one fewer preds.
+///
+/// Under normal circumstances, this is only called by markChainSuccessors,
+/// but if a block that was to be placed is completely tail-duplicated away,
+/// and was duplicated into the chain end, we need to redo markBlockSuccessors
+/// for just that block.
+void MachineBlockPlacement::markBlockSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *MBB,
+ const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+ continue;
+
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
+
+ auto *NewBB = *SuccChain.begin();
+ if (NewBB->isEHPad())
+ EHPadWorkList.push_back(NewBB);
+ else
+ BlockWorkList.push_back(NewBB);
+ }
+}
+
+/// This helper function collects the set of successors of block
+/// \p BB that are allowed to be its layout successors, and return
+/// the total branch probability of edges from \p BB to those
+/// blocks.
+BranchProbability MachineBlockPlacement::collectViableSuccessors(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors) {
+ // Adjust edge probabilities by excluding edges pointing to blocks that is
+ // either not in BlockFilter or is already in the current chain. Consider the
+ // following CFG:
+ //
+ // --->A
+ // | / \
+ // | B C
+ // | \ / \
+ // ----D E
+ //
+ // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+ // A->C is chosen as a fall-through, D won't be selected as a successor of C
+ // due to CFG constraint (the probability of C->D is not greater than
+ // HotProb to break topo-order). If we exclude E that is not in BlockFilter
+ // when calculating the probability of C->D, D will be selected and we
+ // will get A C D B as the layout of this loop.
+ auto AdjustedSumProb = BranchProbability::getOne();
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ bool SkipSucc = false;
+ if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
+ SkipSucc = true;
+ } else {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (SuccChain == &Chain) {
+ SkipSucc = true;
+ } else if (Succ != *SuccChain->begin()) {
+ LLVM_DEBUG(dbgs() << " " << getBlockName(Succ)
+ << " -> Mid chain!\n");
+ continue;
+ }
+ }
+ if (SkipSucc)
+ AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
+ else
+ Successors.push_back(Succ);
+ }
+
+ return AdjustedSumProb;
+}
+
+/// The helper function returns the branch probability that is adjusted
+/// or normalized over the new total \p AdjustedSumProb.
+static BranchProbability
+getAdjustedProbability(BranchProbability OrigProb,
+ BranchProbability AdjustedSumProb) {
+ BranchProbability SuccProb;
+ uint32_t SuccProbN = OrigProb.getNumerator();
+ uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+ if (SuccProbN >= SuccProbD)
+ SuccProb = BranchProbability::getOne();
+ else
+ SuccProb = BranchProbability(SuccProbN, SuccProbD);
+
+ return SuccProb;
+}
+
+/// Check if \p BB has exactly the successors in \p Successors.
+static bool
+hasSameSuccessors(MachineBasicBlock &BB,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {
+ if (BB.succ_size() != Successors.size())
+ return false;
+ // We don't want to count self-loops
+ if (Successors.count(&BB))
+ return false;
+ for (MachineBasicBlock *Succ : BB.successors())
+ if (!Successors.count(Succ))
+ return false;
+ return true;
+}
+
+/// Check if a block should be tail duplicated to increase fallthrough
+/// opportunities.
+/// \p BB Block to check.
+bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
+ // Blocks with single successors don't create additional fallthrough
+ // opportunities. Don't duplicate them. TODO: When conditional exits are
+ // analyzable, allow them to be duplicated.
+ bool IsSimple = TailDup.isSimpleBB(BB);
+
+ if (BB->succ_size() == 1)
+ return false;
+ return TailDup.shouldTailDuplicate(IsSimple, *BB);
+}
+
+/// Compare 2 BlockFrequency's with a small penalty for \p A.
+/// In order to be conservative, we apply a X% penalty to account for
+/// increased icache pressure and static heuristics. For small frequencies
+/// we use only the numerators to improve accuracy. For simplicity, we assume the
+/// penalty is less than 100%
+/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
+static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
+ uint64_t EntryFreq) {
+ BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
+ BlockFrequency Gain = A - B;
+ return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
+}
+
+/// Check the edge frequencies to see if tail duplication will increase
+/// fallthroughs. It only makes sense to call this function when
+/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
+/// always locally profitable if we would have picked \p Succ without
+/// considering duplication.
+bool MachineBlockPlacement::isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability QProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // We need to do a probability calculation to make sure this is profitable.
+ // First: does succ have a successor that post-dominates? This affects the
+ // calculation. The 2 relevant cases are:
+ // BB BB
+ // | \Qout | \Qout
+ // P| C |P C
+ // = C' = C'
+ // | /Qin | /Qin
+ // | / | /
+ // Succ Succ
+ // / \ | \ V
+ // U/ =V |U \
+ // / \ = D
+ // D E | /
+ // | /
+ // |/
+ // PDom
+ // '=' : Branch taken for that CFG edge
+ // In the second case, Placing Succ while duplicating it into C prevents the
+ // fallthrough of Succ into either D or PDom, because they now have C as an
+ // unplaced predecessor
+
+ // Start by figuring out which case we fall into
+ MachineBasicBlock *PDom = nullptr;
+ SmallVector<MachineBasicBlock *, 4> SuccSuccs;
+ // Only scan the relevant successors
+ auto AdjustedSuccSumProb =
+ collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
+ BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ);
+ auto BBFreq = MBFI->getBlockFreq(BB);
+ auto SuccFreq = MBFI->getBlockFreq(Succ);
+ BlockFrequency P = BBFreq * PProb;
+ BlockFrequency Qout = BBFreq * QProb;
+ uint64_t EntryFreq = MBFI->getEntryFreq();
+ // If there are no more successors, it is profitable to copy, as it strictly
+ // increases fallthrough.
+ if (SuccSuccs.size() == 0)
+ return greaterWithBias(P, Qout, EntryFreq);
+
+ auto BestSuccSucc = BranchProbability::getZero();
+ // Find the PDom or the best Succ if no PDom exists.
+ for (MachineBasicBlock *SuccSucc : SuccSuccs) {
+ auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc);
+ if (Prob > BestSuccSucc)
+ BestSuccSucc = Prob;
+ if (PDom == nullptr)
+ if (MPDT->dominates(SuccSucc, Succ)) {
+ PDom = SuccSucc;
+ break;
+ }
+ }
+ // For the comparisons, we need to know Succ's best incoming edge that isn't
+ // from BB.
+ auto SuccBestPred = BlockFrequency(0);
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ if (SuccPred == Succ || SuccPred == BB
+ || BlockToChain[SuccPred] == &Chain
+ || (BlockFilter && !BlockFilter->count(SuccPred)))
+ continue;
+ auto Freq = MBFI->getBlockFreq(SuccPred)
+ * MBPI->getEdgeProbability(SuccPred, Succ);
+ if (Freq > SuccBestPred)
+ SuccBestPred = Freq;
+ }
+ // Qin is Succ's best unplaced incoming edge that isn't BB
+ BlockFrequency Qin = SuccBestPred;
+ // If it doesn't have a post-dominating successor, here is the calculation:
+ // BB BB
+ // | \Qout | \
+ // P| C | =
+ // = C' | C
+ // | /Qin | |
+ // | / | C' (+Succ)
+ // Succ Succ /|
+ // / \ | \/ |
+ // U/ =V | == |
+ // / \ | / \|
+ // D E D E
+ // '=' : Branch taken for that CFG edge
+ // Cost in the first case is: P + V
+ // For this calculation, we always assume P > Qout. If Qout > P
+ // The result of this function will be ignored at the caller.
+ // Let F = SuccFreq - Qin
+ // Cost in the second case is: Qout + min(Qin, F) * U + max(Qin, F) * V
+
+ if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
+ BranchProbability UProb = BestSuccSucc;
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency F = SuccFreq - Qin;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency QinU = std::min(Qin, F) * UProb;
+ BlockFrequency BaseCost = P + V;
+ BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
+ return greaterWithBias(BaseCost, DupCost, EntryFreq);
+ }
+ BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency U = SuccFreq * UProb;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency F = SuccFreq - Qin;
+ // If there is a post-dominating successor, here is the calculation:
+ // BB BB BB BB
+ // | \Qout | \ | \Qout | \
+ // |P C | = |P C | =
+ // = C' |P C = C' |P C
+ // | /Qin | | | /Qin | |
+ // | / | C' (+Succ) | / | C' (+Succ)
+ // Succ Succ /| Succ Succ /|
+ // | \ V | \/ | | \ V | \/ |
+ // |U \ |U /\ =? |U = |U /\ |
+ // = D = = =?| | D | = =|
+ // | / |/ D | / |/ D
+ // | / | / | = | /
+ // |/ | / |/ | =
+ // Dom Dom Dom Dom
+ // '=' : Branch taken for that CFG edge
+ // The cost for taken branches in the first case is P + U
+ // Let F = SuccFreq - Qin
+ // The cost in the second case (assuming independence), given the layout:
+ // BB, Succ, (C+Succ), D, Dom or the layout:
+ // BB, Succ, D, Dom, (C+Succ)
+ // is Qout + max(F, Qin) * U + min(F, Qin)
+ // compare P + U vs Qout + P * U + Qin.
+ //
+ // The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
+ //
+ // For the 3rd case, the cost is P + 2 * V
+ // For the 4th case, the cost is Qout + min(Qin, F) * U + max(Qin, F) * V + V
+ // We choose 4 over 3 when (P + V) > Qout + min(Qin, F) * U + max(Qin, F) * V
+ if (UProb > AdjustedSuccSumProb / 2 &&
+ !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
+ Chain, BlockFilter))
+ // Cases 3 & 4
+ return greaterWithBias(
+ (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
+ EntryFreq);
+ // Cases 1 & 2
+ return greaterWithBias((P + U),
+ (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
+ std::max(Qin, F) * UProb),
+ EntryFreq);
+}
+
+/// Check for a trellis layout. \p BB is the upper part of a trellis if its
+/// successors form the lower part of a trellis. A successor set S forms the
+/// lower part of a trellis if all of the predecessors of S are either in S or
+/// have all of S as successors. We ignore trellises where BB doesn't have 2
+/// successors because for fewer than 2, it's trivial, and for 3 or greater they
+/// are very uncommon and complex to compute optimally. Allowing edges within S
+/// is not strictly a trellis, but the same algorithm works, so we allow it.
+bool MachineBlockPlacement::isTrellis(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // Technically BB could form a trellis with branching factor higher than 2.
+ // But that's extremely uncommon.
+ if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
+ return false;
+
+ SmallPtrSet<const MachineBasicBlock *, 2> Successors(BB->succ_begin(),
+ BB->succ_end());
+ // To avoid reviewing the same predecessors twice.
+ SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
+
+ for (MachineBasicBlock *Succ : ViableSuccs) {
+ int PredCount = 0;
+ for (auto *SuccPred : Succ->predecessors()) {
+ // Allow triangle successors, but don't count them.
+ if (Successors.count(SuccPred)) {
+ // Make sure that it is actually a triangle.
+ for (MachineBasicBlock *CheckSucc : SuccPred->successors())
+ if (!Successors.count(CheckSucc))
+ return false;
+ continue;
+ }
+ const BlockChain *PredChain = BlockToChain[SuccPred];
+ if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
+ PredChain == &Chain || PredChain == BlockToChain[Succ])
+ continue;
+ ++PredCount;
+ // Perform the successor check only once.
+ if (!SeenPreds.insert(SuccPred).second)
+ continue;
+ if (!hasSameSuccessors(*SuccPred, Successors))
+ return false;
+ }
+ // If one of the successors has only BB as a predecessor, it is not a
+ // trellis.
+ if (PredCount < 1)
+ return false;
+ }
+ return true;
+}
+
+/// Pick the highest total weight pair of edges that can both be laid out.
+/// The edges in \p Edges[0] are assumed to have a different destination than
+/// the edges in \p Edges[1]. Simple counting shows that the best pair is either
+/// the individual highest weight edges to the 2 different destinations, or in
+/// case of a conflict, one of them should be replaced with a 2nd best edge.
+std::pair<MachineBlockPlacement::WeightedEdge,
+ MachineBlockPlacement::WeightedEdge>
+MachineBlockPlacement::getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge, 8>>
+ Edges) {
+ // Sort the edges, and then for each successor, find the best incoming
+ // predecessor. If the best incoming predecessors aren't the same,
+ // then that is clearly the best layout. If there is a conflict, one of the
+ // successors will have to fallthrough from the second best predecessor. We
+ // compare which combination is better overall.
+
+ // Sort for highest frequency.
+ auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
+
+ llvm::stable_sort(Edges[0], Cmp);
+ llvm::stable_sort(Edges[1], Cmp);
+ auto BestA = Edges[0].begin();
+ auto BestB = Edges[1].begin();
+ // Arrange for the correct answer to be in BestA and BestB
+ // If the 2 best edges don't conflict, the answer is already there.
+ if (BestA->Src == BestB->Src) {
+ // Compare the total fallthrough of (Best + Second Best) for both pairs
+ auto SecondBestA = std::next(BestA);
+ auto SecondBestB = std::next(BestB);
+ BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
+ BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
+ if (BestAScore < BestBScore)
+ BestA = SecondBestA;
+ else
+ BestB = SecondBestB;
+ }
+ // Arrange for the BB edge to be in BestA if it exists.
+ if (BestB->Src == BB)
+ std::swap(BestA, BestB);
+ return std::make_pair(*BestA, *BestB);
+}
+
+/// Get the best successor from \p BB based on \p BB being part of a trellis.
+/// We only handle trellises with 2 successors, so the algorithm is
+/// straightforward: Find the best pair of edges that don't conflict. We find
+/// the best incoming edge for each successor in the trellis. If those conflict,
+/// we consider which of them should be replaced with the second best.
+/// Upon return the two best edges will be in \p BestEdges. If one of the edges
+/// comes from \p BB, it will be in \p BestEdges[0]
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+
+ BlockAndTailDupResult Result = {nullptr, false};
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+
+ // We assume size 2 because it's common. For general n, we would have to do
+ // the Hungarian algorithm, but it's not worth the complexity because more
+ // than 2 successors is fairly uncommon, and a trellis even more so.
+ if (Successors.size() != 2 || ViableSuccs.size() != 2)
+ return Result;
+
+ // Collect the edge frequencies of all edges that form the trellis.
+ SmallVector<WeightedEdge, 8> Edges[2];
+ int SuccIndex = 0;
+ for (auto *Succ : ViableSuccs) {
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ // Skip any placed predecessors that are not BB
+ if (SuccPred != BB)
+ if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
+ BlockToChain[SuccPred] == &Chain ||
+ BlockToChain[SuccPred] == BlockToChain[Succ])
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
+ MBPI->getEdgeProbability(SuccPred, Succ);
+ Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
+ }
+ ++SuccIndex;
+ }
+
+ // Pick the best combination of 2 edges from all the edges in the trellis.
+ WeightedEdge BestA, BestB;
+ std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
+
+ if (BestA.Src != BB) {
+ // If we have a trellis, and BB doesn't have the best fallthrough edges,
+ // we shouldn't choose any successor. We've already looked and there's a
+ // better fallthrough edge for all the successors.
+ LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
+ return Result;
+ }
+
+ // Did we pick the triangle edge? If tail-duplication is profitable, do
+ // that instead. Otherwise merge the triangle edge now while we know it is
+ // optimal.
+ if (BestA.Dest == BestB.Src) {
+ // The edges are BB->Succ1->Succ2, and we're looking to see if BB->Succ2
+ // would be better.
+ MachineBasicBlock *Succ1 = BestA.Dest;
+ MachineBasicBlock *Succ2 = BestB.Dest;
+ // Check to see if tail-duplication would be profitable.
+ if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
+ canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
+ isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
+ Chain, BlockFilter)) {
+ LLVM_DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(Succ2)
+ << ", probability: " << Succ2Prob
+ << " (Tail Duplicate)\n");
+ Result.BB = Succ2;
+ Result.ShouldTailDup = true;
+ return Result;
+ }
+ }
+ // We have already computed the optimal edge for the other side of the
+ // trellis.
+ ComputedEdges[BestB.Src] = { BestB.Dest, false };
+
+ auto TrellisSucc = BestA.Dest;
+ LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(TrellisSucc)
+ << ", probability: " << SuccProb << " (Trellis)\n");
+ Result.BB = TrellisSucc;
+ return Result;
+}
+
+/// When the option allowTailDupPlacement() is on, this method checks if the
+/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
+/// into all of its unplaced, unfiltered predecessors, that are not BB.
+bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ if (!shouldTailDuplicate(Succ))
+ return false;
+
+ // The result of canTailDuplicate.
+ bool Duplicate = true;
+ // Number of possible duplication.
+ unsigned int NumDup = 0;
+
+ // For CFG checking.
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Make sure all unplaced and unfiltered predecessors can be
+ // tail-duplicated into.
+ // Skip any blocks that are already placed or not in this loop.
+ if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
+ || (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))
+ continue;
+ if (!TailDup.canTailDuplicate(Succ, Pred)) {
+ if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
+ // This will result in a trellis after tail duplication, so we don't
+ // need to copy Succ into this predecessor. In the presence
+ // of a trellis tail duplication can continue to be profitable.
+ // For example:
+ // A A
+ // |\ |\
+ // | \ | \
+ // | C | C+BB
+ // | / | |
+ // |/ | |
+ // BB => BB |
+ // |\ |\/|
+ // | \ |/\|
+ // | D | D
+ // | / | /
+ // |/ |/
+ // Succ Succ
+ //
+ // After BB was duplicated into C, the layout looks like the one on the
+ // right. BB and C now have the same successors. When considering
+ // whether Succ can be duplicated into all its unplaced predecessors, we
+ // ignore C.
+ // We can do this because C already has a profitable fallthrough, namely
+ // D. TODO(iteratee): ignore sufficiently cold predecessors for
+ // duplication and for this test.
+ //
+ // This allows trellises to be laid out in 2 separate chains
+ // (A,B,Succ,...) and later (C,D,...) This is a reasonable heuristic
+ // because it allows the creation of 2 fallthrough paths with links
+ // between them, and we correctly identify the best layout for these
+ // CFGs. We want to extend trellises that the user created in addition
+ // to trellises created by tail-duplication, so we just look for the
+ // CFG.
+ continue;
+ Duplicate = false;
+ continue;
+ }
+ NumDup++;
+ }
+
+ // No possible duplication in current filter set.
+ if (NumDup == 0)
+ return false;
+
+ // If profile information is available, findDuplicateCandidates can do more
+ // precise benefit analysis.
+ if (F->getFunction().hasProfileData())
+ return true;
+
+ // This is mainly for function exit BB.
+ // The integrated tail duplication is really designed for increasing
+ // fallthrough from predecessors from Succ to its successors. We may need
+ // other machanism to handle different cases.
+ if (Succ->succ_empty())
+ return true;
+
+ // Plus the already placed predecessor.
+ NumDup++;
+
+ // If the duplication candidate has more unplaced predecessors than
+ // successors, the extra duplication can't bring more fallthrough.
+ //
+ // Pred1 Pred2 Pred3
+ // \ | /
+ // \ | /
+ // \ | /
+ // Dup
+ // / \
+ // / \
+ // Succ1 Succ2
+ //
+ // In this example Dup has 2 successors and 3 predecessors, duplication of Dup
+ // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
+ // but the duplication into Pred3 can't increase fallthrough.
+ //
+ // A small number of extra duplication may not hurt too much. We need a better
+ // heuristic to handle it.
+ if ((NumDup > Succ->succ_size()) || !Duplicate)
+ return false;
+
+ return true;
+}
+
+/// Find chains of triangles where we believe it would be profitable to
+/// tail-duplicate them all, but a local analysis would not find them.
+/// There are 3 ways this can be profitable:
+/// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
+/// longer chains)
+/// 2) The chains are statically correlated. Branch probabilities have a very
+/// U-shaped distribution.
+/// [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
+/// If the branches in a chain are likely to be from the same side of the
+/// distribution as their predecessor, but are independent at runtime, this
+/// transformation is profitable. (Because the cost of being wrong is a small
+/// fixed cost, unlike the standard triangle layout where the cost of being
+/// wrong scales with the # of triangles.)
+/// 3) The chains are dynamically correlated. If the probability that a previous
+/// branch was taken positively influences whether the next branch will be
+/// taken
+/// We believe that 2 and 3 are common enough to justify the small margin in 1.
+void MachineBlockPlacement::precomputeTriangleChains() {
+ struct TriangleChain {
+ std::vector<MachineBasicBlock *> Edges;
+
+ TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
+ : Edges({src, dst}) {}
+
+ void append(MachineBasicBlock *dst) {
+ assert(getKey()->isSuccessor(dst) &&
+ "Attempting to append a block that is not a successor.");
+ Edges.push_back(dst);
+ }
+
+ unsigned count() const { return Edges.size() - 1; }
+
+ MachineBasicBlock *getKey() const {
+ return Edges.back();
+ }
+ };
+
+ if (TriangleChainCount == 0)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");
+ // Map from last block to the chain that contains it. This allows us to extend
+ // chains as we find new triangles.
+ DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
+ for (MachineBasicBlock &BB : *F) {
+ // If BB doesn't have 2 successors, it doesn't start a triangle.
+ if (BB.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PDom = nullptr;
+ for (MachineBasicBlock *Succ : BB.successors()) {
+ if (!MPDT->dominates(Succ, &BB))
+ continue;
+ PDom = Succ;
+ break;
+ }
+ // If BB doesn't have a post-dominating successor, it doesn't form a
+ // triangle.
+ if (PDom == nullptr)
+ continue;
+ // If PDom has a hint that it is low probability, skip this triangle.
+ if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
+ continue;
+ // If PDom isn't eligible for duplication, this isn't the kind of triangle
+ // we're looking for.
+ if (!shouldTailDuplicate(PDom))
+ continue;
+ bool CanTailDuplicate = true;
+ // If PDom can't tail-duplicate into it's non-BB predecessors, then this
+ // isn't the kind of triangle we're looking for.
+ for (MachineBasicBlock* Pred : PDom->predecessors()) {
+ if (Pred == &BB)
+ continue;
+ if (!TailDup.canTailDuplicate(PDom, Pred)) {
+ CanTailDuplicate = false;
+ break;
+ }
+ }
+ // If we can't tail-duplicate PDom to its predecessors, then skip this
+ // triangle.
+ if (!CanTailDuplicate)
+ continue;
+
+ // Now we have an interesting triangle. Insert it if it's not part of an
+ // existing chain.
+ // Note: This cannot be replaced with a call insert() or emplace() because
+ // the find key is BB, but the insert/emplace key is PDom.
+ auto Found = TriangleChainMap.find(&BB);
+ // If it is, remove the chain from the map, grow it, and put it back in the
+ // map with the end as the new key.
+ if (Found != TriangleChainMap.end()) {
+ TriangleChain Chain = std::move(Found->second);
+ TriangleChainMap.erase(Found);
+ Chain.append(PDom);
+ TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
+ } else {
+ auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+ }
+ }
+
+ // Iterating over a DenseMap is safe here, because the only thing in the body
+ // of the loop is inserting into another DenseMap (ComputedEdges).
+ // ComputedEdges is never iterated, so this doesn't lead to non-determinism.
+ for (auto &ChainPair : TriangleChainMap) {
+ TriangleChain &Chain = ChainPair.second;
+ // Benchmarking has shown that due to branch correlation duplicating 2 or
+ // more triangles is profitable, despite the calculations assuming
+ // independence.
+ if (Chain.count() < TriangleChainCount)
+ continue;
+ MachineBasicBlock *dst = Chain.Edges.back();
+ Chain.Edges.pop_back();
+ for (MachineBasicBlock *src : reverse(Chain.Edges)) {
+ LLVM_DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->"
+ << getBlockName(dst)
+ << " as pre-computed based on triangles.\n");
+
+ auto InsertResult = ComputedEdges.insert({src, {dst, true}});
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+
+ dst = src;
+ }
+ }
+}
+
+// When profile is not present, return the StaticLikelyProb.
+// When profile is available, we need to handle the triangle-shape CFG.
+static BranchProbability getLayoutSuccessorProbThreshold(
+ const MachineBasicBlock *BB) {
+ if (!BB->getParent()->getFunction().hasProfileData())
+ return BranchProbability(StaticLikelyProb, 100);
+ if (BB->succ_size() == 2) {
+ const MachineBasicBlock *Succ1 = *BB->succ_begin();
+ const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
+ if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
+ /* See case 1 below for the cost analysis. For BB->Succ to
+ * be taken with smaller cost, the following needs to hold:
+ * Prob(BB->Succ) > 2 * Prob(BB->Pred)
+ * So the threshold T in the calculation below
+ * (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred)
+ * So T / (1 - T) = 2, Yielding T = 2/3
+ * Also adding user specified branch bias, we have
+ * T = (2/3)*(ProfileLikelyProb/50)
+ * = (2*ProfileLikelyProb)/150)
+ */
+ return BranchProbability(2 * ProfileLikelyProb, 150);
+ }
+ }
+ return BranchProbability(ProfileLikelyProb, 100);
+}
+
+/// Checks to see if the layout candidate block \p Succ has a better layout
+/// predecessor than \c BB. If yes, returns true.
+/// \p SuccProb: The probability adjusted for only remaining blocks.
+/// Only used for logging
+/// \p RealSuccProb: The un-adjusted probability.
+/// \p Chain: The chain that BB belongs to and Succ is being considered for.
+/// \p BlockFilter: if non-null, the set of blocks that make up the loop being
+/// considered
+bool MachineBlockPlacement::hasBetterLayoutPredecessor(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+
+ // There isn't a better layout when there are no unscheduled predecessors.
+ if (SuccChain.UnscheduledPredecessors == 0)
+ return false;
+
+ // There are two basic scenarios here:
+ // -------------------------------------
+ // Case 1: triangular shape CFG (if-then):
+ // BB
+ // | \
+ // | \
+ // | Pred
+ // | /
+ // Succ
+ // In this case, we are evaluating whether to select edge -> Succ, e.g.
+ // set Succ as the layout successor of BB. Picking Succ as BB's
+ // successor breaks the CFG constraints (FIXME: define these constraints).
+ // With this layout, Pred BB
+ // is forced to be outlined, so the overall cost will be cost of the
+ // branch taken from BB to Pred, plus the cost of back taken branch
+ // from Pred to Succ, as well as the additional cost associated
+ // with the needed unconditional jump instruction from Pred To Succ.
+
+ // The cost of the topological order layout is the taken branch cost
+ // from BB to Succ, so to make BB->Succ a viable candidate, the following
+ // must hold:
+ // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
+ // < freq(BB->Succ) * taken_branch_cost.
+ // Ignoring unconditional jump cost, we get
+ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
+ // prob(BB->Succ) > 2 * prob(BB->Pred)
+ //
+ // When real profile data is available, we can precisely compute the
+ // probability threshold that is needed for edge BB->Succ to be considered.
+ // Without profile data, the heuristic requires the branch bias to be
+ // a lot larger to make sure the signal is very strong (e.g. 80% default).
+ // -----------------------------------------------------------------
+ // Case 2: diamond like CFG (if-then-else):
+ // S
+ // / \
+ // | \
+ // BB Pred
+ // \ /
+ // Succ
+ // ..
+ //
+ // The current block is BB and edge BB->Succ is now being evaluated.
+ // Note that edge S->BB was previously already selected because
+ // prob(S->BB) > prob(S->Pred).
+ // At this point, 2 blocks can be placed after BB: Pred or Succ. If we
+ // choose Pred, we will have a topological ordering as shown on the left
+ // in the picture below. If we choose Succ, we have the solution as shown
+ // on the right:
+ //
+ // topo-order:
+ //
+ // S----- ---S
+ // | | | |
+ // ---BB | | BB
+ // | | | |
+ // | Pred-- | Succ--
+ // | | | |
+ // ---Succ ---Pred--
+ //
+ // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
+ // = freq(S->Pred) + freq(S->BB)
+ //
+ // If we have profile data (i.e, branch probabilities can be trusted), the
+ // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
+ // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
+ // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
+ // means the cost of topological order is greater.
+ // When profile data is not available, however, we need to be more
+ // conservative. If the branch prediction is wrong, breaking the topo-order
+ // will actually yield a layout with large cost. For this reason, we need
+ // strong biased branch at block S with Prob(S->BB) in order to select
+ // BB->Succ. This is equivalent to looking the CFG backward with backward
+ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
+ // profile data).
+ // --------------------------------------------------------------------------
+ // Case 3: forked diamond
+ // S
+ // / \
+ // / \
+ // BB Pred
+ // | \ / |
+ // | \ / |
+ // | X |
+ // | / \ |
+ // | / \ |
+ // S1 S2
+ //
+ // The current block is BB and edge BB->S1 is now being evaluated.
+ // As above S->BB was already selected because
+ // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
+ //
+ // topo-order:
+ //
+ // S-------| ---S
+ // | | | |
+ // ---BB | | BB
+ // | | | |
+ // | Pred----| | S1----
+ // | | | |
+ // --(S1 or S2) ---Pred--
+ // |
+ // S2
+ //
+ // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
+ // + min(freq(Pred->S1), freq(Pred->S2))
+ // Non-topo-order cost:
+ // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
+ // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
+ // is 0. Then the non topo layout is better when
+ // freq(S->Pred) < freq(BB->S1).
+ // This is exactly what is checked below.
+ // Note there are other shapes that apply (Pred may not be a single block,
+ // but they all fit this general pattern.)
+ BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
+
+ // Make sure that a hot successor doesn't have a globally more
+ // important predecessor.
+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
+ bool BadCFGConflict = false;
+
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (Pred == Succ || PredChain == &SuccChain ||
+ (BlockFilter && !BlockFilter->count(Pred)) ||
+ PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
+ // This check is redundant except for look ahead. This function is
+ // called for lookahead by isProfitableToTailDup when BB hasn't been
+ // placed yet.
+ (Pred == BB))
+ continue;
+ // Do backward checking.
+ // For all cases above, we need a backward checking to filter out edges that
+ // are not 'strongly' biased.
+ // BB Pred
+ // \ /
+ // Succ
+ // We select edge BB->Succ if
+ // freq(BB->Succ) > freq(Succ) * HotProb
+ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
+ // HotProb
+ // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+ // Case 1 is covered too, because the first equation reduces to:
+ // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
+ BlockFrequency PredEdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+
+ if (BadCFGConflict) {
+ LLVM_DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> "
+ << SuccProb << " (prob) (non-cold CFG conflict)\n");
+ return true;
+ }
+
+ return false;
+}
+
+/// Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable, along
+/// with a boolean indicating if tail duplication is necessary.
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(StaticLikelyProb, 100);
+
+ BlockAndTailDupResult BestSucc = { nullptr, false };
+ auto BestProb = BranchProbability::getZero();
+
+ SmallVector<MachineBasicBlock *, 4> Successors;
+ auto AdjustedSumProb =
+ collectViableSuccessors(BB, Chain, BlockFilter, Successors);
+
+ LLVM_DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB)
+ << "\n");
+
+ // if we already precomputed the best successor for BB, return that if still
+ // applicable.
+ auto FoundEdge = ComputedEdges.find(BB);
+ if (FoundEdge != ComputedEdges.end()) {
+ MachineBasicBlock *Succ = FoundEdge->second.BB;
+ ComputedEdges.erase(FoundEdge);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
+ SuccChain != &Chain && Succ == *SuccChain->begin())
+ return FoundEdge->second;
+ }
+
+ // if BB is part of a trellis, Use the trellis to determine the optimal
+ // fallthrough edges
+ if (isTrellis(BB, Successors, Chain, BlockFilter))
+ return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
+ BlockFilter);
+
+ // For blocks with CFG violations, we may be able to lay them out anyway with
+ // tail-duplication. We keep this vector so we can perform the probability
+ // calculations the minimum number of times.
+ SmallVector<std::pair<BranchProbability, MachineBasicBlock *>, 4>
+ DupCandidates;
+ for (MachineBasicBlock *Succ : Successors) {
+ auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
+ BranchProbability SuccProb =
+ getAdjustedProbability(RealSuccProb, AdjustedSumProb);
+
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Skip the edge \c BB->Succ if block \c Succ has a better layout
+ // predecessor that yields lower global cost.
+ if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
+ Chain, BlockFilter)) {
+ // If tail duplication would make Succ profitable, place it.
+ if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
+ DupCandidates.emplace_back(SuccProb, Succ);
+ continue;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << " Candidate: " << getBlockName(Succ)
+ << ", probability: " << SuccProb
+ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+
+ if (BestSucc.BB && BestProb >= SuccProb) {
+ LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n");
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << " Setting it as best candidate\n");
+ BestSucc.BB = Succ;
+ BestProb = SuccProb;
+ }
+ // Handle the tail duplication candidates in order of decreasing probability.
+ // Stop at the first one that is profitable. Also stop if they are less
+ // profitable than BestSucc. Position is important because we preserve it and
+ // prefer first best match. Here we aren't comparing in order, so we capture
+ // the position instead.
+ llvm::stable_sort(DupCandidates,
+ [](std::tuple<BranchProbability, MachineBasicBlock *> L,
+ std::tuple<BranchProbability, MachineBasicBlock *> R) {
+ return std::get<0>(L) > std::get<0>(R);
+ });
+ for (auto &Tup : DupCandidates) {
+ BranchProbability DupProb;
+ MachineBasicBlock *Succ;
+ std::tie(DupProb, Succ) = Tup;
+ if (DupProb < BestProb)
+ break;
+ if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
+ && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
+ LLVM_DEBUG(dbgs() << " Candidate: " << getBlockName(Succ)
+ << ", probability: " << DupProb
+ << " (Tail Duplicate)\n");
+ BestSucc.BB = Succ;
+ BestSucc.ShouldTailDup = true;
+ break;
+ }
+ }
+
+ if (BestSucc.BB)
+ LLVM_DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
+
+ return BestSucc;
+}
+
+/// Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve i-cache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ });
+
+ if (WorkList.empty())
+ return nullptr;
+
+ bool IsEHPad = WorkList[0]->isEHPad();
+
+ MachineBasicBlock *BestBlock = nullptr;
+ BlockFrequency BestFreq;
+ for (MachineBasicBlock *MBB : WorkList) {
+ assert(MBB->isEHPad() == IsEHPad &&
+ "EHPad mismatch between block and work list.");
+
+ BlockChain &SuccChain = *BlockToChain[MBB];
+ if (&SuccChain == &Chain)
+ continue;
+
+ assert(SuccChain.UnscheduledPredecessors == 0 &&
+ "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+ LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+
+ // For ehpad, we layout the least probable first as to avoid jumping back
+ // from least probable landingpads to more probable ones.
+ //
+ // FIXME: Using probability is probably (!) not the best way to achieve
+ // this. We should probably have a more principled approach to layout
+ // cleanup code.
+ //
+ // The goal is to get:
+ //
+ // +--------------------------+
+ // | V
+ // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume
+ //
+ // Rather than:
+ //
+ // +-------------------------------------+
+ // V |
+ // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup
+ if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
+ continue;
+
+ BestBlock = MBB;
+ BestFreq = CandidateFreq;
+ }
+
+ return BestBlock;
+}
+
+/// Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(&*I))
+ continue;
+ if (BlockToChain[&*I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[&*I]->begin();
+ }
+ }
+ return nullptr;
+}
+
+void MachineBlockPlacement::fillWorkLists(
+ const MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter = nullptr) {
+ BlockChain &Chain = *BlockToChain[MBB];
+ if (!UpdatedPreds.insert(&Chain).second)
+ return;
+
+ assert(
+ Chain.UnscheduledPredecessors == 0 &&
+ "Attempting to place block with unscheduled predecessors in worklist.");
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain &&
+ "Block in chain doesn't match BlockToChain map.");
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockFilter && !BlockFilter->count(Pred))
+ continue;
+ if (BlockToChain[Pred] == &Chain)
+ continue;
+ ++Chain.UnscheduledPredecessors;
+ }
+ }
+
+ if (Chain.UnscheduledPredecessors != 0)
+ return;
+
+ MachineBasicBlock *BB = *Chain.begin();
+ if (BB->isEHPad())
+ EHPadWorkList.push_back(BB);
+ else
+ BlockWorkList.push_back(BB);
+}
+
+void MachineBlockPlacement::buildChain(
+ const MachineBasicBlock *HeadBB, BlockChain &Chain,
+ BlockFilterSet *BlockFilter) {
+ assert(HeadBB && "BB must not be null.\n");
+ assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
+ MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
+
+ const MachineBasicBlock *LoopHeaderBB = HeadBB;
+ markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
+ MachineBasicBlock *BB = *std::prev(Chain.end());
+ while (true) {
+ assert(BB && "null block found at end of chain in loop.");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
+ assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
+
+
+ // Look for the best viable successor if there is one to place immediately
+ // after this block.
+ auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
+ MachineBasicBlock* BestSucc = Result.BB;
+ bool ShouldTailDup = Result.ShouldTailDup;
+ if (allowTailDupPlacement())
+ ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
+ Chain,
+ BlockFilter));
+
+ // If an immediate successor isn't available, look for the best viable
+ // block among those we've identified as not violating the loop's CFG at
+ // this point. This won't be a fallthrough, but it will increase locality.
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
+
+ if (!BestSucc) {
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
+ if (!BestSucc)
+ break;
+
+ LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
+ }
+
+ // Placement may have changed tail duplication opportunities.
+ // Check for that now.
+ if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
+ repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
+ BlockFilter, PrevUnplacedBlockIt);
+ // If the chosen successor was duplicated into BB, don't bother laying
+ // it out, just go round the loop again with BB as the chain end.
+ if (!BB->isSuccessor(BestSucc))
+ continue;
+ }
+
+ // Place this block, updating the datastructures to reflect its placement.
+ BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out UnscheduledPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.UnscheduledPredecessors = 0;
+ LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
+ << getBlockName(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
+ Chain.merge(BestSucc, &SuccChain);
+ BB = *std::prev(Chain.end());
+ }
+
+ LLVM_DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockName(*Chain.begin()) << "\n");
+}
+
+// If bottom of block BB has only one successor OldTop, in most cases it is
+// profitable to move it before OldTop, except the following case:
+//
+// -->OldTop<-
+// | . |
+// | . |
+// | . |
+// ---Pred |
+// | |
+// BB-----
+//
+// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
+// layout the other successor below it, so it can't reduce taken branch.
+// In this case we keep its original layout.
+bool
+MachineBlockPlacement::canMoveBottomBlockToTop(
+ const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop) {
+ if (BottomBlock->pred_size() != 1)
+ return true;
+ MachineBasicBlock *Pred = *BottomBlock->pred_begin();
+ if (Pred->succ_size() != 2)
+ return true;
+
+ MachineBasicBlock *OtherBB = *Pred->succ_begin();
+ if (OtherBB == BottomBlock)
+ OtherBB = *Pred->succ_rbegin();
+ if (OtherBB == OldTop)
+ return false;
+
+ return true;
+}
+
+// Find out the possible fall through frequence to the top of a loop.
+BlockFrequency
+MachineBlockPlacement::TopFallThroughFreq(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency MaxFreq = 0;
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, Top);
+ if (EdgeFreq > MaxFreq)
+ MaxFreq = EdgeFreq;
+ }
+ }
+ }
+ return MaxFreq;
+}
+
+// Compute the fall through gains when move NewTop before OldTop.
+//
+// In following diagram, edges marked as "-" are reduced fallthrough, edges
+// marked as "+" are increased fallthrough, this function computes
+//
+// SUM(increased fallthrough) - SUM(decreased fallthrough)
+//
+// |
+// | -
+// V
+// --->OldTop
+// | .
+// | .
+// +| . +
+// | Pred --->
+// | |-
+// | V
+// --- NewTop <---
+// |-
+// V
+//
+BlockFrequency
+MachineBlockPlacement::FallThroughGains(
+ const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
+ BlockFrequency FallThrough2Exit = 0;
+ if (ExitBB)
+ FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, ExitBB);
+ BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, OldTop);
+
+ // Find the best Pred of NewTop.
+ MachineBasicBlock *BestPred = nullptr;
+ BlockFrequency FallThroughFromPred = 0;
+ for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!PredChain || Pred == *std::prev(PredChain->end())) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, NewTop);
+ if (EdgeFreq > FallThroughFromPred) {
+ FallThroughFromPred = EdgeFreq;
+ BestPred = Pred;
+ }
+ }
+ }
+
+ // If NewTop is not placed after Pred, another successor can be placed
+ // after Pred.
+ BlockFrequency NewFreq = 0;
+ if (BestPred) {
+ for (MachineBasicBlock *Succ : BestPred->successors()) {
+ if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+ continue;
+ if (ComputedEdges.contains(Succ))
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if ((SuccChain && (Succ != *SuccChain->begin())) ||
+ (SuccChain == BlockToChain[BestPred]))
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, Succ);
+ if (EdgeFreq > NewFreq)
+ NewFreq = EdgeFreq;
+ }
+ BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, NewTop);
+ if (NewFreq > OrigEdgeFreq) {
+ // If NewTop is not the best successor of Pred, then Pred doesn't
+ // fallthrough to NewTop. So there is no FallThroughFromPred and
+ // NewFreq.
+ NewFreq = 0;
+ FallThroughFromPred = 0;
+ }
+ }
+
+ BlockFrequency Result = 0;
+ BlockFrequency Gains = BackEdgeFreq + NewFreq;
+ BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
+ FallThroughFromPred;
+ if (Gains > Lost)
+ Result = Gains - Lost;
+ return Result;
+}
+
+/// Helper function of findBestLoopTop. Find the best loop top block
+/// from predecessors of old top.
+///
+/// Look for a block which is strictly better than the old top for laying
+/// out before the old top of the loop. This looks for only two patterns:
+///
+/// 1. a block has only one successor, the old loop top
+///
+/// Because such a block will always result in an unconditional jump,
+/// rotating it in front of the old top is always profitable.
+///
+/// 2. a block has two successors, one is old top, another is exit
+/// and it has more than one predecessors
+///
+/// If it is below one of its predecessors P, only P can fall through to
+/// it, all other predecessors need a jump to it, and another conditional
+/// jump to loop header. If it is moved before loop header, all its
+/// predecessors jump to it, then fall through to loop header. So all its
+/// predecessors except P can reduce one taken branch.
+/// At the same time, move it before old top increases the taken branch
+/// to loop exit block, so the reduced taken branch will be compared with
+/// the increased taken branch to the loop exit block.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTopHelper(
+ MachineBasicBlock *OldTop,
+ const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Check that the header hasn't been fused with a preheader block due to
+ // crazy branches. If it has, we need to start with the header at the top to
+ // prevent pulling the preheader into the loop body.
+ BlockChain &HeaderChain = *BlockToChain[OldTop];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return OldTop;
+ if (OldTop != *HeaderChain.begin())
+ return OldTop;
+
+ LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
+ << "\n");
+
+ BlockFrequency BestGains = 0;
+ MachineBasicBlock *BestPred = nullptr;
+ for (MachineBasicBlock *Pred : OldTop->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ if (Pred == L.getHeader())
+ continue;
+ LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has "
+ << Pred->succ_size() << " successors, ";
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ if (Pred->succ_size() > 2)
+ continue;
+
+ MachineBasicBlock *OtherBB = nullptr;
+ if (Pred->succ_size() == 2) {
+ OtherBB = *Pred->succ_begin();
+ if (OtherBB == OldTop)
+ OtherBB = *Pred->succ_rbegin();
+ }
+
+ if (!canMoveBottomBlockToTop(Pred, OldTop))
+ continue;
+
+ BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+ LoopBlockSet);
+ if ((Gains > 0) && (Gains > BestGains ||
+ ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
+ BestPred = Pred;
+ BestGains = Gains;
+ }
+ }
+
+ // If no direct predecessor is fine, just use the loop header.
+ if (!BestPred) {
+ LLVM_DEBUG(dbgs() << " final top unchanged\n");
+ return OldTop;
+ }
+
+ // Walk backwards through any straight line of predecessors.
+ while (BestPred->pred_size() == 1 &&
+ (*BestPred->pred_begin())->succ_size() == 1 &&
+ *BestPred->pred_begin() != L.getHeader())
+ BestPred = *BestPred->pred_begin();
+
+ LLVM_DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
+ return BestPred;
+}
+
+/// Find the best loop top block for layout.
+///
+/// This function iteratively calls findBestLoopTopHelper, until no new better
+/// BB can be found.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ bool OptForSize = F->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
+ if (OptForSize)
+ return L.getHeader();
+
+ MachineBasicBlock *OldTop = nullptr;
+ MachineBasicBlock *NewTop = L.getHeader();
+ while (NewTop != OldTop) {
+ OldTop = NewTop;
+ NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
+ if (NewTop != OldTop)
+ ComputedEdges[NewTop] = { OldTop, false };
+ }
+ return NewTop;
+}
+
+/// Find the best loop exiting block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq) {
+ // We don't want to layout the loop linearly in all cases. If the loop header
+ // is just a normal basic block in the loop, we want to look for what block
+ // within the loop is the best one to layout at the top. However, if the loop
+ // header has be pre-merged into a chain due to predecessors not having
+ // analyzable branches, *and* the predecessor it is merged with is *not* part
+ // of the loop, rotating the header into the middle of the loop will create
+ // a non-contiguous range of blocks which is Very Bad. So start with the
+ // header and only rotate if safe.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return nullptr;
+
+ BlockFrequency BestExitEdgeFreq;
+ unsigned BestExitLoopDepth = 0;
+ MachineBasicBlock *ExitingBB = nullptr;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunities unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ LLVM_DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
+ for (MachineBasicBlock *MBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[MBB];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an unanalyzable branch.
+ if (MBB != *std::prev(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ bool HasLoopingSucc = false;
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isEHPad())
+ continue;
+ if (Succ == MBB)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain) {
+ LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (chain conflict)\n");
+ continue;
+ }
+
+ auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
+ if (LoopBlockSet.count(Succ)) {
+ LLVM_DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (" << SuccProb << ")\n");
+ HasLoopingSucc = true;
+ continue;
+ }
+
+ unsigned SuccLoopDepth = 0;
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
+ SuccLoopDepth = ExitLoop->getLoopDepth();
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(MBB);
+ }
+
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+ LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth
+ << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ // Note that we bias this toward an existing layout successor to retain
+ // incoming order in the absence of better information. The exit must have
+ // a frequency higher than the current exit before we consider breaking
+ // the layout.
+ BranchProbability Bias(100 - ExitBlockBias, 100);
+ if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
+ ExitEdgeFreq > BestExitEdgeFreq ||
+ (MBB->isLayoutSuccessor(Succ) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = MBB;
+ }
+ }
+
+ if (!HasLoopingSucc) {
+ // Restore the old exiting state, no viable looping successor was found.
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ }
+ }
+ // Without a candidate exiting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB) {
+ LLVM_DEBUG(
+ dbgs() << " No other candidate exit blocks, using loop header\n");
+ return nullptr;
+ }
+ if (L.getNumBlocks() == 1) {
+ LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
+ return nullptr;
+ }
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
+ << "\n");
+ ExitFreq = BestExitEdgeFreq;
+ return ExitingBB;
+}
+
+/// Check if there is a fallthrough to loop header Top.
+///
+/// 1. Look for a Pred that can be layout before Top.
+/// 2. Check if Top is the most possible successor of Pred.
+bool
+MachineBlockPlacement::hasViableTopFallthrough(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Attempt to rotate an exiting block to the bottom of the loop.
+///
+/// Once we have built a chain, try to rotate it to line up the hot exit block
+/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
+/// branches. For example, if the loop has fallthrough into its header and out
+/// of its bottom already, don't rotate it.
+void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
+ const MachineBasicBlock *ExitingBB,
+ BlockFrequency ExitFreq,
+ const BlockFilterSet &LoopBlockSet) {
+ if (!ExitingBB)
+ return;
+
+ MachineBasicBlock *Top = *LoopChain.begin();
+ MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
+
+ // If ExitingBB is already the last one in a chain then nothing to do.
+ if (Bottom == ExitingBB)
+ return;
+
+ // The entry block should always be the first BB in a function.
+ if (Top->isEntryBlock())
+ return;
+
+ bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
+
+ // If the header has viable fallthrough, check whether the current loop
+ // bottom is a viable exiting block. If so, bail out as rotating will
+ // introduce an unnecessary branch.
+ if (ViableTopFallthrough) {
+ for (MachineBasicBlock *Succ : Bottom->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin()))
+ return;
+ }
+
+ // Rotate will destroy the top fallthrough, we need to ensure the new exit
+ // frequency is larger than top fallthrough.
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
+ if (FallThrough2Top >= ExitFreq)
+ return;
+ }
+
+ BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
+ if (ExitIt == LoopChain.end())
+ return;
+
+ // Rotating a loop exit to the bottom when there is a fallthrough to top
+ // trades the entry fallthrough for an exit fallthrough.
+ // If there is no bottom->top edge, but the chosen exit block does have
+ // a fallthrough, we break that fallthrough for nothing in return.
+
+ // Let's consider an example. We have a built chain of basic blocks
+ // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block.
+ // By doing a rotation we get
+ // Bk+1, ..., Bn, B1, ..., Bk
+ // Break of fallthrough to B1 is compensated by a fallthrough from Bk.
+ // If we had a fallthrough Bk -> Bk+1 it is broken now.
+ // It might be compensated by fallthrough Bn -> B1.
+ // So we have a condition to avoid creation of extra branch by loop rotation.
+ // All below must be true to avoid loop rotation:
+ // If there is a fallthrough to top (B1)
+ // There was fallthrough from chosen exit block (Bk) to next one (Bk+1)
+ // There is no fallthrough from bottom (Bn) to top (B1).
+ // Please note that there is no exit fallthrough from Bn because we checked it
+ // above.
+ if (ViableTopFallthrough) {
+ assert(std::next(ExitIt) != LoopChain.end() &&
+ "Exit should not be last BB");
+ MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
+ if (ExitingBB->isSuccessor(NextBlockInChain))
+ if (!Bottom->isSuccessor(Top))
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
+ << " at bottom\n");
+ std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
+}
+
+/// Attempt to rotate a loop based on profile data to reduce branch cost.
+///
+/// With profile data, we can determine the cost in terms of missed fall through
+/// opportunities when rotating a loop chain and select the best rotation.
+/// Basically, there are three kinds of cost to consider for each rotation:
+/// 1. The possibly missed fall through edge (if it exists) from BB out of
+/// the loop to the loop header.
+/// 2. The possibly missed fall through edges (if they exist) from the loop
+/// exits to BB out of the loop.
+/// 3. The missed fall through edge (if it exists) from the last BB to the
+/// first BB in the loop chain.
+/// Therefore, the cost for a given rotation is the sum of costs listed above.
+/// We select the best rotation with the smallest cost.
+void MachineBlockPlacement::rotateLoopWithProfile(
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ auto RotationPos = LoopChain.end();
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+
+ // The entry block should always be the first BB in a function.
+ if (ChainHeaderBB->isEntryBlock())
+ return;
+
+ BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
+
+ // A utility lambda that scales up a block frequency by dividing it by a
+ // branch probability which is the reciprocal of the scale.
+ auto ScaleBlockFrequency = [](BlockFrequency Freq,
+ unsigned Scale) -> BlockFrequency {
+ if (Scale == 0)
+ return 0;
+ // Use operator / between BlockFrequency and BranchProbability to implement
+ // saturating multiplication.
+ return Freq / BranchProbability(1, Scale);
+ };
+
+ // Compute the cost of the missed fall-through edge to the loop header if the
+ // chain head is not the loop header. As we only consider natural loops with
+ // single header, this computation can be done only once.
+ BlockFrequency HeaderFallThroughCost(0);
+ for (auto *Pred : ChainHeaderBB->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ auto EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, ChainHeaderBB);
+ auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
+ // If the predecessor has only an unconditional jump to the header, we
+ // need to consider the cost of this jump.
+ if (Pred->succ_size() == 1)
+ FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
+ HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
+ }
+ }
+
+ // Here we collect all exit blocks in the loop, and for each exit we find out
+ // its hottest exit edge. For each loop rotation, we define the loop exit cost
+ // as the sum of frequencies of exit edges we collect here, excluding the exit
+ // edge from the tail of the loop chain.
+ SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
+ for (auto *BB : LoopChain) {
+ auto LargestExitEdgeProb = BranchProbability::getZero();
+ for (auto *Succ : BB->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
+ LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
+ }
+ }
+ if (LargestExitEdgeProb > BranchProbability::getZero()) {
+ auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
+ ExitsWithFreq.emplace_back(BB, ExitFreq);
+ }
+ }
+
+ // In this loop we iterate every block in the loop chain and calculate the
+ // cost assuming the block is the head of the loop chain. When the loop ends,
+ // we should have found the best candidate as the loop chain's head.
+ for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
+ EndIter = LoopChain.end();
+ Iter != EndIter; Iter++, TailIter++) {
+ // TailIter is used to track the tail of the loop chain if the block we are
+ // checking (pointed by Iter) is the head of the chain.
+ if (TailIter == LoopChain.end())
+ TailIter = LoopChain.begin();
+
+ auto TailBB = *TailIter;
+
+ // Calculate the cost by putting this BB to the top.
+ BlockFrequency Cost = 0;
+
+ // If the current BB is the loop header, we need to take into account the
+ // cost of the missed fall through edge from outside of the loop to the
+ // header.
+ if (Iter != LoopChain.begin())
+ Cost += HeaderFallThroughCost;
+
+ // Collect the loop exit cost by summing up frequencies of all exit edges
+ // except the one from the chain tail.
+ for (auto &ExitWithFreq : ExitsWithFreq)
+ if (TailBB != ExitWithFreq.first)
+ Cost += ExitWithFreq.second;
+
+ // The cost of breaking the once fall-through edge from the tail to the top
+ // of the loop chain. Here we need to consider three cases:
+ // 1. If the tail node has only one successor, then we will get an
+ // additional jmp instruction. So the cost here is (MisfetchCost +
+ // JumpInstCost) * tail node frequency.
+ // 2. If the tail node has two successors, then we may still get an
+ // additional jmp instruction if the layout successor after the loop
+ // chain is not its CFG successor. Note that the more frequently executed
+ // jmp instruction will be put ahead of the other one. Assume the
+ // frequency of those two branches are x and y, where x is the frequency
+ // of the edge to the chain head, then the cost will be
+ // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
+ // 3. If the tail node has more than two successors (this rarely happens),
+ // we won't consider any additional cost.
+ if (TailBB->isSuccessor(*Iter)) {
+ auto TailBBFreq = MBFI->getBlockFreq(TailBB);
+ if (TailBB->succ_size() == 1)
+ Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
+ MisfetchCost + JumpInstCost);
+ else if (TailBB->succ_size() == 2) {
+ auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
+ auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
+ auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
+ ? TailBBFreq * TailToHeadProb.getCompl()
+ : TailToHeadFreq;
+ Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
+ ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "
+ << getBlockName(*Iter)
+ << " to the top: " << Cost.getFrequency() << "\n");
+
+ if (Cost < SmallestRotationCost) {
+ SmallestRotationCost = Cost;
+ RotationPos = Iter;
+ }
+ }
+
+ if (RotationPos != LoopChain.end()) {
+ LLVM_DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
+ << " to the top\n");
+ std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
+ }
+}
+
+/// Collect blocks in the given loop that are to be placed.
+///
+/// When profile data is available, exclude cold blocks from the returned set;
+/// otherwise, collect all blocks in the loop.
+MachineBlockPlacement::BlockFilterSet
+MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
+ BlockFilterSet LoopBlockSet;
+
+ // Filter cold blocks off from LoopBlockSet when profile data is available.
+ // Collect the sum of frequencies of incoming edges to the loop header from
+ // outside. If we treat the loop as a super block, this is the frequency of
+ // the loop. Then for each block in the loop, we calculate the ratio between
+ // its frequency and the frequency of the loop block. When it is too small,
+ // don't add it to the loop chain. If there are outer loops, then this block
+ // will be merged into the first outer loop chain for which this block is not
+ // cold anymore. This needs precise profile data and we only do this when
+ // profile data is available.
+ if (F->getFunction().hasProfileData() || ForceLoopColdBlock) {
+ BlockFrequency LoopFreq(0);
+ for (auto *LoopPred : L.getHeader()->predecessors())
+ if (!L.contains(LoopPred))
+ LoopFreq += MBFI->getBlockFreq(LoopPred) *
+ MBPI->getEdgeProbability(LoopPred, L.getHeader());
+
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ if (LoopBlockSet.count(LoopBB))
+ continue;
+ auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
+ if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
+ continue;
+ BlockChain *Chain = BlockToChain[LoopBB];
+ for (MachineBasicBlock *ChainBB : *Chain)
+ LoopBlockSet.insert(ChainBB);
+ }
+ } else
+ LoopBlockSet.insert(L.block_begin(), L.block_end());
+
+ return LoopBlockSet;
+}
+
+/// Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
+ // First recurse through any nested loops, building chains for those inner
+ // loops.
+ for (const MachineLoop *InnerLoop : L)
+ buildLoopChains(*InnerLoop);
+
+ assert(BlockWorkList.empty() &&
+ "BlockWorkList not empty when starting to build loop chains.");
+ assert(EHPadWorkList.empty() &&
+ "EHPadWorkList not empty when starting to build loop chains.");
+ BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
+
+ // Check if we have profile data for this function. If yes, we will rotate
+ // this loop by modeling costs more precisely which requires the profile data
+ // for better layout.
+ bool RotateLoopWithProfile =
+ ForcePreciseRotationCost ||
+ (PreciseRotationCost && F->getFunction().hasProfileData());
+
+ // First check to see if there is an obviously preferable top block for the
+ // loop. This will default to the header, but may end up as one of the
+ // predecessors to the header if there is one which will result in strictly
+ // fewer branches in the loop body.
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+
+ // If we selected just the header for the loop top, look for a potentially
+ // profitable exit block in the event that rotating the loop can eliminate
+ // branches by placing an exit edge at the bottom.
+ //
+ // Loops are processed innermost to uttermost, make sure we clear
+ // PreferredLoopExit before processing a new loop.
+ PreferredLoopExit = nullptr;
+ BlockFrequency ExitFreq;
+ if (!RotateLoopWithProfile && LoopTop == L.getHeader())
+ PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
+
+ BlockChain &LoopChain = *BlockToChain[LoopTop];
+
+ // FIXME: This is a really lame way of walking the chains in the loop: we
+ // walk the blocks, and use a set to prevent visiting a particular chain
+ // twice.
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.UnscheduledPredecessors == 0 &&
+ "LoopChain should not have unscheduled predecessors.");
+ UpdatedPreds.insert(&LoopChain);
+
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
+ fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
+
+ buildChain(LoopTop, LoopChain, &LoopBlockSet);
+
+ if (RotateLoopWithProfile)
+ rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
+ else
+ rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
+
+ LLVM_DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.UnscheduledPredecessors) {
+ BadLoop = true;
+ dbgs() << "Loop chain contains a block without its preds placed!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
+ for (MachineBasicBlock *ChainBB : LoopChain) {
+ dbgs() << " ... " << getBlockName(ChainBB) << "\n";
+ if (!LoopBlockSet.remove(ChainBB)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
+ dbgs() << "Loop chain contains a block not contained by the loop!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
+ }
+ }
+
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
+ dbgs() << "Loop contains blocks never placed into a chain!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(LoopBB) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
+ });
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+void MachineBlockPlacement::buildCFGChains() {
+ // Ensure that every BB in the function has an associated chain to simplify
+ // the assumptions of the remaining algorithm.
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
+ for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
+ ++FI) {
+ MachineBasicBlock *BB = &*FI;
+ BlockChain *Chain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ while (true) {
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI = std::next(FI);
+ MachineBasicBlock *NextBB = &*NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, nullptr);
+#ifndef NDEBUG
+ BlocksWithUnanalyzableExits.insert(&*BB);
+#endif
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
+
+ // Build any loop-based chains.
+ PreferredLoopExit = nullptr;
+ for (MachineLoop *L : *MLI)
+ buildLoopChains(*L);
+
+ assert(BlockWorkList.empty() &&
+ "BlockWorkList should be empty before building final chain.");
+ assert(EHPadWorkList.empty() &&
+ "EHPadWorkList should be empty before building final chain.");
+
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ for (MachineBasicBlock &MBB : *F)
+ fillWorkLists(&MBB, UpdatedPreds);
+
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ buildChain(&F->front(), FunctionChain);
+
+#ifndef NDEBUG
+ using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
+#endif
+ LLVM_DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
+ FunctionBlockSetType FunctionBlockSet;
+ for (MachineBasicBlock &MBB : *F)
+ FunctionBlockSet.insert(&MBB);
+
+ for (MachineBasicBlock *ChainBB : FunctionChain)
+ if (!FunctionBlockSet.erase(ChainBB)) {
+ BadFunc = true;
+ dbgs() << "Function chain contains a block not in the function!\n"
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
+ }
+
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
+ dbgs() << "Function contains blocks never placed into a chain!\n"
+ << " Bad block: " << getBlockName(RemainingBB) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
+ });
+
+ // Remember original layout ordering, so we can update terminators after
+ // reordering to point to the original layout successor.
+ SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
+ F->getNumBlockIDs());
+ {
+ MachineBasicBlock *LastMBB = nullptr;
+ for (auto &MBB : *F) {
+ if (LastMBB != nullptr)
+ OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;
+ LastMBB = &MBB;
+ }
+ OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;
+ }
+
+ // Splice the blocks into place.
+ MachineFunction::iterator InsertPos = F->begin();
+ LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(ChainBB) << "\n");
+ if (InsertPos != MachineFunction::iterator(ChainBB))
+ F->splice(InsertPos, ChainBB);
+ else
+ ++InsertPos;
+
+ // Update the terminator of the previous block.
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+ MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
+
+ // FIXME: It would be awesome of updateTerminator would just return rather
+ // than assert when the branch cannot be analyzed in order to remove this
+ // boiler plate.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+
+#ifndef NDEBUG
+ if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
+ // Given the exact block placement we chose, we may actually not _need_ to
+ // be able to edit PrevBB's terminator sequence, but not being _able_ to
+ // do that at this point is a bug.
+ assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
+ !PrevBB->canFallThrough()) &&
+ "Unexpected block with un-analyzable fallthrough!");
+ Cond.clear();
+ TBB = FBB = nullptr;
+ }
+#endif
+
+ // The "PrevBB" is not yet updated to reflect current code layout, so,
+ // o. it may fall-through to a block without explicit "goto" instruction
+ // before layout, and no longer fall-through it after layout; or
+ // o. just opposite.
+ //
+ // analyzeBranch() may return erroneous value for FBB when these two
+ // situations take place. For the first scenario FBB is mistakenly set NULL;
+ // for the 2nd scenario, the FBB, which is expected to be NULL, is
+ // mistakenly pointing to "*BI".
+ // Thus, if the future change needs to use FBB before the layout is set, it
+ // has to correct FBB first by using the code similar to the following:
+ //
+ // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
+ // PrevBB->updateTerminator();
+ // Cond.clear();
+ // TBB = FBB = nullptr;
+ // if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // // FIXME: This should never take place.
+ // TBB = FBB = nullptr;
+ // }
+ // }
+ if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]);
+ }
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond)) {
+ MachineBasicBlock *PrevBB = &F->back();
+ PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]);
+ }
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+void MachineBlockPlacement::optimizeBranches() {
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
+
+ // Now that all the basic blocks in the chain have the proper layout,
+ // make a final call to analyzeBranch with AllowModify set.
+ // Indeed, the target may be able to optimize the branches in a way we
+ // cannot because all branches may not be analyzable.
+ // E.g., the target may be able to remove an unconditional branch to
+ // a fallthrough when it occurs after predicated terminators.
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher probability first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeProbability(ChainBB, FBB) >
+ MBPI->getEdgeProbability(ChainBB, TBB) &&
+ !TII->reverseBranchCondition(Cond)) {
+ LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(ChainBB) << "\n");
+ LLVM_DEBUG(dbgs() << " Edge probability: "
+ << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->removeBranch(*ChainBB);
+ TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
+ }
+ }
+ }
+}
+
+void MachineBlockPlacement::alignBlocks() {
+ // Walk through the backedges of the function now that we have fully laid out
+ // the basic blocks and align the destination of each backedge. We don't rely
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
+ if (F->getFunction().hasMinSize() ||
+ (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize()))
+ return;
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
+
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(ChainBB);
+ if (!L)
+ continue;
+
+ const Align Align = TLI->getPrefLoopAlignment(L);
+ if (Align == 1)
+ continue; // Don't care about loop alignment.
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // If the global profiles indicates so, don't align it.
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
+ !TLI->alignLoopsWithOptSize())
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(ChainBB));
+
+ auto DetermineMaxAlignmentPadding = [&]() {
+ // Set the maximum bytes allowed to be emitted for alignment.
+ unsigned MaxBytes;
+ if (MaxBytesForAlignmentOverride.getNumOccurrences() > 0)
+ MaxBytes = MaxBytesForAlignmentOverride;
+ else
+ MaxBytes = TLI->getMaxPermittedBytesForAlignment(ChainBB);
+ ChainBB->setMaxBytesForAlignment(MaxBytes);
+ };
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(ChainBB)) {
+ ChainBB->setAlignment(Align);
+ DetermineMaxAlignmentPadding();
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, other predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb =
+ MBPI->getEdgeProbability(LayoutPred, ChainBB);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb)) {
+ ChainBB->setAlignment(Align);
+ DetermineMaxAlignmentPadding();
+ }
+ }
+}
+
+/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
+/// it was duplicated into its chain predecessor and removed.
+/// \p BB - Basic block that may be duplicated.
+///
+/// \p LPred - Chosen layout predecessor of \p BB.
+/// Updated to be the chain end if LPred is removed.
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+/// Used to identify which blocks to update predecessor
+/// counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+/// chosen in the given order due to unnatural CFG
+/// only needed if \p BB is removed and
+/// \p PrevUnplacedBlockIt pointed to \p BB.
+/// @return true if \p BB was removed.
+bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+ const MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt) {
+ bool Removed, DuplicatedToLPred;
+ bool DuplicatedToOriginalLPred;
+ Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
+ PrevUnplacedBlockIt,
+ DuplicatedToLPred);
+ if (!Removed)
+ return false;
+ DuplicatedToOriginalLPred = DuplicatedToLPred;
+ // Iteratively try to duplicate again. It can happen that a block that is
+ // duplicated into is still small enough to be duplicated again.
+ // No need to call markBlockSuccessors in this case, as the blocks being
+ // duplicated from here on are already scheduled.
+ while (DuplicatedToLPred && Removed) {
+ MachineBasicBlock *DupBB, *DupPred;
+ // The removal callback causes Chain.end() to be updated when a block is
+ // removed. On the first pass through the loop, the chain end should be the
+ // same as it was on function entry. On subsequent passes, because we are
+ // duplicating the block at the end of the chain, if it is removed the
+ // chain will have shrunk by one block.
+ BlockChain::iterator ChainEnd = Chain.end();
+ DupBB = *(--ChainEnd);
+ // Now try to duplicate again.
+ if (ChainEnd == Chain.begin())
+ break;
+ DupPred = *std::prev(ChainEnd);
+ Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
+ PrevUnplacedBlockIt,
+ DuplicatedToLPred);
+ }
+ // If BB was duplicated into LPred, it is now scheduled. But because it was
+ // removed, markChainSuccessors won't be called for its chain. Instead we
+ // call markBlockSuccessors for LPred to achieve the same effect. This must go
+ // at the end because repeating the tail duplication can increase the number
+ // of unscheduled predecessors.
+ LPred = *std::prev(Chain.end());
+ if (DuplicatedToOriginalLPred)
+ markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
+ return true;
+}
+
+/// Tail duplicate \p BB into (some) predecessors if profitable.
+/// \p BB - Basic block that may be duplicated
+/// \p LPred - Chosen layout predecessor of \p BB
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+/// Used to identify which blocks to update predecessor
+/// counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+/// chosen in the given order due to unnatural CFG
+/// only needed if \p BB is removed and
+/// \p PrevUnplacedBlockIt pointed to \p BB.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred.
+/// \return - True if the block was duplicated into all preds and removed.
+bool MachineBlockPlacement::maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToLPred) {
+ DuplicatedToLPred = false;
+ if (!shouldTailDuplicate(BB))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Redoing tail duplication for Succ#" << BB->getNumber()
+ << "\n");
+
+ // This has to be a callback because none of it can be done after
+ // BB is deleted.
+ bool Removed = false;
+ auto RemovalCallback =
+ [&](MachineBasicBlock *RemBB) {
+ // Signal to outer function
+ Removed = true;
+
+ // Conservative default.
+ bool InWorkList = true;
+ // Remove from the Chain and Chain Map
+ if (BlockToChain.count(RemBB)) {
+ BlockChain *Chain = BlockToChain[RemBB];
+ InWorkList = Chain->UnscheduledPredecessors == 0;
+ Chain->remove(RemBB);
+ BlockToChain.erase(RemBB);
+ }
+
+ // Handle the unplaced block iterator
+ if (&(*PrevUnplacedBlockIt) == RemBB) {
+ PrevUnplacedBlockIt++;
+ }
+
+ // Handle the Work Lists
+ if (InWorkList) {
+ SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
+ if (RemBB->isEHPad())
+ RemoveList = EHPadWorkList;
+ llvm::erase_value(RemoveList, RemBB);
+ }
+
+ // Handle the filter set
+ if (BlockFilter) {
+ BlockFilter->remove(RemBB);
+ }
+
+ // Remove the block from loop info.
+ MLI->removeBlock(RemBB);
+ if (RemBB == PreferredLoopExit)
+ PreferredLoopExit = nullptr;
+
+ LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: "
+ << getBlockName(RemBB) << "\n");
+ };
+ auto RemovalCallbackRef =
+ function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+
+ SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+ bool IsSimple = TailDup.isSimpleBB(BB);
+ SmallVector<MachineBasicBlock *, 8> CandidatePreds;
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr;
+ if (F->getFunction().hasProfileData()) {
+ // We can do partial duplication with precise profile information.
+ findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
+ if (CandidatePreds.size() == 0)
+ return false;
+ if (CandidatePreds.size() < BB->pred_size())
+ CandidatePtr = &CandidatePreds;
+ }
+ TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, &DuplicatedPreds,
+ &RemovalCallbackRef, CandidatePtr);
+
+ // Update UnscheduledPredecessors to reflect tail-duplication.
+ DuplicatedToLPred = false;
+ for (MachineBasicBlock *Pred : DuplicatedPreds) {
+ // We're only looking for unscheduled predecessors that match the filter.
+ BlockChain* PredChain = BlockToChain[Pred];
+ if (Pred == LPred)
+ DuplicatedToLPred = true;
+ if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
+ || PredChain == &Chain)
+ continue;
+ for (MachineBasicBlock *NewSucc : Pred->successors()) {
+ if (BlockFilter && !BlockFilter->count(NewSucc))
+ continue;
+ BlockChain *NewChain = BlockToChain[NewSucc];
+ if (NewChain != &Chain && NewChain != PredChain)
+ NewChain->UnscheduledPredecessors++;
+ }
+ }
+ return Removed;
+}
+
+// Count the number of actual machine instructions.
+static uint64_t countMBBInstruction(MachineBasicBlock *MBB) {
+ uint64_t InstrCount = 0;
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isPHI() && !MI.isMetaInstruction())
+ InstrCount += 1;
+ }
+ return InstrCount;
+}
+
+// The size cost of duplication is the instruction size of the duplicated block.
+// So we should scale the threshold accordingly. But the instruction size is not
+// available on all targets, so we use the number of instructions instead.
+BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
+ return DupThreshold.getFrequency() * countMBBInstruction(BB);
+}
+
+// Returns true if BB is Pred's best successor.
+bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
+ MachineBasicBlock *Pred,
+ BlockFilterSet *BlockFilter) {
+ if (BB == Pred)
+ return false;
+ if (BlockFilter && !BlockFilter->count(Pred))
+ return false;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (PredChain && (Pred != *std::prev(PredChain->end())))
+ return false;
+
+ // Find the successor with largest probability excluding BB.
+ BranchProbability BestProb = BranchProbability::getZero();
+ for (MachineBasicBlock *Succ : Pred->successors())
+ if (Succ != BB) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (SuccChain && (Succ != *SuccChain->begin()))
+ continue;
+ BranchProbability SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ if (SuccProb > BestProb)
+ BestProb = SuccProb;
+ }
+
+ BranchProbability BBProb = MBPI->getEdgeProbability(Pred, BB);
+ if (BBProb <= BestProb)
+ return false;
+
+ // Compute the number of reduced taken branches if Pred falls through to BB
+ // instead of another successor. Then compare it with threshold.
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
+ BlockFrequency Gain = PredFreq * (BBProb - BestProb);
+ return Gain > scaleThreshold(BB);
+}
+
+// Find out the predecessors of BB and BB can be beneficially duplicated into
+// them.
+void MachineBlockPlacement::findDuplicateCandidates(
+ SmallVectorImpl<MachineBasicBlock *> &Candidates,
+ MachineBasicBlock *BB,
+ BlockFilterSet *BlockFilter) {
+ MachineBasicBlock *Fallthrough = nullptr;
+ BranchProbability DefaultBranchProb = BranchProbability::getZero();
+ BlockFrequency BBDupThreshold(scaleThreshold(BB));
+ SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());
+ SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());
+
+ // Sort for highest frequency.
+ auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return MBPI->getEdgeProbability(BB, A) > MBPI->getEdgeProbability(BB, B);
+ };
+ auto CmpPred = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);
+ };
+ llvm::stable_sort(Succs, CmpSucc);
+ llvm::stable_sort(Preds, CmpPred);
+
+ auto SuccIt = Succs.begin();
+ if (SuccIt != Succs.end()) {
+ DefaultBranchProb = MBPI->getEdgeProbability(BB, *SuccIt).getCompl();
+ }
+
+ // For each predecessors of BB, compute the benefit of duplicating BB,
+ // if it is larger than the threshold, add it into Candidates.
+ //
+ // If we have following control flow.
+ //
+ // PB1 PB2 PB3 PB4
+ // \ | / /\
+ // \ | / / \
+ // \ |/ / \
+ // BB----/ OB
+ // /\
+ // / \
+ // SB1 SB2
+ //
+ // And it can be partially duplicated as
+ //
+ // PB2+BB
+ // | PB1 PB3 PB4
+ // | | / /\
+ // | | / / \
+ // | |/ / \
+ // | BB----/ OB
+ // |\ /|
+ // | X |
+ // |/ \|
+ // SB2 SB1
+ //
+ // The benefit of duplicating into a predecessor is defined as
+ // Orig_taken_branch - Duplicated_taken_branch
+ //
+ // The Orig_taken_branch is computed with the assumption that predecessor
+ // jumps to BB and the most possible successor is laid out after BB.
+ //
+ // The Duplicated_taken_branch is computed with the assumption that BB is
+ // duplicated into PB, and one successor is layout after it (SB1 for PB1 and
+ // SB2 for PB2 in our case). If there is no available successor, the combined
+ // block jumps to all BB's successor, like PB3 in this example.
+ //
+ // If a predecessor has multiple successors, so BB can't be duplicated into
+ // it. But it can beneficially fall through to BB, and duplicate BB into other
+ // predecessors.
+ for (MachineBasicBlock *Pred : Preds) {
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
+
+ if (!TailDup.canTailDuplicate(BB, Pred)) {
+ // BB can't be duplicated into Pred, but it is possible to be layout
+ // below Pred.
+ if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
+ Fallthrough = Pred;
+ if (SuccIt != Succs.end())
+ SuccIt++;
+ }
+ continue;
+ }
+
+ BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
+ BlockFrequency DupCost;
+ if (SuccIt == Succs.end()) {
+ // Jump to all successors;
+ if (Succs.size() > 0)
+ DupCost += PredFreq;
+ } else {
+ // Fallthrough to *SuccIt, jump to all other successors;
+ DupCost += PredFreq;
+ DupCost -= PredFreq * MBPI->getEdgeProbability(BB, *SuccIt);
+ }
+
+ assert(OrigCost >= DupCost);
+ OrigCost -= DupCost;
+ if (OrigCost > BBDupThreshold) {
+ Candidates.push_back(Pred);
+ if (SuccIt != Succs.end())
+ SuccIt++;
+ }
+ }
+
+ // No predecessors can optimally fallthrough to BB.
+ // So we can change one duplication into fallthrough.
+ if (!Fallthrough) {
+ if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {
+ Candidates[0] = Candidates.back();
+ Candidates.pop_back();
+ }
+ }
+}
+
+void MachineBlockPlacement::initDupThreshold() {
+ DupThreshold = 0;
+ if (!F->getFunction().hasProfileData())
+ return;
+
+ // We prefer to use prifile count.
+ uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
+ if (HotThreshold != UINT64_MAX) {
+ UseProfileCount = true;
+ DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
+ return;
+ }
+
+ // Profile count is not available, we can use block frequency instead.
+ BlockFrequency MaxFreq = 0;
+ for (MachineBasicBlock &MBB : *F) {
+ BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
+ if (Freq > MaxFreq)
+ MaxFreq = Freq;
+ }
+
+ BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
+ DupThreshold = MaxFreq * ThresholdProb;
+ UseProfileCount = false;
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ // Check for single-block functions and skip them.
+ if (std::next(MF.begin()) == MF.end())
+ return false;
+
+ F = &MF;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = std::make_unique<MBFIWrapper>(
+ getAnalysis<MachineBlockFrequencyInfo>());
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = MF.getSubtarget().getInstrInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
+ MPDT = nullptr;
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ initDupThreshold();
+
+ // Initialize PreferredLoopExit to nullptr here since it may never be set if
+ // there are no MachineLoops.
+ PreferredLoopExit = nullptr;
+
+ assert(BlockToChain.empty() &&
+ "BlockToChain map should be empty before starting placement.");
+ assert(ComputedEdges.empty() &&
+ "Computed Edge map should be empty before starting placement.");
+
+ unsigned TailDupSize = TailDupPlacementThreshold;
+ // If only the aggressive threshold is explicitly set, use it.
+ if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 &&
+ TailDupPlacementThreshold.getNumOccurrences() == 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // For aggressive optimization, we can adjust some thresholds to be less
+ // conservative.
+ if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
+ // At O3 we should be more willing to copy blocks for tail duplication. This
+ // increases size pressure, so we only do it at O3
+ // Do this unless only the regular threshold is explicitly set.
+ if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
+ TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+ }
+
+ // If there's no threshold provided through options, query the target
+ // information for a threshold instead.
+ if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
+ (PassConfig->getOptLevel() < CodeGenOpt::Aggressive ||
+ TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
+ TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
+
+ if (allowTailDupPlacement()) {
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ bool OptForSize = MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
+ if (OptForSize)
+ TailDupSize = 1;
+ bool PreRegAlloc = false;
+ TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
+ /* LayoutMode */ true, TailDupSize);
+ precomputeTriangleChains();
+ }
+
+ buildCFGChains();
+
+ // Changing the layout can create new tail merging opportunities.
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structured CFG.
+ bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge() &&
+ BranchFoldPlacement;
+ // No tail merging opportunities if the block number is less than four.
+ if (MF.size() > 3 && EnableTailMerge) {
+ unsigned TailMergeSize = TailDupSize + 1;
+ BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false,
+ *MBFI, *MBPI, PSI, TailMergeSize);
+
+ if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
+ /*AfterPlacement=*/true)) {
+ // Redo the layout if tail merging creates/removes/moves blocks.
+ BlockToChain.clear();
+ ComputedEdges.clear();
+ // Must redo the post-dominator tree if blocks were changed.
+ if (MPDT)
+ MPDT->runOnMachineFunction(MF);
+ ChainAllocator.DestroyAll();
+ buildCFGChains();
+ }
+ }
+
+ // Apply a post-processing optimizing block placement.
+ if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
+ (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
+ // Find a new placement and modify the layout of the blocks in the function.
+ applyExtTsp();
+
+ // Re-create CFG chain so that we can optimizeBranches and alignBlocks.
+ createCFGChainExtTsp();
+ }
+
+ optimizeBranches();
+ alignBlocks();
+
+ BlockToChain.clear();
+ ComputedEdges.clear();
+ ChainAllocator.DestroyAll();
+
+ bool HasMaxBytesOverride =
+ MaxBytesForAlignmentOverride.getNumOccurrences() > 0;
+
+ if (AlignAllBlock)
+ // Align all of the blocks in the function to a specific alignment.
+ for (MachineBasicBlock &MBB : MF) {
+ if (HasMaxBytesOverride)
+ MBB.setAlignment(Align(1ULL << AlignAllBlock),
+ MaxBytesForAlignmentOverride);
+ else
+ MBB.setAlignment(Align(1ULL << AlignAllBlock));
+ }
+ else if (AlignAllNonFallThruBlocks) {
+ // Align all of the blocks that have no fall-through predecessors to a
+ // specific alignment.
+ for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
+ auto LayoutPred = std::prev(MBI);
+ if (!LayoutPred->isSuccessor(&*MBI)) {
+ if (HasMaxBytesOverride)
+ MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks),
+ MaxBytesForAlignmentOverride);
+ else
+ MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks));
+ }
+ }
+ }
+ if (ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ if (RenumberBlocksBeforeView)
+ MF.RenumberBlocks();
+ MBFI->view("MBP." + MF.getName(), false);
+ }
+
+ // We always return true as we have no way to track whether the final order
+ // differs from the original order.
+ return true;
+}
+
+void MachineBlockPlacement::applyExtTsp() {
+ // Prepare data; blocks are indexed by their index in the current ordering.
+ DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
+ BlockIndex.reserve(F->size());
+ std::vector<const MachineBasicBlock *> CurrentBlockOrder;
+ CurrentBlockOrder.reserve(F->size());
+ size_t NumBlocks = 0;
+ for (const MachineBasicBlock &MBB : *F) {
+ BlockIndex[&MBB] = NumBlocks++;
+ CurrentBlockOrder.push_back(&MBB);
+ }
+
+ auto BlockSizes = std::vector<uint64_t>(F->size());
+ auto BlockCounts = std::vector<uint64_t>(F->size());
+ std::vector<EdgeCountT> JumpCounts;
+ for (MachineBasicBlock &MBB : *F) {
+ // Getting the block frequency.
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ BlockCounts[BlockIndex[&MBB]] = BlockFreq.getFrequency();
+ // Getting the block size:
+ // - approximate the size of an instruction by 4 bytes, and
+ // - ignore debug instructions.
+ // Note: getting the exact size of each block is target-dependent and can be
+ // done by extending the interface of MCCodeEmitter. Experimentally we do
+ // not see a perf improvement with the exact block sizes.
+ auto NonDbgInsts =
+ instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end());
+ int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
+ BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;
+ // Getting jump frequencies.
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ auto EP = MBPI->getEdgeProbability(&MBB, Succ);
+ BlockFrequency JumpFreq = BlockFreq * EP;
+ auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
+ JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency()));
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()
+ << " with profile = " << F->getFunction().hasProfileData()
+ << " (" << F->getName().str() << ")"
+ << "\n");
+ LLVM_DEBUG(
+ dbgs() << format(" original layout score: %0.2f\n",
+ calcExtTspScore(BlockSizes, BlockCounts, JumpCounts)));
+
+ // Run the layout algorithm.
+ auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
+ std::vector<const MachineBasicBlock *> NewBlockOrder;
+ NewBlockOrder.reserve(F->size());
+ for (uint64_t Node : NewOrder) {
+ NewBlockOrder.push_back(CurrentBlockOrder[Node]);
+ }
+ LLVM_DEBUG(dbgs() << format(" optimized layout score: %0.2f\n",
+ calcExtTspScore(NewOrder, BlockSizes, BlockCounts,
+ JumpCounts)));
+
+ // Assign new block order.
+ assignBlockOrder(NewBlockOrder);
+}
+
+void MachineBlockPlacement::assignBlockOrder(
+ const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
+ assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");
+ F->RenumberBlocks();
+
+ bool HasChanges = false;
+ for (size_t I = 0; I < NewBlockOrder.size(); I++) {
+ if (NewBlockOrder[I] != F->getBlockNumbered(I)) {
+ HasChanges = true;
+ break;
+ }
+ }
+ // Stop early if the new block order is identical to the existing one.
+ if (!HasChanges)
+ return;
+
+ SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs());
+ for (auto &MBB : *F) {
+ PrevFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ }
+
+ // Sort basic blocks in the function according to the computed order.
+ DenseMap<const MachineBasicBlock *, size_t> NewIndex;
+ for (const MachineBasicBlock *MBB : NewBlockOrder) {
+ NewIndex[MBB] = NewIndex.size();
+ }
+ F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
+ return NewIndex[&L] < NewIndex[&R];
+ });
+
+ // Update basic block branches by inserting explicit fallthrough branches
+ // when required and re-optimize branches when possible.
+ const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (auto &MBB : *F) {
+ MachineFunction::iterator NextMBB = std::next(MBB.getIterator());
+ MachineFunction::iterator EndIt = MBB.getParent()->end();
+ auto *FTMBB = PrevFallThroughs[MBB.getNumber()];
+ // If this block had a fallthrough before we need an explicit unconditional
+ // branch to that block if the fallthrough block is not adjacent to the
+ // block in the new order.
+ if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
+ TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
+ }
+
+ // It might be possible to optimize branches by flipping the condition.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
+ continue;
+ MBB.updateTerminator(FTMBB);
+ }
+
+#ifndef NDEBUG
+ // Make sure we correctly constructed all branches.
+ F->verify(this, "After optimized block reordering");
+#endif
+}
+
+void MachineBlockPlacement::createCFGChainExtTsp() {
+ BlockToChain.clear();
+ ComputedEdges.clear();
+ ChainAllocator.DestroyAll();
+
+ MachineBasicBlock *HeadBB = &F->front();
+ BlockChain *FunctionChain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);
+
+ for (MachineBasicBlock &MBB : *F) {
+ if (HeadBB == &MBB)
+ continue; // Ignore head of the chain
+ FunctionChain->merge(&MBB, nullptr);
+ }
+}
+
+namespace {
+
+/// A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absence of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+ /// A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char MachineBlockPlacementStats::ID = 0;
+
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (std::next(F.begin()) == F.end())
+ return false;
+
+ if (!isFunctionInPrintList(F.getName()))
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ for (MachineBasicBlock &MBB : F) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ Statistic &NumBranches =
+ (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+ Statistic &BranchTakenFreq =
+ (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ // Skip if this successor is a fallthrough.
+ if (MBB.isLayoutSuccessor(Succ))
+ continue;
+
+ BlockFrequency EdgeFreq =
+ BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
+ ++NumBranches;
+ BranchTakenFreq += EdgeFreq.getFrequency();
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..a84377d70855
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,79 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+
+namespace llvm {
+cl::opt<unsigned>
+ StaticLikelyProb("static-likely-prob",
+ cl::desc("branch probability threshold in percentage"
+ "to be considered very likely"),
+ cl::init(80), cl::Hidden);
+
+cl::opt<unsigned> ProfileLikelyProb(
+ "profile-likely-prob",
+ cl::desc("branch probability threshold in percentage to be considered"
+ " very likely when profile is available"),
+ cl::init(51), cl::Hidden);
+} // namespace llvm
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+MachineBranchProbabilityInfo::MachineBranchProbabilityInfo()
+ : ImmutablePass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeMachineBranchProbabilityInfoPass(Registry);
+}
+
+void MachineBranchProbabilityInfo::anchor() {}
+
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
+ return Src->getSuccProbability(Dst);
+}
+
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeProbability(Src, find(Src->successors(), Dst));
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
+ BranchProbability HotProb(StaticLikelyProb, 100);
+ return getEdgeProbability(Src, Dst) > HotProb;
+}
+
+raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
+ raw_ostream &OS, const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge " << printMBBReference(*Src) << " -> " << printMBBReference(*Dst)
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp
new file mode 100644
index 000000000000..7bfb81771380
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp
@@ -0,0 +1,95 @@
+//===- MachineCFGPrinter.cpp - DOT Printer for Machine Functions ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the `-dot-machine-cfg` analysis pass, which emits
+// Machine Function in DOT format in file titled `<prefix>.<function-name>.dot.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCFGPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dot-machine-cfg"
+
+static cl::opt<std::string>
+ MCFGFuncName("mcfg-func-name", cl::Hidden,
+ cl::desc("The name of a function (or its substring)"
+ " whose CFG is viewed/printed."));
+
+static cl::opt<std::string> MCFGDotFilenamePrefix(
+ "mcfg-dot-filename-prefix", cl::Hidden,
+ cl::desc("The prefix used for the Machine CFG dot file names."));
+
+static cl::opt<bool>
+ CFGOnly("dot-mcfg-only", cl::init(false), cl::Hidden,
+ cl::desc("Print only the CFG without blocks body"));
+
+static void writeMCFGToDotFile(MachineFunction &MF) {
+ std::string Filename =
+ (MCFGDotFilenamePrefix + "." + MF.getName() + ".dot").str();
+ errs() << "Writing '" << Filename << "'...";
+
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+
+ DOTMachineFuncInfo MCFGInfo(&MF);
+
+ if (!EC)
+ WriteGraph(File, &MCFGInfo, CFGOnly);
+ else
+ errs() << " error opening file for writing!";
+ errs() << '\n';
+}
+
+namespace {
+
+class MachineCFGPrinter : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCFGPrinter();
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace
+
+char MachineCFGPrinter::ID = 0;
+
+char &llvm::MachineCFGPrinterID = MachineCFGPrinter::ID;
+
+INITIALIZE_PASS(MachineCFGPrinter, DEBUG_TYPE, "Machine CFG Printer Pass",
+ false, true)
+
+/// Default construct and initialize the pass.
+MachineCFGPrinter::MachineCFGPrinter() : MachineFunctionPass(ID) {
+ initializeMachineCFGPrinterPass(*PassRegistry::getPassRegistry());
+}
+
+bool MachineCFGPrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (!MCFGFuncName.empty() && !MF.getName().contains(MCFGFuncName))
+ return false;
+ errs() << "Writing Machine CFG for function ";
+ errs().write_escaped(MF.getName()) << '\n';
+
+ writeMCFGToDotFile(MF);
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 000000000000..f879c5fcf20c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,947 @@
+//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-cse"
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPREs, "Number of partial redundant expression"
+ " transformed to fully redundant");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
+
+// Threshold to avoid excessive cost to compute isProfitableToCSE.
+static cl::opt<int>
+ CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
+ cl::desc("Threshold for the size of CSUses"));
+
+namespace {
+
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+
+ public:
+ static char ID; // Pass identification
+
+ MachineCSE() : MachineFunctionPass(ID) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
+ void releaseMemory() override {
+ ScopeMap.clear();
+ PREMap.clear();
+ Exps.clear();
+ }
+
+ private:
+ using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr *, unsigned>>;
+ using ScopedHTType =
+ ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
+ AllocatorTy>;
+ using ScopeType = ScopedHTType::ScopeTy;
+ using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
+
+ unsigned LookAheadLimit = 0;
+ DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
+ DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
+ PREMap;
+ ScopedHTType VNT;
+ SmallVector<MachineInstr *, 64> Exps;
+ unsigned CurrVN = 0;
+
+ bool PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+ bool isPhysDefTriviallyDead(MCRegister Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<MCRegister, 8> &PhysRefs,
+ PhysDefVector &PhysDefs, bool &PhysUseDef) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<MCRegister, 8> &PhysRefs,
+ PhysDefVector &PhysDefs, bool &NonLocal) const;
+ bool isCSECandidate(MachineInstr *MI);
+ bool isProfitableToCSE(Register CSReg, Register Reg,
+ MachineBasicBlock *CSBB, MachineInstr *MI);
+ void EnterScope(MachineBasicBlock *MBB);
+ void ExitScope(MachineBasicBlock *MBB);
+ bool ProcessBlockCSE(MachineBasicBlock *MBB);
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
+ bool PerformCSE(MachineDomTreeNode *Node);
+
+ bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
+ bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
+ bool PerformSimplePRE(MachineDominatorTree *DT);
+ /// Heuristics to see if it's profitable to move common computations of MBB
+ /// and MBB1 to CandidateBB.
+ bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1);
+ };
+
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+
+char &llvm::MachineCSEID = MachineCSE::ID;
+
+INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
+ "Machine Common Subexpression Elimination", false, false)
+
+/// The source register of a COPY machine instruction can be propagated to all
+/// its users, and this propagation could increase the probability of finding
+/// common subexpressions. If the COPY has only one user, the COPY itself can
+/// be removed.
+bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (MachineOperand &MO : MI->all_uses()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI->isCopy())
+ continue;
+ Register SrcReg = DefMI->getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ continue;
+ if (DefMI->getOperand(0).getSubReg())
+ continue;
+ // FIXME: We should trivially coalesce subregister copies to expose CSE
+ // opportunities on instructions with truncated operands (see
+ // cse-add-with-overflow.ll). This can be done here as follows:
+ // if (SrcSubReg)
+ // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
+ // SrcSubReg);
+ // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
+ //
+ // The 2-addr pass has been updated to handle coalesced subregs. However,
+ // some machine-specific code still can't handle it.
+ // To handle it properly we also need a way find a constrained subregister
+ // class given a super-reg class and subreg index.
+ if (DefMI->getOperand(1).getSubReg())
+ continue;
+ if (!MRI->constrainRegAttrs(SrcReg, Reg))
+ continue;
+ LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ LLVM_DEBUG(dbgs() << "*** to: " << *MI);
+
+ // Propagate SrcReg of copies to MI.
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ // Coalesce single use copies.
+ if (OnlyOneUse) {
+ // If (and only if) we've eliminated all uses of the copy, also
+ // copy-propagate to any debug-users of MI, or they'll be left using
+ // an undefined value.
+ DefMI->changeDebugValuesDefReg(SrcReg);
+
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ }
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool MachineCSE::isPhysDefTriviallyDead(
+ MCRegister Reg, MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ I = skipDebugInstructionsForward(I, E);
+
+ if (I == E)
+ // Reached end of block, we don't know if register is dead or not.
+ return false;
+
+ bool SeenDef = false;
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ SeenDef = true;
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!TRI->regsOverlap(MO.getReg(), Reg))
+ continue;
+ if (MO.isUse())
+ // Found a use!
+ return false;
+ SeenDef = true;
+ }
+ if (SeenDef)
+ // See a def of Reg (or an alias) before encountering any use, it's
+ // trivially dead.
+ return true;
+
+ --LookAheadLeft;
+ ++I;
+ }
+ return false;
+}
+
+static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
+ const MachineOperand &MO,
+ const MachineFunction &MF,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ // MachineRegisterInfo::isConstantPhysReg directly called by
+ // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
+ // reserved registers to be frozen. That doesn't cause a problem post-ISel as
+ // most (if not all) targets freeze reserved registers right after ISel.
+ //
+ // It does cause issues mid-GlobalISel, however, hence the additional
+ // reservedRegsFrozen check.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
+ (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<MCRegister, 8> &PhysRefs,
+ PhysDefVector &PhysDefs,
+ bool &PhysUseDef) const {
+ // First, add all uses to PhysRefs.
+ for (const MachineOperand &MO : MI->all_uses()) {
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg.isVirtual())
+ continue;
+ // Reading either caller preserved or constant physregs is ok.
+ if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
+ *TII))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+ }
+
+ // Next, collect all defs into PhysDefs. If any is already in PhysRefs
+ // (which currently contains only uses), set the PhysUseDef flag.
+ PhysUseDef = false;
+ MachineBasicBlock::const_iterator I = MI; I = std::next(I);
+ for (const auto &MOP : llvm::enumerate(MI->operands())) {
+ const MachineOperand &MO = MOP.value();
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg.isVirtual())
+ continue;
+ // Check against PhysRefs even if the def is "dead".
+ if (PhysRefs.count(Reg.asMCReg()))
+ PhysUseDef = true;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
+ PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
+ }
+
+ // Finally, add all defs to PhysRefs as well.
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+ for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
+ ++AI)
+ PhysRefs.insert(*AI);
+
+ return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<MCRegister, 8> &PhysRefs,
+ PhysDefVector &PhysDefs,
+ bool &NonLocal) const {
+ // For now conservatively returns false if the common subexpression is
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+ return false;
+
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+ if (MRI->isAllocatable(PhysDefs[i].second) ||
+ MRI->isReserved(PhysDefs[i].second))
+ // Avoid extending live range of physical registers if they are
+ //allocatable or reserved.
+ return false;
+ }
+ CrossMBB = true;
+ }
+ MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
+ MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I != EE && I->isDebugInstr())
+ ++I;
+
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ (void)CrossMBB;
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
+ if (I == E)
+ return true;
+
+ for (const MachineOperand &MO : I->operands()) {
+ // RegMasks go on instructions like calls that clobber lots of physregs.
+ // Don't attempt to CSE across such an instruction.
+ if (MO.isRegMask())
+ return false;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register MOReg = MO.getReg();
+ if (MOReg.isVirtual())
+ continue;
+ if (PhysRefs.count(MOReg.asMCReg()))
+ return false;
+ }
+
+ --LookAheadLeft;
+ ++I;
+ }
+
+ return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+ if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
+ MI->isInlineAsm() || MI->isDebugInstr())
+ return false;
+
+ // Ignore copies.
+ if (MI->isCopyLike())
+ return false;
+
+ // Ignore stuff that we obviously can't move.
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
+ MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
+ return false;
+
+ if (MI->mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!MI->isDereferenceableInvariantLoad())
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+
+ // Ignore stack guard loads, otherwise the register that holds CSEed value may
+ // be spilled and get loaded back with corrupted data.
+ if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
+ return false;
+
+ return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg. CSBB is basic block where CSReg is
+/// defined.
+bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
+ MachineBasicBlock *CSBB, MachineInstr *MI) {
+ // FIXME: Heuristics that works around the lack the live range splitting.
+
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (CSReg.isVirtual() && Reg.isVirtual()) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ int NumOfUses = 0;
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
+ CSUses.insert(&MI);
+ // Too costly to compute if NumOfUses is very large. Conservatively assume
+ // MayIncreasePressure to avoid spending too much time here.
+ if (++NumOfUses > CSUsesThreshold) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ if (!MayIncreasePressure)
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ if (!CSUses.count(&MI)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
+ if (TII->isAsCheapAsAMove(*MI)) {
+ MachineBasicBlock *BB = MI->getParent();
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
+ return false;
+ }
+
+ // Heuristics #2: If the expression doesn't not use a vr and the only use
+ // of the redundant computation are copies, do not cse.
+ bool HasVRegUse = false;
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg().isVirtual()) {
+ HasVRegUse = true;
+ break;
+ }
+ }
+ if (!HasVRegUse) {
+ bool HasNonCopyUse = false;
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ // Ignore copies.
+ if (!MI.isCopyLike()) {
+ HasNonCopyUse = true;
+ break;
+ }
+ }
+ if (!HasNonCopyUse)
+ return false;
+ }
+
+ // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+ // it unless the defined value is already used in the BB of the new use.
+ bool HasPHI = false;
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
+ HasPHI |= UseMI.isPHI();
+ if (UseMI.getParent() == MI->getParent())
+ return true;
+ }
+
+ return !HasPHI;
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ ScopeType *Scope = new ScopeType(VNT);
+ ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+ assert(SI != ScopeMap.end());
+ delete SI->second;
+ ScopeMap.erase(SI);
+}
+
+bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
+ SmallVector<unsigned, 2> ImplicitDefs;
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isCSECandidate(&MI))
+ continue;
+
+ bool FoundCSE = VNT.count(&MI);
+ if (!FoundCSE) {
+ // Using trivial copy propagation to find more CSE opportunities.
+ if (PerformTrivialCopyPropagation(&MI, MBB)) {
+ Changed = true;
+
+ // After coalescing MI itself may become a copy.
+ if (MI.isCopyLike())
+ continue;
+
+ // Try again to see if CSE is possible.
+ FoundCSE = VNT.count(&MI);
+ }
+ }
+
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI.isCommutable()) {
+ if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != &MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ Changed = true;
+ } else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
+ // used, then it's not safe to replace it with a common subexpression.
+ // It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
+ SmallSet<MCRegister, 8> PhysRefs;
+ PhysDefVector PhysDefs;
+ bool PhysUseDef = false;
+ if (FoundCSE &&
+ hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
+ FoundCSE = false;
+
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
+ // This can never be the case if the instruction both uses and
+ // defines the same physical register, which was detected above.
+ if (!PhysUseDef) {
+ unsigned CSVN = VNT.lookup(&MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ FoundCSE = true;
+ }
+ }
+
+ if (!FoundCSE) {
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
+ continue;
+ }
+
+ // Found a common subexpression, eliminate it.
+ unsigned CSVN = VNT.lookup(&MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ LLVM_DEBUG(dbgs() << "Examining: " << MI);
+ LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+ // Prevent CSE-ing non-local convergent instructions.
+ // LLVM's current definition of `isConvergent` does not necessarily prove
+ // that non-local CSE is illegal. The following check extends the definition
+ // of `isConvergent` to assume a convergent instruction is dependent not
+ // only on additional conditions, but also on fewer conditions. LLVM does
+ // not have a MachineInstr attribute which expresses this extended
+ // definition, so it's necessary to use `isConvergent` to prevent illegally
+ // CSE-ing the subset of `isConvergent` instructions which do fall into this
+ // extended definition.
+ if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
+ LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
+ "different BBs, avoid CSE!\n");
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
+ continue;
+ }
+
+ // Check if it's profitable to perform this CSE.
+ bool DoCSE = true;
+ unsigned NumDefs = MI.getNumDefs();
+
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register OldReg = MO.getReg();
+ Register NewReg = CSMI->getOperand(i).getReg();
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+
+ // Keep track of implicit defs of CSMI and MI, to clear possibly
+ // made-redundant kill flags.
+ if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
+ ImplicitDefs.push_back(OldReg);
+
+ if (OldReg == NewReg) {
+ --NumDefs;
+ continue;
+ }
+
+ assert(OldReg.isVirtual() && NewReg.isVirtual() &&
+ "Do not CSE physical register defs!");
+
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
+ LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ // Don't perform CSE if the result of the new instruction cannot exist
+ // within the constraints (register class, bank, or low-level type) of
+ // the old instruction.
+ if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
+ LLVM_DEBUG(
+ dbgs() << "*** Not the same register constraints, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+ --NumDefs;
+ }
+
+ // Actually perform the elimination.
+ if (DoCSE) {
+ for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
+ unsigned OldReg = CSEPair.first;
+ unsigned NewReg = CSEPair.second;
+ // OldReg may have been unused but is used now, clear the Dead flag
+ MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
+ assert(Def != nullptr && "CSEd register has no unique definition?");
+ Def->clearRegisterDeads(NewReg);
+ // Replace with NewReg and clear kill flags which may be wrong now.
+ MRI->replaceRegWith(OldReg, NewReg);
+ MRI->clearKillFlags(NewReg);
+ }
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
+ CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
+ for (const auto &PhysDef : PhysDefs)
+ if (!MI.getOperand(PhysDef.first).isDead())
+ CSMI->getOperand(PhysDef.first).setIsDead(false);
+
+ // Go through implicit defs of CSMI and MI, and clear the kill flags on
+ // their uses in all the instructions between CSMI and MI.
+ // We might have made some of the kill flags redundant, consider:
+ // subs ... implicit-def %nzcv <- CSMI
+ // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
+ // subs ... implicit-def %nzcv <- MI, to be eliminated
+ // csinc ... implicit killed %nzcv
+ // Since we eliminated MI, and reused a register imp-def'd by CSMI
+ // (here %nzcv), that register, if it was killed before MI, should have
+ // that kill flag removed, because it's lifetime was extended.
+ if (CSMI->getParent() == MI.getParent()) {
+ for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
+ for (auto ImplicitDef : ImplicitDefs)
+ if (MachineOperand *MO = II->findRegisterUseOperand(
+ ImplicitDef, /*isKill=*/true, TRI))
+ MO->setIsKill(false);
+ } else {
+ // If the instructions aren't in the same BB, bail out and clear the
+ // kill flag on all uses of the imp-def'd register.
+ for (auto ImplicitDef : ImplicitDefs)
+ MRI->clearKillFlags(ImplicitDef);
+ }
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ auto LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn.second))
+ MBB->addLiveIn(LiveIn.second);
+ }
+ ++NumCrossBBCSEs;
+ }
+
+ MI.eraseFromParent();
+ ++NumCSEs;
+ if (!PhysRefs.empty())
+ ++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
+ Changed = true;
+ } else {
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
+ }
+ CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
+ ImplicitDefs.clear();
+ }
+
+ return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ CurrVN = 0;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(Node);
+ do {
+ Node = WorkList.pop_back_val();
+ Scopes.push_back(Node);
+ OpenChildren[Node] = Node->getNumChildren();
+ append_range(WorkList, Node->children());
+ } while (!WorkList.empty());
+
+ // Now perform CSE.
+ bool Changed = false;
+ for (MachineDomTreeNode *Node : Scopes) {
+ MachineBasicBlock *MBB = Node->getBlock();
+ EnterScope(MBB);
+ Changed |= ProcessBlockCSE(MBB);
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren);
+ }
+
+ return Changed;
+}
+
+// We use stronger checks for PRE candidate rather than for CSE ones to embrace
+// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
+// to exclude instrs created by PRE that won't be CSEed later.
+bool MachineCSE::isPRECandidate(MachineInstr *MI,
+ SmallSet<MCRegister, 8> &PhysRefs) {
+ if (!isCSECandidate(MI) ||
+ MI->isNotDuplicable() ||
+ MI->mayLoad() ||
+ TII->isAsCheapAsAMove(*MI) ||
+ MI->getNumDefs() != 1 ||
+ MI->getNumExplicitDefs() != 1)
+ return false;
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && !MO.getReg().isVirtual()) {
+ if (MO.isDef())
+ return false;
+ else
+ PhysRefs.insert(MO.getReg());
+ }
+ }
+
+ return true;
+}
+
+bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ SmallSet<MCRegister, 8> PhysRefs;
+ if (!isPRECandidate(&MI, PhysRefs))
+ continue;
+
+ if (!PREMap.count(&MI)) {
+ PREMap[&MI] = MBB;
+ continue;
+ }
+
+ auto MBB1 = PREMap[&MI];
+ assert(
+ !DT->properlyDominates(MBB, MBB1) &&
+ "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
+ auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
+ if (!CMBB->isLegalToHoistInto())
+ continue;
+
+ if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
+ continue;
+
+ // Two instrs are partial redundant if their basic blocks are reachable
+ // from one to another but one doesn't dominate another.
+ if (CMBB != MBB1) {
+ auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
+ if (BB != nullptr && BB1 != nullptr &&
+ (isPotentiallyReachable(BB1, BB) ||
+ isPotentiallyReachable(BB, BB1))) {
+ // The following check extends the definition of `isConvergent` to
+ // assume a convergent instruction is dependent not only on additional
+ // conditions, but also on fewer conditions. LLVM does not have a
+ // MachineInstr attribute which expresses this extended definition, so
+ // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
+ // subset of `isConvergent` instructions which do fall into this
+ // extended definition.
+ if (MI.isConvergent() && CMBB != MBB)
+ continue;
+
+ // If this instruction uses physical registers then we can only do PRE
+ // if it's using the value that is live at the place we're hoisting to.
+ bool NonLocal;
+ PhysDefVector PhysDefs;
+ if (!PhysRefs.empty() &&
+ !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
+ PhysDefs, NonLocal))
+ continue;
+
+ assert(MI.getOperand(0).isDef() &&
+ "First operand of instr with one explicit def must be this def");
+ Register VReg = MI.getOperand(0).getReg();
+ Register NewReg = MRI->cloneVirtualRegister(VReg);
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
+ continue;
+ MachineInstr &NewMI =
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
+
+ // When hoisting, make sure we don't carry the debug location of
+ // the original instruction, as that's not correct and can cause
+ // unexpected jumps when debugging optimized code.
+ auto EmptyDL = DebugLoc();
+ NewMI.setDebugLoc(EmptyDL);
+
+ NewMI.getOperand(0).setReg(NewReg);
+
+ PREMap[&MI] = CMBB;
+ ++NumPREs;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+// This simple PRE (partial redundancy elimination) pass doesn't actually
+// eliminate partial redundancy but transforms it to full redundancy,
+// anticipating that the next CSE step will eliminate this created redundancy.
+// If CSE doesn't eliminate this, than created instruction will remain dead
+// and eliminated later by Remove Dead Machine Instructions pass.
+bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
+ SmallVector<MachineDomTreeNode *, 32> BBs;
+
+ PREMap.clear();
+ bool Changed = false;
+ BBs.push_back(DT->getRootNode());
+ do {
+ auto Node = BBs.pop_back_val();
+ append_range(BBs, Node->children());
+
+ MachineBasicBlock *MBB = Node->getBlock();
+ Changed |= ProcessBlockPRE(DT, MBB);
+
+ } while (!BBs.empty());
+
+ return Changed;
+}
+
+bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1) {
+ if (CandidateBB->getParent()->getFunction().hasMinSize())
+ return true;
+ assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
+ assert(DT->dominates(CandidateBB, MBB1) &&
+ "CandidateBB should dominate MBB1");
+ return MBFI->getBlockFreq(CandidateBB) <=
+ MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<MachineDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ LookAheadLimit = TII->getMachineCSELookAheadLimit();
+ bool ChangedPRE, ChangedCSE;
+ ChangedPRE = PerformSimplePRE(DT);
+ ChangedCSE = PerformCSE(DT->getRootNode());
+ return ChangedPRE || ChangedCSE;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
new file mode 100644
index 000000000000..874f726d2947
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -0,0 +1,127 @@
+//===- MachineCheckDebugify.cpp - Check debug info ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This checks debug info after mir-debugify (+ pass-to-test). Currently
+/// it simply checks the integrity of line info in DILocation and
+/// DILocalVariable which mir-debugifiy generated before.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+#define DEBUG_TYPE "mir-check-debugify"
+
+using namespace llvm;
+
+namespace {
+
+struct CheckDebugMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
+ if (!NMD) {
+ errs() << "WARNING: Please run mir-debugify to generate "
+ "llvm.mir.debugify metadata first.\n";
+ return false;
+ }
+
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
+ return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+ ->getZExtValue();
+ };
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.mir.debugify should have exactly 2 operands!");
+ unsigned NumLines = getDebugifyOperand(0);
+ unsigned NumVars = getDebugifyOperand(1);
+ BitVector MissingLines{NumLines, true};
+ BitVector MissingVars{NumVars, true};
+
+ for (Function &F : M.functions()) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+ if (!MF)
+ continue;
+ for (MachineBasicBlock &MBB : *MF) {
+ // Find missing lines.
+ // TODO: Avoid meta instructions other than dbg_val.
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
+ const DebugLoc DL = MI.getDebugLoc();
+ if (DL && DL.getLine() != 0) {
+ MissingLines.reset(DL.getLine() - 1);
+ continue;
+ }
+
+ if (!DL) {
+ errs() << "WARNING: Instruction with empty DebugLoc in function ";
+ errs() << F.getName() << " --";
+ MI.print(errs());
+ }
+ }
+
+ // Find missing variables.
+ // TODO: Handle DBG_INSTR_REF which is under an experimental option now.
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isDebugValue())
+ continue;
+ const DILocalVariable *LocalVar = MI.getDebugVariable();
+ unsigned Var = ~0U;
+
+ (void)to_integer(LocalVar->getName(), Var, 10);
+ assert(Var <= NumVars && "Unexpected name for DILocalVariable");
+ MissingVars.reset(Var - 1);
+ }
+ }
+ }
+
+ bool Fail = false;
+ for (unsigned Idx : MissingLines.set_bits()) {
+ errs() << "WARNING: Missing line " << Idx + 1 << "\n";
+ Fail = true;
+ }
+
+ for (unsigned Idx : MissingVars.set_bits()) {
+ errs() << "WARNING: Missing variable " << Idx + 1 << "\n";
+ Fail = true;
+ }
+ errs() << "Machine IR debug info check: ";
+ errs() << (Fail ? "FAIL" : "PASS") << "\n";
+
+ return false;
+ }
+
+ CheckDebugMachineModule() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ static char ID; // Pass identification.
+};
+char CheckDebugMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CheckDebugMachineModule, DEBUG_TYPE,
+ "Machine Check Debug Module", false, false)
+INITIALIZE_PASS_END(CheckDebugMachineModule, DEBUG_TYPE,
+ "Machine Check Debug Module", false, false)
+
+ModulePass *llvm::createCheckDebugMachineModulePass() {
+ return new CheckDebugMachineModule();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
new file mode 100644
index 000000000000..c65937935ed8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -0,0 +1,769 @@
+//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The machine combiner pass uses machine trace metrics to ensure the combined
+// instructions do not lengthen the critical path or the resource depth.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-combiner"
+
+STATISTIC(NumInstCombined, "Number of machineinst combined");
+
+static cl::opt<unsigned>
+inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
+ cl::desc("Incremental depth computation will be used for basic "
+ "blocks with more instructions."), cl::init(500));
+
+static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
+ cl::desc("Dump all substituted intrs"),
+ cl::init(false));
+
+#ifdef EXPENSIVE_CHECKS
+static cl::opt<bool> VerifyPatternOrder(
+ "machine-combiner-verify-pattern-order", cl::Hidden,
+ cl::desc(
+ "Verify that the generated patterns are ordered by increasing latency"),
+ cl::init(true));
+#else
+static cl::opt<bool> VerifyPatternOrder(
+ "machine-combiner-verify-pattern-order", cl::Hidden,
+ cl::desc(
+ "Verify that the generated patterns are ordered by increasing latency"),
+ cl::init(false));
+#endif
+
+namespace {
+class MachineCombiner : public MachineFunctionPass {
+ const TargetSubtargetInfo *STI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
+ MachineTraceMetrics *Traces = nullptr;
+ MachineTraceMetrics::Ensemble *TraceEnsemble = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+ TargetSchedModel TSchedModel;
+
+ /// True if optimizing for code size.
+ bool OptSize = false;
+
+public:
+ static char ID;
+ MachineCombiner() : MachineFunctionPass(ID) {
+ initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "Machine InstCombiner"; }
+
+private:
+ bool combineInstructions(MachineBasicBlock *);
+ MachineInstr *getOperandDef(const MachineOperand &MO);
+ bool isTransientMI(const MachineInstr *MI);
+ unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace,
+ const MachineBasicBlock &MBB);
+ unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace);
+ bool
+ improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineCombinerPattern Pattern, bool SlackIsAccurate);
+ bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineCombinerPattern Pattern);
+ bool preservesResourceLen(MachineBasicBlock *MBB,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs);
+ void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
+ std::pair<unsigned, unsigned>
+ getLatenciesForInstrSequences(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Trace BlockTrace);
+
+ void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
+ SmallVector<MachineCombinerPattern, 16> &Patterns);
+};
+}
+
+char MachineCombiner::ID = 0;
+char &llvm::MachineCombinerID = MachineCombiner::ID;
+
+INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
+ "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
+ false, false)
+
+void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+MachineCombiner::getOperandDef(const MachineOperand &MO) {
+ MachineInstr *DefInstr = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && MO.getReg().isVirtual())
+ DefInstr = MRI->getUniqueVRegDef(MO.getReg());
+ // PHI's have no depth etc.
+ if (DefInstr && DefInstr->isPHI())
+ DefInstr = nullptr;
+ return DefInstr;
+}
+
+/// Return true if MI is unlikely to generate an actual target instruction.
+bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
+ if (!MI->isCopy())
+ return MI->isTransient();
+
+ // If MI is a COPY, check if its src and dst registers can be coalesced.
+ Register Dst = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+
+ if (!MI->isFullCopy()) {
+ // If src RC contains super registers of dst RC, it can also be coalesced.
+ if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
+ return false;
+
+ auto SrcSub = MI->getOperand(1).getSubReg();
+ auto SrcRC = MRI->getRegClass(Src);
+ auto DstRC = MRI->getRegClass(Dst);
+ return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr;
+ }
+
+ if (Src.isPhysical() && Dst.isPhysical())
+ return Src == Dst;
+
+ if (Src.isVirtual() && Dst.isVirtual()) {
+ auto SrcRC = MRI->getRegClass(Src);
+ auto DstRC = MRI->getRegClass(Dst);
+ return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
+ }
+
+ if (Src.isVirtual())
+ std::swap(Src, Dst);
+
+ // Now Src is physical register, Dst is virtual register.
+ auto DstRC = MRI->getRegClass(Dst);
+ return DstRC->contains(Src);
+}
+
+/// Computes depth of instructions in vector \InsInstr.
+///
+/// \param InsInstrs is a vector of machine instructions
+/// \param InstrIdxForVirtReg is a dense map of virtual register to index
+/// of defining machine instruction in \p InsInstrs
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
+unsigned
+MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace,
+ const MachineBasicBlock &MBB) {
+ SmallVector<unsigned, 16> InstrDepth;
+ // For each instruction in the new sequence compute the depth based on the
+ // operands. Use the trace information when possible. For new operands which
+ // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
+ for (auto *InstrPtr : InsInstrs) { // for each Use
+ unsigned IDepth = 0;
+ for (const MachineOperand &MO : InstrPtr->all_uses()) {
+ // Check for virtual register operand.
+ if (!MO.getReg().isVirtual())
+ continue;
+ unsigned DepthOp = 0;
+ unsigned LatencyOp = 0;
+ DenseMap<unsigned, unsigned>::iterator II =
+ InstrIdxForVirtReg.find(MO.getReg());
+ if (II != InstrIdxForVirtReg.end()) {
+ // Operand is new virtual register not in trace
+ assert(II->second < InstrDepth.size() && "Bad Index");
+ MachineInstr *DefInstr = InsInstrs[II->second];
+ assert(DefInstr &&
+ "There must be a definition for a new virtual register");
+ DepthOp = InstrDepth[II->second];
+ int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
+ int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
+ LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
+ InstrPtr, UseIdx);
+ } else {
+ MachineInstr *DefInstr = getOperandDef(MO);
+ if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
+ MachineTraceStrategy::TS_Local ||
+ DefInstr->getParent() == &MBB)) {
+ DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
+ if (!isTransientMI(DefInstr))
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ }
+ }
+ IDepth = std::max(IDepth, DepthOp + LatencyOp);
+ }
+ InstrDepth.push_back(IDepth);
+ }
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ return InstrDepth[NewRootIdx];
+}
+
+/// Computes instruction latency as max of latency of defined operands.
+///
+/// \param Root is a machine instruction that could be replaced by NewRoot.
+/// It is used to compute a more accurate latency information for NewRoot in
+/// case there is a dependent instruction in the same trace (\p BlockTrace)
+/// \param NewRoot is the instruction for which the latency is computed
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Latency of \p NewRoot
+unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace) {
+ // Check each definition in NewRoot and compute the latency
+ unsigned NewRootLatency = 0;
+
+ for (const MachineOperand &MO : NewRoot->all_defs()) {
+ // Check for virtual register operand.
+ if (!MO.getReg().isVirtual())
+ continue;
+ // Get the first instruction that uses MO
+ MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
+ RI++;
+ if (RI == MRI->reg_end())
+ continue;
+ MachineInstr *UseMO = RI->getParent();
+ unsigned LatencyOp = 0;
+ if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
+ LatencyOp = TSchedModel.computeOperandLatency(
+ NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
+ UseMO->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
+ }
+ NewRootLatency = std::max(NewRootLatency, LatencyOp);
+ }
+ return NewRootLatency;
+}
+
+/// The combiner's goal may differ based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+ MustReduceDepth, // The data dependency chain must be improved.
+ MustReduceRegisterPressure, // The register pressure must be reduced.
+ Default // The critical path must not be lengthened.
+};
+
+static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+ // TODO: If C++ ever gets a real enum class, make this part of the
+ // MachineCombinerPattern class.
+ switch (P) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case MachineCombinerPattern::SUBADD_OP1:
+ case MachineCombinerPattern::SUBADD_OP2:
+ case MachineCombinerPattern::FMADD_AX:
+ case MachineCombinerPattern::FMADD_XA:
+ case MachineCombinerPattern::FMSUB:
+ case MachineCombinerPattern::FNMSUB:
+ return CombinerObjective::MustReduceDepth;
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ return CombinerObjective::MustReduceRegisterPressure;
+ default:
+ return CombinerObjective::Default;
+ }
+}
+
+/// Estimate the latency of the new and original instruction sequence by summing
+/// up the latencies of the inserted and deleted instructions. This assumes
+/// that the inserted and deleted instructions are dependent instruction chains,
+/// which might not hold in all cases.
+std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
+ MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Trace BlockTrace) {
+ assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
+ unsigned NewRootLatency = 0;
+ // NewRoot is the last instruction in the \p InsInstrs vector.
+ MachineInstr *NewRoot = InsInstrs.back();
+ for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
+ NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
+ NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
+
+ unsigned RootLatency = 0;
+ for (auto *I : DelInstrs)
+ RootLatency += TSchedModel.computeInstrLatency(I);
+
+ return {NewRootLatency, RootLatency};
+}
+
+bool MachineCombiner::reduceRegisterPressure(
+ MachineInstr &Root, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineCombinerPattern Pattern) {
+ // FIXME: for now, we don't do any check for the register pressure patterns.
+ // We treat them as always profitable. But we can do better if we make
+ // RegPressureTracker class be aware of TIE attribute. Then we can get an
+ // accurate compare of register pressure with DelInstrs or InsInstrs.
+ return true;
+}
+
+/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
+/// The new code sequence ends in MI NewRoot. A necessary condition for the new
+/// sequence to replace the old sequence is that it cannot lengthen the critical
+/// path. The definition of "improve" may be restricted by specifying that the
+/// new path improves the data dependency chain (MustReduceDepth).
+bool MachineCombiner::improvesCriticalPathLen(
+ MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineCombinerPattern Pattern,
+ bool SlackIsAccurate) {
+ // Get depth and latency of NewRoot and Root.
+ unsigned NewRootDepth =
+ getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB);
+ unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
+
+ LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
+ << NewRootDepth << "\tRootDepth: " << RootDepth);
+
+ // For a transform such as reassociation, the cost equation is
+ // conservatively calculated so that we must improve the depth (data
+ // dependency cycles) in the critical path to proceed with the transform.
+ // Being conservative also protects against inaccuracies in the underlying
+ // machine trace metrics and CPU models.
+ if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
+ LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
+ LLVM_DEBUG(NewRootDepth < RootDepth
+ ? dbgs() << "\t and it does it\n"
+ : dbgs() << "\t but it does NOT do it\n");
+ return NewRootDepth < RootDepth;
+ }
+
+ // A more flexible cost calculation for the critical path includes the slack
+ // of the original code sequence. This may allow the transform to proceed
+ // even if the instruction depths (data dependency cycles) become worse.
+
+ // Account for the latency of the inserted and deleted instructions by
+ unsigned NewRootLatency, RootLatency;
+ if (TII->accumulateInstrSeqToRootLatency(*Root)) {
+ std::tie(NewRootLatency, RootLatency) =
+ getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
+ } else {
+ NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back());
+ RootLatency = TSchedModel.computeInstrLatency(Root);
+ }
+
+ unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
+ unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+ unsigned OldCycleCount =
+ RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
+ LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
+ << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
+ << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
+ << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
+ << "\n\tRootDepth + RootLatency + RootSlack = "
+ << OldCycleCount;);
+ LLVM_DEBUG(NewCycleCount <= OldCycleCount
+ ? dbgs() << "\n\t It IMPROVES PathLen because"
+ : dbgs() << "\n\t It DOES NOT improve PathLen because");
+ LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
+ << ", OldCycleCount = " << OldCycleCount << "\n");
+
+ return NewCycleCount <= OldCycleCount;
+}
+
+/// helper routine to convert instructions into SC
+void MachineCombiner::instr2instrSC(
+ SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
+ for (auto *InstrPtr : Instrs) {
+ unsigned Opc = InstrPtr->getOpcode();
+ unsigned Idx = TII->get(Opc).getSchedClass();
+ const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
+ InstrsSC.push_back(SC);
+ }
+}
+
+/// True when the new instructions do not increase resource length
+bool MachineCombiner::preservesResourceLen(
+ MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs) {
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
+
+ // Compute current resource length
+
+ //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+ SmallVector <const MachineBasicBlock *, 1> MBBarr;
+ MBBarr.push_back(MBB);
+ unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
+
+ // Deal with SC rather than Instructions.
+ SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
+ SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
+
+ instr2instrSC(InsInstrs, InsInstrsSC);
+ instr2instrSC(DelInstrs, DelInstrsSC);
+
+ ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
+ ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
+
+ // Compute new resource length.
+ unsigned ResLenAfterCombine =
+ BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
+
+ LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
+ << ResLenBeforeCombine
+ << " and after: " << ResLenAfterCombine << "\n";);
+ LLVM_DEBUG(
+ ResLenAfterCombine <=
+ ResLenBeforeCombine + TII->getExtendResourceLenLimit()
+ ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
+ : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
+ "Length\n");
+
+ return ResLenAfterCombine <=
+ ResLenBeforeCombine + TII->getExtendResourceLenLimit();
+}
+
+/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
+/// depths if requested.
+///
+/// \param MBB basic block to insert instructions in
+/// \param MI current machine instruction
+/// \param InsInstrs new instructions to insert in \p MBB
+/// \param DelInstrs instruction to delete from \p MBB
+/// \param TraceEnsemble is a pointer to the machine trace information
+/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param TII is target instruction info, used to call target hook
+/// \param Pattern is used to call target hook finalizeInsInstrs
+/// \param IncrementalUpdate if true, compute instruction depths incrementally,
+/// otherwise invalidate the trace
+static void insertDeleteInstructions(
+ MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Ensemble *TraceEnsemble,
+ SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII,
+ MachineCombinerPattern Pattern, bool IncrementalUpdate) {
+ // If we want to fix up some placeholder for some target, do it now.
+ // We need this because in genAlternativeCodeSequence, we have not decided the
+ // better pattern InsInstrs or DelInstrs, so we don't want generate some
+ // sideeffect to the function. For example we need to delay the constant pool
+ // entry creation here after InsInstrs is selected as better pattern.
+ // Otherwise the constant pool entry created for InsInstrs will not be deleted
+ // even if InsInstrs is not the better pattern.
+ TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
+
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
+
+ for (auto *InstrPtr : DelInstrs) {
+ InstrPtr->eraseFromParent();
+ // Erase all LiveRegs defined by the removed instruction
+ for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
+ if (I->MI == InstrPtr)
+ I = RegUnits.erase(I);
+ else
+ I++;
+ }
+ }
+
+ if (IncrementalUpdate)
+ for (auto *InstrPtr : InsInstrs)
+ TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits);
+ else
+ TraceEnsemble->invalidate(MBB);
+
+ NumInstCombined++;
+}
+
+// Check that the difference between original and new latency is decreasing for
+// later patterns. This helps to discover sub-optimal pattern orderings.
+void MachineCombiner::verifyPatternOrder(
+ MachineBasicBlock *MBB, MachineInstr &Root,
+ SmallVector<MachineCombinerPattern, 16> &Patterns) {
+ long PrevLatencyDiff = std::numeric_limits<long>::max();
+ (void)PrevLatencyDiff; // Variable is used in assert only.
+ for (auto P : Patterns) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
+ continue;
+
+ unsigned NewRootLatency, RootLatency;
+ std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
+ Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(MBB));
+ long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
+ assert(CurrentLatencyDiff <= PrevLatencyDiff &&
+ "Current pattern is better than previous pattern.");
+ PrevLatencyDiff = CurrentLatencyDiff;
+ }
+}
+
+/// Substitute a slow code sequence with a faster one by
+/// evaluating instruction combining pattern.
+/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
+/// combining based on machine trace metrics. Only combine a sequence of
+/// instructions when this neither lengthens the critical path nor increases
+/// resource pressure. When optimizing for codesize always combine when the new
+/// sequence is shorter.
+bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+
+ bool IncrementalUpdate = false;
+ auto BlockIter = MBB->begin();
+ decltype(BlockIter) LastUpdate;
+ // Check if the block is in a loop.
+ const MachineLoop *ML = MLI->getLoopFor(MBB);
+ if (!TraceEnsemble)
+ TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
+
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(TRI->getNumRegUnits());
+
+ bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+
+ bool DoRegPressureReduce =
+ TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
+
+ while (BlockIter != MBB->end()) {
+ auto &MI = *BlockIter++;
+ SmallVector<MachineCombinerPattern, 16> Patterns;
+ // The motivating example is:
+ //
+ // MUL Other MUL_op1 MUL_op2 Other
+ // \ / \ | /
+ // ADD/SUB => MADD/MSUB
+ // (=Root) (=NewRoot)
+
+ // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
+ // usually beneficial for code size it unfortunately can hurt performance
+ // when the ADD is on the critical path, but the MUL is not. With the
+ // substitution the MUL becomes part of the critical path (in form of the
+ // MADD) and can lengthen it on architectures where the MADD latency is
+ // longer than the ADD latency.
+ //
+ // For each instruction we check if it can be the root of a combiner
+ // pattern. Then for each pattern the new code sequence in form of MI is
+ // generated and evaluated. When the efficiency criteria (don't lengthen
+ // critical path, don't use more resources) is met the new sequence gets
+ // hooked up into the basic block before the old sequence is removed.
+ //
+ // The algorithm does not try to evaluate all patterns and pick the best.
+ // This is only an artificial restriction though. In practice there is
+ // mostly one pattern, and getMachineCombinerPatterns() can order patterns
+ // based on an internal cost heuristic. If
+ // machine-combiner-verify-pattern-order is enabled, all patterns are
+ // checked to ensure later patterns do not provide better latency savings.
+
+ if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
+ continue;
+
+ if (VerifyPatternOrder)
+ verifyPatternOrder(MBB, MI, Patterns);
+
+ for (const auto P : Patterns) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (InsInstrs.empty())
+ continue;
+
+ LLVM_DEBUG(if (dump_intrs) {
+ dbgs() << "\tFor the Pattern (" << (int)P
+ << ") these instructions could be removed\n";
+ for (auto const *InstrPtr : DelInstrs)
+ InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+ /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
+ dbgs() << "\tThese instructions could replace the removed ones\n";
+ for (auto const *InstrPtr : InsInstrs)
+ InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+ /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
+ });
+
+ if (IncrementalUpdate && LastUpdate != BlockIter) {
+ // Update depths since the last incremental update.
+ TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
+ LastUpdate = BlockIter;
+ }
+
+ if (DoRegPressureReduce &&
+ getCombinerObjective(P) ==
+ CombinerObjective::MustReduceRegisterPressure) {
+ if (MBB->size() > inc_threshold) {
+ // Use incremental depth updates for basic blocks above threshold
+ IncrementalUpdate = true;
+ LastUpdate = BlockIter;
+ }
+ if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
+ // Replace DelInstrs with InsInstrs.
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
+ RegUnits, TII, P, IncrementalUpdate);
+ Changed |= true;
+
+ // Go back to previous instruction as it may have ILP reassociation
+ // opportunity.
+ BlockIter--;
+ break;
+ }
+ }
+
+ if (ML && TII->isThroughputPattern(P)) {
+ LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
+ RegUnits, TII, P, IncrementalUpdate);
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
+ LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
+ << InsInstrs.size() << " < "
+ << DelInstrs.size() << ")\n");
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
+ RegUnits, TII, P, IncrementalUpdate);
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ } else {
+ // For big basic blocks, we only compute the full trace the first time
+ // we hit this. We do not invalidate the trace, but instead update the
+ // instruction depths incrementally.
+ // NOTE: Only the instruction depths up to MI are accurate. All other
+ // trace information is not updated.
+ MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB);
+ Traces->verifyAnalysis();
+ if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg, P,
+ !IncrementalUpdate) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
+ if (MBB->size() > inc_threshold) {
+ // Use incremental depth updates for basic blocks above treshold
+ IncrementalUpdate = true;
+ LastUpdate = BlockIter;
+ }
+
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
+ RegUnits, TII, P, IncrementalUpdate);
+
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ }
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ MachineFunction *MF = MBB->getParent();
+ for (auto *InstrPtr : InsInstrs)
+ MF->deleteMachineInstr(InstrPtr);
+ }
+ InstrIdxForVirtReg.clear();
+ }
+ }
+
+ if (Changed && IncrementalUpdate)
+ Traces->invalidate(MBB);
+ return Changed;
+}
+
+bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
+ STI = &MF.getSubtarget();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ SchedModel = STI->getSchedModel();
+ TSchedModel.init(STI);
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ TraceEnsemble = nullptr;
+ OptSize = MF.getFunction().hasOptSize();
+ RegClassInfo.runOnMachineFunction(MF);
+
+ LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
+ if (!TII->useMachineCombiner()) {
+ LLVM_DEBUG(
+ dbgs()
+ << " Skipping pass: Target does not support machine combiner\n");
+ return false;
+ }
+
+ bool Changed = false;
+
+ // Try to combine instructions.
+ for (auto &MBB : MF)
+ Changed |= combineInstructions(&MBB);
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..3453e6c0b8be
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,1424 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+// This pass forwards the source of COPYs to the users of their destinations
+// when doing so is legal. For example:
+//
+// %reg1 = COPY %reg0
+// ...
+// ... = OP %reg1
+//
+// If
+// - %reg0 has not been clobbered by the time of the use of %reg1
+// - the register class constraints are satisfied
+// - the COPY def is the only value that reaches OP
+// then this pass replaces the above with:
+//
+// %reg1 = COPY %reg0
+// ...
+// ... = OP %reg0
+//
+// This pass also removes some redundant COPYs. For example:
+//
+// %R1 = COPY %R0
+// ... // No clobber of %R1
+// %R0 = COPY %R1 <<< Removed
+//
+// or
+//
+// %R1 = COPY %R0
+// ... // No clobber of %R0
+// %R1 = COPY %R0 <<< Removed
+//
+// or
+//
+// $R0 = OP ...
+// ... // No read/clobber of $R0 and $R1
+// $R1 = COPY $R0 // $R0 is killed
+// Replace $R0 with $R1 and remove the COPY
+// $R1 = OP ...
+// ...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-cp"
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
+STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
+STATISTIC(SpillageChainsLength, "Length of spillage chains");
+STATISTIC(NumSpillageChains, "Number of spillage chains");
+DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
+ "Controls which register COPYs are forwarded");
+
+static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false),
+ cl::Hidden);
+static cl::opt<cl::boolOrDefault>
+ EnableSpillageCopyElimination("enable-spill-copy-elim", cl::Hidden);
+
+namespace {
+
+static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ if (UseCopyInstr)
+ return TII.isCopyInstr(MI);
+
+ if (MI.isCopy())
+ return std::optional<DestSourcePair>(
+ DestSourcePair{MI.getOperand(0), MI.getOperand(1)});
+
+ return std::nullopt;
+}
+
+class CopyTracker {
+ struct CopyInfo {
+ MachineInstr *MI, *LastSeenUseInCopy;
+ SmallVector<MCRegister, 4> DefRegs;
+ bool Avail;
+ };
+
+ DenseMap<MCRegister, CopyInfo> Copies;
+
+public:
+ /// Mark all of the given registers and their subregisters as unavailable for
+ /// copying.
+ void markRegsUnavailable(ArrayRef<MCRegister> Regs,
+ const TargetRegisterInfo &TRI) {
+ for (MCRegister Reg : Regs) {
+ // Source of copy is no longer available for propagation.
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto CI = Copies.find(Unit);
+ if (CI != Copies.end())
+ CI->second.Avail = false;
+ }
+ }
+ }
+
+ /// Remove register from copy maps.
+ void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
+ // Since Reg might be a subreg of some registers, only invalidate Reg is not
+ // enough. We have to find the COPY defines Reg or registers defined by Reg
+ // and invalidate all of them.
+ SmallSet<MCRegister, 8> RegsToInvalidate;
+ RegsToInvalidate.insert(Reg);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto I = Copies.find(Unit);
+ if (I != Copies.end()) {
+ if (MachineInstr *MI = I->second.MI) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Expect copy");
+
+ RegsToInvalidate.insert(
+ CopyOperands->Destination->getReg().asMCReg());
+ RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg());
+ }
+ RegsToInvalidate.insert(I->second.DefRegs.begin(),
+ I->second.DefRegs.end());
+ }
+ }
+ for (MCRegister InvalidReg : RegsToInvalidate)
+ for (MCRegUnit Unit : TRI.regunits(InvalidReg))
+ Copies.erase(Unit);
+ }
+
+ /// Clobber a single register, removing it from the tracker's copy maps.
+ void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto I = Copies.find(Unit);
+ if (I != Copies.end()) {
+ // When we clobber the source of a copy, we need to clobber everything
+ // it defined.
+ markRegsUnavailable(I->second.DefRegs, TRI);
+ // When we clobber the destination of a copy, we need to clobber the
+ // whole register it defined.
+ if (MachineInstr *MI = I->second.MI) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()},
+ TRI);
+ }
+ // Now we can erase the copy.
+ Copies.erase(I);
+ }
+ }
+ }
+
+ /// Add this copy's registers into the tracker's copy maps.
+ void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Tracking non-copy?");
+
+ MCRegister Src = CopyOperands->Source->getReg().asMCReg();
+ MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
+
+ // Remember Def is defined by the copy.
+ for (MCRegUnit Unit : TRI.regunits(Def))
+ Copies[Unit] = {MI, nullptr, {}, true};
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ for (MCRegUnit Unit : TRI.regunits(Src)) {
+ auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}});
+ auto &Copy = I.first->second;
+ if (!is_contained(Copy.DefRegs, Def))
+ Copy.DefRegs.push_back(Def);
+ Copy.LastSeenUseInCopy = MI;
+ }
+ }
+
+ bool hasAnyCopies() {
+ return !Copies.empty();
+ }
+
+ MachineInstr *findCopyForUnit(MCRegister RegUnit,
+ const TargetRegisterInfo &TRI,
+ bool MustBeAvailable = false) {
+ auto CI = Copies.find(RegUnit);
+ if (CI == Copies.end())
+ return nullptr;
+ if (MustBeAvailable && !CI->second.Avail)
+ return nullptr;
+ return CI->second.MI;
+ }
+
+ MachineInstr *findCopyDefViaUnit(MCRegister RegUnit,
+ const TargetRegisterInfo &TRI) {
+ auto CI = Copies.find(RegUnit);
+ if (CI == Copies.end())
+ return nullptr;
+ if (CI->second.DefRegs.size() != 1)
+ return nullptr;
+ MCRegUnit RU = *TRI.regunits(CI->second.DefRegs[0]).begin();
+ return findCopyForUnit(RU, TRI, true);
+ }
+
+ MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ MachineInstr *AvailCopy = findCopyDefViaUnit(RU, TRI);
+
+ if (!AvailCopy)
+ return nullptr;
+
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*AvailCopy, TII, UseCopyInstr);
+ Register AvailSrc = CopyOperands->Source->getReg();
+ Register AvailDef = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(AvailSrc, Reg))
+ return nullptr;
+
+ for (const MachineInstr &MI :
+ make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef?
+ if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+ return nullptr;
+
+ return AvailCopy;
+ }
+
+ MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
+ // We check the first RegUnit here, since we'll only be interested in the
+ // copy if it copies the entire register anyway.
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ MachineInstr *AvailCopy =
+ findCopyForUnit(RU, TRI, /*MustBeAvailable=*/true);
+
+ if (!AvailCopy)
+ return nullptr;
+
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*AvailCopy, TII, UseCopyInstr);
+ Register AvailSrc = CopyOperands->Source->getReg();
+ Register AvailDef = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(AvailDef, Reg))
+ return nullptr;
+
+ // Check that the available copy isn't clobbered by any regmasks between
+ // itself and the destination.
+ for (const MachineInstr &MI :
+ make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+ return nullptr;
+
+ return AvailCopy;
+ }
+
+ // Find last COPY that defines Reg before Current MachineInstr.
+ MachineInstr *findLastSeenDefInCopy(const MachineInstr &Current,
+ MCRegister Reg,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ auto CI = Copies.find(RU);
+ if (CI == Copies.end() || !CI->second.Avail)
+ return nullptr;
+
+ MachineInstr *DefCopy = CI->second.MI;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*DefCopy, TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(Def, Reg))
+ return nullptr;
+
+ for (const MachineInstr &MI :
+ make_range(static_cast<const MachineInstr *>(DefCopy)->getIterator(),
+ Current.getIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ if (MO.clobbersPhysReg(Def)) {
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of "
+ << printReg(Def, &TRI) << "\n");
+ return nullptr;
+ }
+
+ return DefCopy;
+ }
+
+ // Find last COPY that uses Reg.
+ MachineInstr *findLastSeenUseInCopy(MCRegister Reg,
+ const TargetRegisterInfo &TRI) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ auto CI = Copies.find(RU);
+ if (CI == Copies.end())
+ return nullptr;
+ return CI->second.LastSeenUseInCopy;
+ }
+
+ void clear() {
+ Copies.clear();
+ }
+};
+
+class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+
+ // Return true if this is a copy instruction and false otherwise.
+ bool UseCopyInstr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ MachineCopyPropagation(bool CopyInstr = false)
+ : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ typedef enum { DebugUse = false, RegularUse = true } DebugType;
+
+ void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
+ void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
+ void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
+ void EliminateSpillageCopies(MachineBasicBlock &MBB);
+ bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def);
+ void forwardUses(MachineInstr &MI);
+ void propagateDefs(MachineInstr &MI);
+ bool isForwardableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI, unsigned UseIdx);
+ bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI,
+ unsigned UseIdx);
+ bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+ bool hasOverlappingMultipleDef(const MachineInstr &MI,
+ const MachineOperand &MODef, Register Def);
+
+ /// Candidates for deletion.
+ SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
+
+ /// Multimap tracking debug users in current BB
+ DenseMap<MachineInstr *, SmallSet<MachineInstr *, 2>> CopyDbgUsers;
+
+ CopyTracker Tracker;
+
+ bool Changed = false;
+};
+
+} // end anonymous namespace
+
+char MachineCopyPropagation::ID = 0;
+
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
+ "Machine Copy Propagation Pass", false, false)
+
+void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
+ DebugType DT) {
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination. If a copy is "read" by a debug user, record the user
+ // for propagation.
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI)) {
+ if (DT == RegularUse) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
+ MaybeDeadCopies.remove(Copy);
+ } else {
+ CopyDbgUsers[Copy].insert(&Reader);
+ }
+ }
+ }
+}
+
+/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
+/// This fact may have been obscured by sub register usage or may not be true at
+/// all even though Src and Def are subregisters of the registers used in
+/// PreviousCopy. e.g.
+/// isNopCopy("ecx = COPY eax", AX, CX) == true
+/// isNopCopy("ecx = COPY eax", AH, CL) == false
+static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src,
+ MCRegister Def, const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII, bool UseCopyInstr) {
+
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(PreviousCopy, *TII, UseCopyInstr);
+ MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg();
+ MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg();
+ if (Src == PreviousSrc && Def == PreviousDef)
+ return true;
+ if (!TRI->isSubRegister(PreviousSrc, Src))
+ return false;
+ unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
+ return SubIdx == TRI->getSubRegIndex(PreviousDef, Def);
+}
+
+/// Remove instruction \p Copy if there exists a previous copy that copies the
+/// register \p Src to the register \p Def; This may happen indirectly by
+/// copying the super registers.
+bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
+ MCRegister Src, MCRegister Def) {
+ // Avoid eliminating a copy from/to a reserved registers as we cannot predict
+ // the value (Example: The sparc zero register is writable but stays zero).
+ if (MRI->isReserved(Src) || MRI->isReserved(Def))
+ return false;
+
+ // Search for an existing copy.
+ MachineInstr *PrevCopy =
+ Tracker.findAvailCopy(Copy, Def, *TRI, *TII, UseCopyInstr);
+ if (!PrevCopy)
+ return false;
+
+ auto PrevCopyOperands = isCopyInstr(*PrevCopy, *TII, UseCopyInstr);
+ // Check that the existing copy uses the correct sub registers.
+ if (PrevCopyOperands->Destination->isDead())
+ return false;
+ if (!isNopCopy(*PrevCopy, Src, Def, TRI, TII, UseCopyInstr))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
+
+ // Copy was redundantly redefining either Src or Def. Remove earlier kill
+ // flags between Copy and PrevCopy because the value will be reused now.
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(Copy, *TII, UseCopyInstr);
+ assert(CopyOperands);
+
+ Register CopyDef = CopyOperands->Destination->getReg();
+ assert(CopyDef == Src || CopyDef == Def);
+ for (MachineInstr &MI :
+ make_range(PrevCopy->getIterator(), Copy.getIterator()))
+ MI.clearRegisterKills(CopyDef, TRI);
+
+ // Clear undef flag from remaining copy if needed.
+ if (!CopyOperands->Source->isUndef()) {
+ PrevCopy->getOperand(PrevCopyOperands->Source->getOperandNo())
+ .setIsUndef(false);
+ }
+
+ Copy.eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ return true;
+}
+
+bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
+ const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(Copy, *TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+
+ if (const TargetRegisterClass *URC =
+ UseI.getRegClassConstraint(UseIdx, TII, TRI))
+ return URC->contains(Def);
+
+ // We don't process further if UseI is a COPY, since forward copy propagation
+ // should handle that.
+ return false;
+}
+
+/// Decide whether we should forward the source of \param Copy to its use in
+/// \param UseI based on the physical register class constraints of the opcode
+/// and avoiding introducing more cross-class COPYs.
+bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI,
+ unsigned UseIdx) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(Copy, *TII, UseCopyInstr);
+ Register CopySrcReg = CopyOperands->Source->getReg();
+
+ // If the new register meets the opcode register constraints, then allow
+ // forwarding.
+ if (const TargetRegisterClass *URC =
+ UseI.getRegClassConstraint(UseIdx, TII, TRI))
+ return URC->contains(CopySrcReg);
+
+ auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr);
+ if (!UseICopyOperands)
+ return false;
+
+ /// COPYs don't have register class constraints, so if the user instruction
+ /// is a COPY, we just try to avoid introducing additional cross-class
+ /// COPYs. For example:
+ ///
+ /// RegClassA = COPY RegClassB // Copy parameter
+ /// ...
+ /// RegClassB = COPY RegClassA // UseI parameter
+ ///
+ /// which after forwarding becomes
+ ///
+ /// RegClassA = COPY RegClassB
+ /// ...
+ /// RegClassB = COPY RegClassB
+ ///
+ /// so we have reduced the number of cross-class COPYs and potentially
+ /// introduced a nop COPY that can be removed.
+
+ // Allow forwarding if src and dst belong to any common class, so long as they
+ // don't belong to any (possibly smaller) common class that requires copies to
+ // go via a different class.
+ Register UseDstReg = UseICopyOperands->Destination->getReg();
+ bool Found = false;
+ bool IsCrossClass = false;
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(CopySrcReg) && RC->contains(UseDstReg)) {
+ Found = true;
+ if (TRI->getCrossCopyRegClass(RC) != RC) {
+ IsCrossClass = true;
+ break;
+ }
+ }
+ }
+ if (!Found)
+ return false;
+ if (!IsCrossClass)
+ return true;
+ // The forwarded copy would be cross-class. Only do this if the original copy
+ // was also cross-class.
+ Register CopyDstReg = CopyOperands->Destination->getReg();
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) &&
+ TRI->getCrossCopyRegClass(RC) != RC)
+ return true;
+ }
+ return false;
+}
+
+/// Check that \p MI does not have implicit uses that overlap with it's \p Use
+/// operand (the register being replaced), since these can sometimes be
+/// implicitly tied to other operands. For example, on AMDGPU:
+///
+/// V_MOVRELS_B32_e32 %VGPR2, %M0<imp-use>, %EXEC<imp-use>, %VGPR2_VGPR3_VGPR4_VGPR5<imp-use>
+///
+/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no
+/// way of knowing we need to update the latter when updating the former.
+bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
+ const MachineOperand &Use) {
+ for (const MachineOperand &MIUse : MI.uses())
+ if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() &&
+ MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg()))
+ return true;
+
+ return false;
+}
+
+/// For an MI that has multiple definitions, check whether \p MI has
+/// a definition that overlaps with another of its definitions.
+/// For example, on ARM: umull r9, r9, lr, r0
+/// The umull instruction is unpredictable unless RdHi and RdLo are different.
+bool MachineCopyPropagation::hasOverlappingMultipleDef(
+ const MachineInstr &MI, const MachineOperand &MODef, Register Def) {
+ for (const MachineOperand &MIDef : MI.defs()) {
+ if ((&MIDef != &MODef) && MIDef.isReg() &&
+ TRI->regsOverlap(Def, MIDef.getReg()))
+ return true;
+ }
+
+ return false;
+}
+
+/// Look for available copies whose destination register is used by \p MI and
+/// replace the use in \p MI with the copy's source register.
+void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
+ if (!Tracker.hasAnyCopies())
+ return;
+
+ // Look for non-tied explicit vreg uses that have an active COPY
+ // instruction that defines the physical register allocated to them.
+ // Replace the vreg with the source of the active COPY.
+ for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd;
+ ++OpIdx) {
+ MachineOperand &MOUse = MI.getOperand(OpIdx);
+ // Don't forward into undef use operands since doing so can cause problems
+ // with the machine verifier, since it doesn't treat undef reads as reads,
+ // so we can end up with a live range that ends on an undef read, leading to
+ // an error that the live range doesn't end on a read of the live range
+ // register.
+ if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() ||
+ MOUse.isImplicit())
+ continue;
+
+ if (!MOUse.getReg())
+ continue;
+
+ // Check that the register is marked 'renamable' so we know it is safe to
+ // rename it without violating any constraints that aren't expressed in the
+ // IR (e.g. ABI or opcode requirements).
+ if (!MOUse.isRenamable())
+ continue;
+
+ MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(),
+ *TRI, *TII, UseCopyInstr);
+ if (!Copy)
+ continue;
+
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register CopyDstReg = CopyOperands->Destination->getReg();
+ const MachineOperand &CopySrc = *CopyOperands->Source;
+ Register CopySrcReg = CopySrc.getReg();
+
+ Register ForwardedReg = CopySrcReg;
+ // MI might use a sub-register of the Copy destination, in which case the
+ // forwarded register is the matching sub-register of the Copy source.
+ if (MOUse.getReg() != CopyDstReg) {
+ unsigned SubRegIdx = TRI->getSubRegIndex(CopyDstReg, MOUse.getReg());
+ assert(SubRegIdx &&
+ "MI source is not a sub-register of Copy destination");
+ ForwardedReg = TRI->getSubReg(CopySrcReg, SubRegIdx);
+ if (!ForwardedReg) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source does not have sub-register "
+ << TRI->getSubRegIndexName(SubRegIdx) << '\n');
+ continue;
+ }
+ }
+
+ // Don't forward COPYs of reserved regs unless they are constant.
+ if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
+ continue;
+
+ if (!isForwardableRegClassCopy(*Copy, MI, OpIdx))
+ continue;
+
+ if (hasImplicitOverlap(MI, MOUse))
+ continue;
+
+ // Check that the instruction is not a copy that partially overwrites the
+ // original copy source that we are about to use. The tracker mechanism
+ // cannot cope with that.
+ if (isCopyInstr(MI, *TII, UseCopyInstr) &&
+ MI.modifiesRegister(CopySrcReg, TRI) &&
+ !MI.definesRegister(CopySrcReg)) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
+ continue;
+ }
+
+ if (!DebugCounter::shouldExecute(FwdCounter)) {
+ LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n "
+ << MI);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
+ << "\n with " << printReg(ForwardedReg, TRI)
+ << "\n in " << MI << " from " << *Copy);
+
+ MOUse.setReg(ForwardedReg);
+
+ if (!CopySrc.isRenamable())
+ MOUse.setIsRenamable(false);
+ MOUse.setIsUndef(CopySrc.isUndef());
+
+ LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+
+ // Clear kill markers that may have been invalidated.
+ for (MachineInstr &KMI :
+ make_range(Copy->getIterator(), std::next(MI.getIterator())))
+ KMI.clearRegisterKills(CopySrcReg, TRI);
+
+ ++NumCopyForwards;
+ Changed = true;
+ }
+}
+
+void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
+ << "\n");
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ // Analyze copies (which don't overlap themselves).
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
+ if (CopyOperands) {
+
+ Register RegSrc = CopyOperands->Source->getReg();
+ Register RegDef = CopyOperands->Destination->getReg();
+
+ if (!TRI->regsOverlap(RegDef, RegSrc)) {
+ assert(RegDef.isPhysical() && RegSrc.isPhysical() &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ MCRegister Def = RegDef.asMCReg();
+ MCRegister Src = RegSrc.asMCReg();
+
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %eax = COPY %ecx
+ // =>
+ // %ecx = COPY %eax
+ //
+ // or
+ //
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %ecx = COPY %eax
+ // =>
+ // %ecx = COPY %eax
+ if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
+ continue;
+
+ forwardUses(MI);
+
+ // Src may have been changed by forwardUses()
+ CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ Src = CopyOperands->Source->getReg().asMCReg();
+
+ // If Src is defined by a previous copy, the previous copy cannot be
+ // eliminated.
+ ReadRegister(Src, MI, RegularUse);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ ReadRegister(Reg, MI, RegularUse);
+ }
+
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
+
+ // Copy is now a candidate for deletion.
+ if (!MRI->isReserved(Def))
+ MaybeDeadCopies.insert(&MI);
+
+ // If 'Def' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9 = copy %xmm2
+ // ...
+ // %xmm2 = copy %xmm0
+ // ...
+ // %xmm2 = copy %xmm9
+ Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ }
+
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+
+ continue;
+ }
+ }
+
+ // Clobber any earlyclobber regs first.
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isEarlyClobber()) {
+ MCRegister Reg = MO.getReg().asMCReg();
+ // If we have a tied earlyclobber, that means it is also read by this
+ // instruction, so we need to make sure we don't remove it as dead
+ // later.
+ if (MO.isTied())
+ ReadRegister(Reg, MI, RegularUse);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ }
+
+ forwardUses(MI);
+
+ // Not a copy.
+ SmallVector<Register, 2> Defs;
+ const MachineOperand *RegMask = nullptr;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask())
+ RegMask = &MO;
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ assert(!Reg.isVirtual() &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ if (MO.isDef() && !MO.isEarlyClobber()) {
+ Defs.push_back(Reg.asMCReg());
+ continue;
+ } else if (MO.readsReg())
+ ReadRegister(Reg.asMCReg(), MI, MO.isDebug() ? DebugUse : RegularUse);
+ }
+
+ // The instruction has a register mask operand which means that it clobbers
+ // a large set of registers. Treat clobbered registers the same way as
+ // defined registers.
+ if (RegMask) {
+ // Erase any MaybeDeadCopies whose destination register is clobbered.
+ for (SmallSetVector<MachineInstr *, 8>::iterator DI =
+ MaybeDeadCopies.begin();
+ DI != MaybeDeadCopies.end();) {
+ MachineInstr *MaybeDead = *DI;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
+ MCRegister Reg = CopyOperands->Destination->getReg().asMCReg();
+ assert(!MRI->isReserved(Reg));
+
+ if (!RegMask->clobbersPhysReg(Reg)) {
+ ++DI;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
+ MaybeDead->dump());
+
+ // Make sure we invalidate any entries in the copy maps before erasing
+ // the instruction.
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+
+ // erase() will return the next valid iterator pointing to the next
+ // element after the erased one.
+ DI = MaybeDeadCopies.erase(DI);
+ MaybeDead->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+
+ // Any previous copy definition or reading the Defs is no longer available.
+ for (MCRegister Reg : Defs)
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (MachineInstr *MaybeDead : MaybeDeadCopies) {
+ LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
+ MaybeDead->dump());
+
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
+ assert(CopyOperands);
+
+ Register SrcReg = CopyOperands->Source->getReg();
+ Register DestReg = CopyOperands->Destination->getReg();
+ assert(!MRI->isReserved(DestReg));
+
+ // Update matching debug values, if any.
+ SmallVector<MachineInstr *> MaybeDeadDbgUsers(
+ CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end());
+ MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(),
+ MaybeDeadDbgUsers);
+
+ MaybeDead->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
+static bool isBackwardPropagatableCopy(const DestSourcePair &CopyOperands,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII) {
+ Register Def = CopyOperands.Destination->getReg();
+ Register Src = CopyOperands.Source->getReg();
+
+ if (!Def || !Src)
+ return false;
+
+ if (MRI.isReserved(Def) || MRI.isReserved(Src))
+ return false;
+
+ return CopyOperands.Source->isRenamable() && CopyOperands.Source->isKill();
+}
+
+void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
+ if (!Tracker.hasAnyCopies())
+ return;
+
+ for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+ ++OpIdx) {
+ MachineOperand &MODef = MI.getOperand(OpIdx);
+
+ if (!MODef.isReg() || MODef.isUse())
+ continue;
+
+ // Ignore non-trivial cases.
+ if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit())
+ continue;
+
+ if (!MODef.getReg())
+ continue;
+
+ // We only handle if the register comes from a vreg.
+ if (!MODef.isRenamable())
+ continue;
+
+ MachineInstr *Copy = Tracker.findAvailBackwardCopy(
+ MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr);
+ if (!Copy)
+ continue;
+
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+ Register Src = CopyOperands->Source->getReg();
+
+ if (MODef.getReg() != Src)
+ continue;
+
+ if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx))
+ continue;
+
+ if (hasImplicitOverlap(MI, MODef))
+ continue;
+
+ if (hasOverlappingMultipleDef(MI, MODef, Def))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
+ << "\n with " << printReg(Def, TRI) << "\n in "
+ << MI << " from " << *Copy);
+
+ MODef.setReg(Def);
+ MODef.setIsRenamable(CopyOperands->Destination->isRenamable());
+
+ LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+ MaybeDeadCopies.insert(Copy);
+ Changed = true;
+ ++NumCopyBackwardPropagated;
+ }
+}
+
+void MachineCopyPropagation::BackwardCopyPropagateBlock(
+ MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
+ << "\n");
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
+ // Ignore non-trivial COPYs.
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
+ if (CopyOperands && MI.getNumOperands() == 2) {
+ Register DefReg = CopyOperands->Destination->getReg();
+ Register SrcReg = CopyOperands->Source->getReg();
+
+ if (!TRI->regsOverlap(DefReg, SrcReg)) {
+ // Unlike forward cp, we don't invoke propagateDefs here,
+ // just let forward cp do COPY-to-COPY propagation.
+ if (isBackwardPropagatableCopy(*CopyOperands, *MRI, *TII)) {
+ Tracker.invalidateRegister(SrcReg.asMCReg(), *TRI, *TII,
+ UseCopyInstr);
+ Tracker.invalidateRegister(DefReg.asMCReg(), *TRI, *TII,
+ UseCopyInstr);
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+ continue;
+ }
+ }
+ }
+
+ // Invalidate any earlyclobber regs first.
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isEarlyClobber()) {
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr);
+ }
+
+ propagateDefs(MI);
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ if (!MO.getReg())
+ continue;
+
+ if (MO.isDef())
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
+ UseCopyInstr);
+
+ if (MO.readsReg()) {
+ if (MO.isDebug()) {
+ // Check if the register in the debug instruction is utilized
+ // in a copy instruction, so we can update the debug info if the
+ // register is changed.
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg().asMCReg())) {
+ if (auto *Copy = Tracker.findCopyDefViaUnit(Unit, *TRI)) {
+ CopyDbgUsers[Copy].insert(&MI);
+ }
+ }
+ } else {
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
+ UseCopyInstr);
+ }
+ }
+ }
+ }
+
+ for (auto *Copy : MaybeDeadCopies) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ SmallVector<MachineInstr *> MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(),
+ CopyDbgUsers[Copy].end());
+
+ MRI->updateDbgUsersToReg(Src.asMCReg(), Def.asMCReg(), MaybeDeadDbgUsers);
+ Copy->eraseFromParent();
+ ++NumDeletes;
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
+static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
+ MachineInstr *Leader) {
+ auto &SC = SpillChain[Leader];
+ auto &RC = ReloadChain[Leader];
+ for (auto I = SC.rbegin(), E = SC.rend(); I != E; ++I)
+ (*I)->dump();
+ for (MachineInstr *MI : RC)
+ MI->dump();
+}
+
+// Remove spill-reload like copy chains. For example
+// r0 = COPY r1
+// r1 = COPY r2
+// r2 = COPY r3
+// r3 = COPY r4
+// <def-use r4>
+// r4 = COPY r3
+// r3 = COPY r2
+// r2 = COPY r1
+// r1 = COPY r0
+// will be folded into
+// r0 = COPY r1
+// r1 = COPY r4
+// <def-use r4>
+// r4 = COPY r1
+// r1 = COPY r0
+// TODO: Currently we don't track usage of r0 outside the chain, so we
+// conservatively keep its value as it was before the rewrite.
+//
+// The algorithm is trying to keep
+// property#1: No Def of spill COPY in the chain is used or defined until the
+// paired reload COPY in the chain uses the Def.
+//
+// property#2: NO Source of COPY in the chain is used or defined until the next
+// COPY in the chain defines the Source, except the innermost spill-reload
+// pair.
+//
+// The algorithm is conducted by checking every COPY inside the MBB, assuming
+// the COPY is a reload COPY, then try to find paired spill COPY by searching
+// the COPY defines the Src of the reload COPY backward. If such pair is found,
+// it either belongs to an existing chain or a new chain depends on
+// last available COPY uses the Def of the reload COPY.
+// Implementation notes, we use CopyTracker::findLastDefCopy(Reg, ...) to find
+// out last COPY that defines Reg; we use CopyTracker::findLastUseCopy(Reg, ...)
+// to find out last COPY that uses Reg. When we are encountered with a Non-COPY
+// instruction, we check registers in the operands of this instruction. If this
+// Reg is defined by a COPY, we untrack this Reg via
+// CopyTracker::clobberRegister(Reg, ...).
+void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) {
+ // ChainLeader maps MI inside a spill-reload chain to its innermost reload COPY.
+ // Thus we can track if a MI belongs to an existing spill-reload chain.
+ DenseMap<MachineInstr *, MachineInstr *> ChainLeader;
+ // SpillChain maps innermost reload COPY of a spill-reload chain to a sequence
+ // of COPYs that forms spills of a spill-reload chain.
+ // ReloadChain maps innermost reload COPY of a spill-reload chain to a
+ // sequence of COPYs that forms reloads of a spill-reload chain.
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> SpillChain, ReloadChain;
+ // If a COPY's Source has use or def until next COPY defines the Source,
+ // we put the COPY in this set to keep property#2.
+ DenseSet<const MachineInstr *> CopySourceInvalid;
+
+ auto TryFoldSpillageCopies =
+ [&, this](const SmallVectorImpl<MachineInstr *> &SC,
+ const SmallVectorImpl<MachineInstr *> &RC) {
+ assert(SC.size() == RC.size() && "Spill-reload should be paired");
+
+ // We need at least 3 pairs of copies for the transformation to apply,
+ // because the first outermost pair cannot be removed since we don't
+ // recolor outside of the chain and that we need at least one temporary
+ // spill slot to shorten the chain. If we only have a chain of two
+ // pairs, we already have the shortest sequence this code can handle:
+ // the outermost pair for the temporary spill slot, and the pair that
+ // use that temporary spill slot for the other end of the chain.
+ // TODO: We might be able to simplify to one spill-reload pair if collecting
+ // more infomation about the outermost COPY.
+ if (SC.size() <= 2)
+ return;
+
+ // If violate property#2, we don't fold the chain.
+ for (const MachineInstr *Spill : make_range(SC.begin() + 1, SC.end()))
+ if (CopySourceInvalid.count(Spill))
+ return;
+
+ for (const MachineInstr *Reload : make_range(RC.begin(), RC.end() - 1))
+ if (CopySourceInvalid.count(Reload))
+ return;
+
+ auto CheckCopyConstraint = [this](Register Def, Register Src) {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(Def) && RC->contains(Src))
+ return true;
+ }
+ return false;
+ };
+
+ auto UpdateReg = [](MachineInstr *MI, const MachineOperand *Old,
+ const MachineOperand *New) {
+ for (MachineOperand &MO : MI->operands()) {
+ if (&MO == Old)
+ MO.setReg(New->getReg());
+ }
+ };
+
+ std::optional<DestSourcePair> InnerMostSpillCopy =
+ isCopyInstr(*SC[0], *TII, UseCopyInstr);
+ std::optional<DestSourcePair> OuterMostSpillCopy =
+ isCopyInstr(*SC.back(), *TII, UseCopyInstr);
+ std::optional<DestSourcePair> InnerMostReloadCopy =
+ isCopyInstr(*RC[0], *TII, UseCopyInstr);
+ std::optional<DestSourcePair> OuterMostReloadCopy =
+ isCopyInstr(*RC.back(), *TII, UseCopyInstr);
+ if (!CheckCopyConstraint(OuterMostSpillCopy->Source->getReg(),
+ InnerMostSpillCopy->Source->getReg()) ||
+ !CheckCopyConstraint(InnerMostReloadCopy->Destination->getReg(),
+ OuterMostReloadCopy->Destination->getReg()))
+ return;
+
+ SpillageChainsLength += SC.size() + RC.size();
+ NumSpillageChains += 1;
+ UpdateReg(SC[0], InnerMostSpillCopy->Destination,
+ OuterMostSpillCopy->Source);
+ UpdateReg(RC[0], InnerMostReloadCopy->Source,
+ OuterMostReloadCopy->Destination);
+
+ for (size_t I = 1; I < SC.size() - 1; ++I) {
+ SC[I]->eraseFromParent();
+ RC[I]->eraseFromParent();
+ NumDeletes += 2;
+ }
+ };
+
+ auto IsFoldableCopy = [this](const MachineInstr &MaybeCopy) {
+ if (MaybeCopy.getNumImplicitOperands() > 0)
+ return false;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MaybeCopy, *TII, UseCopyInstr);
+ if (!CopyOperands)
+ return false;
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ return Src && Def && !TRI->regsOverlap(Src, Def) &&
+ CopyOperands->Source->isRenamable() &&
+ CopyOperands->Destination->isRenamable();
+ };
+
+ auto IsSpillReloadPair = [&, this](const MachineInstr &Spill,
+ const MachineInstr &Reload) {
+ if (!IsFoldableCopy(Spill) || !IsFoldableCopy(Reload))
+ return false;
+ std::optional<DestSourcePair> SpillCopy =
+ isCopyInstr(Spill, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> ReloadCopy =
+ isCopyInstr(Reload, *TII, UseCopyInstr);
+ if (!SpillCopy || !ReloadCopy)
+ return false;
+ return SpillCopy->Source->getReg() == ReloadCopy->Destination->getReg() &&
+ SpillCopy->Destination->getReg() == ReloadCopy->Source->getReg();
+ };
+
+ auto IsChainedCopy = [&, this](const MachineInstr &Prev,
+ const MachineInstr &Current) {
+ if (!IsFoldableCopy(Prev) || !IsFoldableCopy(Current))
+ return false;
+ std::optional<DestSourcePair> PrevCopy =
+ isCopyInstr(Prev, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CurrentCopy =
+ isCopyInstr(Current, *TII, UseCopyInstr);
+ if (!PrevCopy || !CurrentCopy)
+ return false;
+ return PrevCopy->Source->getReg() == CurrentCopy->Destination->getReg();
+ };
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
+
+ // Update track information via non-copy instruction.
+ SmallSet<Register, 8> RegsToClobber;
+ if (!CopyOperands) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ MachineInstr *LastUseCopy =
+ Tracker.findLastSeenUseInCopy(Reg.asMCReg(), *TRI);
+ if (LastUseCopy) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source of\n");
+ LLVM_DEBUG(LastUseCopy->dump());
+ LLVM_DEBUG(dbgs() << "might be invalidated by\n");
+ LLVM_DEBUG(MI.dump());
+ CopySourceInvalid.insert(LastUseCopy);
+ }
+ // Must be noted Tracker.clobberRegister(Reg, ...) removes tracking of
+ // Reg, i.e, COPY that defines Reg is removed from the mapping as well
+ // as marking COPYs that uses Reg unavailable.
+ // We don't invoke CopyTracker::clobberRegister(Reg, ...) if Reg is not
+ // defined by a previous COPY, since we don't want to make COPYs uses
+ // Reg unavailable.
+ if (Tracker.findLastSeenDefInCopy(MI, Reg.asMCReg(), *TRI, *TII,
+ UseCopyInstr))
+ // Thus we can keep the property#1.
+ RegsToClobber.insert(Reg);
+ }
+ for (Register Reg : RegsToClobber) {
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Reg, TRI)
+ << "\n");
+ }
+ continue;
+ }
+
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ // Check if we can find a pair spill-reload copy.
+ LLVM_DEBUG(dbgs() << "MCP: Searching paired spill for reload: ");
+ LLVM_DEBUG(MI.dump());
+ MachineInstr *MaybeSpill =
+ Tracker.findLastSeenDefInCopy(MI, Src.asMCReg(), *TRI, *TII, UseCopyInstr);
+ bool MaybeSpillIsChained = ChainLeader.count(MaybeSpill);
+ if (!MaybeSpillIsChained && MaybeSpill &&
+ IsSpillReloadPair(*MaybeSpill, MI)) {
+ // Check if we already have an existing chain. Now we have a
+ // spill-reload pair.
+ // L2: r2 = COPY r3
+ // L5: r3 = COPY r2
+ // Looking for a valid COPY before L5 which uses r3.
+ // This can be serverial cases.
+ // Case #1:
+ // No COPY is found, which can be r3 is def-use between (L2, L5), we
+ // create a new chain for L2 and L5.
+ // Case #2:
+ // L2: r2 = COPY r3
+ // L5: r3 = COPY r2
+ // Such COPY is found and is L2, we create a new chain for L2 and L5.
+ // Case #3:
+ // L2: r2 = COPY r3
+ // L3: r1 = COPY r3
+ // L5: r3 = COPY r2
+ // we create a new chain for L2 and L5.
+ // Case #4:
+ // L2: r2 = COPY r3
+ // L3: r1 = COPY r3
+ // L4: r3 = COPY r1
+ // L5: r3 = COPY r2
+ // Such COPY won't be found since L4 defines r3. we create a new chain
+ // for L2 and L5.
+ // Case #5:
+ // L2: r2 = COPY r3
+ // L3: r3 = COPY r1
+ // L4: r1 = COPY r3
+ // L5: r3 = COPY r2
+ // COPY is found and is L4 which belongs to an existing chain, we add
+ // L2 and L5 to this chain.
+ LLVM_DEBUG(dbgs() << "MCP: Found spill: ");
+ LLVM_DEBUG(MaybeSpill->dump());
+ MachineInstr *MaybePrevReload =
+ Tracker.findLastSeenUseInCopy(Def.asMCReg(), *TRI);
+ auto Leader = ChainLeader.find(MaybePrevReload);
+ MachineInstr *L = nullptr;
+ if (Leader == ChainLeader.end() ||
+ (MaybePrevReload && !IsChainedCopy(*MaybePrevReload, MI))) {
+ L = &MI;
+ assert(!SpillChain.count(L) &&
+ "SpillChain should not have contained newly found chain");
+ } else {
+ assert(MaybePrevReload &&
+ "Found a valid leader through nullptr should not happend");
+ L = Leader->second;
+ assert(SpillChain[L].size() > 0 &&
+ "Existing chain's length should be larger than zero");
+ }
+ assert(!ChainLeader.count(&MI) && !ChainLeader.count(MaybeSpill) &&
+ "Newly found paired spill-reload should not belong to any chain "
+ "at this point");
+ ChainLeader.insert({MaybeSpill, L});
+ ChainLeader.insert({&MI, L});
+ SpillChain[L].push_back(MaybeSpill);
+ ReloadChain[L].push_back(&MI);
+ LLVM_DEBUG(dbgs() << "MCP: Chain " << L << " now is:\n");
+ LLVM_DEBUG(printSpillReloadChain(SpillChain, ReloadChain, L));
+ } else if (MaybeSpill && !MaybeSpillIsChained) {
+ // MaybeSpill is unable to pair with MI. That's to say adding MI makes
+ // the chain invalid.
+ // The COPY defines Src is no longer considered as a candidate of a
+ // valid chain. Since we expect the Def of a spill copy isn't used by
+ // any COPY instruction until a reload copy. For example:
+ // L1: r1 = COPY r2
+ // L2: r3 = COPY r1
+ // If we later have
+ // L1: r1 = COPY r2
+ // L2: r3 = COPY r1
+ // L3: r2 = COPY r1
+ // L1 and L3 can't be a valid spill-reload pair.
+ // Thus we keep the property#1.
+ LLVM_DEBUG(dbgs() << "MCP: Not paired spill-reload:\n");
+ LLVM_DEBUG(MaybeSpill->dump());
+ LLVM_DEBUG(MI.dump());
+ Tracker.clobberRegister(Src.asMCReg(), *TRI, *TII, UseCopyInstr);
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Src, TRI)
+ << "\n");
+ }
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+ }
+
+ for (auto I = SpillChain.begin(), E = SpillChain.end(); I != E; ++I) {
+ auto &SC = I->second;
+ assert(ReloadChain.count(I->first) &&
+ "Reload chain of the same leader should exist");
+ auto &RC = ReloadChain[I->first];
+ TryFoldSpillageCopies(SC, RC);
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ bool isSpillageCopyElimEnabled = false;
+ switch (EnableSpillageCopyElimination) {
+ case cl::BOU_UNSET:
+ isSpillageCopyElimEnabled =
+ MF.getSubtarget().enableSpillageCopyElimination();
+ break;
+ case cl::BOU_TRUE:
+ isSpillageCopyElimEnabled = true;
+ break;
+ case cl::BOU_FALSE:
+ isSpillageCopyElimEnabled = false;
+ break;
+ }
+
+ Changed = false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (isSpillageCopyElimEnabled)
+ EliminateSpillageCopies(MBB);
+ BackwardCopyPropagateBlock(MBB);
+ ForwardCopyPropagateBlock(MBB);
+ }
+
+ return Changed;
+}
+
+MachineFunctionPass *
+llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) {
+ return new MachineCopyPropagation(UseCopyInstr);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
new file mode 100644
index 000000000000..57f7a098ac17
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -0,0 +1,151 @@
+//===- MachineCycleAnalysis.cpp - Compute CycleInfo for Machine IR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
+#include "llvm/ADT/GenericCycleImpl.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
+template class llvm::GenericCycle<llvm::MachineSSAContext>;
+
+char MachineCycleInfoWrapperPass::ID = 0;
+
+MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineCycleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineCycleInfoWrapperPass, "machine-cycles",
+ "Machine Cycle Info Analysis", true, true)
+INITIALIZE_PASS_END(MachineCycleInfoWrapperPass, "machine-cycles",
+ "Machine Cycle Info Analysis", true, true)
+
+void MachineCycleInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineCycleInfoWrapperPass::runOnMachineFunction(MachineFunction &Func) {
+ CI.clear();
+
+ F = &Func;
+ CI.compute(Func);
+ return false;
+}
+
+void MachineCycleInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
+ OS << "MachineCycleInfo for function: " << F->getName() << "\n";
+ CI.print(OS);
+}
+
+void MachineCycleInfoWrapperPass::releaseMemory() {
+ CI.clear();
+ F = nullptr;
+}
+
+namespace {
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCycleInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // namespace
+
+char MachineCycleInfoPrinterPass::ID = 0;
+
+MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineCycleInfoPrinterPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineCycleInfoPrinterPass, "print-machine-cycles",
+ "Print Machine Cycle Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_END(MachineCycleInfoPrinterPass, "print-machine-cycles",
+ "Print Machine Cycle Info Analysis", true, true)
+
+void MachineCycleInfoPrinterPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
+ auto &CI = getAnalysis<MachineCycleInfoWrapperPass>();
+ CI.print(errs());
+ return false;
+}
+
+bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
+ MachineFunction *MF = I.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
+
+ // The instruction is cycle invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // An instruction that uses or defines a physical register can't e.g. be
+ // hoisted, so mark this as not invariant.
+ if (Reg.isPhysical()) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead can't be moved.
+ return false;
+ } else if (any_of(Cycle->getEntries(),
+ [&](const MachineBasicBlock *Block) {
+ return Block->isLiveIn(Reg);
+ })) {
+ // If the reg is live into any header of the cycle we can't hoist an
+ // instruction which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!");
+
+ // If the cycle contains the definition of an operand, then the instruction
+ // isn't cycle invariant.
+ if (Cycle->contains(MRI->getVRegDef(Reg)->getParent()))
+ return false;
+ }
+
+ // If we got this far, the instruction is cycle invariant!
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
new file mode 100644
index 000000000000..c264e199cf47
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -0,0 +1,207 @@
+//===- MachineDebugify.cpp - Attach synthetic debug info to everything ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This pass attaches synthetic debug info to everything. It can be used
+/// to create targeted tests for debug info preservation, or test for CodeGen
+/// differences with vs. without debug info.
+///
+/// This isn't intended to have feature parity with Debugify.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/Debugify.h"
+
+#define DEBUG_TYPE "mir-debugify"
+
+using namespace llvm;
+
+namespace {
+bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
+ DIBuilder &DIB, Function &F) {
+ MachineFunction *MaybeMF = MMI.getMachineFunction(F);
+ if (!MaybeMF)
+ return false;
+ MachineFunction &MF = *MaybeMF;
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+ DISubprogram *SP = F.getSubprogram();
+ assert(SP && "IR Debugify just created it?");
+
+ Module &M = *F.getParent();
+ LLVMContext &Ctx = M.getContext();
+
+ unsigned NextLine = SP->getLine();
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // This will likely emit line numbers beyond the end of the imagined
+ // source function and into subsequent ones. We don't do anything about
+ // that as it doesn't really matter to the compiler where the line is in
+ // the imaginary source code.
+ MI.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP));
+ }
+ }
+
+ // Find local variables defined by debugify. No attempt is made to match up
+ // MIR-level regs to the 'correct' IR-level variables: there isn't a simple
+ // way to do that, and it isn't necessary to find interesting CodeGen bugs.
+ // Instead, simply keep track of one variable per line. Later, we can insert
+ // DBG_VALUE insts that point to these local variables. Emitting DBG_VALUEs
+ // which cover a wide range of lines can help stress the debug info passes:
+ // if we can't do that, fall back to using the local variable which precedes
+ // all the others.
+ Function *DbgValF = M.getFunction("llvm.dbg.value");
+ DbgValueInst *EarliestDVI = nullptr;
+ DenseMap<unsigned, DILocalVariable *> Line2Var;
+ DIExpression *Expr = nullptr;
+ if (DbgValF) {
+ for (const Use &U : DbgValF->uses()) {
+ auto *DVI = dyn_cast<DbgValueInst>(U.getUser());
+ if (!DVI || DVI->getFunction() != &F)
+ continue;
+ unsigned Line = DVI->getDebugLoc().getLine();
+ assert(Line != 0 && "debugify should not insert line 0 locations");
+ Line2Var[Line] = DVI->getVariable();
+ if (!EarliestDVI || Line < EarliestDVI->getDebugLoc().getLine())
+ EarliestDVI = DVI;
+ Expr = DVI->getExpression();
+ }
+ }
+ if (Line2Var.empty())
+ return true;
+
+ // Now, try to insert a DBG_VALUE instruction after each real instruction.
+ // Do this by introducing debug uses of each register definition. If that is
+ // not possible (e.g. we have a phi or a meta instruction), emit a constant.
+ uint64_t NextImm = 0;
+ SmallSet<DILocalVariable *, 16> VarSet;
+ const MCInstrDesc &DbgValDesc = TII.get(TargetOpcode::DBG_VALUE);
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator FirstNonPHIIt = MBB.getFirstNonPHI();
+ for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
+ MachineInstr &MI = *I;
+ ++I;
+
+ // `I` may point to a DBG_VALUE created in the previous loop iteration.
+ if (MI.isDebugInstr())
+ continue;
+
+ // It's not allowed to insert DBG_VALUEs after a terminator.
+ if (MI.isTerminator())
+ continue;
+
+ // Find a suitable insertion point for the DBG_VALUE.
+ auto InsertBeforeIt = MI.isPHI() ? FirstNonPHIIt : I;
+
+ // Find a suitable local variable for the DBG_VALUE.
+ unsigned Line = MI.getDebugLoc().getLine();
+ if (!Line2Var.count(Line))
+ Line = EarliestDVI->getDebugLoc().getLine();
+ DILocalVariable *LocalVar = Line2Var[Line];
+ assert(LocalVar && "No variable for current line?");
+ VarSet.insert(LocalVar);
+
+ // Emit DBG_VALUEs for register definitions.
+ SmallVector<MachineOperand *, 4> RegDefs;
+ for (MachineOperand &MO : MI.all_defs())
+ if (MO.getReg())
+ RegDefs.push_back(&MO);
+ for (MachineOperand *MO : RegDefs)
+ BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc,
+ /*IsIndirect=*/false, *MO, LocalVar, Expr);
+
+ // OK, failing that, emit a constant DBG_VALUE.
+ if (RegDefs.empty()) {
+ auto ImmOp = MachineOperand::CreateImm(NextImm++);
+ BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc,
+ /*IsIndirect=*/false, ImmOp, LocalVar, Expr);
+ }
+ }
+ }
+
+ // Here we save the number of lines and variables into "llvm.mir.debugify".
+ // It is useful for mir-check-debugify.
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ if (!NMD) {
+ NMD = M.getOrInsertNamedMetadata("llvm.mir.debugify");
+ auto addDebugifyOperand = [&](unsigned N) {
+ NMD->addOperand(MDNode::get(
+ Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N))));
+ };
+ // Add number of lines.
+ addDebugifyOperand(NextLine - 1);
+ // Add number of variables.
+ addDebugifyOperand(VarSet.size());
+ } else {
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.mir.debugify should have exactly 2 operands!");
+ auto setDebugifyOperand = [&](unsigned Idx, unsigned N) {
+ NMD->setOperand(Idx, MDNode::get(Ctx, ValueAsMetadata::getConstant(
+ ConstantInt::get(Int32Ty, N))));
+ };
+ auto getDebugifyOperand = [&](unsigned Idx) {
+ return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+ ->getZExtValue();
+ };
+ // Set number of lines.
+ setDebugifyOperand(0, NextLine - 1);
+ // Set number of variables.
+ auto OldNumVars = getDebugifyOperand(1);
+ setDebugifyOperand(1, OldNumVars + VarSet.size());
+ }
+
+ return true;
+}
+
+/// ModulePass for attaching synthetic debug info to everything, used with the
+/// legacy module pass manager.
+struct DebugifyMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ // We will insert new debugify metadata, so erasing the old one.
+ assert(!M.getNamedMetadata("llvm.mir.debugify") &&
+ "llvm.mir.debugify metadata already exists! Strip it first");
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return applyDebugifyMetadata(
+ M, M.functions(),
+ "ModuleDebugify: ", [&](DIBuilder &DIB, Function &F) -> bool {
+ return applyDebugifyMetadataToMachineFunction(MMI, DIB, F);
+ });
+ }
+
+ DebugifyMachineModule() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ static char ID; // Pass identification.
+};
+char DebugifyMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(DebugifyMachineModule, DEBUG_TYPE,
+ "Machine Debugify Module", false, false)
+INITIALIZE_PASS_END(DebugifyMachineModule, DEBUG_TYPE,
+ "Machine Debugify Module", false, false)
+
+ModulePass *llvm::createDebugifyMachineModulePass() {
+ return new DebugifyMachineModule();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
new file mode 100644
index 000000000000..346cfedde390
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -0,0 +1,53 @@
+//===- MachineDominanceFrontier.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+namespace llvm {
+template class DominanceFrontierBase<MachineBasicBlock, false>;
+template class DominanceFrontierBase<MachineBasicBlock, true>;
+template class ForwardDominanceFrontierBase<MachineBasicBlock>;
+}
+
+
+char MachineDominanceFrontier::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+
+MachineDominanceFrontier::MachineDominanceFrontier() : MachineFunctionPass(ID) {
+ initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry());
+}
+
+char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID;
+
+bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Base.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineDominanceFrontier::releaseMemory() {
+ Base.releaseMemory();
+}
+
+void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 000000000000..0632cde9c6f4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,152 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace llvm {
+// Always verify dominfo if expensive checking is enabled.
+#ifdef EXPENSIVE_CHECKS
+bool VerifyMachineDomInfo = true;
+#else
+bool VerifyMachineDomInfo = false;
+#endif
+} // namespace llvm
+
+static cl::opt<bool, true> VerifyMachineDomInfoX(
+ "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden,
+ cl::desc("Verify machine dominator info (time consuming)"));
+
+namespace llvm {
+template class DomTreeNodeBase<MachineBasicBlock>;
+template class DominatorTreeBase<MachineBasicBlock, false>; // DomTreeBase
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ calculate(F);
+ return false;
+}
+
+void MachineDominatorTree::calculate(MachineFunction &F) {
+ CriticalEdgesToSplit.clear();
+ NewBBs.clear();
+ DT.reset(new DomTreeBase<MachineBasicBlock>());
+ DT->recalculate(F);
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+}
+
+void MachineDominatorTree::releaseMemory() {
+ CriticalEdgesToSplit.clear();
+ DT.reset(nullptr);
+}
+
+void MachineDominatorTree::verifyAnalysis() const {
+ if (DT && VerifyMachineDomInfo)
+ if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) {
+ errs() << "MachineDominatorTree verification failed\n";
+ abort();
+ }
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+ if (DT)
+ DT->print(OS);
+}
+
+void MachineDominatorTree::applySplitCriticalEdges() const {
+ // Bail out early if there is nothing to do.
+ if (CriticalEdgesToSplit.empty())
+ return;
+
+ // For each element in CriticalEdgesToSplit, remember whether or not element
+ // is the new immediate domminator of its successor. The mapping is done by
+ // index, i.e., the information for the ith element of CriticalEdgesToSplit is
+ // the ith element of IsNewIDom.
+ SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true);
+ size_t Idx = 0;
+
+ // Collect all the dominance properties info, before invalidating
+ // the underlying DT.
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // Update dominator information.
+ MachineBasicBlock *Succ = Edge.ToBB;
+ MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+
+ for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+ if (PredBB == Edge.NewBB)
+ continue;
+ // If we are in this situation:
+ // FromBB1 FromBB2
+ // + +
+ // + + + +
+ // + + + +
+ // ... Split1 Split2 ...
+ // + +
+ // + +
+ // +
+ // Succ
+ // Instead of checking the domiance property with Split2, we check it with
+ // FromBB2 since Split2 is still unknown of the underlying DT structure.
+ if (NewBBs.count(PredBB)) {
+ assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+ "critical edge split has more "
+ "than one predecessor!");
+ PredBB = *PredBB->pred_begin();
+ }
+ if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+ IsNewIDom[Idx] = false;
+ break;
+ }
+ }
+ ++Idx;
+ }
+
+ // Now, update DT with the collected dominance properties info.
+ Idx = 0;
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // We know FromBB dominates NewBB.
+ MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom[Idx])
+ DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+ ++Idx;
+ }
+ NewBBs.clear();
+ CriticalEdgesToSplit.clear();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
new file mode 100644
index 000000000000..280d3a6a41ed
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -0,0 +1,256 @@
+//===-- MachineFrameInfo.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Implements MachineFrameInfo that manages the stack frame.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+#define DEBUG_TYPE "codegen"
+
+using namespace llvm;
+
+void MachineFrameInfo::ensureMaxAlignment(Align Alignment) {
+ if (!StackRealignable)
+ assert(Alignment <= StackAlignment &&
+ "For targets without stack realignment, Alignment is out of limit!");
+ if (MaxAlignment < Alignment)
+ MaxAlignment = Alignment;
+}
+
+/// Clamp the alignment if requested and emit a warning.
+static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment,
+ Align StackAlignment) {
+ if (!ShouldClamp || Alignment <= StackAlignment)
+ return Alignment;
+ LLVM_DEBUG(dbgs() << "Warning: requested alignment " << DebugStr(Alignment)
+ << " exceeds the stack alignment "
+ << DebugStr(StackAlignment)
+ << " when stack realignment is off" << '\n');
+ return StackAlignment;
+}
+
+int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment,
+ bool IsSpillSlot,
+ const AllocaInst *Alloca,
+ uint8_t StackID) {
+ assert(Size != 0 && "Cannot allocate zero size stack objects!");
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.push_back(StackObject(Size, Alignment, 0, false, IsSpillSlot, Alloca,
+ !IsSpillSlot, StackID));
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ assert(Index >= 0 && "Bad frame index!");
+ if (contributesToMaxAlignment(StackID))
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, Align Alignment) {
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ CreateStackObject(Size, Alignment, true);
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+int MachineFrameInfo::CreateVariableSizedObject(Align Alignment,
+ const AllocaInst *Alloca) {
+ HasVarSizedObjects = true;
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
+ ensureMaxAlignment(Alignment);
+ return (int)Objects.size()-NumFixedObjects-1;
+}
+
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool IsImmutable, bool IsAliased) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned. Note that unlike the non-fixed case, if the
+ // stack needs realignment, we can't assume that the stack will in fact be
+ // aligned.
+ Align Alignment =
+ commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.insert(Objects.begin(),
+ StackObject(Size, Alignment, SPOffset, IsImmutable,
+ /*IsSpillSlot=*/false, /*Alloca=*/nullptr,
+ IsAliased));
+ return -++NumFixedObjects;
+}
+
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+ int64_t SPOffset,
+ bool IsImmutable) {
+ Align Alignment =
+ commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.insert(Objects.begin(),
+ StackObject(Size, Alignment, SPOffset, IsImmutable,
+ /*IsSpillSlot=*/true, /*Alloca=*/nullptr,
+ /*IsAliased=*/false));
+ return -++NumFixedObjects;
+}
+
+BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
+ BV.set(*CSR);
+
+ // Saved CSRs are not pristine.
+ for (const auto &I : getCalleeSavedInfo())
+ for (MCPhysReg S : TRI->subregs_inclusive(I.getReg()))
+ BV.reset(S);
+
+ return BV;
+}
+
+uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ Align MaxAlign = getMaxAlign();
+ int64_t Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ // Only estimate stack size of default stack.
+ if (getStackID(i) != TargetStackID::Default)
+ continue;
+ int64_t FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ // Only estimate stack size of live objects on default stack.
+ if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default)
+ continue;
+ Offset += getObjectSize(i);
+ Align Alignment = getObjectAlign(i);
+ // Adjust to alignment boundary
+ Offset = alignTo(Offset, Alignment);
+
+ MaxAlign = std::max(Alignment, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ Align StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->hasStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlign();
+ else
+ StackAlign = TFI->getTransientStackAlign();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ return alignTo(Offset, StackAlign);
+}
+
+void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+ assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u &&
+ "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known");
+
+ MaxCallFrameSize = 0;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) {
+ unsigned Size = TII.getFrameSize(MI);
+ MaxCallFrameSize = std::max(MaxCallFrameSize, Size);
+ AdjustsStack = true;
+ } else if (MI.isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+ }
+ }
+}
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+
+ if (SO.StackID != 0)
+ OS << "id=" << static_cast<unsigned>(SO.StackID) << ' ';
+
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment.value();
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 000000000000..88939e96e07f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,1521 @@
+//===- MachineFunction.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "LiveDebugValues/LiveDebugValues.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "codegen"
+
+static cl::opt<unsigned> AlignAllFunctions(
+ "align-all-functions",
+ cl::desc("Force the alignment of all functions in log2 format (e.g. 4 "
+ "means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
+
+static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
+ using P = MachineFunctionProperties::Property;
+
+ // clang-format off
+ switch(Prop) {
+ case P::FailedISel: return "FailedISel";
+ case P::IsSSA: return "IsSSA";
+ case P::Legalized: return "Legalized";
+ case P::NoPHIs: return "NoPHIs";
+ case P::NoVRegs: return "NoVRegs";
+ case P::RegBankSelected: return "RegBankSelected";
+ case P::Selected: return "Selected";
+ case P::TracksLiveness: return "TracksLiveness";
+ case P::TiedOpsRewritten: return "TiedOpsRewritten";
+ case P::FailsVerification: return "FailsVerification";
+ case P::TracksDebugUserValues: return "TracksDebugUserValues";
+ }
+ // clang-format on
+ llvm_unreachable("Invalid machine function property");
+}
+
+void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) {
+ if (!F.hasFnAttribute(Attribute::SafeStack))
+ return;
+
+ auto *Existing =
+ dyn_cast_or_null<MDTuple>(F.getMetadata(LLVMContext::MD_annotation));
+
+ if (!Existing || Existing->getNumOperands() != 2)
+ return;
+
+ auto *MetadataName = "unsafe-stack-size";
+ if (auto &N = Existing->getOperand(0)) {
+ if (N.equalsStr(MetadataName)) {
+ if (auto &Op = Existing->getOperand(1)) {
+ auto Val = mdconst::extract<ConstantInt>(Op)->getZExtValue();
+ FrameInfo.setUnsafeStackSize(Val);
+ }
+ }
+ }
+}
+
+// Pin the vtable to this file.
+void MachineFunction::Delegate::anchor() {}
+
+void MachineFunctionProperties::print(raw_ostream &OS) const {
+ const char *Separator = "";
+ for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
+ if (!Properties[I])
+ continue;
+ OS << Separator << getPropertyName(static_cast<Property>(I));
+ Separator = ", ";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out-of-line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() = default;
+
+void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->deleteMachineBasicBlock(MBB);
+}
+
+static inline Align getFnStackAlignment(const TargetSubtargetInfo *STI,
+ const Function &F) {
+ if (auto MA = F.getFnStackAlign())
+ return *MA;
+ return STI->getFrameLowering()->getStackAlign();
+}
+
+MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target,
+ const TargetSubtargetInfo &STI,
+ unsigned FunctionNum, MachineModuleInfo &mmi)
+ : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
+ FunctionNumber = FunctionNum;
+ init();
+}
+
+void MachineFunction::handleInsertion(MachineInstr &MI) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleInsertion(MI);
+}
+
+void MachineFunction::handleRemoval(MachineInstr &MI) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleRemoval(MI);
+}
+
+void MachineFunction::init() {
+ // Assume the function starts in SSA form with correct liveness.
+ Properties.set(MachineFunctionProperties::Property::IsSSA);
+ Properties.set(MachineFunctionProperties::Property::TracksLiveness);
+ if (STI->getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(this);
+ else
+ RegInfo = nullptr;
+
+ MFInfo = nullptr;
+
+ // We can realign the stack if the target supports it and the user hasn't
+ // explicitly asked us not to.
+ bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
+ !F.hasFnAttribute("no-realign-stack");
+ FrameInfo = new (Allocator) MachineFrameInfo(
+ getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
+ /*ForcedRealign=*/CanRealignSP &&
+ F.hasFnAttribute(Attribute::StackAlignment));
+
+ setUnsafeStackSize(F, *FrameInfo);
+
+ if (F.hasFnAttribute(Attribute::StackAlignment))
+ FrameInfo->ensureMaxAlignment(*F.getFnStackAlign());
+
+ ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
+ Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
+
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on F.
+ // FIXME: Use Function::hasOptSize().
+ if (!F.hasFnAttribute(Attribute::OptimizeForSize))
+ Alignment = std::max(Alignment,
+ STI->getTargetLowering()->getPrefFunctionAlignment());
+
+ // -fsanitize=function and -fsanitize=kcfi instrument indirect function calls
+ // to load a type hash before the function label. Ensure functions are aligned
+ // by a least 4 to avoid unaligned access, which is especially important for
+ // -mno-unaligned-access.
+ if (F.hasMetadata(LLVMContext::MD_func_sanitize) ||
+ F.getMetadata(LLVMContext::MD_kcfi_type))
+ Alignment = std::max(Alignment, Align(4));
+
+ if (AlignAllFunctions)
+ Alignment = Align(1ULL << AlignAllFunctions);
+
+ JumpTableInfo = nullptr;
+
+ if (isFuncletEHPersonality(classifyEHPersonality(
+ F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) {
+ WinEHInfo = new (Allocator) WinEHFuncInfo();
+ }
+
+ if (isScopedEHPersonality(classifyEHPersonality(
+ F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) {
+ WasmEHInfo = new (Allocator) WasmEHFuncInfo();
+ }
+
+ assert(Target.isCompatibleDataLayout(getDataLayout()) &&
+ "Can't create a MachineFunction using a Module with a "
+ "Target-incompatible DataLayout attached\n");
+
+ PSVManager = std::make_unique<PseudoSourceValueManager>(getTarget());
+}
+
+void MachineFunction::initTargetMachineFunctionInfo(
+ const TargetSubtargetInfo &STI) {
+ assert(!MFInfo && "MachineFunctionInfo already set");
+ MFInfo = Target.createMachineFunctionInfo(Allocator, F, &STI);
+}
+
+MachineFunction::~MachineFunction() {
+ clear();
+}
+
+void MachineFunction::clear() {
+ Properties.reset();
+ // Don't call destructors on MachineInstr and MachineOperand. All of their
+ // memory comes from the BumpPtrAllocator which is about to be purged.
+ //
+ // Do call MachineBasicBlock destructors, it contains std::vectors.
+ for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
+ I->Insts.clearAndLeakNodesUnsafely();
+ MBBNumbering.clear();
+
+ InstructionRecycler.clear(Allocator);
+ OperandRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ CodeViewAnnotations.clear();
+ VariableDbgInfos.clear();
+ if (RegInfo) {
+ RegInfo->~MachineRegisterInfo();
+ Allocator.Deallocate(RegInfo);
+ }
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo();
+ Allocator.Deallocate(MFInfo);
+ }
+
+ FrameInfo->~MachineFrameInfo();
+ Allocator.Deallocate(FrameInfo);
+
+ ConstantPool->~MachineConstantPool();
+ Allocator.Deallocate(ConstantPool);
+
+ if (JumpTableInfo) {
+ JumpTableInfo->~MachineJumpTableInfo();
+ Allocator.Deallocate(JumpTableInfo);
+ }
+
+ if (WinEHInfo) {
+ WinEHInfo->~WinEHFuncInfo();
+ Allocator.Deallocate(WinEHInfo);
+ }
+
+ if (WasmEHInfo) {
+ WasmEHInfo->~WasmEHFuncInfo();
+ Allocator.Deallocate(WasmEHInfo);
+ }
+}
+
+const DataLayout &MachineFunction::getDataLayout() const {
+ return F.getParent()->getDataLayout();
+}
+
+/// Get the JumpTableInfo for this function.
+/// If it does not already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+ if (JumpTableInfo) return JumpTableInfo;
+
+ JumpTableInfo = new (Allocator)
+ MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+ return JumpTableInfo;
+}
+
+DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
+ return F.getDenormalMode(FPType);
+}
+
+/// Should we be emitting segmented stack stuff for the function
+bool MachineFunction::shouldSplitStack() const {
+ return getFunction().hasFnAttribute("split-stack");
+}
+
+[[nodiscard]] unsigned
+MachineFunction::addFrameInst(const MCCFIInstruction &Inst) {
+ FrameInstructions.push_back(Inst);
+ return FrameInstructions.size() - 1;
+}
+
+/// This discards all of the MachineBasicBlock numbers and recomputes them.
+/// This guarantees that the MBB numbers are sequential, dense, and match the
+/// ordering of the blocks within the function. If a specific MachineBasicBlock
+/// is specified, only that block and those after it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == nullptr)
+ MBBI = begin();
+ else
+ MBBI = MBB->getIterator();
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = std::prev(MBBI)->getNumber() + 1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = nullptr;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = &*MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// This method iterates over the basic blocks and assigns their IsBeginSection
+/// and IsEndSection fields. This must be called after MBB layout is finalized
+/// and the SectionID's are assigned to MBBs.
+void MachineFunction::assignBeginEndSections() {
+ front().setIsBeginSection();
+ auto CurrentSectionID = front().getSectionID();
+ for (auto MBBI = std::next(begin()), E = end(); MBBI != E; ++MBBI) {
+ if (MBBI->getSectionID() == CurrentSectionID)
+ continue;
+ MBBI->setIsBeginSection();
+ std::prev(MBBI)->setIsEndSection();
+ CurrentSectionID = MBBI->getSectionID();
+ }
+ back().setIsEndSection();
+}
+
+/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
+MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+ DebugLoc DL,
+ bool NoImplicit) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, MCID, std::move(DL), NoImplicit);
+}
+
+/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
+/// identical in all ways except the instruction has no parent, prev, or next.
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+MachineInstr &MachineFunction::cloneMachineInstrBundle(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const MachineInstr &Orig) {
+ MachineInstr *FirstClone = nullptr;
+ MachineBasicBlock::const_instr_iterator I = Orig.getIterator();
+ while (true) {
+ MachineInstr *Cloned = CloneMachineInstr(&*I);
+ MBB.insert(InsertBefore, Cloned);
+ if (FirstClone == nullptr) {
+ FirstClone = Cloned;
+ } else {
+ Cloned->bundleWithPred();
+ }
+
+ if (!I->isBundledWithSucc())
+ break;
+ ++I;
+ }
+ // Copy over call site info to the cloned instruction if needed. If Orig is in
+ // a bundle, copyCallSiteInfo takes care of finding the call instruction in
+ // the bundle.
+ if (Orig.shouldUpdateCallSiteInfo())
+ copyCallSiteInfo(&Orig, FirstClone);
+ return *FirstClone;
+}
+
+/// Delete the given MachineInstr.
+///
+/// This function also serves as the MachineInstr destructor - the real
+/// ~MachineInstr() destructor must be empty.
+void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
+ // Verify that a call site info is at valid state. This assertion should
+ // be triggered during the implementation of support for the
+ // call site info of a new architecture. If the assertion is triggered,
+ // back trace will tell where to insert a call to updateCallSiteInfo().
+ assert((!MI->isCandidateForCallSiteEntry() || !CallSitesInfo.contains(MI)) &&
+ "Call site info was not updated!");
+ // Strip it for parts. The operand array and the MI object itself are
+ // independently recyclable.
+ if (MI->Operands)
+ deallocateOperandArray(MI->CapOperands, MI->Operands);
+ // Don't call ~MachineInstr() which must be trivial anyway because
+ // ~MachineFunction drops whole lists of MachineInstrs wihout calling their
+ // destructors.
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// Allocate a new MachineBasicBlock. Use this instead of
+/// `new MachineBasicBlock'.
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ MachineBasicBlock *MBB =
+ new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+ // Set BBID for `-basic-block=sections=labels` and
+ // `-basic-block-sections=list` to allow robust mapping of profiles to basic
+ // blocks.
+ if (Target.getBBSectionsType() == BasicBlockSection::Labels ||
+ Target.getBBSectionsType() == BasicBlockSection::List)
+ MBB->setBBID(NextBBID++);
+ return MBB;
+}
+
+/// Delete the given MachineBasicBlock.
+void MachineFunction::deleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ // Clean up any references to MBB in jump tables before deleting it.
+ if (JumpTableInfo)
+ JumpTableInfo->RemoveMBBFromJumpTables(MBB);
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
+ Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+ SyncScope::ID SSID, AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
+ SSID, Ordering, FailureOrdering);
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy,
+ Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+ SyncScope::ID SSID, AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, f, MemTy, base_alignment, AAInfo, Ranges, SSID,
+ Ordering, FailureOrdering);
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, LLT Ty) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, MMO->getFlags(), Ty, MMO->getBaseAlign(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ int64_t Offset, LLT Ty) {
+ const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+ // If there is no pointer value, the offset isn't tracked so we need to adjust
+ // the base alignment.
+ Align Alignment = PtrInfo.V.isNull()
+ ? commonAlignment(MMO->getBaseAlign(), Offset)
+ : MMO->getBaseAlign();
+
+ // Do not preserve ranges, since we don't necessarily know what the high bits
+ // are anymore.
+ return new (Allocator) MachineMemOperand(
+ PtrInfo.getWithOffset(Offset), MMO->getFlags(), Ty, Alignment,
+ MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ const AAMDNodes &AAInfo) {
+ MachinePointerInfo MPI = MMO->getValue() ?
+ MachinePointerInfo(MMO->getValue(), MMO->getOffset()) :
+ MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset());
+
+ return new (Allocator) MachineMemOperand(
+ MPI, MMO->getFlags(), MMO->getSize(), MMO->getBaseAlign(), AAInfo,
+ MMO->getRanges(), MMO->getSyncScopeID(), MMO->getSuccessOrdering(),
+ MMO->getFailureOrdering());
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ MachineMemOperand::Flags Flags) {
+ return new (Allocator) MachineMemOperand(
+ MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlign(),
+ MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
+}
+
+MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
+ ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
+ MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections,
+ uint32_t CFIType) {
+ return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
+ PostInstrSymbol, HeapAllocMarker,
+ PCSections, CFIType);
+}
+
+const char *MachineFunction::createExternalSymbolName(StringRef Name) {
+ char *Dest = Allocator.Allocate<char>(Name.size() + 1);
+ llvm::copy(Name, Dest);
+ Dest[Name.size()] = 0;
+ return Dest;
+}
+
+uint32_t *MachineFunction::allocateRegMask() {
+ unsigned NumRegs = getSubtarget().getRegisterInfo()->getNumRegs();
+ unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
+ uint32_t *Mask = Allocator.Allocate<uint32_t>(Size);
+ memset(Mask, 0, Size * sizeof(Mask[0]));
+ return Mask;
+}
+
+ArrayRef<int> MachineFunction::allocateShuffleMask(ArrayRef<int> Mask) {
+ int* AllocMask = Allocator.Allocate<int>(Mask.size());
+ copy(Mask, AllocMask);
+ return {AllocMask, Mask.size()};
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineFunction::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineFunction::getName() const {
+ return getFunction().getName();
+}
+
+void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
+ OS << "# Machine code for function " << getName() << ": ";
+ getProperties().print(OS);
+ OS << '\n';
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ if (JumpTableInfo)
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ ConstantPool->print(OS);
+
+ const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Function Live Ins: ";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ OS << printReg(I->first, TRI);
+ if (I->second)
+ OS << " in " << printReg(I->second, TRI);
+ if (std::next(I) != E)
+ OS << ", ";
+ }
+ OS << '\n';
+ }
+
+ ModuleSlotTracker MST(getFunction().getParent());
+ MST.incorporateFunction(getFunction());
+ for (const auto &BB : *this) {
+ OS << '\n';
+ // If we print the whole function, print it at its most verbose level.
+ BB.print(OS, MST, Indexes, /*IsStandalone=*/true);
+ }
+
+ OS << "\n# End machine code for function " << getName() << ".\n\n";
+}
+
+/// True if this function needs frame moves for debug or exceptions.
+bool MachineFunction::needsFrameMoves() const {
+ return getMMI().hasDebugInfo() ||
+ getTarget().Options.ForceDwarfFrameSection ||
+ F.needsUnwindTableEntry();
+}
+
+namespace llvm {
+
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const MachineFunction *F) {
+ return ("CFG for '" + F->getName() + "' function").str();
+ }
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple()) {
+ OSS << printMBBReference(*Node);
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
+ Node->print(OSS);
+ }
+
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+
+} // end namespace llvm
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName());
+#else
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName(), true);
+#else
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+Register MachineFunction::addLiveIn(MCRegister PReg,
+ const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = getRegInfo();
+ Register VReg = MRI.getLiveInVirtReg(PReg);
+ if (VReg) {
+ const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg);
+ (void)VRegRC;
+ // A physical register can be added several times.
+ // Between two calls, the register class of the related virtual register
+ // may have been constrained to match some operation constraints.
+ // In that case, check that the current register class includes the
+ // physical register and is a sub class of the specified RC.
+ assert((VRegRC == RC || (VRegRC->contains(PReg) &&
+ RC->hasSubClassEq(VRegRC))) &&
+ "Register class mismatch!");
+ return VReg;
+ }
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+ bool isLinkerPrivate) const {
+ const DataLayout &DL = getDataLayout();
+ assert(JumpTableInfo && "No jump tables");
+ assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+
+ StringRef Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+ : DL.getPrivateGlobalPrefix();
+ SmallString<60> Name;
+ raw_svector_ostream(Name)
+ << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+ return Ctx.getOrCreateSymbol(Name);
+}
+
+/// Return a function-local symbol to represent the PIC base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const DataLayout &DL = getDataLayout();
+ return Ctx.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "$pb");
+}
+
+/// \name Exception Handling
+/// \{
+
+LandingPadInfo &
+MachineFunction::getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+void MachineFunction::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+
+ const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) {
+ // If there's no typeid list specified, then "cleanup" is implicit.
+ // Otherwise, id 0 is reserved for the cleanup action.
+ if (LPI->isCleanup() && LPI->getNumClauses() != 0)
+ LP.TypeIds.push_back(0);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100%
+ // correct, but we need to do it this way because of how the DWARF EH
+ // emitter processes the clauses.
+ for (unsigned I = LPI->getNumClauses(); I != 0; --I) {
+ Value *Val = LPI->getClause(I - 1);
+ if (LPI->isCatch(I - 1)) {
+ LP.TypeIds.push_back(
+ getTypeIDFor(dyn_cast<GlobalValue>(Val->stripPointerCasts())));
+ } else {
+ // Add filters in a list.
+ auto *CVal = cast<Constant>(Val);
+ SmallVector<unsigned, 4> FilterList;
+ for (const Use &U : CVal->operands())
+ FilterList.push_back(
+ getTypeIDFor(cast<GlobalValue>(U->stripPointerCasts())));
+
+ LP.TypeIds.push_back(getFilterIDFor(FilterList));
+ }
+ }
+
+ } else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) {
+ for (unsigned I = CPI->arg_size(); I != 0; --I) {
+ auto *TypeInfo =
+ dyn_cast<GlobalValue>(CPI->getArgOperand(I - 1)->stripPointerCasts());
+ LP.TypeIds.push_back(getTypeIDFor(TypeInfo));
+ }
+
+ } else {
+ assert(isa<CleanupPadInst>(FirstI) && "Invalid landingpad!");
+ }
+
+ return LandingPadLabel;
+}
+
+void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+int MachineFunction::getFilterIDFor(ArrayRef<unsigned> TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (unsigned i : FilterEnds) {
+ unsigned j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ llvm::append_range(FilterIds, TyIds);
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+MachineFunction::CallSiteInfoMap::iterator
+MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
+ assert(MI->isCandidateForCallSiteEntry() &&
+ "Call site info refers only to call (MI) candidates");
+
+ if (!Target.Options.EmitCallSiteInfo)
+ return CallSitesInfo.end();
+ return CallSitesInfo.find(MI);
+}
+
+/// Return the call machine instruction or find a call within bundle.
+static const MachineInstr *getCallInstr(const MachineInstr *MI) {
+ if (!MI->isBundle())
+ return MI;
+
+ for (const auto &BMI : make_range(getBundleStart(MI->getIterator()),
+ getBundleEnd(MI->getIterator())))
+ if (BMI.isCandidateForCallSiteEntry())
+ return &BMI;
+
+ llvm_unreachable("Unexpected bundle without a call site candidate");
+}
+
+void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) {
+ assert(MI->shouldUpdateCallSiteInfo() &&
+ "Call site info refers only to call (MI) candidates or "
+ "candidates inside bundles");
+
+ const MachineInstr *CallMI = getCallInstr(MI);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(CallMI);
+ if (CSIt == CallSitesInfo.end())
+ return;
+ CallSitesInfo.erase(CSIt);
+}
+
+void MachineFunction::copyCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(Old->shouldUpdateCallSiteInfo() &&
+ "Call site info refers only to call (MI) candidates or "
+ "candidates inside bundles");
+
+ if (!New->isCandidateForCallSiteEntry())
+ return eraseCallSiteInfo(Old);
+
+ const MachineInstr *OldCallMI = getCallInstr(Old);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(OldCallMI);
+ if (CSIt == CallSitesInfo.end())
+ return;
+
+ CallSiteInfo CSInfo = CSIt->second;
+ CallSitesInfo[New] = CSInfo;
+}
+
+void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(Old->shouldUpdateCallSiteInfo() &&
+ "Call site info refers only to call (MI) candidates or "
+ "candidates inside bundles");
+
+ if (!New->isCandidateForCallSiteEntry())
+ return eraseCallSiteInfo(Old);
+
+ const MachineInstr *OldCallMI = getCallInstr(Old);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(OldCallMI);
+ if (CSIt == CallSitesInfo.end())
+ return;
+
+ CallSiteInfo CSInfo = std::move(CSIt->second);
+ CallSitesInfo.erase(CSIt);
+ CallSitesInfo[New] = CSInfo;
+}
+
+void MachineFunction::setDebugInstrNumberingCount(unsigned Num) {
+ DebugInstrNumberingCount = Num;
+}
+
+void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
+ DebugInstrOperandPair B,
+ unsigned Subreg) {
+ // Catch any accidental self-loops.
+ assert(A.first != B.first);
+ // Don't allow any substitutions _from_ the memory operand number.
+ assert(A.second != DebugOperandMemNumber);
+
+ DebugValueSubstitutions.push_back({A, B, Subreg});
+}
+
+void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
+ MachineInstr &New,
+ unsigned MaxOperand) {
+ // If the Old instruction wasn't tracked at all, there is no work to do.
+ unsigned OldInstrNum = Old.peekDebugInstrNum();
+ if (!OldInstrNum)
+ return;
+
+ // Iterate over all operands looking for defs to create substitutions for.
+ // Avoid creating new instr numbers unless we create a new substitution.
+ // While this has no functional effect, it risks confusing someone reading
+ // MIR output.
+ // Examine all the operands, or the first N specified by the caller.
+ MaxOperand = std::min(MaxOperand, Old.getNumOperands());
+ for (unsigned int I = 0; I < MaxOperand; ++I) {
+ const auto &OldMO = Old.getOperand(I);
+ auto &NewMO = New.getOperand(I);
+ (void)NewMO;
+
+ if (!OldMO.isReg() || !OldMO.isDef())
+ continue;
+ assert(NewMO.isDef());
+
+ unsigned NewInstrNum = New.getDebugInstrNum();
+ makeDebugValueSubstitution(std::make_pair(OldInstrNum, I),
+ std::make_pair(NewInstrNum, I));
+ }
+}
+
+auto MachineFunction::salvageCopySSA(
+ MachineInstr &MI, DenseMap<Register, DebugInstrOperandPair> &DbgPHICache)
+ -> DebugInstrOperandPair {
+ const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+
+ // Check whether this copy-like instruction has already been salvaged into
+ // an operand pair.
+ Register Dest;
+ if (auto CopyDstSrc = TII.isCopyInstr(MI)) {
+ Dest = CopyDstSrc->Destination->getReg();
+ } else {
+ assert(MI.isSubregToReg());
+ Dest = MI.getOperand(0).getReg();
+ }
+
+ auto CacheIt = DbgPHICache.find(Dest);
+ if (CacheIt != DbgPHICache.end())
+ return CacheIt->second;
+
+ // Calculate the instruction number to use, or install a DBG_PHI.
+ auto OperandPair = salvageCopySSAImpl(MI);
+ DbgPHICache.insert({Dest, OperandPair});
+ return OperandPair;
+}
+
+auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
+ -> DebugInstrOperandPair {
+ MachineRegisterInfo &MRI = getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+
+ // Chase the value read by a copy-like instruction back to the instruction
+ // that ultimately _defines_ that value. This may pass:
+ // * Through multiple intermediate copies, including subregister moves /
+ // copies,
+ // * Copies from physical registers that must then be traced back to the
+ // defining instruction,
+ // * Or, physical registers may be live-in to (only) the entry block, which
+ // requires a DBG_PHI to be created.
+ // We can pursue this problem in that order: trace back through copies,
+ // optionally through a physical register, to a defining instruction. We
+ // should never move from physreg to vreg. As we're still in SSA form, no need
+ // to worry about partial definitions of registers.
+
+ // Helper lambda to interpret a copy-like instruction. Takes instruction,
+ // returns the register read and any subregister identifying which part is
+ // read.
+ auto GetRegAndSubreg =
+ [&](const MachineInstr &Cpy) -> std::pair<Register, unsigned> {
+ Register NewReg, OldReg;
+ unsigned SubReg;
+ if (Cpy.isCopy()) {
+ OldReg = Cpy.getOperand(0).getReg();
+ NewReg = Cpy.getOperand(1).getReg();
+ SubReg = Cpy.getOperand(1).getSubReg();
+ } else if (Cpy.isSubregToReg()) {
+ OldReg = Cpy.getOperand(0).getReg();
+ NewReg = Cpy.getOperand(2).getReg();
+ SubReg = Cpy.getOperand(3).getImm();
+ } else {
+ auto CopyDetails = *TII.isCopyInstr(Cpy);
+ const MachineOperand &Src = *CopyDetails.Source;
+ const MachineOperand &Dest = *CopyDetails.Destination;
+ OldReg = Dest.getReg();
+ NewReg = Src.getReg();
+ SubReg = Src.getSubReg();
+ }
+
+ return {NewReg, SubReg};
+ };
+
+ // First seek either the defining instruction, or a copy from a physreg.
+ // During search, the current state is the current copy instruction, and which
+ // register we've read. Accumulate qualifying subregisters into SubregsSeen;
+ // deal with those later.
+ auto State = GetRegAndSubreg(MI);
+ auto CurInst = MI.getIterator();
+ SmallVector<unsigned, 4> SubregsSeen;
+ while (true) {
+ // If we've found a copy from a physreg, first portion of search is over.
+ if (!State.first.isVirtual())
+ break;
+
+ // Record any subregister qualifier.
+ if (State.second)
+ SubregsSeen.push_back(State.second);
+
+ assert(MRI.hasOneDef(State.first));
+ MachineInstr &Inst = *MRI.def_begin(State.first)->getParent();
+ CurInst = Inst.getIterator();
+
+ // Any non-copy instruction is the defining instruction we're seeking.
+ if (!Inst.isCopyLike() && !TII.isCopyInstr(Inst))
+ break;
+ State = GetRegAndSubreg(Inst);
+ };
+
+ // Helper lambda to apply additional subregister substitutions to a known
+ // instruction/operand pair. Adds new (fake) substitutions so that we can
+ // record the subregister. FIXME: this isn't very space efficient if multiple
+ // values are tracked back through the same copies; cache something later.
+ auto ApplySubregisters =
+ [&](DebugInstrOperandPair P) -> DebugInstrOperandPair {
+ for (unsigned Subreg : reverse(SubregsSeen)) {
+ // Fetch a new instruction number, not attached to an actual instruction.
+ unsigned NewInstrNumber = getNewDebugInstrNum();
+ // Add a substitution from the "new" number to the known one, with a
+ // qualifying subreg.
+ makeDebugValueSubstitution({NewInstrNumber, 0}, P, Subreg);
+ // Return the new number; to find the underlying value, consumers need to
+ // deal with the qualifying subreg.
+ P = {NewInstrNumber, 0};
+ }
+ return P;
+ };
+
+ // If we managed to find the defining instruction after COPYs, return an
+ // instruction / operand pair after adding subregister qualifiers.
+ if (State.first.isVirtual()) {
+ // Virtual register def -- we can just look up where this happens.
+ MachineInstr *Inst = MRI.def_begin(State.first)->getParent();
+ for (auto &MO : Inst->all_defs()) {
+ if (MO.getReg() != State.first)
+ continue;
+ return ApplySubregisters({Inst->getDebugInstrNum(), MO.getOperandNo()});
+ }
+
+ llvm_unreachable("Vreg def with no corresponding operand?");
+ }
+
+ // Our search ended in a copy from a physreg: walk back up the function
+ // looking for whatever defines the physreg.
+ assert(CurInst->isCopyLike() || TII.isCopyInstr(*CurInst));
+ State = GetRegAndSubreg(*CurInst);
+ Register RegToSeek = State.first;
+
+ auto RMII = CurInst->getReverseIterator();
+ auto PrevInstrs = make_range(RMII, CurInst->getParent()->instr_rend());
+ for (auto &ToExamine : PrevInstrs) {
+ for (auto &MO : ToExamine.all_defs()) {
+ // Test for operand that defines something aliasing RegToSeek.
+ if (!TRI.regsOverlap(RegToSeek, MO.getReg()))
+ continue;
+
+ return ApplySubregisters(
+ {ToExamine.getDebugInstrNum(), MO.getOperandNo()});
+ }
+ }
+
+ MachineBasicBlock &InsertBB = *CurInst->getParent();
+
+ // We reached the start of the block before finding a defining instruction.
+ // There are numerous scenarios where this can happen:
+ // * Constant physical registers,
+ // * Several intrinsics that allow LLVM-IR to read arbitary registers,
+ // * Arguments in the entry block,
+ // * Exception handling landing pads.
+ // Validating all of them is too difficult, so just insert a DBG_PHI reading
+ // the variable value at this position, rather than checking it makes sense.
+
+ // Create DBG_PHI for specified physreg.
+ auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
+ TII.get(TargetOpcode::DBG_PHI));
+ Builder.addReg(State.first);
+ unsigned NewNum = getNewDebugInstrNum();
+ Builder.addImm(NewNum);
+ return ApplySubregisters({NewNum, 0u});
+}
+
+void MachineFunction::finalizeDebugInstrRefs() {
+ auto *TII = getSubtarget().getInstrInfo();
+
+ auto MakeUndefDbgValue = [&](MachineInstr &MI) {
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE_LIST);
+ MI.setDesc(RefII);
+ MI.setDebugValueUndef();
+ };
+
+ DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs;
+ for (auto &MBB : *this) {
+ for (auto &MI : MBB) {
+ if (!MI.isDebugRef())
+ continue;
+
+ bool IsValidRef = true;
+
+ for (MachineOperand &MO : MI.debug_operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+
+ // Some vregs can be deleted as redundant in the meantime. Mark those
+ // as DBG_VALUE $noreg. Additionally, some normal instructions are
+ // quickly deleted, leaving dangling references to vregs with no def.
+ if (Reg == 0 || !RegInfo->hasOneDef(Reg)) {
+ IsValidRef = false;
+ break;
+ }
+
+ assert(Reg.isVirtual());
+ MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
+
+ // If we've found a copy-like instruction, follow it back to the
+ // instruction that defines the source value, see salvageCopySSA docs
+ // for why this is important.
+ if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
+ auto Result = salvageCopySSA(DefMI, ArgDbgPHIs);
+ MO.ChangeToDbgInstrRef(Result.first, Result.second);
+ } else {
+ // Otherwise, identify the operand number that the VReg refers to.
+ unsigned OperandIdx = 0;
+ for (const auto &DefMO : DefMI.operands()) {
+ if (DefMO.isReg() && DefMO.isDef() && DefMO.getReg() == Reg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI.getNumOperands());
+
+ // Morph this instr ref to point at the given instruction and operand.
+ unsigned ID = DefMI.getDebugInstrNum();
+ MO.ChangeToDbgInstrRef(ID, OperandIdx);
+ }
+ }
+
+ if (!IsValidRef)
+ MakeUndefDbgValue(MI);
+ }
+ }
+}
+
+bool MachineFunction::shouldUseDebugInstrRef() const {
+ // Disable instr-ref at -O0: it's very slow (in compile time). We can still
+ // have optimized code inlined into this unoptimized code, however with
+ // fewer and less aggressive optimizations happening, coverage and accuracy
+ // should not suffer.
+ if (getTarget().getOptLevel() == CodeGenOpt::None)
+ return false;
+
+ // Don't use instr-ref if this function is marked optnone.
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ if (llvm::debuginfoShouldUseDebugInstrRef(getTarget().getTargetTriple()))
+ return true;
+
+ return false;
+}
+
+bool MachineFunction::useDebugInstrRef() const {
+ return UseDebugInstrRef;
+}
+
+void MachineFunction::setUseDebugInstrRef(bool Use) {
+ UseDebugInstrRef = Use;
+}
+
+// Use one million as a high / reserved number.
+const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
+
+/// \}
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
+ // The size of a jump table entry is 4 bytes unless the entry is just the
+ // address of a block, in which case it is the pointer size.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return 8;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return 4;
+ case MachineJumpTableInfo::EK_Inline:
+ return 0;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
+ // The alignment of a jump table entry is the alignment of int32 unless the
+ // entry is just the address of a block, in which case it is the pointer
+ // alignment.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerABIAlignment(0).value();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return TD.getABIIntegerTypeAlignment(64).value();
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return TD.getABIIntegerTypeAlignment(32).value();
+ case MachineJumpTableInfo::EK_Inline:
+ return 1;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// Create a new jump table entry in the jump table info.
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// If Old is the target of any jump tables, update the jump tables to branch
+/// to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// If MBB is present in any jump tables, remove it.
+bool MachineJumpTableInfo::RemoveMBBFromJumpTables(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ for (MachineJumpTableEntry &JTE : JumpTables) {
+ auto removeBeginItr = std::remove(JTE.MBBs.begin(), JTE.MBBs.end(), MBB);
+ MadeChange |= (removeBeginItr != JTE.MBBs.end());
+ JTE.MBBs.erase(removeBeginItr, JTE.MBBs.end());
+ }
+ return MadeChange;
+}
+
+/// If Old is a target of the jump tables, update the jump table to branch to
+/// New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (MachineBasicBlock *&MBB : JTE.MBBs)
+ if (MBB == Old) {
+ MBB = New;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+ if (JumpTables.empty()) return;
+
+ OS << "Jump Tables:\n";
+
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << printJumpTableEntryReference(i) << ':';
+ for (const MachineBasicBlock *MBB : JumpTables[i].MBBs)
+ OS << ' ' << printMBBReference(*MBB);
+ if (i != e)
+ OS << '\n';
+ }
+
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); }
+#endif
+
+Printable llvm::printJumpTableEntryReference(unsigned Idx) {
+ return Printable([Idx](raw_ostream &OS) { OS << "%jump-table." << Idx; });
+}
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+void MachineConstantPoolValue::anchor() {}
+
+unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const {
+ return DL.getTypeAllocSize(Ty);
+}
+
+unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getSizeInBytes(DL);
+ return DL.getTypeAllocSize(Val.ConstVal->getType());
+}
+
+bool MachineConstantPoolEntry::needsRelocation() const {
+ if (isMachineConstantPoolEntry())
+ return true;
+ return Val.ConstVal->needsDynamicRelocation();
+}
+
+SectionKind
+MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
+ if (needsRelocation())
+ return SectionKind::getReadOnlyWithRel();
+ switch (getSizeInBytes(*DL)) {
+ case 4:
+ return SectionKind::getMergeableConst4();
+ case 8:
+ return SectionKind::getMergeableConst8();
+ case 16:
+ return SectionKind::getMergeableConst16();
+ case 32:
+ return SectionKind::getMergeableConst32();
+ default:
+ return SectionKind::getReadOnly();
+ }
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ // A constant may be a member of both Constants and MachineCPVsSharingEntries,
+ // so keep track of which we've deleted to avoid double deletions.
+ DenseSet<MachineConstantPoolValue*> Deleted;
+ for (const MachineConstantPoolEntry &C : Constants)
+ if (C.isMachineConstantPoolEntry()) {
+ Deleted.insert(C.Val.MachineCPVal);
+ delete C.Val.MachineCPVal;
+ }
+ for (MachineConstantPoolValue *CPV : MachineCPVsSharingEntries) {
+ if (Deleted.count(CPV) == 0)
+ delete CPV;
+ }
+}
+
+/// Test whether the given two constants can be allocated the same constant pool
+/// entry referenced by \param A.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+ const DataLayout &DL) {
+ // Handle the trivial case quickly.
+ if (A == B) return true;
+
+ // If they have the same type but weren't the same constant, quickly
+ // reject them.
+ if (A->getType() == B->getType()) return false;
+
+ // We can't handle structs or arrays.
+ if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+ isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+ return false;
+
+ // For now, only support constants with the same size.
+ uint64_t StoreSize = DL.getTypeStoreSize(A->getType());
+ if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128)
+ return false;
+
+ bool ContainsUndefOrPoisonA = A->containsUndefOrPoisonElement();
+
+ Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+ // Try constant folding a bitcast of both instructions to an integer. If we
+ // get two identical ConstantInt's, then we are good to share them. We use
+ // the constant folding APIs to do this so that we get the benefit of
+ // DataLayout.
+ if (isa<PointerType>(A->getType()))
+ A = ConstantFoldCastOperand(Instruction::PtrToInt,
+ const_cast<Constant *>(A), IntTy, DL);
+ else if (A->getType() != IntTy)
+ A = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(A),
+ IntTy, DL);
+ if (isa<PointerType>(B->getType()))
+ B = ConstantFoldCastOperand(Instruction::PtrToInt,
+ const_cast<Constant *>(B), IntTy, DL);
+ else if (B->getType() != IntTy)
+ B = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(B),
+ IntTy, DL);
+
+ if (A != B)
+ return false;
+
+ // Constants only safely match if A doesn't contain undef/poison.
+ // As we'll be reusing A, it doesn't matter if B contain undef/poison.
+ // TODO: Handle cases where A and B have the same undef/poison elements.
+ // TODO: Merge A and B with mismatching undef/poison elements.
+ return !ContainsUndefOrPoisonA;
+}
+
+/// Create a new entry in the constant pool or return an existing one.
+/// User must specify the log2 of the minimum required alignment for the object.
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+ Align Alignment) {
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (!Constants[i].isMachineConstantPoolEntry() &&
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) {
+ if (Constants[i].getAlign() < Alignment)
+ Constants[i].Alignment = Alignment;
+ return i;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ Align Alignment) {
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1) {
+ MachineCPVsSharingEntries.insert(V);
+ return (unsigned)Idx;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ if (Constants.empty()) return;
+
+ OS << "Constant Pool:\n";
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " cp#" << i << ": ";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false);
+ OS << ", align=" << Constants[i].getAlign().value();
+ OS << "\n";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Template specialization for MachineFunction implementation of
+// ProfileSummaryInfo::getEntryCount().
+//===----------------------------------------------------------------------===//
+template <>
+std::optional<Function::ProfileCount>
+ProfileSummaryInfo::getEntryCount<llvm::MachineFunction>(
+ const llvm::MachineFunction *F) const {
+ return F->getFunction().getEntryCount();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineConstantPool::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 000000000000..3a1e1720be9c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,188 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PrintPasses.h"
+
+using namespace llvm;
+using namespace ore;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
+
+ MachineFunctionProperties &MFProps = MF.getProperties();
+
+#ifndef NDEBUG
+ if (!MFProps.verifyRequiredProperties(RequiredProperties)) {
+ errs() << "MachineFunctionProperties required by " << getPassName()
+ << " pass are not met by function " << F.getName() << ".\n"
+ << "Required properties: ";
+ RequiredProperties.print(errs());
+ errs() << "\nCurrent properties: ";
+ MFProps.print(errs());
+ errs() << "\n";
+ llvm_unreachable("MachineFunctionProperties check failed");
+ }
+#endif
+ // Collect the MI count of the function before the pass.
+ unsigned CountBefore, CountAfter;
+
+ // Check if the user asked for size remarks.
+ bool ShouldEmitSizeRemarks =
+ F.getParent()->shouldEmitInstrCountChangedRemark();
+
+ // If we want size remarks, collect the number of MachineInstrs in our
+ // MachineFunction before the pass runs.
+ if (ShouldEmitSizeRemarks)
+ CountBefore = MF.getInstructionCount();
+
+ // For --print-changed, if the function name is a candidate, save the
+ // serialized MF to be compared later.
+ SmallString<0> BeforeStr, AfterStr;
+ StringRef PassID;
+ if (PrintChanged != ChangePrinter::None) {
+ if (const PassInfo *PI = Pass::lookupPassInfo(getPassID()))
+ PassID = PI->getPassArgument();
+ }
+ const bool IsInterestingPass = isPassInPrintList(PassID);
+ const bool ShouldPrintChanged = PrintChanged != ChangePrinter::None &&
+ IsInterestingPass &&
+ isFunctionInPrintList(MF.getName());
+ if (ShouldPrintChanged) {
+ raw_svector_ostream OS(BeforeStr);
+ MF.print(OS);
+ }
+
+ bool RV = runOnMachineFunction(MF);
+
+ if (ShouldEmitSizeRemarks) {
+ // We wanted size remarks. Check if there was a change to the number of
+ // MachineInstrs in the module. Emit a remark if there was a change.
+ CountAfter = MF.getInstructionCount();
+ if (CountBefore != CountAfter) {
+ MachineOptimizationRemarkEmitter MORE(MF, nullptr);
+ MORE.emit([&]() {
+ int64_t Delta = static_cast<int64_t>(CountAfter) -
+ static_cast<int64_t>(CountBefore);
+ MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+ MF.getFunction().getSubprogram(),
+ &MF.front());
+ R << NV("Pass", getPassName())
+ << ": Function: " << NV("Function", F.getName()) << ": "
+ << "MI Instruction count changed from "
+ << NV("MIInstrsBefore", CountBefore) << " to "
+ << NV("MIInstrsAfter", CountAfter)
+ << "; Delta: " << NV("Delta", Delta);
+ return R;
+ });
+ }
+ }
+
+ MFProps.set(SetProperties);
+ MFProps.reset(ClearedProperties);
+
+ // For --print-changed, print if the serialized MF has changed. Modes other
+ // than quiet/verbose are unimplemented and treated the same as 'quiet'.
+ if (ShouldPrintChanged || !IsInterestingPass) {
+ if (ShouldPrintChanged) {
+ raw_svector_ostream OS(AfterStr);
+ MF.print(OS);
+ }
+ if (IsInterestingPass && BeforeStr != AfterStr) {
+ errs() << ("*** IR Dump After " + getPassName() + " (" + PassID +
+ ") on " + MF.getName() + " ***\n");
+ switch (PrintChanged) {
+ case ChangePrinter::None:
+ llvm_unreachable("");
+ case ChangePrinter::Quiet:
+ case ChangePrinter::Verbose:
+ case ChangePrinter::DotCfgQuiet: // unimplemented
+ case ChangePrinter::DotCfgVerbose: // unimplemented
+ errs() << AfterStr;
+ break;
+ case ChangePrinter::DiffQuiet:
+ case ChangePrinter::DiffVerbose:
+ case ChangePrinter::ColourDiffQuiet:
+ case ChangePrinter::ColourDiffVerbose: {
+ bool Color = llvm::is_contained(
+ {ChangePrinter::ColourDiffQuiet, ChangePrinter::ColourDiffVerbose},
+ PrintChanged.getValue());
+ StringRef Removed = Color ? "\033[31m-%l\033[0m\n" : "-%l\n";
+ StringRef Added = Color ? "\033[32m+%l\033[0m\n" : "+%l\n";
+ StringRef NoChange = " %l\n";
+ errs() << doSystemDiff(BeforeStr, AfterStr, Removed, Added, NoChange);
+ break;
+ }
+ }
+ } else if (llvm::is_contained({ChangePrinter::Verbose,
+ ChangePrinter::DiffVerbose,
+ ChangePrinter::ColourDiffVerbose},
+ PrintChanged.getValue())) {
+ const char *Reason =
+ IsInterestingPass ? " omitted because no change" : " filtered out";
+ errs() << "*** IR Dump After " << getPassName();
+ if (!PassID.empty())
+ errs() << " (" << PassID << ")";
+ errs() << " on " << MF.getName() + Reason + " ***\n";
+ }
+ }
+ return RV;
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+
+ // MachineFunctionPass preserves all LLVM IR passes, but there's no
+ // high-level way to express this. Instead, just list a bunch of
+ // passes explicitly. This does not include setPreservesCFG,
+ // because CodeGen overloads that to mean preserving the MachineBasicBlock
+ // CFG in addition to the LLVM IR CFG.
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<DominanceFrontierWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<IVUsersWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+
+ FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 000000000000..c31c065b1976
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,71 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+ const std::string Banner;
+
+ MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+ StringRef getPassName() const override { return "MachineFunction Printer"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addUsedIfAvailable<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!isFunctionInPrintList(MF.getName()))
+ return false;
+ OS << "# " << Banner << ":\n";
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
+ return false;
+ }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer",
+ "Machine Function Printer", false, false)
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner){
+ return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
new file mode 100644
index 000000000000..fbc071536d22
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -0,0 +1,224 @@
+//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Uses profile information to split out cold blocks.
+//
+// This pass splits out cold machine basic blocks from the parent function. This
+// implementation leverages the basic block section framework. Blocks marked
+// cold by this pass are grouped together in a separate section prefixed with
+// ".text.unlikely.*". The linker can then group these together as a cold
+// section. The split part of the function is a contiguous region identified by
+// the symbol "foo.cold". Grouping all cold blocks across functions together
+// decreases fragmentation and improves icache and itlb utilization. Note that
+// the overall changes to the binary size are negligible; only a small number of
+// additional jump instructions may be introduced.
+//
+// For the original RFC of this pass please see
+// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/EHUtils.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include <optional>
+
+using namespace llvm;
+
+// FIXME: This cutoff value is CPU dependent and should be moved to
+// TargetTransformInfo once we consider enabling this on other platforms.
+// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
+// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
+// The default was empirically determined to be optimal when considering cutoff
+// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
+// Intel CPUs.
+static cl::opt<unsigned>
+ PercentileCutoff("mfs-psi-cutoff",
+ cl::desc("Percentile profile summary cutoff used to "
+ "determine cold blocks. Unused if set to zero."),
+ cl::init(999950), cl::Hidden);
+
+static cl::opt<unsigned> ColdCountThreshold(
+ "mfs-count-threshold",
+ cl::desc(
+ "Minimum number of times a block must be executed to be retained."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<bool> SplitAllEHCode(
+ "mfs-split-ehcode",
+ cl::desc("Splits all EH code and it's descendants by default."),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class MachineFunctionSplitter : public MachineFunctionPass {
+public:
+ static char ID;
+ MachineFunctionSplitter() : MachineFunctionPass(ID) {
+ initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Machine Function Splitter Transformation";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+} // end anonymous namespace
+
+/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
+/// only by EH pad as cold. This will help mark EH pads statically cold
+/// instead of relying on profile data.
+static void setDescendantEHBlocksCold(MachineFunction &MF) {
+ DenseSet<MachineBasicBlock *> EHBlocks;
+ computeEHOnlyBlocks(MF, EHBlocks);
+ for (auto Block : EHBlocks) {
+ Block->setSectionID(MBBSectionID::ColdSectionID);
+ }
+}
+
+static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
+ auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
+ return X.getSectionID().Type < Y.getSectionID().Type;
+ };
+ llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
+ llvm::avoidZeroOffsetLandingPad(MF);
+}
+
+static bool isColdBlock(const MachineBasicBlock &MBB,
+ const MachineBlockFrequencyInfo *MBFI,
+ ProfileSummaryInfo *PSI) {
+ std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+ // For instrumentation profiles and sample profiles, we use different ways
+ // to judge whether a block is cold and should be split.
+ if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
+ // If using instrument profile, which is deemed "accurate", no count means
+ // cold.
+ if (!Count)
+ return true;
+ if (PercentileCutoff > 0)
+ return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ // Fallthrough to end of function.
+ } else if (PSI->hasSampleProfile()) {
+ // For sample profile, no count means "do not judege coldness".
+ if (!Count)
+ return false;
+ }
+
+ return (*Count < ColdCountThreshold);
+}
+
+bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
+ // We target functions with profile data. Static information in the form
+ // of exception handling code may be split to cold if user passes the
+ // mfs-split-ehcode flag.
+ bool UseProfileData = MF.getFunction().hasProfileData();
+ if (!UseProfileData && !SplitAllEHCode)
+ return false;
+
+ // TODO: We don't split functions where a section attribute has been set
+ // since the split part may not be placed in a contiguous region. It may also
+ // be more beneficial to augment the linker to ensure contiguous layout of
+ // split functions within the same section as specified by the attribute.
+ if (MF.getFunction().hasSection() ||
+ MF.getFunction().hasFnAttribute("implicit-section-name"))
+ return false;
+
+ // We don't want to proceed further for cold functions
+ // or functions of unknown hotness. Lukewarm functions have no prefix.
+ std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
+ if (SectionPrefix &&
+ (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
+ return false;
+ }
+
+ // Renumbering blocks here preserves the order of the blocks as
+ // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
+ // blocks. Preserving the order of blocks is essential to retaining decisions
+ // made by prior passes such as MachineBlockPlacement.
+ MF.RenumberBlocks();
+ MF.setBBSectionsType(BasicBlockSection::Preset);
+
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
+ if (UseProfileData) {
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ // If we don't have a good profile (sample profile is not deemed
+ // as a "good profile") and the function is not hot, then early
+ // return. (Because we can only trust hot functions when profile
+ // quality is not good.)
+ if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
+ // Split all EH code and it's descendant statically by default.
+ if (SplitAllEHCode)
+ setDescendantEHBlocksCold(MF);
+ finishAdjustingBasicBlocksAndLandingPads(MF);
+ return true;
+ }
+ }
+
+ SmallVector<MachineBasicBlock *, 2> LandingPads;
+ for (auto &MBB : MF) {
+ if (MBB.isEntryBlock())
+ continue;
+
+ if (MBB.isEHPad())
+ LandingPads.push_back(&MBB);
+ else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
+
+ // Split all EH code and it's descendant statically by default.
+ if (SplitAllEHCode)
+ setDescendantEHBlocksCold(MF);
+ // We only split out eh pads if all of them are cold.
+ else {
+ // Here we have UseProfileData == true.
+ bool HasHotLandingPads = false;
+ for (const MachineBasicBlock *LP : LandingPads) {
+ if (!isColdBlock(*LP, MBFI, PSI))
+ HasHotLandingPads = true;
+ }
+ if (!HasHotLandingPads) {
+ for (MachineBasicBlock *LP : LandingPads)
+ LP->setSectionID(MBBSectionID::ColdSectionID);
+ }
+ }
+
+ finishAdjustingBasicBlocksAndLandingPads(MF);
+ return true;
+}
+
+void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+}
+
+char MachineFunctionSplitter::ID = 0;
+INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
+ "Split machine functions using profile information", false,
+ false)
+
+MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
+ return new MachineFunctionSplitter();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 000000000000..a9309487a7a7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,2462 @@
+//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+
+using namespace llvm;
+
+static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) {
+ if (const MachineBasicBlock *MBB = MI.getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF;
+ return nullptr;
+}
+
+// Try to crawl up to the machine function and get TRI and IntrinsicInfo from
+// it.
+static void tryToGetTargetInfo(const MachineInstr &MI,
+ const TargetRegisterInfo *&TRI,
+ const MachineRegisterInfo *&MRI,
+ const TargetIntrinsicInfo *&IntrinsicInfo,
+ const TargetInstrInfo *&TII) {
+
+ if (const MachineFunction *MF = getMFIfAvailable(MI)) {
+ TRI = MF->getSubtarget().getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ }
+}
+
+void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
+ for (MCPhysReg ImpDef : MCID->implicit_defs())
+ addOperand(MF, MachineOperand::CreateReg(ImpDef, true, true));
+ for (MCPhysReg ImpUse : MCID->implicit_uses())
+ addOperand(MF, MachineOperand::CreateReg(ImpUse, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
+ DebugLoc DL, bool NoImp)
+ : MCID(&TID), NumOperands(0), Flags(0), AsmPrinterFlags(0),
+ DbgLoc(std::move(DL)), DebugInstrNum(0) {
+ assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
+
+ // Reserve space for the expected number of operands.
+ if (unsigned NumOps = MCID->getNumOperands() + MCID->implicit_defs().size() +
+ MCID->implicit_uses().size()) {
+ CapOperands = OperandCapacity::get(NumOps);
+ Operands = MF.allocateOperandArray(CapOperands);
+ }
+
+ if (!NoImp)
+ addImplicitDefUseOperands(MF);
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly.
+/// Does not copy the number from debug instruction numbering, to preserve
+/// uniqueness.
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : MCID(&MI.getDesc()), NumOperands(0), Flags(0), AsmPrinterFlags(0),
+ Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0) {
+ assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
+
+ CapOperands = OperandCapacity::get(MI.getNumOperands());
+ Operands = MF.allocateOperandArray(CapOperands);
+
+ // Copy operands.
+ for (const MachineOperand &MO : MI.operands())
+ addOperand(MF, MO);
+
+ // Replicate ties between the operands, which addOperand was not
+ // able to do reliably.
+ for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+ MachineOperand &NewMO = getOperand(i);
+ const MachineOperand &OrigMO = MI.getOperand(i);
+ NewMO.TiedTo = OrigMO.TiedTo;
+ }
+
+ // Copy all the sensible flags.
+ setFlags(MI.Flags);
+}
+
+void MachineInstr::moveBefore(MachineInstr *MovePos) {
+ MovePos->getParent()->splice(MovePos, getParent(), getIterator());
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return nullptr;
+}
+
+const MachineRegisterInfo *MachineInstr::getRegInfo() const {
+ if (const MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return nullptr;
+}
+
+void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (MachineOperand &MO : operands())
+ if (MO.isReg())
+ MRI.removeRegOperandFromUseList(&MO);
+}
+
+void MachineInstr::addRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (MachineOperand &MO : operands())
+ if (MO.isReg())
+ MRI.addRegOperandToUseList(&MO);
+}
+
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ MachineBasicBlock *MBB = getParent();
+ assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs");
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs");
+ addOperand(*MF, Op);
+}
+
+/// Move NumOps MachineOperands from Src to Dst, with support for overlapping
+/// ranges. If MRI is non-null also update use-def chains.
+static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
+ unsigned NumOps, MachineRegisterInfo *MRI) {
+ if (MRI)
+ return MRI->moveOperands(Dst, Src, NumOps);
+ // MachineOperand is a trivially copyable type so we can just use memmove.
+ assert(Dst && Src && "Unknown operands");
+ std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
+}
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
+ assert(isUInt<LLVM_MI_NUMOPERANDS_BITS>(NumOperands + 1) &&
+ "Cannot add more operands.");
+ assert(MCID && "Cannot add operands before providing an instr descriptor");
+
+ // Check if we're adding one of our existing operands.
+ if (&Op >= Operands && &Op < Operands + NumOperands) {
+ // This is unusual: MI->addOperand(MI->getOperand(i)).
+ // If adding Op requires reallocating or moving existing operands around,
+ // the Op reference could go stale. Support it by copying Op.
+ MachineOperand CopyOp(Op);
+ return addOperand(MF, CopyOp);
+ }
+
+ // Find the insert location for the new operand. Implicit registers go at
+ // the end, everything else goes before the implicit regs.
+ //
+ // FIXME: Allow mixed explicit and implicit operands on inline asm.
+ // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
+ // implicit-defs, but they must not be moved around. See the FIXME in
+ // InstrEmitter.cpp.
+ unsigned OpNo = getNumOperands();
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ if (!isImpReg && !isInlineAsm()) {
+ while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
+ --OpNo;
+ assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
+ }
+ }
+
+ // OpNo now points as the desired insertion point. Unless this is a variadic
+ // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
+ // RegMask operands go between the explicit and implicit operands.
+ assert((MCID->isVariadic() || OpNo < MCID->getNumOperands() ||
+ Op.isValidExcessOperand()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *MRI = getRegInfo();
+
+ // Determine if the Operands array needs to be reallocated.
+ // Save the old capacity and operand array.
+ OperandCapacity OldCap = CapOperands;
+ MachineOperand *OldOperands = Operands;
+ if (!OldOperands || OldCap.getSize() == getNumOperands()) {
+ CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1);
+ Operands = MF.allocateOperandArray(CapOperands);
+ // Move the operands before the insertion point.
+ if (OpNo)
+ moveOperands(Operands, OldOperands, OpNo, MRI);
+ }
+
+ // Move the operands following the insertion point.
+ if (OpNo != NumOperands)
+ moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo,
+ MRI);
+ ++NumOperands;
+
+ // Deallocate the old operand array.
+ if (OldOperands != Operands && OldOperands)
+ MF.deallocateOperandArray(OldCap, OldOperands);
+
+ // Copy Op into place. It still needs to be inserted into the MRI use lists.
+ MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op);
+ NewMO->ParentMI = this;
+
+ // When adding a register operand, tell MRI about it.
+ if (NewMO->isReg()) {
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ NewMO->Contents.Reg.Prev = nullptr;
+ // Ignore existing ties. This is not a property that can be copied.
+ NewMO->TiedTo = 0;
+ // Add the new operand to MRI, but only for instructions in an MBB.
+ if (MRI)
+ MRI->addRegOperandToUseList(NewMO);
+ // The MCID operand information isn't accurate until we start adding
+ // explicit operands. The implicit operands are added first, then the
+ // explicits are inserted before them.
+ if (!isImpReg) {
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (NewMO->isUse()) {
+ int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ tieOperands(DefIdx, OpNo);
+ }
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ NewMO->setIsEarlyClobber(true);
+ }
+ // Ensure debug instructions set debug flag on register uses.
+ if (NewMO->isUse() && isDebugInstr())
+ NewMO->setIsDebug();
+ }
+}
+
+void MachineInstr::removeOperand(unsigned OpNo) {
+ assert(OpNo < getNumOperands() && "Invalid operand number");
+ untieRegOperand(OpNo);
+
+#ifndef NDEBUG
+ // Moving tied operands would break the ties.
+ for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ assert(!Operands[i].isTied() && "Cannot move tied operands");
+#endif
+
+ MachineRegisterInfo *MRI = getRegInfo();
+ if (MRI && Operands[OpNo].isReg())
+ MRI->removeRegOperandFromUseList(Operands + OpNo);
+
+ // Don't call the MachineOperand destructor. A lot of this code depends on
+ // MachineOperand having a trivial destructor anyway, and adding a call here
+ // wouldn't make it 'destructor-correct'.
+
+ if (unsigned N = NumOperands - 1 - OpNo)
+ moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI);
+ --NumOperands;
+}
+
+void MachineInstr::setExtraInfo(MachineFunction &MF,
+ ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol,
+ MCSymbol *PostInstrSymbol,
+ MDNode *HeapAllocMarker, MDNode *PCSections,
+ uint32_t CFIType) {
+ bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
+ bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
+ bool HasHeapAllocMarker = HeapAllocMarker != nullptr;
+ bool HasPCSections = PCSections != nullptr;
+ bool HasCFIType = CFIType != 0;
+ int NumPointers = MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol +
+ HasHeapAllocMarker + HasPCSections + HasCFIType;
+
+ // Drop all extra info if there is none.
+ if (NumPointers <= 0) {
+ Info.clear();
+ return;
+ }
+
+ // If more than one pointer, then store out of line. Store heap alloc markers
+ // out of line because PointerSumType cannot hold more than 4 tag types with
+ // 32-bit pointers.
+ // FIXME: Maybe we should make the symbols in the extra info mutable?
+ else if (NumPointers > 1 || HasHeapAllocMarker || HasPCSections ||
+ HasCFIType) {
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(MMOs, PreInstrSymbol, PostInstrSymbol,
+ HeapAllocMarker, PCSections, CFIType));
+ return;
+ }
+
+ // Otherwise store the single pointer inline.
+ if (HasPreInstrSymbol)
+ Info.set<EIIK_PreInstrSymbol>(PreInstrSymbol);
+ else if (HasPostInstrSymbol)
+ Info.set<EIIK_PostInstrSymbol>(PostInstrSymbol);
+ else
+ Info.set<EIIK_MMO>(MMOs[0]);
+}
+
+void MachineInstr::dropMemRefs(MachineFunction &MF) {
+ if (memoperands_empty())
+ return;
+
+ setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), getPCSections(), getCFIType());
+}
+
+void MachineInstr::setMemRefs(MachineFunction &MF,
+ ArrayRef<MachineMemOperand *> MMOs) {
+ if (MMOs.empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), getPCSections(), getCFIType());
+}
+
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ MachineMemOperand *MO) {
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ MMOs.append(memoperands_begin(), memoperands_end());
+ MMOs.push_back(MO);
+ setMemRefs(MF, MMOs);
+}
+
+void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
+ if (this == &MI)
+ // Nothing to do for a self-clone!
+ return;
+
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning memory refrences!");
+ // See if we can just steal the extra info already allocated for the
+ // instruction. We can do this whenever the pre- and post-instruction symbols
+ // are the same (including null).
+ if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
+ getPostInstrSymbol() == MI.getPostInstrSymbol() &&
+ getHeapAllocMarker() == MI.getHeapAllocMarker() &&
+ getPCSections() == MI.getPCSections()) {
+ Info = MI.Info;
+ return;
+ }
+
+ // Otherwise, fall back on a copy-based clone.
+ setMemRefs(MF, MI.memoperands());
+}
+
+/// Check to see if the MMOs pointed to by the two MemRefs arrays are
+/// identical.
+static bool hasIdenticalMMOs(ArrayRef<MachineMemOperand *> LHS,
+ ArrayRef<MachineMemOperand *> RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+
+ auto LHSPointees = make_pointee_range(LHS);
+ auto RHSPointees = make_pointee_range(RHS);
+ return std::equal(LHSPointees.begin(), LHSPointees.end(),
+ RHSPointees.begin());
+}
+
+void MachineInstr::cloneMergedMemRefs(MachineFunction &MF,
+ ArrayRef<const MachineInstr *> MIs) {
+ // Try handling easy numbers of MIs with simpler mechanisms.
+ if (MIs.empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+ if (MIs.size() == 1) {
+ cloneMemRefs(MF, *MIs[0]);
+ return;
+ }
+ // Because an empty memoperands list provides *no* information and must be
+ // handled conservatively (assuming the instruction can do anything), the only
+ // way to merge with it is to drop all other memoperands.
+ if (MIs[0]->memoperands_empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Handle the general case.
+ SmallVector<MachineMemOperand *, 2> MergedMMOs;
+ // Start with the first instruction.
+ assert(&MF == MIs[0]->getMF() &&
+ "Invalid machine functions when cloning memory references!");
+ MergedMMOs.append(MIs[0]->memoperands_begin(), MIs[0]->memoperands_end());
+ // Now walk all the other instructions and accumulate any different MMOs.
+ for (const MachineInstr &MI : make_pointee_range(MIs.slice(1))) {
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning memory references!");
+
+ // Skip MIs with identical operands to the first. This is a somewhat
+ // arbitrary hack but will catch common cases without being quadratic.
+ // TODO: We could fully implement merge semantics here if needed.
+ if (hasIdenticalMMOs(MIs[0]->memoperands(), MI.memoperands()))
+ continue;
+
+ // Because an empty memoperands list provides *no* information and must be
+ // handled conservatively (assuming the instruction can do anything), the
+ // only way to merge with it is to drop all other memoperands.
+ if (MI.memoperands_empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Otherwise accumulate these into our temporary buffer of the merged state.
+ MergedMMOs.append(MI.memoperands_begin(), MI.memoperands_end());
+ }
+
+ setMemRefs(MF, MergedMMOs);
+}
+
+void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+ // Do nothing if old and new symbols are the same.
+ if (Symbol == getPreInstrSymbol())
+ return;
+
+ // If there was only one symbol and we're removing it, just clear info.
+ if (!Symbol && Info.is<EIIK_PreInstrSymbol>()) {
+ Info.clear();
+ return;
+ }
+
+ setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(),
+ getHeapAllocMarker(), getPCSections(), getCFIType());
+}
+
+void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+ // Do nothing if old and new symbols are the same.
+ if (Symbol == getPostInstrSymbol())
+ return;
+
+ // If there was only one symbol and we're removing it, just clear info.
+ if (!Symbol && Info.is<EIIK_PostInstrSymbol>()) {
+ Info.clear();
+ return;
+ }
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol,
+ getHeapAllocMarker(), getPCSections(), getCFIType());
+}
+
+void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
+ // Do nothing if old and new symbols are the same.
+ if (Marker == getHeapAllocMarker())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ Marker, getPCSections(), getCFIType());
+}
+
+void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) {
+ // Do nothing if old and new symbols are the same.
+ if (PCSections == getPCSections())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), PCSections, getCFIType());
+}
+
+void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) {
+ // Do nothing if old and new types are the same.
+ if (Type == getCFIType())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), getPCSections(), Type);
+}
+
+void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
+ const MachineInstr &MI) {
+ if (this == &MI)
+ // Nothing to do for a self-clone!
+ return;
+
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning instruction symbols!");
+
+ setPreInstrSymbol(MF, MI.getPreInstrSymbol());
+ setPostInstrSymbol(MF, MI.getPostInstrSymbol());
+ setHeapAllocMarker(MF, MI.getHeapAllocMarker());
+ setPCSections(MF, MI.getPCSections());
+}
+
+uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
+ // For now, the just return the union of the flags. If the flags get more
+ // complicated over time, we might need more logic here.
+ return getFlags() | Other.getFlags();
+}
+
+uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+ uint32_t MIFlags = 0;
+ // Copy the wrapping flags.
+ if (const OverflowingBinaryOperator *OB =
+ dyn_cast<OverflowingBinaryOperator>(&I)) {
+ if (OB->hasNoSignedWrap())
+ MIFlags |= MachineInstr::MIFlag::NoSWrap;
+ if (OB->hasNoUnsignedWrap())
+ MIFlags |= MachineInstr::MIFlag::NoUWrap;
+ }
+
+ // Copy the exact flag.
+ if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
+ if (PE->isExact())
+ MIFlags |= MachineInstr::MIFlag::IsExact;
+
+ // Copy the fast-math flags.
+ if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
+ const FastMathFlags Flags = FP->getFastMathFlags();
+ if (Flags.noNaNs())
+ MIFlags |= MachineInstr::MIFlag::FmNoNans;
+ if (Flags.noInfs())
+ MIFlags |= MachineInstr::MIFlag::FmNoInfs;
+ if (Flags.noSignedZeros())
+ MIFlags |= MachineInstr::MIFlag::FmNsz;
+ if (Flags.allowReciprocal())
+ MIFlags |= MachineInstr::MIFlag::FmArcp;
+ if (Flags.allowContract())
+ MIFlags |= MachineInstr::MIFlag::FmContract;
+ if (Flags.approxFunc())
+ MIFlags |= MachineInstr::MIFlag::FmAfn;
+ if (Flags.allowReassoc())
+ MIFlags |= MachineInstr::MIFlag::FmReassoc;
+ }
+
+ if (I.getMetadata(LLVMContext::MD_unpredictable))
+ MIFlags |= MachineInstr::MIFlag::Unpredictable;
+
+ return MIFlags;
+}
+
+void MachineInstr::copyIRFlags(const Instruction &I) {
+ Flags = copyFlagsFromInstruction(I);
+}
+
+bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
+ assert(!isBundledWithPred() && "Must be called on bundle header");
+ for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
+ if (MII->getDesc().getFlags() & Mask) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle && !MII->isBundle())
+ return false;
+ }
+ // This was the last instruction in the bundle.
+ if (!MII->isBundledWithSucc())
+ return Type == AllInBundle;
+ }
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
+ MICheckType Check) const {
+ // If opcodes or number of operands are not the same then the two
+ // instructions are obviously not identical.
+ if (Other.getOpcode() != getOpcode() ||
+ Other.getNumOperands() != getNumOperands())
+ return false;
+
+ if (isBundle()) {
+ // We have passed the test above that both instructions have the same
+ // opcode, so we know that both instructions are bundles here. Let's compare
+ // MIs inside the bundle.
+ assert(Other.isBundle() && "Expected that both instructions are bundles.");
+ MachineBasicBlock::const_instr_iterator I1 = getIterator();
+ MachineBasicBlock::const_instr_iterator I2 = Other.getIterator();
+ // Loop until we analysed the last intruction inside at least one of the
+ // bundles.
+ while (I1->isBundledWithSucc() && I2->isBundledWithSucc()) {
+ ++I1;
+ ++I2;
+ if (!I1->isIdenticalTo(*I2, Check))
+ return false;
+ }
+ // If we've reached the end of just one of the two bundles, but not both,
+ // the instructions are not identical.
+ if (I1->isBundledWithSucc() || I2->isBundledWithSucc())
+ return false;
+ }
+
+ // Check operands to make sure they match.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other.getOperand(i);
+ if (!MO.isReg()) {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ continue;
+ }
+
+ // Clients may or may not want to ignore defs when testing for equality.
+ // For example, machine CSE pass only cares about finding common
+ // subexpressions, so it's safe to ignore virtual register defs.
+ if (MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ else if (Check == IgnoreVRegDefs) {
+ if (!MO.getReg().isVirtual() || !OMO.getReg().isVirtual())
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+ return false;
+ }
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+ return false;
+ }
+ }
+ // If DebugLoc does not match then two debug instructions are not identical.
+ if (isDebugInstr())
+ if (getDebugLoc() && Other.getDebugLoc() &&
+ getDebugLoc() != Other.getDebugLoc())
+ return false;
+ // If pre- or post-instruction symbols do not match then the two instructions
+ // are not identical.
+ if (getPreInstrSymbol() != Other.getPreInstrSymbol() ||
+ getPostInstrSymbol() != Other.getPostInstrSymbol())
+ return false;
+ // Call instructions with different CFI types are not identical.
+ if (isCall() && getCFIType() != Other.getCFIType())
+ return false;
+
+ return true;
+}
+
+bool MachineInstr::isEquivalentDbgInstr(const MachineInstr &Other) const {
+ if (!isDebugValueLike() || !Other.isDebugValueLike())
+ return false;
+ if (getDebugLoc() != Other.getDebugLoc())
+ return false;
+ if (getDebugVariable() != Other.getDebugVariable())
+ return false;
+ if (getNumDebugOperands() != Other.getNumDebugOperands())
+ return false;
+ for (unsigned OpIdx = 0; OpIdx < getNumDebugOperands(); ++OpIdx)
+ if (!getDebugOperand(OpIdx).isIdenticalTo(Other.getDebugOperand(OpIdx)))
+ return false;
+ if (!DIExpression::isEqualExpression(
+ getDebugExpression(), isIndirectDebugValue(),
+ Other.getDebugExpression(), Other.isIndirectDebugValue()))
+ return false;
+ return true;
+}
+
+const MachineFunction *MachineInstr::getMF() const {
+ return getParent()->getParent();
+}
+
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove(this);
+}
+
+MachineInstr *MachineInstr::removeFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove_instr(this);
+}
+
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+void MachineInstr::eraseFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase_instr(this);
+}
+
+bool MachineInstr::isCandidateForCallSiteEntry(QueryType Type) const {
+ if (!isCall(Type))
+ return false;
+ switch (getOpcode()) {
+ case TargetOpcode::PATCHPOINT:
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::STATEPOINT:
+ case TargetOpcode::FENTRY_CALL:
+ return false;
+ }
+ return true;
+}
+
+bool MachineInstr::shouldUpdateCallSiteInfo() const {
+ if (isBundle())
+ return isCandidateForCallSiteEntry(MachineInstr::AnyInBundle);
+ return isCandidateForCallSiteEntry();
+}
+
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic())
+ return NumOperands;
+
+ for (unsigned I = NumOperands, E = getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = getOperand(I);
+ // The operands must always be in the following order:
+ // - explicit reg defs,
+ // - other explicit operands (reg uses, immediates, etc.),
+ // - implicit reg defs
+ // - implicit reg uses
+ if (MO.isReg() && MO.isImplicit())
+ break;
+ ++NumOperands;
+ }
+ return NumOperands;
+}
+
+unsigned MachineInstr::getNumExplicitDefs() const {
+ unsigned NumDefs = MCID->getNumDefs();
+ if (!MCID->isVariadic())
+ return NumDefs;
+
+ for (unsigned I = NumDefs, E = getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ break;
+ ++NumDefs;
+ }
+ return NumDefs;
+}
+
+void MachineInstr::bundleWithPred() {
+ assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+ setFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = getIterator();
+ --Pred;
+ assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+ assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+ setFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = getIterator();
+ ++Succ;
+ assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+ assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+ clearFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = getIterator();
+ --Pred;
+ assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+ assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+ clearFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = getIterator();
+ ++Succ;
+ assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->clearFlag(BundledPred);
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
+
+InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const {
+ assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!");
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0);
+}
+
+int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
+ unsigned *GroupNo) const {
+ assert(isInlineAsm() && "Expected an inline asm instruction");
+ assert(OpIdx < getNumOperands() && "OpIdx out of range");
+
+ // Ignore queries about the initial operands.
+ if (OpIdx < InlineAsm::MIOp_FirstOperand)
+ return -1;
+
+ unsigned Group = 0;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ // If we reach the implicit register operands, stop looking.
+ if (!FlagMO.isImm())
+ return -1;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ if (i + NumOps > OpIdx) {
+ if (GroupNo)
+ *GroupNo = Group;
+ return i;
+ }
+ ++Group;
+ }
+ return -1;
+}
+
+const DILabel *MachineInstr::getDebugLabel() const {
+ assert(isDebugLabel() && "not a DBG_LABEL");
+ return cast<DILabel>(getOperand(0).getMetadata());
+}
+
+const MachineOperand &MachineInstr::getDebugVariableOp() const {
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned VariableOp = isNonListDebugValue() ? 2 : 0;
+ return getOperand(VariableOp);
+}
+
+MachineOperand &MachineInstr::getDebugVariableOp() {
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned VariableOp = isNonListDebugValue() ? 2 : 0;
+ return getOperand(VariableOp);
+}
+
+const DILocalVariable *MachineInstr::getDebugVariable() const {
+ return cast<DILocalVariable>(getDebugVariableOp().getMetadata());
+}
+
+const MachineOperand &MachineInstr::getDebugExpressionOp() const {
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned ExpressionOp = isNonListDebugValue() ? 3 : 1;
+ return getOperand(ExpressionOp);
+}
+
+MachineOperand &MachineInstr::getDebugExpressionOp() {
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned ExpressionOp = isNonListDebugValue() ? 3 : 1;
+ return getOperand(ExpressionOp);
+}
+
+const DIExpression *MachineInstr::getDebugExpression() const {
+ return cast<DIExpression>(getDebugExpressionOp().getMetadata());
+}
+
+bool MachineInstr::isDebugEntryValue() const {
+ return isDebugValue() && getDebugExpression()->isEntryValue();
+}
+
+const TargetRegisterClass*
+MachineInstr::getRegClassConstraint(unsigned OpIdx,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ assert(getParent() && "Can't have an MBB reference here!");
+ assert(getMF() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getMF();
+
+ // Most opcodes have fixed constraints in their MCInstrDesc.
+ if (!isInlineAsm())
+ return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
+
+ if (!getOperand(OpIdx).isReg())
+ return nullptr;
+
+ // For tied uses on inline asm, get the constraint from the def.
+ unsigned DefIdx;
+ if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx))
+ OpIdx = DefIdx;
+
+ // Inline asm stores register class constraints in the flag word.
+ int FlagIdx = findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0)
+ return nullptr;
+
+ unsigned Flag = getOperand(FlagIdx).getImm();
+ unsigned RCID;
+ if ((InlineAsm::getKind(Flag) == InlineAsm::Kind_RegUse ||
+ InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDef ||
+ InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDefEarlyClobber) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID))
+ return TRI->getRegClass(RCID);
+
+ // Assume that all registers in a memory operand are pointers.
+ if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ return TRI->getPointerRegClass(MF);
+
+ return nullptr;
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
+ Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI, bool ExploreBundle) const {
+ // Check every operands inside the bundle if we have
+ // been asked to.
+ if (ExploreBundle)
+ for (ConstMIBundleOperands OpndIt(*this); OpndIt.isValid() && CurRC;
+ ++OpndIt)
+ CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl(
+ OpndIt.getOperandNo(), Reg, CurRC, TII, TRI);
+ else
+ // Otherwise, just check the current operands.
+ for (unsigned i = 0, e = NumOperands; i < e && CurRC; ++i)
+ CurRC = getRegClassConstraintEffectForVRegImpl(i, Reg, CurRC, TII, TRI);
+ return CurRC;
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl(
+ unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
+ assert(CurRC && "Invalid initial register class");
+ // Check if Reg is constrained by some of its use/def from MI.
+ const MachineOperand &MO = getOperand(OpIdx);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ return CurRC;
+ // If yes, accumulate the constraints through the operand.
+ return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI);
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
+ unsigned OpIdx, const TargetRegisterClass *CurRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
+ const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI);
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isReg() &&
+ "Cannot get register constraints for non-register operand");
+ assert(CurRC && "Invalid initial register class");
+ if (unsigned SubIdx = MO.getSubReg()) {
+ if (OpRC)
+ CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx);
+ else
+ CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx);
+ } else if (OpRC)
+ CurRC = TRI->getCommonSubClass(CurRC, OpRC);
+ return CurRC;
+}
+
+/// Return the number of instructions inside the MI bundle, not counting the
+/// header instruction.
+unsigned MachineInstr::getBundleSize() const {
+ MachineBasicBlock::const_instr_iterator I = getIterator();
+ unsigned Size = 0;
+ while (I->isBundledWithSucc()) {
+ ++Size;
+ ++I;
+ }
+ return Size;
+}
+
+/// Returns true if the MachineInstr has an implicit-use operand of exactly
+/// the given register (not considering sub/super-registers).
+bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
+ return true;
+ }
+ return false;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(
+ Register Reg, bool isKill, const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(Register Reg,
+ SmallVectorImpl<unsigned> *Ops) const {
+ bool PartDef = false; // Partial redefine.
+ bool FullDef = false; // Full define.
+ bool Use = false;
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (Ops)
+ Ops->push_back(i);
+ if (MO.isUse())
+ Use |= !MO.isUndef();
+ else if (MO.getSubReg() && !MO.isUndef())
+ // A partial def undef doesn't count as reading the register.
+ PartDef = true;
+ else
+ FullDef = true;
+ }
+ // A partial redefine uses Reg unless there is also a full define.
+ return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap,
+ const TargetRegisterInfo *TRI) const {
+ bool isPhys = Reg.isPhysical();
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ // Accept regmask operands when Overlap is set.
+ // Ignore them when looking for a specific def operand (Overlap == false).
+ if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ return i;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register MOReg = MO.getReg();
+ bool Found = (MOReg == Reg);
+ if (!Found && TRI && isPhys && MOReg.isPhysical()) {
+ if (Overlap)
+ Found = TRI->regsOverlap(MOReg, Reg);
+ else
+ Found = TRI->isSubRegister(MOReg, Reg);
+ }
+ if (Found && (!isDead || MO.isDead()))
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ // Don't call MCID.findFirstPredOperandIdx() because this variant
+ // is sometimes called on an instruction that's not yet complete, and
+ // so the number of operands is less than the MCID indicates. In
+ // particular, the PTX target does this.
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (MCID.operands()[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+// MachineOperand::TiedTo is 4 bits wide.
+const unsigned TiedMax = 15;
+
+/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other.
+///
+/// Use and def operands can be tied together, indicated by a non-zero TiedTo
+/// field. TiedTo can have these values:
+///
+/// 0: Operand is not tied to anything.
+/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1).
+/// TiedMax: Tied to an operand >= TiedMax-1.
+///
+/// The tied def must be one of the first TiedMax operands on a normal
+/// instruction. INLINEASM instructions allow more tied defs.
+///
+void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
+ MachineOperand &DefMO = getOperand(DefIdx);
+ MachineOperand &UseMO = getOperand(UseIdx);
+ assert(DefMO.isDef() && "DefIdx must be a def operand");
+ assert(UseMO.isUse() && "UseIdx must be a use operand");
+ assert(!DefMO.isTied() && "Def is already tied to another use");
+ assert(!UseMO.isTied() && "Use is already tied to another def");
+
+ if (DefIdx < TiedMax)
+ UseMO.TiedTo = DefIdx + 1;
+ else {
+ // Inline asm can use the group descriptors to find tied operands,
+ // statepoint tied operands are trivial to match (1-1 reg def with reg use),
+ // but on normal instruction, the tied def must be within the first TiedMax
+ // operands.
+ assert((isInlineAsm() || getOpcode() == TargetOpcode::STATEPOINT) &&
+ "DefIdx out of range");
+ UseMO.TiedTo = TiedMax;
+ }
+
+ // UseIdx can be out of range, we'll search for it in findTiedOperandIdx().
+ DefMO.TiedTo = std::min(UseIdx + 1, TiedMax);
+}
+
+/// Given the index of a tied register operand, find the operand it is tied to.
+/// Defs are tied to uses and vice versa. Returns the index of the tied operand
+/// which must exist.
+unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isTied() && "Operand isn't tied");
+
+ // Normally TiedTo is in range.
+ if (MO.TiedTo < TiedMax)
+ return MO.TiedTo - 1;
+
+ // Uses on normal instructions can be out of range.
+ if (!isInlineAsm() && getOpcode() != TargetOpcode::STATEPOINT) {
+ // Normal tied defs must be in the 0..TiedMax-1 range.
+ if (MO.isUse())
+ return TiedMax - 1;
+ // MO is a def. Search for the tied use.
+ for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &UseMO = getOperand(i);
+ if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1)
+ return i;
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
+ if (getOpcode() == TargetOpcode::STATEPOINT) {
+ // In STATEPOINT defs correspond 1-1 to GC pointer operands passed
+ // on registers.
+ StatepointOpers SO(this);
+ unsigned CurUseIdx = SO.getFirstGCPtrIdx();
+ assert(CurUseIdx != -1U && "only gc pointer statepoint operands can be tied");
+ unsigned NumDefs = getNumDefs();
+ for (unsigned CurDefIdx = 0; CurDefIdx < NumDefs; ++CurDefIdx) {
+ while (!getOperand(CurUseIdx).isReg())
+ CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx);
+ if (OpIdx == CurDefIdx)
+ return CurUseIdx;
+ if (OpIdx == CurUseIdx)
+ return CurDefIdx;
+ CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx);
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
+ // Now deal with inline asm by parsing the operand group descriptor flags.
+ // Find the beginning of each operand group.
+ SmallVector<unsigned, 8> GroupIdx;
+ unsigned OpIdxGroup = ~0u;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
+ unsigned CurGroup = GroupIdx.size();
+ GroupIdx.push_back(i);
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ // OpIdx belongs to this operand group.
+ if (OpIdx > i && OpIdx < i + NumOps)
+ OpIdxGroup = CurGroup;
+ unsigned TiedGroup;
+ if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+ continue;
+ // Operands in this group are tied to operands in TiedGroup which must be
+ // earlier. Find the number of operands between the two groups.
+ unsigned Delta = i - GroupIdx[TiedGroup];
+
+ // OpIdx is a use tied to TiedGroup.
+ if (OpIdxGroup == CurGroup)
+ return OpIdx - Delta;
+
+ // OpIdx is a def tied to this use group.
+ if (OpIdxGroup == TiedGroup)
+ return OpIdx + Delta;
+ }
+ llvm_unreachable("Invalid tied operand on inline asm");
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+ for (MachineOperand &MO : operands()) {
+ if (MO.isReg() && MO.isUse())
+ MO.setIsKill(false);
+ }
+}
+
+void MachineInstr::substituteRegister(Register FromReg, Register ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (ToReg.isPhysical()) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ //
+ // Treat volatile loads as stores. This is not strictly necessary for
+ // volatiles, but it is required for atomic loads. It is not allowed to move
+ // a load across an atomic load with Ordering > Monotonic.
+ if (mayStore() || isCall() || isPHI() ||
+ (mayLoad() && hasOrderedMemoryRef())) {
+ SawStore = true;
+ return false;
+ }
+
+ if (isPosition() || isDebugInstr() || isTerminator() ||
+ mayRaiseFPException() || hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the target the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (mayLoad() && !isDereferenceableInvariantLoad())
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, we can't move it.
+ return !SawStore;
+
+ return true;
+}
+
+static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
+ bool UseTBAA, const MachineMemOperand *MMOa,
+ const MachineMemOperand *MMOb) {
+ // The following interface to AA is fashioned after DAGCombiner::isAlias and
+ // operates with MachineMemOperand offset with some important assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and cannot
+ // affect queries other than the trivial case of overlap checking.
+ // - These offsets never wrap and never step outside of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // Even before we go to AA we can reason locally about some memory objects. It
+ // can save compile time, and possibly catch some corner cases not currently
+ // covered.
+
+ int64_t OffsetA = MMOa->getOffset();
+ int64_t OffsetB = MMOb->getOffset();
+ int64_t MinOffset = std::min(OffsetA, OffsetB);
+
+ uint64_t WidthA = MMOa->getSize();
+ uint64_t WidthB = MMOb->getSize();
+ bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
+ bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+
+ const Value *ValA = MMOa->getValue();
+ const Value *ValB = MMOb->getValue();
+ bool SameVal = (ValA && ValB && (ValA == ValB));
+ if (!SameVal) {
+ const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+ const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+ if (PSVa && ValB && !PSVa->mayAlias(&MFI))
+ return false;
+ if (PSVb && ValA && !PSVb->mayAlias(&MFI))
+ return false;
+ if (PSVa && PSVb && (PSVa == PSVb))
+ SameVal = true;
+ }
+
+ if (SameVal) {
+ if (!KnownWidthA || !KnownWidthB)
+ return true;
+ int64_t MaxOffset = std::max(OffsetA, OffsetB);
+ int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+ return (MinOffset + LowWidth > MaxOffset);
+ }
+
+ if (!AA)
+ return true;
+
+ if (!ValA || !ValB)
+ return true;
+
+ assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
+ assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t OverlapA =
+ KnownWidthA ? WidthA + OffsetA - MinOffset : MemoryLocation::UnknownSize;
+ int64_t OverlapB =
+ KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize;
+
+ return !AA->isNoAlias(
+ MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValB, OverlapB,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+}
+
+bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
+ bool UseTBAA) const {
+ const MachineFunction *MF = getMF();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ // Exclude call instruction which may alter the memory but can not be handled
+ // by this function.
+ if (isCall() || Other.isCall())
+ return true;
+
+ // If neither instruction stores to memory, they can't alias in any
+ // meaningful way, even if they read from the same address.
+ if (!mayStore() && !Other.mayStore())
+ return false;
+
+ // Both instructions must be memory operations to be able to alias.
+ if (!mayLoadOrStore() || !Other.mayLoadOrStore())
+ return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
+ return false;
+
+ // Memory operations without memory operands may access anything. Be
+ // conservative and assume `MayAlias`.
+ if (memoperands_empty() || Other.memoperands_empty())
+ return true;
+
+ // Skip if there are too many memory operands.
+ auto NumChecks = getNumMemOperands() * Other.getNumMemOperands();
+ if (NumChecks > TII->getMemOperandAACheckLimit())
+ return true;
+
+ // Check each pair of memory operands from both instructions, which can't
+ // alias only if all pairs won't alias.
+ for (auto *MMOa : memoperands())
+ for (auto *MMOb : Other.memoperands())
+ if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb))
+ return true;
+
+ return false;
+}
+
+/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
+/// or volatile memory reference, or if the information describing the memory
+/// reference is not available. Return false if it is known to have no ordered
+/// memory references.
+bool MachineInstr::hasOrderedMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
+ !hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check if any of our memory operands are ordered.
+ return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) {
+ return !MMO->isUnordered();
+ });
+}
+
+/// isDereferenceableInvariantLoad - Return true if this instruction will never
+/// trap and is loading from a location whose value is invariant across a run of
+/// this function.
+bool MachineInstr::isDereferenceableInvariantLoad() const {
+ // If the instruction doesn't load at all, it isn't an invariant load.
+ if (!mayLoad())
+ return false;
+
+ // If the instruction has lost its memoperands, conservatively assume that
+ // it may not be an invariant load.
+ if (memoperands_empty())
+ return false;
+
+ const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
+
+ for (MachineMemOperand *MMO : memoperands()) {
+ if (!MMO->isUnordered())
+ // If the memory operand has ordering side effects, we can't move the
+ // instruction. Such an instruction is technically an invariant load,
+ // but the caller code would need updated to expect that.
+ return false;
+ if (MMO->isStore()) return false;
+ if (MMO->isInvariant() && MMO->isDereferenceable())
+ continue;
+
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
+ if (PSV->isConstant(&MFI))
+ continue;
+ }
+
+ // Otherwise assume conservatively.
+ return false;
+ }
+
+ // Everything checks out.
+ return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (!isPHI())
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ Register Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (hasProperty(MCID::UnmodeledSideEffects))
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
+bool MachineInstr::isLoadFoldBarrier() const {
+ return mayStore() || isCall() ||
+ (hasUnmodeledSideEffects() && !isPseudoProbe());
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+ for (const MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(MachineFunction &MF,
+ const MachineInstr &MI) {
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands()))
+ if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
+ addOperand(MF, MO);
+}
+
+bool MachineInstr::hasComplexRegisterTies() const {
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.Opcode == TargetOpcode::STATEPOINT)
+ return true;
+ for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
+ const auto &Operand = getOperand(I);
+ if (!Operand.isReg() || Operand.isDef())
+ // Ignore the defined registers as MCID marks only the uses as tied.
+ continue;
+ int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+ int TiedIdx = Operand.isTied() ? int(findTiedOperandIdx(I)) : -1;
+ if (ExpectedTiedIdx != TiedIdx)
+ return true;
+ }
+ return false;
+}
+
+LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
+ const MachineRegisterInfo &MRI) const {
+ const MachineOperand &Op = getOperand(OpIdx);
+ if (!Op.isReg())
+ return LLT{};
+
+ if (isVariadic() || OpIdx >= getNumExplicitOperands())
+ return MRI.getType(Op.getReg());
+
+ auto &OpInfo = getDesc().operands()[OpIdx];
+ if (!OpInfo.isGenericType())
+ return MRI.getType(Op.getReg());
+
+ if (PrintedTypes[OpInfo.getGenericTypeIndex()])
+ return LLT{};
+
+ LLT TypeToPrint = MRI.getType(Op.getReg());
+ // Don't mark the type index printed if it wasn't actually printed: maybe
+ // another operand with the same type index has an actual type attached:
+ if (TypeToPrint.isValid())
+ PrintedTypes.set(OpInfo.getGenericTypeIndex());
+ return TypeToPrint;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineInstr::dump() const {
+ dbgs() << " ";
+ print(dbgs());
+}
+
+LLVM_DUMP_METHOD void MachineInstr::dumprImpl(
+ const MachineRegisterInfo &MRI, unsigned Depth, unsigned MaxDepth,
+ SmallPtrSetImpl<const MachineInstr *> &AlreadySeenInstrs) const {
+ if (Depth >= MaxDepth)
+ return;
+ if (!AlreadySeenInstrs.insert(this).second)
+ return;
+ // PadToColumn always inserts at least one space.
+ // Don't mess up the alignment if we don't want any space.
+ if (Depth)
+ fdbgs().PadToColumn(Depth * 2);
+ print(fdbgs());
+ for (const MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical())
+ continue;
+ const MachineInstr *NewMI = MRI.getUniqueVRegDef(Reg);
+ if (NewMI == nullptr)
+ continue;
+ NewMI->dumprImpl(MRI, Depth + 1, MaxDepth, AlreadySeenInstrs);
+ }
+}
+
+LLVM_DUMP_METHOD void MachineInstr::dumpr(const MachineRegisterInfo &MRI,
+ unsigned MaxDepth) const {
+ SmallPtrSet<const MachineInstr *, 16> AlreadySeenInstrs;
+ dumprImpl(MRI, 0, MaxDepth, AlreadySeenInstrs);
+}
+#endif
+
+void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
+ bool SkipDebugLoc, bool AddNewLine,
+ const TargetInstrInfo *TII) const {
+ const Module *M = nullptr;
+ const Function *F = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ F = &MF->getFunction();
+ M = F->getParent();
+ if (!TII)
+ TII = MF->getSubtarget().getInstrInfo();
+ }
+
+ ModuleSlotTracker MST(M);
+ if (F)
+ MST.incorporateFunction(*F);
+ print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, AddNewLine, TII);
+}
+
+void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ bool IsStandalone, bool SkipOpers, bool SkipDebugLoc,
+ bool AddNewLine, const TargetInstrInfo *TII) const {
+ // We can be a bit tidier if we know the MachineFunction.
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
+ tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII);
+
+ if (isCFIInstruction())
+ assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
+
+ SmallBitVector PrintedTypes(8);
+ bool ShouldPrintRegisterTies = IsStandalone || hasComplexRegisterTies();
+ auto getTiedOperandIdx = [&](unsigned OpIdx) {
+ if (!ShouldPrintRegisterTies)
+ return 0U;
+ const MachineOperand &MO = getOperand(OpIdx);
+ if (MO.isReg() && MO.isTied() && !MO.isDef())
+ return findTiedOperandIdx(OpIdx);
+ return 0U;
+ };
+ unsigned StartOp = 0;
+ unsigned e = getNumOperands();
+
+ // Print explicitly defined operands on the left of an assignment syntax.
+ while (StartOp < e) {
+ const MachineOperand &MO = getOperand(StartOp);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ break;
+
+ if (StartOp != 0)
+ OS << ", ";
+
+ LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
+ MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ ++StartOp;
+ }
+
+ if (StartOp != 0)
+ OS << " = ";
+
+ if (getFlag(MachineInstr::FrameSetup))
+ OS << "frame-setup ";
+ if (getFlag(MachineInstr::FrameDestroy))
+ OS << "frame-destroy ";
+ if (getFlag(MachineInstr::FmNoNans))
+ OS << "nnan ";
+ if (getFlag(MachineInstr::FmNoInfs))
+ OS << "ninf ";
+ if (getFlag(MachineInstr::FmNsz))
+ OS << "nsz ";
+ if (getFlag(MachineInstr::FmArcp))
+ OS << "arcp ";
+ if (getFlag(MachineInstr::FmContract))
+ OS << "contract ";
+ if (getFlag(MachineInstr::FmAfn))
+ OS << "afn ";
+ if (getFlag(MachineInstr::FmReassoc))
+ OS << "reassoc ";
+ if (getFlag(MachineInstr::NoUWrap))
+ OS << "nuw ";
+ if (getFlag(MachineInstr::NoSWrap))
+ OS << "nsw ";
+ if (getFlag(MachineInstr::IsExact))
+ OS << "exact ";
+ if (getFlag(MachineInstr::NoFPExcept))
+ OS << "nofpexcept ";
+ if (getFlag(MachineInstr::NoMerge))
+ OS << "nomerge ";
+
+ // Print the opcode name.
+ if (TII)
+ OS << TII->getName(getOpcode());
+ else
+ OS << "UNKNOWN";
+
+ if (SkipOpers)
+ return;
+
+ // Print the rest of the operands.
+ bool FirstOp = true;
+ unsigned AsmDescOp = ~0u;
+ unsigned AsmOpCount = 0;
+
+ if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
+ // Print asm string.
+ OS << " ";
+ const unsigned OpIdx = InlineAsm::MIOp_AsmString;
+ LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
+ getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI,
+ IntrinsicInfo);
+
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_MayLoad)
+ OS << " [mayload]";
+ if (ExtraInfo & InlineAsm::Extra_MayStore)
+ OS << " [maystore]";
+ if (ExtraInfo & InlineAsm::Extra_IsConvergent)
+ OS << " [isconvergent]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+ if (getInlineAsmDialect() == InlineAsm::AD_ATT)
+ OS << " [attdialect]";
+ if (getInlineAsmDialect() == InlineAsm::AD_Intel)
+ OS << " [inteldialect]";
+
+ StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ";
+
+ if (isDebugValueLike() && MO.isMetadata()) {
+ // Pretty print DBG_VALUE* instructions.
+ auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
+ if (DIV && !DIV->getName().empty())
+ OS << "!\"" << DIV->getName() << '\"';
+ else {
+ LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(i);
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ }
+ } else if (isDebugLabel() && MO.isMetadata()) {
+ // Pretty print DBG_LABEL instructions.
+ auto *DIL = dyn_cast<DILabel>(MO.getMetadata());
+ if (DIL && !DIL->getName().empty())
+ OS << "\"" << DIL->getName() << '\"';
+ else {
+ LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(i);
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ }
+ } else if (i == AsmDescOp && MO.isImm()) {
+ // Pretty print the inline asm operand descriptor.
+ OS << '$' << AsmOpCount++;
+ unsigned Flag = MO.getImm();
+ OS << ":[";
+ OS << InlineAsm::getKindName(InlineAsm::getKind(Flag));
+
+ unsigned RCID = 0;
+ if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TRI) {
+ OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
+ } else
+ OS << ":RC" << RCID;
+ }
+
+ if (InlineAsm::isMemKind(Flag)) {
+ unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ OS << ":" << InlineAsm::getMemConstraintName(MCID);
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ OS << ']';
+
+ // Compute the index of the next operand descriptor.
+ AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+ } else {
+ LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(i);
+ if (MO.isImm() && isOperandSubregIdx(i))
+ MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);
+ else
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ }
+ }
+
+ // Print any optional symbols attached to this instruction as-if they were
+ // operands.
+ if (MCSymbol *PreInstrSymbol = getPreInstrSymbol()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " pre-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PreInstrSymbol);
+ }
+ if (MCSymbol *PostInstrSymbol = getPostInstrSymbol()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " post-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PostInstrSymbol);
+ }
+ if (MDNode *HeapAllocMarker = getHeapAllocMarker()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " heap-alloc-marker ";
+ HeapAllocMarker->printAsOperand(OS, MST);
+ }
+ if (MDNode *PCSections = getPCSections()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " pcsections ";
+ PCSections->printAsOperand(OS, MST);
+ }
+ if (uint32_t CFIType = getCFIType()) {
+ if (!FirstOp)
+ OS << ',';
+ OS << " cfi-type " << CFIType;
+ }
+
+ if (DebugInstrNum) {
+ if (!FirstOp)
+ OS << ",";
+ OS << " debug-instr-number " << DebugInstrNum;
+ }
+
+ if (!SkipDebugLoc) {
+ if (const DebugLoc &DL = getDebugLoc()) {
+ if (!FirstOp)
+ OS << ',';
+ OS << " debug-location ";
+ DL->printAsOperand(OS, MST);
+ }
+ }
+
+ if (!memoperands_empty()) {
+ SmallVector<StringRef, 0> SSNs;
+ const LLVMContext *Context = nullptr;
+ std::unique_ptr<LLVMContext> CtxPtr;
+ const MachineFrameInfo *MFI = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ MFI = &MF->getFrameInfo();
+ Context = &MF->getFunction().getContext();
+ } else {
+ CtxPtr = std::make_unique<LLVMContext>();
+ Context = CtxPtr.get();
+ }
+
+ OS << " :: ";
+ bool NeedComma = false;
+ for (const MachineMemOperand *Op : memoperands()) {
+ if (NeedComma)
+ OS << ", ";
+ Op->print(OS, MST, SSNs, *Context, MFI, TII);
+ NeedComma = true;
+ }
+ }
+
+ if (SkipDebugLoc)
+ return;
+
+ bool HaveSemi = false;
+
+ // Print debug location information.
+ if (const DebugLoc &DL = getDebugLoc()) {
+ if (!HaveSemi) {
+ OS << ';';
+ HaveSemi = true;
+ }
+ OS << ' ';
+ DL.print(OS);
+ }
+
+ // Print extra comments for DEBUG_VALUE.
+ if (isDebugValueLike() && getDebugVariableOp().isMetadata()) {
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
+ auto *DV = getDebugVariable();
+ OS << " line no:" << DV->getLine();
+ if (isIndirectDebugValue())
+ OS << " indirect";
+ }
+ // TODO: DBG_LABEL
+
+ if (AddNewLine)
+ OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(Register IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = IncomingReg.isPhysical();
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+
+ // DEBUG_VALUE nodes do not contribute to code generation and should
+ // always be ignored. Failure to do so may result in trying to modify
+ // KILL flags on DEBUG_VALUE nodes.
+ if (MO.isDebug())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ if (isPhysReg && isRegTiedToDefOperand(i))
+ // Two-address uses of physregs must not be marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() && Reg.isPhysical()) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit() &&
+ (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
+ removeOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+void MachineInstr::clearRegisterKills(Register Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (!Reg.isPhysical())
+ RegInfo = nullptr;
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ Register OpReg = MO.getReg();
+ if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg)
+ MO.setIsKill(false);
+ }
+}
+
+bool MachineInstr::addRegisterDead(Register Reg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = Reg.isPhysical();
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(Reg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+
+ if (MOReg == Reg) {
+ MO.setIsDead();
+ Found = true;
+ } else if (hasAliases && MO.isDead() && MOReg.isPhysical()) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(Reg, MOReg))
+ return true;
+ if (RegInfo->isSubRegister(Reg, MOReg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit() &&
+ (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
+ removeOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (Found || !AddIfNotFound)
+ return Found;
+
+ addOperand(MachineOperand::CreateReg(Reg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+}
+
+void MachineInstr::clearRegisterDeads(Register Reg) {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+ continue;
+ MO.setIsDead(false);
+ }
+}
+
+void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
+ continue;
+ MO.setIsUndef(IsUndef);
+ }
+}
+
+void MachineInstr::addRegisterDefined(Register Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (Reg.isPhysical()) {
+ MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
+ if (MO)
+ return;
+ } else {
+ for (const MachineOperand &MO : operands()) {
+ if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
+ MO.getSubReg() == 0)
+ return;
+ }
+ }
+ addOperand(MachineOperand::CreateReg(Reg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ bool HasRegMask = false;
+ for (MachineOperand &MO : operands()) {
+ if (MO.isRegMask()) {
+ HasRegMask = true;
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
+ // If there are no uses, including partial uses, the def is dead.
+ if (llvm::none_of(UsedRegs,
+ [&](MCRegister Use) { return TRI.regsOverlap(Use, Reg); }))
+ MO.setIsDead();
+ }
+
+ // This is a call with a register mask operand.
+ // Mask clobbers are always dead, so add defs for the non-dead defines.
+ if (HasRegMask)
+ for (const Register &UsedReg : UsedRegs)
+ addRegisterDefined(UsedReg, &TRI);
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+ // Build up a buffer of hash code components.
+ SmallVector<size_t, 16> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
+}
+
+void MachineInstr::emitError(StringRef Msg) const {
+ // Find the source location cookie.
+ uint64_t LocCookie = 0;
+ const MDNode *LocMD = nullptr;
+ for (unsigned i = getNumOperands(); i != 0; --i) {
+ if (getOperand(i-1).isMetadata() &&
+ (LocMD = getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ report_fatal_error(Msg);
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
+ const MCInstrDesc &MCID, bool IsIndirect,
+ Register Reg, const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = BuildMI(MF, DL, MCID).addReg(Reg);
+ if (IsIndirect)
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U);
+ return MIB.addMetadata(Variable).addMetadata(Expr);
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
+ const MCInstrDesc &MCID, bool IsIndirect,
+ ArrayRef<MachineOperand> DebugOps,
+ const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ if (MCID.Opcode == TargetOpcode::DBG_VALUE) {
+ assert(DebugOps.size() == 1 &&
+ "DBG_VALUE must contain exactly one debug operand");
+ MachineOperand DebugOp = DebugOps[0];
+ if (DebugOp.isReg())
+ return BuildMI(MF, DL, MCID, IsIndirect, DebugOp.getReg(), Variable,
+ Expr);
+
+ auto MIB = BuildMI(MF, DL, MCID).add(DebugOp);
+ if (IsIndirect)
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U);
+ return MIB.addMetadata(Variable).addMetadata(Expr);
+ }
+
+ auto MIB = BuildMI(MF, DL, MCID);
+ MIB.addMetadata(Variable).addMetadata(Expr);
+ for (const MachineOperand &DebugOp : DebugOps)
+ if (DebugOp.isReg())
+ MIB.addReg(DebugOp.getReg());
+ else
+ MIB.add(DebugOp);
+ return MIB;
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const MCInstrDesc &MCID,
+ bool IsIndirect, Register Reg,
+ const MDNode *Variable, const MDNode *Expr) {
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);
+ BB.insert(I, MI);
+ return MachineInstrBuilder(MF, MI);
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const MCInstrDesc &MCID,
+ bool IsIndirect,
+ ArrayRef<MachineOperand> DebugOps,
+ const MDNode *Variable, const MDNode *Expr) {
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI =
+ BuildMI(MF, DL, MCID, IsIndirect, DebugOps, Variable, Expr);
+ BB.insert(I, MI);
+ return MachineInstrBuilder(MF, *MI);
+}
+
+/// Compute the new DIExpression to use with a DBG_VALUE for a spill slot.
+/// This prepends DW_OP_deref when spilling an indirect DBG_VALUE.
+static const DIExpression *
+computeExprForSpill(const MachineInstr &MI,
+ SmallVectorImpl<const MachineOperand *> &SpilledOperands) {
+ assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+
+ const DIExpression *Expr = MI.getDebugExpression();
+ if (MI.isIndirectDebugValue()) {
+ assert(MI.getDebugOffset().getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ } else if (MI.isDebugValueList()) {
+ // We will replace the spilled register with a frame index, so
+ // immediately deref all references to the spilled register.
+ std::array<uint64_t, 1> Ops{{dwarf::DW_OP_deref}};
+ for (const MachineOperand *Op : SpilledOperands) {
+ unsigned OpIdx = MI.getDebugOperandIndex(Op);
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, OpIdx);
+ }
+ }
+ return Expr;
+}
+static const DIExpression *computeExprForSpill(const MachineInstr &MI,
+ Register SpillReg) {
+ assert(MI.hasDebugOperandForReg(SpillReg) && "Spill Reg is not used in MI.");
+ SmallVector<const MachineOperand *> SpillOperands;
+ for (const MachineOperand &Op : MI.getDebugOperandsForReg(SpillReg))
+ SpillOperands.push_back(&Op);
+ return computeExprForSpill(MI, SpillOperands);
+}
+
+MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const MachineInstr &Orig,
+ int FrameIndex, Register SpillReg) {
+ assert(!Orig.isDebugRef() &&
+ "DBG_INSTR_REF should not reference a virtual register.");
+ const DIExpression *Expr = computeExprForSpill(Orig, SpillReg);
+ MachineInstrBuilder NewMI =
+ BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc());
+ // Non-Variadic Operands: Location, Offset, Variable, Expression
+ // Variadic Operands: Variable, Expression, Locations...
+ if (Orig.isNonListDebugValue())
+ NewMI.addFrameIndex(FrameIndex).addImm(0U);
+ NewMI.addMetadata(Orig.getDebugVariable()).addMetadata(Expr);
+ if (Orig.isDebugValueList()) {
+ for (const MachineOperand &Op : Orig.debug_operands())
+ if (Op.isReg() && Op.getReg() == SpillReg)
+ NewMI.addFrameIndex(FrameIndex);
+ else
+ NewMI.add(MachineOperand(Op));
+ }
+ return NewMI;
+}
+MachineInstr *llvm::buildDbgValueForSpill(
+ MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+ const MachineInstr &Orig, int FrameIndex,
+ SmallVectorImpl<const MachineOperand *> &SpilledOperands) {
+ const DIExpression *Expr = computeExprForSpill(Orig, SpilledOperands);
+ MachineInstrBuilder NewMI =
+ BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc());
+ // Non-Variadic Operands: Location, Offset, Variable, Expression
+ // Variadic Operands: Variable, Expression, Locations...
+ if (Orig.isNonListDebugValue())
+ NewMI.addFrameIndex(FrameIndex).addImm(0U);
+ NewMI.addMetadata(Orig.getDebugVariable()).addMetadata(Expr);
+ if (Orig.isDebugValueList()) {
+ for (const MachineOperand &Op : Orig.debug_operands())
+ if (is_contained(SpilledOperands, &Op))
+ NewMI.addFrameIndex(FrameIndex);
+ else
+ NewMI.add(MachineOperand(Op));
+ }
+ return NewMI;
+}
+
+void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex,
+ Register Reg) {
+ const DIExpression *Expr = computeExprForSpill(Orig, Reg);
+ if (Orig.isNonListDebugValue())
+ Orig.getDebugOffset().ChangeToImmediate(0U);
+ for (MachineOperand &Op : Orig.getDebugOperandsForReg(Reg))
+ Op.ChangeToFrameIndex(FrameIndex);
+ Orig.getDebugExpressionOp().setMetadata(Expr);
+}
+
+void MachineInstr::collectDebugValues(
+ SmallVectorImpl<MachineInstr *> &DbgValues) {
+ MachineInstr &MI = *this;
+ if (!MI.getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI.getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->hasDebugOperandForReg(MI.getOperand(0).getReg()))
+ DbgValues.push_back(&*DI);
+ }
+}
+
+void MachineInstr::changeDebugValuesDefReg(Register Reg) {
+ // Collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValues;
+
+ if (!getOperand(0).isReg())
+ return;
+
+ Register DefReg = getOperand(0).getReg();
+ auto *MRI = getRegInfo();
+ for (auto &MO : MRI->use_operands(DefReg)) {
+ auto *DI = MO.getParent();
+ if (!DI->isDebugValue())
+ continue;
+ if (DI->hasDebugOperandForReg(DefReg)) {
+ DbgValues.push_back(DI);
+ }
+ }
+
+ // Propagate Reg to debug value instructions.
+ for (auto *DBI : DbgValues)
+ for (MachineOperand &Op : DBI->getDebugOperandsForReg(DefReg))
+ Op.setReg(Reg);
+}
+
+using MMOList = SmallVector<const MachineMemOperand *, 2>;
+
+static unsigned getSpillSlotSize(const MMOList &Accesses,
+ const MachineFrameInfo &MFI) {
+ unsigned Size = 0;
+ for (const auto *A : Accesses)
+ if (MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex()))
+ Size += A->getSize();
+ return Size;
+}
+
+std::optional<unsigned>
+MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
+ int FI;
+ if (TII->isStoreToStackSlotPostFE(*this, FI)) {
+ const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+ if (MFI.isSpillSlotObjectIndex(FI))
+ return (*memoperands_begin())->getSize();
+ }
+ return std::nullopt;
+}
+
+std::optional<unsigned>
+MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
+ MMOList Accesses;
+ if (TII->hasStoreToStackSlot(*this, Accesses))
+ return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+ return std::nullopt;
+}
+
+std::optional<unsigned>
+MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
+ int FI;
+ if (TII->isLoadFromStackSlotPostFE(*this, FI)) {
+ const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+ if (MFI.isSpillSlotObjectIndex(FI))
+ return (*memoperands_begin())->getSize();
+ }
+ return std::nullopt;
+}
+
+std::optional<unsigned>
+MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
+ MMOList Accesses;
+ if (TII->hasLoadFromStackSlot(*this, Accesses))
+ return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+ return std::nullopt;
+}
+
+unsigned MachineInstr::getDebugInstrNum() {
+ if (DebugInstrNum == 0)
+ DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum();
+ return DebugInstrNum;
+}
+
+unsigned MachineInstr::getDebugInstrNum(MachineFunction &MF) {
+ if (DebugInstrNum == 0)
+ DebugInstrNum = MF.getNewDebugInstrNum();
+ return DebugInstrNum;
+}
+
+std::tuple<LLT, LLT> MachineInstr::getFirst2LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT> MachineInstr::getFirst3LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT, LLT> MachineInstr::getFirst4LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()),
+ getRegInfo()->getType(getOperand(3).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT, LLT, LLT> MachineInstr::getFirst5LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()),
+ getRegInfo()->getType(getOperand(3).getReg()),
+ getRegInfo()->getType(getOperand(4).getReg()));
+}
+
+std::tuple<Register, LLT, Register, LLT>
+MachineInstr::getFirst2RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ return std::tuple(Reg0, getRegInfo()->getType(Reg0), Reg1,
+ getRegInfo()->getType(Reg1));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT>
+MachineInstr::getFirst3RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ return std::tuple(Reg0, getRegInfo()->getType(Reg0), Reg1,
+ getRegInfo()->getType(Reg1), Reg2,
+ getRegInfo()->getType(Reg2));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT>
+MachineInstr::getFirst4RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ Register Reg3 = getOperand(3).getReg();
+ return std::tuple(
+ Reg0, getRegInfo()->getType(Reg0), Reg1, getRegInfo()->getType(Reg1),
+ Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT, Register,
+ LLT>
+MachineInstr::getFirst5RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ Register Reg3 = getOperand(3).getReg();
+ Register Reg4 = getOperand(4).getReg();
+ return std::tuple(
+ Reg0, getRegInfo()->getType(Reg0), Reg1, getRegInfo()->getType(Reg1),
+ Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3),
+ Reg4, getRegInfo()->getType(Reg4));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..b9db34f7be95
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,387 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include <utility>
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles(
+ std::function<bool(const MachineFunction &)> Ftor = nullptr)
+ : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ std::function<bool(const MachineFunction &)> PredicateFtor;
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+ "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ if (PredicateFtor && !PredicateFtor(MF))
+ return false;
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+ MIE = MBB.instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isBundledWithPred()) {
+ MII->unbundleFromPred();
+ for (MachineOperand &MO : MII->operands()) {
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *
+llvm::createUnpackMachineBundles(
+ std::function<bool(const MachineFunction &)> Ftor) {
+ return new UnpackMachineBundles(std::move(Ftor));
+}
+
+namespace {
+ class FinalizeMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ FinalizeMachineBundles() : MachineFunctionPass(ID) {
+ initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+ "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ return llvm::finalizeBundles(MF);
+}
+
+/// Return the first found DebugLoc that has a DILocation, given a range of
+/// instructions. The search range is from FirstMI to LastMI (exclusive). If no
+/// DILocation is found, then an empty location is returned.
+static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ for (auto MII = FirstMI; MII != LastMI; ++MII)
+ if (MII->getDebugLoc())
+ return MII->getDebugLoc();
+ return DebugLoc();
+}
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ assert(FirstMI != LastMI && "Empty bundle?");
+ MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
+ Bundle.prepend(MIB);
+
+ SmallVector<Register, 32> LocalDefs;
+ SmallSet<Register, 32> LocalDefSet;
+ SmallSet<Register, 8> DeadDefSet;
+ SmallSet<Register, 16> KilledDefSet;
+ SmallVector<Register, 8> ExternUses;
+ SmallSet<Register, 8> ExternUseSet;
+ SmallSet<Register, 8> KilledUseSet;
+ SmallSet<Register, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ // Debug instructions have no effects to track.
+ if (MII->isDebugInstr())
+ continue;
+
+ for (MachineOperand &MO : MII->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg).second) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg).second) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead() && Reg.isPhysical()) {
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
+ if (LocalDefSet.insert(SubReg).second)
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ Defs.clear();
+ }
+
+ SmallSet<Register, 32> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ Register Reg = LocalDefs[i];
+ if (Added.insert(Reg).second) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ Register Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+
+ // Set FrameSetup/FrameDestroy for the bundle. If any of the instructions got
+ // the property, then also set it on the bundle.
+ for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ if (MII->getFlag(MachineInstr::FrameSetup))
+ MIB.setMIFlag(MachineInstr::FrameSetup);
+ if (MII->getFlag(MachineInstr::FrameDestroy))
+ MIB.setMIFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI) {
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator LastMI = std::next(FirstMI);
+ while (LastMI != E && LastMI->isInsideBundle())
+ ++LastMI;
+ finalizeBundle(MBB, FirstMI, LastMI);
+ return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+ MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
+ assert(!MII->isInsideBundle() &&
+ "First instr cannot be inside bundle before finalization!");
+
+ for (++MII; MII != MIE; ) {
+ if (!MII->isInsideBundle())
+ ++MII;
+ else {
+ MII = finalizeBundle(MBB, std::prev(MII));
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+VirtRegInfo llvm::AnalyzeVirtRegInBundle(
+ MachineInstr &MI, Register Reg,
+ SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) {
+ VirtRegInfo RI = {false, false, false};
+ for (MIBundleOperands O(MI); O.isValid(); ++O) {
+ MachineOperand &MO = *O;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ // Remember each (MI, OpNo) that refers to Reg.
+ if (Ops)
+ Ops->push_back(std::make_pair(MO.getParent(), O.getOperandNo()));
+
+ // Both defs and uses can read virtual registers.
+ if (MO.readsReg()) {
+ RI.Reads = true;
+ if (MO.isDef())
+ RI.Tied = true;
+ }
+
+ // Only defs can write.
+ if (MO.isDef())
+ RI.Writes = true;
+ else if (!RI.Tied &&
+ MO.getParent()->isRegTiedToDefOperand(O.getOperandNo()))
+ RI.Tied = true;
+ }
+ return RI;
+}
+
+std::pair<LaneBitmask, LaneBitmask>
+llvm::AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) {
+
+ LaneBitmask UseMask, DefMask;
+
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 && MO.isUse() && !MO.isUndef())
+ UseMask |= MRI.getMaxLaneMaskForVReg(Reg);
+
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef()) {
+ if (!MO.isUndef())
+ UseMask |= ~SubRegMask;
+ DefMask |= SubRegMask;
+ } else if (!MO.isUndef())
+ UseMask |= SubRegMask;
+ }
+
+ return {UseMask, DefMask};
+}
+
+PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
+ const TargetRegisterInfo *TRI) {
+ bool AllDefsDead = true;
+ PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
+
+ assert(Reg.isPhysical() && "analyzePhysReg not given a physical register!");
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
+ PRI.Clobbered = true;
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+
+ Register MOReg = MO.getReg();
+ if (!MOReg || !MOReg.isPhysical())
+ continue;
+
+ if (!TRI->regsOverlap(MOReg, Reg))
+ continue;
+
+ bool Covered = TRI->isSuperRegisterEq(Reg, MOReg);
+ if (MO.readsReg()) {
+ PRI.Read = true;
+ if (Covered) {
+ PRI.FullyRead = true;
+ if (MO.isKill())
+ PRI.Killed = true;
+ }
+ } else if (MO.isDef()) {
+ PRI.Defined = true;
+ if (Covered)
+ PRI.FullyDefined = true;
+ if (!MO.isDead())
+ AllDefsDead = false;
+ }
+ }
+
+ if (AllDefsDead) {
+ if (PRI.FullyDefined || PRI.Clobbered)
+ PRI.DeadDef = true;
+ else if (PRI.Defined)
+ PRI.PartialDeadDef = true;
+ }
+
+ return PRI;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 000000000000..523e077fd9a2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1522 @@
+//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machinelicm"
+
+static cl::opt<bool>
+AvoidSpeculation("avoid-speculation",
+ cl::desc("MachineLICM should avoid speculation"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+HoistCheapInsts("hoist-cheap-insts",
+ cl::desc("MachineLICM should hoist even cheap instructions"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+HoistConstStores("hoist-const-stores",
+ cl::desc("Hoist invariant stores"),
+ cl::init(true), cl::Hidden);
+// The default threshold of 100 (i.e. if target block is 100 times hotter)
+// is based on empirical data on a single target and is subject to tuning.
+static cl::opt<unsigned>
+BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
+ cl::desc("Do not hoist instructions if target"
+ "block is N times hotter than the source."),
+ cl::init(100), cl::Hidden);
+
+enum class UseBFI { None, PGO, All };
+
+static cl::opt<UseBFI>
+DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
+ cl::desc("Disable hoisting instructions to"
+ " hotter blocks"),
+ cl::init(UseBFI::PGO), cl::Hidden,
+ cl::values(clEnumValN(UseBFI::None, "none",
+ "disable the feature"),
+ clEnumValN(UseBFI::PGO, "pgo",
+ "enable the feature when using profile data"),
+ clEnumValN(UseBFI::All, "all",
+ "enable the feature with/wo profile data")));
+
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+ "Number of machine instructions hoisted out of loops post regalloc");
+STATISTIC(NumStoreConst,
+ "Number of stores of const phys reg hoisted out of loops");
+STATISTIC(NumNotHoistedDueToHotness,
+ "Number of instructions not hoisted due to block frequency");
+
+namespace {
+
+ class MachineLICMBase : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetLoweringBase *TLI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ TargetSchedModel SchedModel;
+ bool PreRegAlloc = false;
+ bool HasProfileData = false;
+
+ // Various analyses that we use...
+ AliasAnalysis *AA = nullptr; // Alias analysis info.
+ MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
+ MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree for the cur loop
+
+ // State that is updated as we process loops
+ bool Changed = false; // True if a loop is changed.
+ bool FirstInLoop = false; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop = nullptr; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop.
+
+ // Exit blocks for CurLoop.
+ SmallVector<MachineBasicBlock *, 8> ExitBlocks;
+
+ bool isExitBlock(const MachineBasicBlock *MBB) const {
+ return is_contained(ExitBlocks, MBB);
+ }
+
+ // Track 'estimated' register pressure.
+ SmallSet<Register, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register pressure set. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+ // For each opcode, keep a list of potential CSE instructions.
+ DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap;
+
+ enum {
+ SpeculateFalse = 0,
+ SpeculateTrue = 1,
+ SpeculateUnknown = 2
+ };
+
+ // If a MBB does not dominate loop exiting blocks then it may not safe
+ // to hoist loads from this block.
+ // Tri-state: 0 - false, 1 - true, 2 - unknown
+ unsigned SpeculationState = SpeculateUnknown;
+
+ public:
+ MachineLICMBase(char &PassID, bool PreRegAlloc)
+ : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void releaseMemory() override {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ CSEMap.clear();
+ }
+
+ private:
+ /// Keep track of information about hoisting candidates.
+ struct CandidateInfo {
+ MachineInstr *MI;
+ unsigned Def;
+ int FI;
+
+ CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+ : MI(mi), Def(def), FI(fi) {}
+ };
+
+ void HoistRegionPostRA();
+
+ void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+ void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
+ SmallVectorImpl<CandidateInfo> &Candidates);
+
+ void AddToLiveIns(MCRegister Reg);
+
+ bool IsLICMCandidate(MachineInstr &I);
+
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ bool HasLoopPHIUse(const MachineInstr *MI) const;
+
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ Register Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
+ bool Cheap);
+
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+
+ bool isTriviallyReMaterializable(const MachineInstr &MI) const;
+
+ void EnterScope(MachineBasicBlock *MBB);
+
+ void ExitScope(MachineBasicBlock *MBB);
+
+ void ExitScopeIfDone(
+ MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
+ const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
+
+ void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
+
+ void InitRegPressure(MachineBasicBlock *BB);
+
+ DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
+ bool ConsiderSeen,
+ bool ConsiderUnseenAsDef);
+
+ void UpdateRegPressure(const MachineInstr *MI,
+ bool ConsiderUnseenAsDef = false);
+
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+ MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<MachineInstr *> &PrevMIs);
+
+ bool
+ EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
+
+ bool MayCSE(MachineInstr *MI);
+
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+ void InitCSEMap(MachineBasicBlock *BB);
+
+ bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock);
+ MachineBasicBlock *getCurPreheader();
+ };
+
+ class MachineLICM : public MachineLICMBase {
+ public:
+ static char ID;
+ MachineLICM() : MachineLICMBase(ID, false) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+ };
+
+ class EarlyMachineLICM : public MachineLICMBase {
+ public:
+ static char ID;
+ EarlyMachineLICM() : MachineLICMBase(ID, true) {
+ initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+ };
+
+} // end anonymous namespace
+
+char MachineLICM::ID;
+char EarlyMachineLICM::ID;
+
+char &llvm::MachineLICMID = MachineLICM::ID;
+char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
+
+INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
+ "Machine Loop Invariant Code Motion", false, false)
+
+INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
+ "Early Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
+ "Early Machine Loop Invariant Code Motion", false, false)
+
+/// Test if the given loop is the outer-most loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPredecessor())
+ return false;
+ // None of them did, so this is the outermost with a unique predecessor.
+ return true;
+}
+
+bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ Changed = FirstInLoop = false;
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ TLI = ST.getTargetLowering();
+ TRI = ST.getRegisterInfo();
+ MFI = &MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(&ST);
+
+ PreRegAlloc = MRI->isSSA();
+ HasProfileData = MF.getFunction().hasProfileData();
+
+ if (PreRegAlloc)
+ LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
+
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRPS = TRI->getNumRegPressureSets();
+ RegPressure.resize(NumRPS);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRPS);
+ for (unsigned i = 0, e = NumRPS; i != e; ++i)
+ RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
+ }
+
+ // Get our Loop information...
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = nullptr;
+ ExitBlocks.clear();
+
+ // If this is done before regalloc, only visit outer-most preheader-sporting
+ // loops.
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
+ continue;
+ }
+
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ if (!PreRegAlloc)
+ HoistRegionPostRA();
+ else {
+ // CSEMap is initialized for loop header when the first instruction is
+ // being hoisted.
+ MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
+ HoistOutOfLoop(N);
+ CSEMap.clear();
+ }
+ }
+
+ return Changed;
+}
+
+/// Return true if instruction stores to the specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
+ // true since they have no memory operands.
+ if (!MI->mayStore())
+ return false;
+ // If we lost memory operands, conservatively assume that the instruction
+ // writes to all slots.
+ if (MI->memoperands_empty())
+ return true;
+ for (const MachineMemOperand *MemOp : MI->memoperands()) {
+ if (!MemOp->isStore() || !MemOp->getPseudoValue())
+ continue;
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
+ if (Value->getFrameIndex() == FI)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICMBase::ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVectorImpl<CandidateInfo> &Candidates) {
+ bool RuledOut = false;
+ bool HasNonInvariantUse = false;
+ unsigned Def = 0;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isFI()) {
+ // Remember if the instruction stores to the frame index.
+ int FI = MO.getIndex();
+ if (!StoredFIs.count(FI) &&
+ MFI->isSpillSlotObjectIndex(FI) &&
+ InstructionStoresToFI(MI, FI))
+ StoredFIs.insert(FI);
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ // We can't hoist an instruction defining a physreg that is clobbered in
+ // the loop.
+ if (MO.isRegMask()) {
+ PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(Reg.isPhysical() && "Not expecting virtual register!");
+
+ if (!MO.isDef()) {
+ if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
+ // If it's using a non-loop-invariant register, then it's obviously not
+ // safe to hoist.
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ if (MO.isImplicit()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegClobbers.set(*AI);
+ if (!MO.isDead())
+ // Non-dead implicit def? This cannot be hoisted.
+ RuledOut = true;
+ // No need to check if a dead implicit def is also defined by
+ // another instruction.
+ continue;
+ }
+
+ // FIXME: For now, avoid instructions with multiple defs, unless
+ // it's a dead implicit def.
+ if (Def)
+ RuledOut = true;
+ else
+ Def = Reg;
+
+ // If we have already seen another instruction that defines the same
+ // register, then this is not safe. Two defs is indicated by setting a
+ // PhysRegClobbers bit.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
+ if (PhysRegDefs.test(*AS))
+ PhysRegClobbers.set(*AS);
+ }
+ // Need a second loop because MCRegAliasIterator can visit the same
+ // register twice.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+ PhysRegDefs.set(*AS);
+
+ if (PhysRegClobbers.test(Reg))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
+ }
+
+ // Only consider reloads for now and remats which do not have register
+ // operands. FIXME: Consider unfold load folding instructions.
+ if (Def && !RuledOut) {
+ int FI = std::numeric_limits<int>::min();
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+ Candidates.push_back(CandidateInfo(MI, Def, FI));
+ }
+}
+
+/// Walk the specified region of the CFG and hoist loop invariants out to the
+/// preheader.
+void MachineLICMBase::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ unsigned NumRegs = TRI->getNumRegs();
+ BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+ BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
+
+ SmallVector<CandidateInfo, 32> Candidates;
+ SmallSet<int, 32> StoredFIs;
+
+ // Walk the entire region, count number of defs for each register, and
+ // collect potential LICM candidates.
+ for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isEHPad()) continue;
+
+ // Conservatively treat live-in's as an external def.
+ // FIXME: That means a reload that're reused in successor block(s) will not
+ // be LICM'ed.
+ for (const auto &LI : BB->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
+ PhysRegDefs.set(*AI);
+ }
+
+ // Funclet entry blocks will clobber all registers
+ if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
+ PhysRegClobbers.setBitsNotInMask(Mask);
+
+ SpeculationState = SpeculateUnknown;
+ for (MachineInstr &MI : *BB)
+ ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ }
+
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (const MachineOperand &MO : TI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ TermRegs.set(*AI);
+ }
+ }
+
+ // Now evaluate whether the potential candidates qualify.
+ // 1. Check if the candidate defined register is defined by another
+ // instruction in the loop.
+ // 2. If the candidate is a load from stack slot (always true for now),
+ // check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
+ for (CandidateInfo &Candidate : Candidates) {
+ if (Candidate.FI != std::numeric_limits<int>::min() &&
+ StoredFIs.count(Candidate.FI))
+ continue;
+
+ unsigned Def = Candidate.Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
+ bool Safe = true;
+ MachineInstr *MI = Candidate.MI;
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (!MO.getReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
+ // If it's using a non-loop-invariant register, then it's obviously
+ // not safe to hoist.
+ Safe = false;
+ break;
+ }
+ }
+ if (Safe)
+ HoistPostRA(MI, Candidate.Def);
+ }
+ }
+}
+
+/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
+/// sure it is not killed by any instructions in the loop.
+void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
+ for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
+ if (!BB->isLiveIn(Reg))
+ BB->addLiveIn(Reg);
+ for (MachineInstr &MI : *BB) {
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (!MO.getReg())
+ continue;
+ if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ MO.setIsKill(false);
+ }
+ }
+ }
+}
+
+/// When an instruction is found to only use loop invariant operands that is
+/// safe to hoist, this instruction is called to do the dirty work.
+void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
+ << " from " << printMBBReference(*MI->getParent()) << ": "
+ << *MI);
+
+ // Splice the instruction to the preheader.
+ MachineBasicBlock *MBB = MI->getParent();
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+ // Since we are moving the instruction out of its basic block, we do not
+ // retain its debug location. Doing so would degrade the debugging
+ // experience and adversely affect the accuracy of profiling information.
+ assert(!MI->isDebugInstr() && "Should not hoist debug inst");
+ MI->setDebugLoc(DebugLoc());
+
+ // Add register to livein list to all the BBs in the current loop since a
+ // loop invariant must be kept live throughout the whole loop. This is
+ // important to ensure later passes do not scavenge the def register.
+ AddToLiveIns(Def);
+
+ ++NumPostRAHoisted;
+ Changed = true;
+}
+
+/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
+/// may not be safe to hoist.
+bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+ if (SpeculationState != SpeculateUnknown)
+ return SpeculationState == SpeculateFalse;
+
+ if (BB != CurLoop->getHeader()) {
+ // Check loop exiting blocks.
+ SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
+ CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
+ for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
+ if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
+ SpeculationState = SpeculateTrue;
+ return false;
+ }
+ }
+
+ SpeculationState = SpeculateFalse;
+ return true;
+}
+
+/// Check if \p MI is trivially remateralizable and if it does not have any
+/// virtual register uses. Even though rematerializable RA might not actually
+/// rematerialize it in this scenario. In that case we do not want to hoist such
+/// instruction out of the loop in a belief RA will sink it back if needed.
+bool MachineLICMBase::isTriviallyReMaterializable(
+ const MachineInstr &MI) const {
+ if (!TII->isTriviallyReMaterializable(MI))
+ return false;
+
+ for (const MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg().isVirtual())
+ return false;
+ }
+
+ return true;
+}
+
+void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+}
+
+void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
+ BackTrace.pop_back();
+}
+
+/// Destroy scope for the MBB that corresponds to the given dominator tree node
+/// if its a leaf or all of its children are done. Walk up the dominator tree to
+/// destroy ancestors which are now done.
+void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ const DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
+ return;
+
+ for(;;) {
+ ExitScope(Node->getBlock());
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ MachineDomTreeNode *Parent = ParentMap.lookup(Node);
+ if (!Parent || --OpenChildren[Parent] != 0)
+ break;
+ Node = Parent;
+ }
+}
+
+/// Walk the specified loop in the CFG (defined by all blocks dominated by the
+/// specified header block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree. This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
+void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(HeaderN);
+ while (!WorkList.empty()) {
+ MachineDomTreeNode *Node = WorkList.pop_back_val();
+ assert(Node && "Null dominator tree node?");
+ MachineBasicBlock *BB = Node->getBlock();
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isEHPad())
+ continue;
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB))
+ continue;
+
+ Scopes.push_back(Node);
+ unsigned NumChildren = Node->getNumChildren();
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() >= 25)
+ NumChildren = 0;
+
+ OpenChildren[Node] = NumChildren;
+ if (NumChildren) {
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (MachineDomTreeNode *Child : reverse(Node->children())) {
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ }
+ }
+
+ if (Scopes.size() == 0)
+ return;
+
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+
+ // Now perform LICM.
+ for (MachineDomTreeNode *Node : Scopes) {
+ MachineBasicBlock *MBB = Node->getBlock();
+
+ EnterScope(MBB);
+
+ // Process the block
+ SpeculationState = SpeculateUnknown;
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!Hoist(&MI, Preheader))
+ UpdateRegPressure(&MI);
+ // If we have hoisted an instruction that may store, it can only be a
+ // constant store.
+ }
+
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// Find all virtual register references that are liveout of the preheader to
+/// initialize the starting "register pressure". Note this does not count live
+/// through (livein but not used) registers.
+void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (const MachineInstr &MI : *BB)
+ UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
+}
+
+/// Update estimate of register pressure after the specified instruction.
+void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
+ bool ConsiderUnseenAsDef) {
+ auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
+ for (const auto &RPIdAndCost : Cost) {
+ unsigned Class = RPIdAndCost.first;
+ if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+ RegPressure[Class] = 0;
+ else
+ RegPressure[Class] += RPIdAndCost.second;
+ }
+}
+
+/// Calculate the additional register pressure that the registers used in MI
+/// cause.
+///
+/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+/// figure out which usages are live-ins.
+/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
+DenseMap<unsigned, int>
+MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+ bool ConsiderUnseenAsDef) {
+ DenseMap<unsigned, int> Cost;
+ if (MI->isImplicitDef())
+ return Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+
+ // FIXME: It seems bad to use RegSeen only for some of these calculations.
+ bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+ RegClassWeight W = TRI->getRegClassWeight(RC);
+ int RCCost = 0;
+ if (MO.isDef())
+ RCCost = W.RegWeight;
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill && ConsiderUnseenAsDef)
+ // Haven't seen this, it must be a livein.
+ RCCost = W.RegWeight;
+ else if (!isNew && isKill)
+ RCCost = -W.RegWeight;
+ }
+ if (RCCost == 0)
+ continue;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ for (; *PS != -1; ++PS) {
+ if (!Cost.contains(*PS))
+ Cost[*PS] = RCCost;
+ else
+ Cost[*PS] += RCCost;
+ }
+ }
+ return Cost;
+}
+
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert(MI.mayLoad() && "Expected MI that loads!");
+
+ // If we lost memory operands, conservatively assume that the instruction
+ // reads from everything..
+ if (MI.memoperands_empty())
+ return true;
+
+ for (MachineMemOperand *MemOp : MI.memoperands())
+ if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
+ if (PSV->isGOT() || PSV->isConstantPool())
+ return true;
+
+ return false;
+}
+
+// This function iterates through all the operands of the input store MI and
+// checks that each register operand statisfies isCallerPreservedPhysReg.
+// This means, the value being stored and the address where it is being stored
+// is constant throughout the body of the function (not including prologue and
+// epilogue). When called with an MI that isn't a store, it returns false.
+// A future improvement can be to check if the store registers are constant
+// throughout the loop rather than throughout the funtion.
+static bool isInvariantStore(const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+
+ bool FoundCallerPresReg = false;
+ if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
+ (MI.getNumOperands() == 0))
+ return false;
+
+ // Check that all register operands are caller-preserved physical registers.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ // If operand is a virtual register, check if it comes from a copy of a
+ // physical register.
+ if (Reg.isVirtual())
+ Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
+ if (Reg.isVirtual())
+ return false;
+ if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
+ return false;
+ else
+ FoundCallerPresReg = true;
+ } else if (!MO.isImm()) {
+ return false;
+ }
+ }
+ return FoundCallerPresReg;
+}
+
+// Return true if the input MI is a copy instruction that feeds an invariant
+// store instruction. This means that the src of the copy has to satisfy
+// isCallerPreservedPhysReg and atleast one of it's users should satisfy
+// isInvariantStore.
+static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+
+ // FIXME: If targets would like to look through instructions that aren't
+ // pure copies, this can be updated to a query.
+ if (!MI.isCopy())
+ return false;
+
+ const MachineFunction *MF = MI.getMF();
+ // Check that we are copying a constant physical register.
+ Register CopySrcReg = MI.getOperand(1).getReg();
+ if (CopySrcReg.isVirtual())
+ return false;
+
+ if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
+ return false;
+
+ Register CopyDstReg = MI.getOperand(0).getReg();
+ // Check if any of the uses of the copy are invariant stores.
+ assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
+
+ for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
+ if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
+ return true;
+ }
+ return false;
+}
+
+/// Returns true if the instruction may be a suitable candidate for LICM.
+/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
+bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
+ !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
+ LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
+ return false;
+ }
+
+ // If it is a load then check if it is guaranteed to execute by making sure
+ // that it dominates all exiting blocks. If it doesn't, then there is a path
+ // out of the loop which does not execute this load, so we can't hoist it.
+ // Loads from constant memory are safe to speculate, for example indexed load
+ // from a jump table.
+ // Stores and side effects are already checked by isSafeToMove.
+ if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
+ !IsGuaranteedToExecute(I.getParent())) {
+ LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
+ return false;
+ }
+
+ // Convergent attribute has been used on operations that involve inter-thread
+ // communication which results are implicitly affected by the enclosing
+ // control flows. It is not safe to hoist or sink such operations across
+ // control flow.
+ if (I.isConvergent())
+ return false;
+
+ if (!TII->shouldHoist(I, CurLoop))
+ return false;
+
+ return true;
+}
+
+/// Returns true if the instruction is loop invariant.
+bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
+ if (!IsLICMCandidate(I)) {
+ LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
+ return false;
+ }
+ return CurLoop->isLoopInvariant(I);
+}
+
+/// Return true if the specified instruction is used by a phi node and hoisting
+/// it could cause a copy to be inserted.
+bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
+ SmallVector<const MachineInstr*, 8> Work(1, MI);
+ do {
+ MI = Work.pop_back_val();
+ for (const MachineOperand &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+ // A PHI may cause a copy to be inserted.
+ if (UseMI.isPHI()) {
+ // A PHI inside the loop causes a copy because the live range of Reg is
+ // extended across the PHI.
+ if (CurLoop->contains(&UseMI))
+ return true;
+ // A PHI in an exit block can cause a copy to be inserted if the PHI
+ // has multiple predecessors in the loop with different values.
+ // For now, approximate by rejecting all exit blocks.
+ if (isExitBlock(UseMI.getParent()))
+ return true;
+ continue;
+ }
+ // Look past copies as well.
+ if (UseMI.isCopy() && CurLoop->contains(&UseMI))
+ Work.push_back(&UseMI);
+ }
+ }
+ } while (!Work.empty());
+ return false;
+}
+
+/// Compute operand latency between a def of 'Reg' and an use in the current
+/// loop, return true if the target considered it high.
+bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ Register Reg) const {
+ if (MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+ if (UseMI.isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI.getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// Return true if the instruction is marked "cheap" or the operand latency
+/// between its def and a use is one or less.
+bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
+ if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
+ return true;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ Register Reg = DefMO.getReg();
+ if (Reg.isPhysical())
+ continue;
+
+ if (!TII->hasLowDefLatency(SchedModel, MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// Visit BBs from header to current BB, check if hoisting an instruction of the
+/// given cost matrix can cause high register pressure.
+bool
+MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
+ bool CheapInstr) {
+ for (const auto &RPIdAndCost : Cost) {
+ if (RPIdAndCost.second <= 0)
+ continue;
+
+ unsigned Class = RPIdAndCost.first;
+ int Limit = RegLimit[Class];
+
+ // Don't hoist cheap instructions if they would increase register pressure,
+ // even if we're under the limit.
+ if (CheapInstr && !HoistCheapInsts)
+ return true;
+
+ for (const auto &RP : BackTrace)
+ if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
+ return true;
+ }
+
+ return false;
+}
+
+/// Traverse the back trace from header to the current block and update their
+/// register pressures to reflect the effect of hoisting MI from the current
+/// block to the preheader.
+void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
+ /*ConsiderUnseenAsDef=*/false);
+
+ // Update register pressure of blocks from loop header to current block.
+ for (auto &RP : BackTrace)
+ for (const auto &RPIdAndCost : Cost)
+ RP[RPIdAndCost.first] += RPIdAndCost.second;
+}
+
+/// Return true if it is potentially profitable to hoist the given loop
+/// invariant.
+bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.isImplicitDef())
+ return true;
+
+ // Besides removing computation from the loop, hoisting an instruction has
+ // these effects:
+ //
+ // - The value defined by the instruction becomes live across the entire
+ // loop. This increases register pressure in the loop.
+ //
+ // - If the value is used by a PHI in the loop, a copy will be required for
+ // lowering the PHI after extending the live range.
+ //
+ // - When hoisting the last use of a value in the loop, that value no longer
+ // needs to be live in the loop. This lowers register pressure in the loop.
+
+ if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI))
+ return true;
+
+ bool CheapInstr = IsCheapInstruction(MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI);
+
+ // Don't hoist a cheap instruction if it would create a copy in the loop.
+ if (CheapInstr && CreatesCopy) {
+ LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Rematerializable instructions should always be hoisted providing the
+ // register allocator can just pull them down again when needed.
+ if (isTriviallyReMaterializable(MI))
+ return true;
+
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+ LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ ++NumHighLatency;
+ return true;
+ }
+ }
+
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // Cheap instructions will only be hoisted if they don't increase register
+ // pressure at all.
+ auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
+ /*ConsiderUnseenAsDef=*/false);
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+ LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ ++NumLowRP;
+ return true;
+ }
+
+ // Don't risk increasing register pressure if it would create copies.
+ if (CreatesCopy) {
+ LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
+ return false;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going
+ // to be remat'ed.
+ if (!isTriviallyReMaterializable(MI) &&
+ !MI.isDereferenceableInvariantLoad()) {
+ LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ return false;
+ }
+
+ return true;
+}
+
+/// Unfold a load from the given machineinstr if the load itself could be
+/// hoisted. Return the unfolded and hoistable load, or null if the load
+/// couldn't be unfolded or if it wouldn't be hoistable.
+MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->canFoldAsLoad())
+ return nullptr;
+
+ // If not, we may be able to unfold a load and hoist that.
+ // First test whether the instruction is loading from an amenable
+ // memory location.
+ if (!MI->isDereferenceableInvariantLoad())
+ return nullptr;
+
+ // Next determine the register class for a temporary register.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc == 0) return nullptr;
+ const MCInstrDesc &MID = TII->get(NewOpc);
+ MachineFunction &MF = *MI->getMF();
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
+ // Ok, we're unfolding. Create a temporary register and do the unfold.
+ Register Reg = MRI->createVirtualRegister(RC);
+
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
+ // If unfolding produced a load that wasn't loop-invariant or profitable to
+ // hoist, discard the new instructions and bail.
+ if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ return nullptr;
+ }
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
+ // Otherwise we successfully unfolded a load that we can hoist.
+
+ // Update the call site info.
+ if (MI->shouldUpdateCallSiteInfo())
+ MF.eraseCallSiteInfo(MI);
+
+ MI->eraseFromParent();
+ return NewMIs[0];
+}
+
+/// Initialize the CSE map with instructions that are in the current loop
+/// preheader that may become duplicates of instructions that are hoisted
+/// out of the loop.
+void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
+ for (MachineInstr &MI : *BB)
+ CSEMap[MI.getOpcode()].push_back(&MI);
+}
+
+/// Find an instruction amount PrevMIs that is a duplicate of MI.
+/// Return this instruction if it's found.
+MachineInstr *
+MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
+ std::vector<MachineInstr *> &PrevMIs) {
+ for (MachineInstr *PrevMI : PrevMIs)
+ if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
+ return PrevMI;
+
+ return nullptr;
+}
+
+/// Given a LICM'ed instruction, look for an instruction on the preheader that
+/// computes the same value. If it's found, do a RAU on with the definition of
+/// the existing instruction rather than hoisting the instruction to the
+/// preheader.
+bool MachineLICMBase::EliminateCSE(
+ MachineInstr *MI,
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
+ SmallVector<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
+ Defs.push_back(i);
+ }
+
+ SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ Register Reg = MI->getOperand(Idx).getReg();
+ Register DupReg = Dup->getOperand(Idx).getReg();
+ OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+ if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+ // Restore old RCs if more than one defs.
+ for (unsigned j = 0; j != i; ++j)
+ MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+ return false;
+ }
+ }
+
+ for (unsigned Idx : Defs) {
+ Register Reg = MI->getOperand(Idx).getReg();
+ Register DupReg = Dup->getOperand(Idx).getReg();
+ MRI->replaceRegWith(Reg, DupReg);
+ MRI->clearKillFlags(DupReg);
+ // Clear Dup dead flag if any, we reuse it for Reg.
+ if (!MRI->use_nodbg_empty(DupReg))
+ Dup->getOperand(Idx).setIsDead(false);
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
+ }
+ return false;
+}
+
+/// Return true if the given instruction will be CSE'd if it's hoisted out of
+/// the loop.
+bool MachineLICMBase::MayCSE(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ CSEMap.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ return LookForDuplicate(MI, CI->second) != nullptr;
+}
+
+/// When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+/// It returns true if the instruction is hoisted.
+bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ MachineBasicBlock *SrcBlock = MI->getParent();
+
+ // Disable the instruction hoisting due to block hotness
+ if ((DisableHoistingToHotterBlocks == UseBFI::All ||
+ (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
+ isTgtHotterThanSrc(SrcBlock, Preheader)) {
+ ++NumNotHoistedDueToHotness;
+ return false;
+ }
+ // First check whether we should hoist this instruction.
+ if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ // If not, try unfolding a hoistable load.
+ MI = ExtractHoistableLoad(MI);
+ if (!MI) return false;
+ }
+
+ // If we have hoisted an instruction that may store, it can only be a constant
+ // store.
+ if (MI->mayStore())
+ NumStoreConst++;
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ LLVM_DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from " << printMBBReference(*MI->getParent());
+ if (Preheader->getBasicBlock())
+ dbgs() << " to " << printMBBReference(*Preheader);
+ dbgs() << "\n";
+ });
+
+ // If this is the first instruction being hoisted to the preheader,
+ // initialize the CSE map with potential common expressions.
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
+
+ // Look for opportunity to CSE the hoisted instruction.
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ CSEMap.find(Opcode);
+ if (!EliminateCSE(MI, CI)) {
+ // Otherwise, splice the instruction to the preheader.
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+ // Since we are moving the instruction out of its basic block, we do not
+ // retain its debug location. Doing so would degrade the debugging
+ // experience and adversely affect the accuracy of profiling information.
+ assert(!MI->isDebugInstr() && "Should not hoist debug inst");
+ MI->setDebugLoc(DebugLoc());
+
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
+ // Clear the kill flags of any register this instruction defines,
+ // since they may need to be live throughout the entire loop
+ // rather than just live for part of it.
+ for (MachineOperand &MO : MI->all_defs())
+ if (!MO.isDead())
+ MRI->clearKillFlags(MO.getReg());
+
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else
+ CSEMap[Opcode].push_back(MI);
+ }
+
+ ++NumHoisted;
+ Changed = true;
+
+ return true;
+}
+
+/// Get the preheader for the current loop, splitting a critical edge if needed.
+MachineBasicBlock *MachineLICMBase::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return nullptr;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return nullptr;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return nullptr;
+ }
+ }
+ }
+ return CurPreheader;
+}
+
+/// Is the target basic block at least "BlockFrequencyRatioThreshold"
+/// times hotter than the source basic block.
+bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock) {
+ // Parse source and target basic block frequency from MBFI
+ uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
+ uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
+
+ // Disable the hoisting if source block frequency is zero
+ if (!SrcBF)
+ return true;
+
+ double Ratio = (double)DstBF / SrcBF;
+
+ // Compare the block frequency ratio with the threshold
+ return Ratio > BlockFrequencyRatioThreshold;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
new file mode 100644
index 000000000000..c44b968b317d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -0,0 +1,249 @@
+//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass removes any identical and redundant immediate or address
+// loads to the same register. The immediate loads removed can originally be
+// the result of rematerialization, while the addresses are redundant frame
+// addressing anchor points created during Frame Indices elimination.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-latecleanup"
+
+STATISTIC(NumRemoved, "Number of redundant instructions removed.");
+
+namespace {
+
+class MachineLateInstrsCleanup : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ // Data structures to map regs to their definitions and kills per MBB.
+ struct Reg2MIMap : public SmallDenseMap<Register, MachineInstr *> {
+ bool hasIdentical(Register Reg, MachineInstr *ArgMI) {
+ MachineInstr *MI = lookup(Reg);
+ return MI && MI->isIdenticalTo(*ArgMI);
+ }
+ };
+
+ std::vector<Reg2MIMap> RegDefs;
+ std::vector<Reg2MIMap> RegKills;
+
+ // Walk through the instructions in MBB and remove any redundant
+ // instructions.
+ bool processBlock(MachineBasicBlock *MBB);
+
+ void removeRedundantDef(MachineInstr *MI);
+ void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ MachineLateInstrsCleanup() : MachineFunctionPass(ID) {
+ initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
+} // end anonymous namespace
+
+char MachineLateInstrsCleanup::ID = 0;
+
+char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID;
+
+INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE,
+ "Machine Late Instructions Cleanup Pass", false, false)
+
+bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ RegDefs.clear();
+ RegDefs.resize(MF.getNumBlockIDs());
+ RegKills.clear();
+ RegKills.resize(MF.getNumBlockIDs());
+
+ // Visit all MBBs in an order that maximises the reuse from predecessors.
+ bool Changed = false;
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ for (MachineBasicBlock *MBB : RPOT)
+ Changed |= processBlock(MBB);
+
+ return Changed;
+}
+
+// Clear any previous kill flag on Reg found before I in MBB. Walk backwards
+// in MBB and if needed continue in predecessors until a use/def of Reg is
+// encountered. This seems to be faster in practice than tracking kill flags
+// in a map.
+void MachineLateInstrsCleanup::
+clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds) {
+ VisitedPreds.set(MBB->getNumber());
+
+ // Kill flag in MBB
+ if (MachineInstr *KillMI = RegKills[MBB->getNumber()].lookup(Reg)) {
+ KillMI->clearRegisterKills(Reg, TRI);
+ return;
+ }
+
+ // Def in MBB (missing kill flag)
+ if (MachineInstr *DefMI = RegDefs[MBB->getNumber()].lookup(Reg))
+ if (DefMI->getParent() == MBB)
+ return;
+
+ // If an earlier def is not in MBB, continue in predecessors.
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ assert(!MBB->pred_empty() && "Predecessor def not found!");
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ if (!VisitedPreds.test(Pred->getNumber()))
+ clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds);
+}
+
+void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
+ Register Reg = MI->getOperand(0).getReg();
+ BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
+ clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds);
+ MI->eraseFromParent();
+ ++NumRemoved;
+}
+
+// Return true if MI is a potential candidate for reuse/removal and if so
+// also the register it defines in DefedReg. A candidate is a simple
+// instruction that does not touch memory, has only one register definition
+// and the only reg it may use is FrameReg. Typically this is an immediate
+// load or a load-address instruction.
+static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
+ Register FrameReg) {
+ DefedReg = MCRegister::NoRegister;
+ bool SawStore = true;
+ if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() ||
+ MI->isInlineAsm())
+ return false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ if (MO.isDef()) {
+ if (i == 0 && !MO.isImplicit() && !MO.isDead())
+ DefedReg = MO.getReg();
+ else
+ return false;
+ } else if (MO.getReg() && MO.getReg() != FrameReg)
+ return false;
+ } else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
+ MO.isGlobal() || MO.isSymbol()))
+ return false;
+ }
+ return DefedReg.isValid();
+}
+
+bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ Reg2MIMap &MBBDefs = RegDefs[MBB->getNumber()];
+ Reg2MIMap &MBBKills = RegKills[MBB->getNumber()];
+
+ // Find reusable definitions in the predecessor(s).
+ if (!MBB->pred_empty() && !MBB->isEHPad() &&
+ !MBB->isInlineAsmBrIndirectTarget()) {
+ MachineBasicBlock *FirstPred = *MBB->pred_begin();
+ for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
+ if (llvm::all_of(
+ drop_begin(MBB->predecessors()),
+ [&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
+ return RegDefs[Pred->getNumber()].hasIdentical(Reg, DefMI);
+ })) {
+ MBBDefs[Reg] = DefMI;
+ LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
+ << printMBBReference(*MBB) << ": " << *DefMI;);
+ }
+ }
+
+ // Process MBB.
+ MachineFunction *MF = MBB->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ Register FrameReg = TRI->getFrameRegister(*MF);
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ // If FrameReg is modified, no previous load-address instructions (using
+ // it) are valid.
+ if (MI.modifiesRegister(FrameReg, TRI)) {
+ MBBDefs.clear();
+ MBBKills.clear();
+ continue;
+ }
+
+ Register DefedReg;
+ bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
+
+ // Check for an earlier identical and reusable instruction.
+ if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
+ LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
+ << printMBBReference(*MBB) << ": " << MI;);
+ removeRedundantDef(&MI);
+ Changed = true;
+ continue;
+ }
+
+ // Clear any entries in map that MI clobbers.
+ for (auto DefI : llvm::make_early_inc_range(MBBDefs)) {
+ Register Reg = DefI.first;
+ if (MI.modifiesRegister(Reg, TRI)) {
+ MBBDefs.erase(Reg);
+ MBBKills.erase(Reg);
+ } else if (MI.findRegisterUseOperandIdx(Reg, true /*isKill*/, TRI) != -1)
+ // Keep track of register kills.
+ MBBKills[Reg] = &MI;
+ }
+
+ // Record this MI for potential later reuse.
+ if (IsCandidate) {
+ LLVM_DEBUG(dbgs() << "Found interesting instruction in "
+ << printMBBReference(*MBB) << ": " << MI;);
+ MBBDefs[DefedReg] = &MI;
+ assert(!MBBKills.count(DefedReg) && "Should already have been removed.");
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 000000000000..37a0ff3d71c8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,214 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/GenericLoopInfoImpl.h"
+
+using namespace llvm;
+
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
+
+char MachineLoopInfo::ID = 0;
+MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) {
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+}
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ calculate(getAnalysis<MachineDominatorTree>());
+ return false;
+}
+
+void MachineLoopInfo::calculate(MachineDominatorTree &MDT) {
+ releaseMemory();
+ LI.analyze(MDT.getBase());
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+ MachineBasicBlock *TopMBB = getHeader();
+ MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+ if (TopMBB->getIterator() != Begin) {
+ MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator());
+ while (contains(PriorMBB)) {
+ TopMBB = PriorMBB;
+ if (TopMBB->getIterator() == Begin)
+ break;
+ PriorMBB = &*std::prev(TopMBB->getIterator());
+ }
+ }
+ return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+ MachineBasicBlock *BotMBB = getHeader();
+ MachineFunction::iterator End = BotMBB->getParent()->end();
+ if (BotMBB->getIterator() != std::prev(End)) {
+ MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator());
+ while (contains(NextMBB)) {
+ BotMBB = NextMBB;
+ if (BotMBB == &*std::next(BotMBB->getIterator()))
+ break;
+ NextMBB = &*std::next(BotMBB->getIterator());
+ }
+ }
+ return BotMBB;
+}
+
+MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+ if (MachineBasicBlock *Latch = getLoopLatch()) {
+ if (isLoopExiting(Latch))
+ return Latch;
+ else
+ return getExitingBlock();
+ }
+ return nullptr;
+}
+
+DebugLoc MachineLoop::getStartLoc() const {
+ // Try the pre-header first.
+ if (MachineBasicBlock *PHeadMBB = getLoopPreheader())
+ if (const BasicBlock *PHeadBB = PHeadMBB->getBasicBlock())
+ if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
+ return DL;
+
+ // If we have no pre-header or there are no instructions with debug
+ // info in it, try the header.
+ if (MachineBasicBlock *HeadMBB = getHeader())
+ if (const BasicBlock *HeadBB = HeadMBB->getBasicBlock())
+ return HeadBB->getTerminator()->getDebugLoc();
+
+ return DebugLoc();
+}
+
+MachineBasicBlock *
+MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
+ bool FindMultiLoopPreheader) const {
+ if (MachineBasicBlock *PB = L->getLoopPreheader())
+ return PB;
+
+ if (!SpeculativePreheader)
+ return nullptr;
+
+ MachineBasicBlock *HB = L->getHeader(), *LB = L->getLoopLatch();
+ if (HB->pred_size() != 2 || HB->hasAddressTaken())
+ return nullptr;
+ // Find the predecessor of the header that is not the latch block.
+ MachineBasicBlock *Preheader = nullptr;
+ for (MachineBasicBlock *P : HB->predecessors()) {
+ if (P == LB)
+ continue;
+ // Sanity.
+ if (Preheader)
+ return nullptr;
+ Preheader = P;
+ }
+
+ // Check if the preheader candidate is a successor of any other loop
+ // headers. We want to avoid having two loop setups in the same block.
+ if (!FindMultiLoopPreheader) {
+ for (MachineBasicBlock *S : Preheader->successors()) {
+ if (S == HB)
+ continue;
+ MachineLoop *T = getLoopFor(S);
+ if (T && T->getHeader() == S)
+ return nullptr;
+ }
+ }
+ return Preheader;
+}
+
+bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
+ MachineFunction *MF = I.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
+
+ // The instruction is loop invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // An instruction that uses or defines a physical register can't e.g. be
+ // hoisted, so mark this as not invariant.
+ if (Reg.isPhysical()) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead can't be moved.
+ return false;
+ } else if (getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineLoop::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
new file mode 100644
index 000000000000..0e8335d4974d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -0,0 +1,134 @@
+//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+// MI's parent and BB are clones of each other. Find the equivalent copy of MI
+// in BB.
+MachineInstr &findEquivalentInstruction(MachineInstr &MI,
+ MachineBasicBlock *BB) {
+ MachineBasicBlock *PB = MI.getParent();
+ unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI));
+ return *std::next(BB->instr_begin(), Offset);
+}
+} // namespace
+
+MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
+ MachineBasicBlock *Loop,
+ MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
+ MachineFunction &MF = *Loop->getParent();
+ MachineBasicBlock *Preheader = *Loop->pred_begin();
+ if (Preheader == Loop)
+ Preheader = *std::next(Loop->pred_begin());
+ MachineBasicBlock *Exit = *Loop->succ_begin();
+ if (Exit == Loop)
+ Exit = *std::next(Loop->succ_begin());
+
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock());
+ if (Direction == LPD_Front)
+ MF.insert(Loop->getIterator(), NewBB);
+ else
+ MF.insert(std::next(Loop->getIterator()), NewBB);
+
+ DenseMap<Register, Register> Remaps;
+ auto InsertPt = NewBB->end();
+ for (MachineInstr &MI : *Loop) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
+ NewBB->insert(InsertPt, NewMI);
+ for (MachineOperand &MO : NewMI->defs()) {
+ Register OrigR = MO.getReg();
+ if (OrigR.isPhysical())
+ continue;
+ Register &R = Remaps[OrigR];
+ R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+ MO.setReg(R);
+
+ if (Direction == LPD_Back) {
+ // Replace all uses outside the original loop with the new register.
+ // FIXME: is the use_iterator stable enough to mutate register uses
+ // while iterating?
+ SmallVector<MachineOperand *, 4> Uses;
+ for (auto &Use : MRI.use_operands(OrigR))
+ if (Use.getParent()->getParent() != Loop)
+ Uses.push_back(&Use);
+ for (auto *Use : Uses) {
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ assert(ConstrainRegClass &&
+ "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
+ Use->setReg(R);
+ }
+ }
+ }
+ }
+
+ for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I)
+ for (MachineOperand &MO : I->uses())
+ if (MO.isReg() && Remaps.count(MO.getReg()))
+ MO.setReg(Remaps[MO.getReg()]);
+
+ for (auto I = NewBB->begin(); I->isPHI(); ++I) {
+ MachineInstr &MI = *I;
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (MI.getOperand(2).getMBB() != Preheader)
+ std::swap(LoopRegIdx, InitRegIdx);
+ MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop);
+ assert(OrigPhi.isPHI());
+ if (Direction == LPD_Front) {
+ // When peeling front, we are only left with the initial value from the
+ // preheader.
+ Register R = MI.getOperand(LoopRegIdx).getReg();
+ if (Remaps.count(R))
+ R = Remaps[R];
+ OrigPhi.getOperand(InitRegIdx).setReg(R);
+ MI.removeOperand(LoopRegIdx + 1);
+ MI.removeOperand(LoopRegIdx + 0);
+ } else {
+ // When peeling back, the initial value is the loop-carried value from
+ // the original loop.
+ Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
+ MI.getOperand(LoopRegIdx).setReg(LoopReg);
+ MI.removeOperand(InitRegIdx + 1);
+ MI.removeOperand(InitRegIdx + 0);
+ }
+ }
+
+ DebugLoc DL;
+ if (Direction == LPD_Front) {
+ Preheader->ReplaceUsesOfBlockWith(Loop, NewBB);
+ NewBB->addSuccessor(Loop);
+ Loop->replacePhiUsesWith(Preheader, NewBB);
+ Preheader->updateTerminator(Loop);
+ TII->removeBranch(*NewBB);
+ TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
+ } else {
+ Loop->replaceSuccessor(Exit, NewBB);
+ Exit->replacePhiUsesWith(Loop, NewBB);
+ NewBB->addSuccessor(Exit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond);
+ (void)CanAnalyzeBr;
+ assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+ TII->removeBranch(*Loop);
+ TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB,
+ FBB == Exit ? NewBB : FBB, Cond, DL);
+ if (TII->removeBranch(*NewBB) > 0)
+ TII->insertBranch(*NewBB, Exit, nullptr, {}, DL);
+ }
+
+ return NewBB;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 000000000000..921feb253d64
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,247 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static cl::opt<bool>
+ DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
+
+void MachineModuleInfo::initialize() {
+ ObjFileMMI = nullptr;
+ CurCallSite = 0;
+ NextFnNum = 0;
+ UsesMSVCFloatingPoint = false;
+ DbgInfoAvailable = false;
+}
+
+void MachineModuleInfo::finalize() {
+ Context.reset();
+ // We don't clear the ExternalContext.
+
+ delete ObjFileMMI;
+ ObjFileMMI = nullptr;
+}
+
+MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
+ : TM(std::move(MMI.TM)),
+ Context(TM.getTargetTriple(), TM.getMCAsmInfo(), TM.getMCRegisterInfo(),
+ TM.getMCSubtargetInfo(), nullptr, &TM.Options.MCOptions, false),
+ MachineFunctions(std::move(MMI.MachineFunctions)) {
+ Context.setObjectFileInfo(TM.getObjFileLowering());
+ ObjFileMMI = MMI.ObjFileMMI;
+ CurCallSite = MMI.CurCallSite;
+ ExternalContext = MMI.ExternalContext;
+ TheModule = MMI.TheModule;
+}
+
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
+ : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
+ TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
+ nullptr, &TM->Options.MCOptions, false) {
+ Context.setObjectFileInfo(TM->getObjFileLowering());
+ initialize();
+}
+
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
+ MCContext *ExtContext)
+ : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
+ TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
+ nullptr, &TM->Options.MCOptions, false),
+ ExternalContext(ExtContext) {
+ Context.setObjectFileInfo(TM->getObjFileLowering());
+ initialize();
+}
+
+MachineModuleInfo::~MachineModuleInfo() { finalize(); }
+
+MachineFunction *
+MachineModuleInfo::getMachineFunction(const Function &F) const {
+ auto I = MachineFunctions.find(&F);
+ return I != MachineFunctions.end() ? I->second.get() : nullptr;
+}
+
+MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) {
+ // Shortcut for the common case where a sequence of MachineFunctionPasses
+ // all query for the same Function.
+ if (LastRequest == &F)
+ return *LastResult;
+
+ auto I = MachineFunctions.insert(
+ std::make_pair(&F, std::unique_ptr<MachineFunction>()));
+ MachineFunction *MF;
+ if (I.second) {
+ // No pre-existing machine function, create a new one.
+ const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
+ MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
+ MF->initTargetMachineFunctionInfo(STI);
+
+ // MRI callback for target specific initializations.
+ TM.registerMachineRegisterInfoCallback(*MF);
+
+ // Update the set entry.
+ I.first->second.reset(MF);
+ } else {
+ MF = I.first->second.get();
+ }
+
+ LastRequest = &F;
+ LastResult = MF;
+ return *MF;
+}
+
+void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
+ MachineFunctions.erase(&F);
+ LastRequest = nullptr;
+ LastResult = nullptr;
+}
+
+void MachineModuleInfo::insertFunction(const Function &F,
+ std::unique_ptr<MachineFunction> &&MF) {
+ auto I = MachineFunctions.insert(std::make_pair(&F, std::move(MF)));
+ assert(I.second && "machine function already mapped");
+ (void)I;
+}
+
+namespace {
+
+/// This pass frees the MachineFunction object associated with a Function.
+class FreeMachineFunction : public FunctionPass {
+public:
+ static char ID;
+
+ FreeMachineFunction() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ MMI.deleteMachineFunctionFor(F);
+ return true;
+ }
+
+ StringRef getPassName() const override {
+ return "Free MachineFunction";
+ }
+};
+
+} // end anonymous namespace
+
+char FreeMachineFunction::ID;
+
+FunctionPass *llvm::createFreeMachineFunctionPass() {
+ return new FreeMachineFunction();
+}
+
+MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
+ const LLVMTargetMachine *TM)
+ : ImmutablePass(ID), MMI(TM) {
+ initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
+ const LLVMTargetMachine *TM, MCContext *ExtContext)
+ : ImmutablePass(ID), MMI(TM, ExtContext) {
+ initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfoWrapperPass::ID = 0;
+
+static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
+ // Look up a LocInfo for the buffer this diagnostic is coming from.
+ unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc());
+ const MDNode *LocInfo = nullptr;
+ if (BufNum > 0 && BufNum <= LocInfos.size())
+ LocInfo = LocInfos[BufNum - 1];
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (LocInfo) {
+ unsigned ErrorLine = SMD.getLineNo() - 1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ return LocCookie;
+}
+
+bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
+ MMI.initialize();
+ MMI.TheModule = &M;
+ // FIXME: Do this for new pass manager.
+ LLVMContext &Ctx = M.getContext();
+ MMI.getContext().setDiagnosticHandler(
+ [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm,
+ const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
+ unsigned LocCookie = 0;
+ if (IsInlineAsm)
+ LocCookie = getLocCookie(SMD, SrcMgr, LocInfos);
+ Ctx.diagnose(
+ DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
+ });
+ MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
+ !M.debug_compile_units().empty();
+ return false;
+}
+
+bool MachineModuleInfoWrapperPass::doFinalization(Module &M) {
+ MMI.finalize();
+ return false;
+}
+
+AnalysisKey MachineModuleAnalysis::Key;
+
+MachineModuleInfo MachineModuleAnalysis::run(Module &M,
+ ModuleAnalysisManager &) {
+ MachineModuleInfo MMI(TM);
+ MMI.TheModule = &M;
+ MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
+ !M.debug_compile_units().empty();
+ return MMI;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 000000000000..9c3b31935f6d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,43 @@
+//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
+void MachineModuleInfoCOFF::anchor() {}
+void MachineModuleInfoWasm::anchor() {}
+
+using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
+static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
+ return LHS->first->getName().compare(RHS->first->getName());
+}
+
+MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
+ DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
+ MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+ array_pod_sort(List.begin(), List.end(), SortSymbolPair);
+
+ Map.clear();
+ return List;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
new file mode 100644
index 000000000000..aa63411df965
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
@@ -0,0 +1,80 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleSlotTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+void MachineModuleSlotTracker::processMachineFunctionMetadata(
+ AbstractSlotTrackerStorage *AST, const MachineFunction &MF) {
+ // Create metadata created within the backend.
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB.instrs())
+ for (const MachineMemOperand *MMO : MI.memoperands()) {
+ AAMDNodes AAInfo = MMO->getAAInfo();
+ if (AAInfo.TBAA)
+ AST->createMetadataSlot(AAInfo.TBAA);
+ if (AAInfo.TBAAStruct)
+ AST->createMetadataSlot(AAInfo.TBAAStruct);
+ if (AAInfo.Scope)
+ AST->createMetadataSlot(AAInfo.Scope);
+ if (AAInfo.NoAlias)
+ AST->createMetadataSlot(AAInfo.NoAlias);
+ }
+}
+
+void MachineModuleSlotTracker::processMachineModule(
+ AbstractSlotTrackerStorage *AST, const Module *M,
+ bool ShouldInitializeAllMetadata) {
+ if (ShouldInitializeAllMetadata) {
+ for (const Function &F : *M) {
+ if (&F != &TheFunction)
+ continue;
+ MDNStartSlot = AST->getNextMetadataSlot();
+ if (auto *MF = TheMMI.getMachineFunction(F))
+ processMachineFunctionMetadata(AST, *MF);
+ MDNEndSlot = AST->getNextMetadataSlot();
+ break;
+ }
+ }
+}
+
+void MachineModuleSlotTracker::processMachineFunction(
+ AbstractSlotTrackerStorage *AST, const Function *F,
+ bool ShouldInitializeAllMetadata) {
+ if (!ShouldInitializeAllMetadata && F == &TheFunction) {
+ MDNStartSlot = AST->getNextMetadataSlot();
+ if (auto *MF = TheMMI.getMachineFunction(*F))
+ processMachineFunctionMetadata(AST, *MF);
+ MDNEndSlot = AST->getNextMetadataSlot();
+ }
+}
+
+void MachineModuleSlotTracker::collectMachineMDNodes(
+ MachineMDNodeListType &L) const {
+ collectMDNodes(L, MDNStartSlot, MDNEndSlot);
+}
+
+MachineModuleSlotTracker::MachineModuleSlotTracker(
+ const MachineFunction *MF, bool ShouldInitializeAllMetadata)
+ : ModuleSlotTracker(MF->getFunction().getParent(),
+ ShouldInitializeAllMetadata),
+ TheFunction(MF->getFunction()), TheMMI(MF->getMMI()) {
+ setProcessHook([this](AbstractSlotTrackerStorage *AST, const Module *M,
+ bool ShouldInitializeAllMetadata) {
+ this->processMachineModule(AST, M, ShouldInitializeAllMetadata);
+ });
+ setProcessHook([this](AbstractSlotTrackerStorage *AST, const Function *F,
+ bool ShouldInitializeAllMetadata) {
+ this->processMachineFunction(AST, F, ShouldInitializeAllMetadata);
+ });
+}
+
+MachineModuleSlotTracker::~MachineModuleSlotTracker() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
new file mode 100644
index 000000000000..788c134b6ee8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -0,0 +1,1256 @@
+//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Methods common to all machine operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/MIRFormatter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StableHashing.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <optional>
+
+using namespace llvm;
+
+static cl::opt<int>
+ PrintRegMaskNumRegs("print-regmask-num-regs",
+ cl::desc("Number of registers to limit to when "
+ "printing regmask operands in IR dumps. "
+ "unlimited = -1"),
+ cl::init(32), cl::Hidden);
+
+static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) {
+ if (const MachineInstr *MI = MO.getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF;
+ return nullptr;
+}
+
+static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
+ return const_cast<MachineFunction *>(
+ getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
+}
+
+unsigned MachineOperand::getOperandNo() const {
+ assert(getParent() && "Operand does not belong to any instruction!");
+ return getParent()->getOperandNo(this);
+}
+
+void MachineOperand::setReg(Register Reg) {
+ if (getReg() == Reg)
+ return; // No change.
+
+ // Clear the IsRenamable bit to keep it conservatively correct.
+ IsRenamable = false;
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineFunction *MF = getMFIfAvailable(*this)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ SmallContents.RegNo = Reg;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(Reg.isVirtual());
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ assert(Register::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
+ setSubReg(0);
+ if (isDef())
+ setIsUndef(false);
+ }
+ setReg(Reg);
+}
+
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ assert(!IsDeadOrKill && "Changing def/use with dead/kill set not supported");
+ // MRI may keep uses and defs in different list positions.
+ if (MachineFunction *MF = getMFIfAvailable(*this)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
+bool MachineOperand::isRenamable() const {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert(getReg().isPhysical() &&
+ "isRenamable should only be checked on physical registers");
+ if (!IsRenamable)
+ return false;
+
+ const MachineInstr *MI = getParent();
+ if (!MI)
+ return true;
+
+ if (isDef())
+ return !MI->hasExtraDefRegAllocReq(MachineInstr::IgnoreBundle);
+
+ assert(isUse() && "Reg is not def or use");
+ return !MI->hasExtraSrcRegAllocReq(MachineInstr::IgnoreBundle);
+}
+
+void MachineOperand::setIsRenamable(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert(getReg().isPhysical() &&
+ "setIsRenamable should only be called on physical registers");
+ IsRenamable = Val;
+}
+
+// If this operand is currently a register operand, and if this is in a
+// function, deregister the operand from the register's use/def list.
+void MachineOperand::removeRegFromUses() {
+ if (!isReg() || !isOnRegUseList())
+ return;
+
+ if (MachineFunction *MF = getMFIfAvailable(*this))
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm,
+ unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_FPImmediate;
+ Contents.CFP = FPImm;
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToES(const char *SymName,
+ unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into an external symbol");
+
+ removeRegFromUses();
+
+ OpKind = MO_ExternalSymbol;
+ Contents.OffsetedInfo.Val.SymbolName = SymName;
+ setOffset(0); // Offset is always 0.
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
+ unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a global address");
+
+ removeRegFromUses();
+
+ OpKind = MO_GlobalAddress;
+ Contents.OffsetedInfo.Val.GV = GV;
+ setOffset(Offset);
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into an MCSymbol");
+
+ removeRegFromUses();
+
+ OpKind = MO_MCSymbol;
+ Contents.Sym = Sym;
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToFrameIndex(int Idx, unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a FrameIndex");
+
+ removeRegFromUses();
+
+ OpKind = MO_FrameIndex;
+ setIndex(Idx);
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
+ unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a FrameIndex");
+
+ removeRegFromUses();
+
+ OpKind = MO_TargetIndex;
+ setIndex(Idx);
+ setOffset(Offset);
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToDbgInstrRef(unsigned InstrIdx, unsigned OpIdx,
+ unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a DbgInstrRef");
+
+ removeRegFromUses();
+
+ OpKind = MO_DbgInstrRef;
+ setInstrRefInstrIndex(InstrIdx);
+ setInstrRefOpIndex(OpIdx);
+ setTargetFlags(TargetFlags);
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ MachineRegisterInfo *RegInfo = nullptr;
+ if (MachineFunction *MF = getMFIfAvailable(*this))
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
+ // register's use/def lists.
+ bool WasReg = isReg();
+ if (RegInfo && WasReg)
+ RegInfo->removeRegOperandFromUseList(this);
+
+ // Ensure debug instructions set debug flag on register uses.
+ const MachineInstr *MI = getParent();
+ if (!isDef && MI && MI->isDebugInstr())
+ isDebug = true;
+
+ // Change this to a register and set the reg#.
+ assert(!(isDead && !isDef) && "Dead flag on non-def");
+ assert(!(isKill && isDef) && "Kill flag on def");
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg_TargetFlags = 0;
+ IsDef = isDef;
+ IsImp = isImp;
+ IsDeadOrKill = isKill | isDead;
+ IsRenamable = false;
+ IsUndef = isUndef;
+ IsInternalRead = false;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = nullptr;
+ // Preserve the tie when the operand was already a register.
+ if (!WasReg)
+ TiedTo = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress() &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut: {
+ // Shallow compare of the two RegMasks
+ const uint32_t *RegMask = getRegMask();
+ const uint32_t *OtherRegMask = Other.getRegMask();
+ if (RegMask == OtherRegMask)
+ return true;
+
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ // Deep compare of the two RegMasks
+ return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+ }
+ // We don't know the size of the RegMask, so we can't deep compare the two
+ // reg masks.
+ return false;
+ }
+ case MachineOperand::MO_MCSymbol:
+ return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_DbgInstrRef:
+ return getInstrRefInstrIndex() == Other.getInstrRefInstrIndex() &&
+ getInstrRefOpIndex() == Other.getInstrRefOpIndex();
+ case MachineOperand::MO_CFIIndex:
+ return getCFIIndex() == Other.getCFIIndex();
+ case MachineOperand::MO_Metadata:
+ return getMetadata() == Other.getMetadata();
+ case MachineOperand::MO_IntrinsicID:
+ return getIntrinsicID() == Other.getIntrinsicID();
+ case MachineOperand::MO_Predicate:
+ return getPredicate() == Other.getPredicate();
+ case MachineOperand::MO_ShuffleMask:
+ return getShuffleMask() == Other.getShuffleMask();
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ // Register operands don't have target flags.
+ return hash_combine(MO.getType(), (unsigned)MO.getReg(), MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ StringRef(MO.getSymbolName()));
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(),
+ MO.getOffset());
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut: {
+ if (const MachineFunction *MF = getMFIfAvailable(MO)) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ const uint32_t *RegMask = MO.getRegMask();
+ std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(RegMaskHashes.data(),
+ RegMaskHashes.size()));
+ }
+
+ assert(0 && "MachineOperand not associated with any MachineFunction");
+ return hash_combine(MO.getType(), MO.getTargetFlags());
+ }
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ case MachineOperand::MO_DbgInstrRef:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getInstrRefInstrIndex(), MO.getInstrRefOpIndex());
+ case MachineOperand::MO_CFIIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+ case MachineOperand::MO_IntrinsicID:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
+ case MachineOperand::MO_Predicate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+ case MachineOperand::MO_ShuffleMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getShuffleMask());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Try to crawl up to the machine function and get TRI and IntrinsicInfo from
+// it.
+static void tryToGetTargetInfo(const MachineOperand &MO,
+ const TargetRegisterInfo *&TRI,
+ const TargetIntrinsicInfo *&IntrinsicInfo) {
+ if (const MachineFunction *MF = getMFIfAvailable(MO)) {
+ TRI = MF->getSubtarget().getRegisterInfo();
+ IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
+ }
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ auto Found = find_if(Indices, [&](const std::pair<int, const char *> &I) {
+ return I.first == Index;
+ });
+ if (Found != Indices.end())
+ return Found->second;
+ return nullptr;
+}
+
+const char *MachineOperand::getTargetIndexName() const {
+ const MachineFunction *MF = getMFIfAvailable(*this);
+ return MF ? ::getTargetIndexName(*MF, this->getIndex()) : nullptr;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TF) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ if (!TRI) {
+ OS << "%dwarfreg." << DwarfReg;
+ return;
+ }
+
+ if (std::optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true))
+ OS << printReg(*Reg, TRI);
+ else
+ OS << "<badreg>";
+}
+
+static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
+ ModuleSlotTracker &MST) {
+ OS << "%ir-block.";
+ if (BB.hasName()) {
+ printLLVMNameWithoutPrefix(OS, BB.getName());
+ return;
+ }
+ std::optional<int> Slot;
+ if (const Function *F = BB.getParent()) {
+ if (F == MST.getCurrentFunction()) {
+ Slot = MST.getLocalSlot(&BB);
+ } else if (const Module *M = F->getParent()) {
+ ModuleSlotTracker CustomMST(M, /*ShouldInitializeAllMetadata=*/false);
+ CustomMST.incorporateFunction(*F);
+ Slot = CustomMST.getLocalSlot(&BB);
+ }
+ }
+ if (Slot)
+ MachineOperand::printIRSlotNumber(OS, *Slot);
+ else
+ OS << "<unknown>";
+}
+
+static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
+ SyncScope::ID SSID,
+ SmallVectorImpl<StringRef> &SSNs) {
+ switch (SSID) {
+ case SyncScope::System:
+ break;
+ default:
+ if (SSNs.empty())
+ Context.getSyncScopeNames(SSNs);
+
+ OS << "syncscope(\"";
+ printEscapedString(SSNs[SSID], OS);
+ OS << "\") ";
+ break;
+ }
+}
+
+static const char *getTargetMMOFlagName(const TargetInstrInfo &TII,
+ unsigned TMMOFlag) {
+ auto Flags = TII.getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TMMOFlag) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+static void printFrameIndex(raw_ostream& OS, int FrameIndex, bool IsFixed,
+ const MachineFrameInfo *MFI) {
+ StringRef Name;
+ if (MFI) {
+ IsFixed = MFI->isFixedObjectIndex(FrameIndex);
+ if (const AllocaInst *Alloca = MFI->getObjectAllocation(FrameIndex))
+ if (Alloca->hasName())
+ Name = Alloca->getName();
+ if (IsFixed)
+ FrameIndex -= MFI->getObjectIndexBegin();
+ }
+ MachineOperand::printStackObjectReference(OS, FrameIndex, IsFixed, Name);
+}
+
+void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index,
+ const TargetRegisterInfo *TRI) {
+ OS << "%subreg.";
+ if (TRI && Index != 0 && Index < TRI->getNumSubRegIndices())
+ OS << TRI->getSubRegIndexName(Index);
+ else
+ OS << Index;
+}
+
+void MachineOperand::printTargetFlags(raw_ostream &OS,
+ const MachineOperand &Op) {
+ if (!Op.getTargetFlags())
+ return;
+ const MachineFunction *MF = getMFIfAvailable(Op);
+ if (!MF)
+ return;
+
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+ OS << "target-flags(";
+ const bool HasDirectFlags = Flags.first;
+ const bool HasBitmaskFlags = Flags.second;
+ if (!HasDirectFlags && !HasBitmaskFlags) {
+ OS << "<unknown>) ";
+ return;
+ }
+ if (HasDirectFlags) {
+ if (const auto *Name = getTargetFlagName(TII, Flags.first))
+ OS << Name;
+ else
+ OS << "<unknown target flag>";
+ }
+ if (!HasBitmaskFlags) {
+ OS << ") ";
+ return;
+ }
+ bool IsCommaNeeded = HasDirectFlags;
+ unsigned BitMask = Flags.second;
+ auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &Mask : BitMasks) {
+ // Check if the flag's bitmask has the bits of the current mask set.
+ if ((BitMask & Mask.first) == Mask.first) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ IsCommaNeeded = true;
+ OS << Mask.second;
+ // Clear the bits which were serialized from the flag's bitmask.
+ BitMask &= ~(Mask.first);
+ }
+ }
+ if (BitMask) {
+ // When the resulting flag's bitmask isn't zero, we know that we didn't
+ // serialize all of the bit flags.
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << "<unknown bitmask target flag>";
+ }
+ OS << ") ";
+}
+
+void MachineOperand::printSymbol(raw_ostream &OS, MCSymbol &Sym) {
+ OS << "<mcsymbol " << Sym << ">";
+}
+
+void MachineOperand::printStackObjectReference(raw_ostream &OS,
+ unsigned FrameIndex,
+ bool IsFixed, StringRef Name) {
+ if (IsFixed) {
+ OS << "%fixed-stack." << FrameIndex;
+ return;
+ }
+
+ OS << "%stack." << FrameIndex;
+ if (!Name.empty())
+ OS << '.' << Name;
+}
+
+void MachineOperand::printOperandOffset(raw_ostream &OS, int64_t Offset) {
+ if (Offset == 0)
+ return;
+ if (Offset < 0) {
+ OS << " - " << -Offset;
+ return;
+ }
+ OS << " + " << Offset;
+}
+
+void MachineOperand::printIRSlotNumber(raw_ostream &OS, int Slot) {
+ if (Slot == -1)
+ OS << "<badref>";
+ else
+ OS << Slot;
+}
+
+static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
+ const TargetRegisterInfo *TRI) {
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpSameValue:
+ OS << "same_value ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpRememberState:
+ OS << "remember_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ OS << "restore_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ case MCCFIInstruction::OpOffset:
+ OS << "offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OS << "def_cfa_register ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ OS << "def_cfa_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OS << "def_cfa ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpLLVMDefAspaceCfa:
+ OS << "llvm_def_aspace_cfa ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ OS << ", " << CFI.getAddressSpace();
+ break;
+ case MCCFIInstruction::OpRelOffset:
+ OS << "rel_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OS << "adjust_cfa_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRestore:
+ OS << "restore ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpEscape: {
+ OS << "escape ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ if (!CFI.getValues().empty()) {
+ size_t e = CFI.getValues().size() - 1;
+ for (size_t i = 0; i < e; ++i)
+ OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", ";
+ OS << format("0x%02x", uint8_t(CFI.getValues()[e]));
+ }
+ break;
+ }
+ case MCCFIInstruction::OpUndefined:
+ OS << "undefined ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpRegister:
+ OS << "register ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", ";
+ printCFIRegister(CFI.getRegister2(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpWindowSave:
+ OS << "window_save ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ case MCCFIInstruction::OpNegateRAState:
+ OS << "negate_ra_sign_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ default:
+ // TODO: Print the other CFI Operations.
+ OS << "<unserializable cfi directive>";
+ break;
+ }
+}
+
+void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
+ print(OS, LLT{}, TRI, IntrinsicInfo);
+}
+
+void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
+ const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
+ tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
+ ModuleSlotTracker DummyMST(nullptr);
+ print(OS, DummyMST, TypeToPrint, std::nullopt, /*PrintDef=*/false,
+ /*IsStandalone=*/true,
+ /*ShouldPrintRegisterTies=*/true,
+ /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
+}
+
+void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ LLT TypeToPrint, std::optional<unsigned> OpIdx,
+ bool PrintDef, bool IsStandalone,
+ bool ShouldPrintRegisterTies,
+ unsigned TiedOperandIdx,
+ const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
+ printTargetFlags(OS, *this);
+ switch (getType()) {
+ case MachineOperand::MO_Register: {
+ Register Reg = getReg();
+ if (isImplicit())
+ OS << (isDef() ? "implicit-def " : "implicit ");
+ else if (PrintDef && isDef())
+ // Print the 'def' flag only when the operand is defined after '='.
+ OS << "def ";
+ if (isInternalRead())
+ OS << "internal ";
+ if (isDead())
+ OS << "dead ";
+ if (isKill())
+ OS << "killed ";
+ if (isUndef())
+ OS << "undef ";
+ if (isEarlyClobber())
+ OS << "early-clobber ";
+ if (getReg().isPhysical() && isRenamable())
+ OS << "renamable ";
+ // isDebug() is exactly true for register operands of a DBG_VALUE. So we
+ // simply infer it when parsing and do not need to print it.
+
+ const MachineRegisterInfo *MRI = nullptr;
+ if (Reg.isVirtual()) {
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ MRI = &MF->getRegInfo();
+ }
+ }
+
+ OS << printReg(Reg, TRI, 0, MRI);
+ // Print the sub register.
+ if (unsigned SubReg = getSubReg()) {
+ if (TRI)
+ OS << '.' << TRI->getSubRegIndexName(SubReg);
+ else
+ OS << ".subreg" << SubReg;
+ }
+ // Print the register class / bank.
+ if (Reg.isVirtual()) {
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) {
+ OS << ':';
+ OS << printRegClassOrBank(Reg, MRI, TRI);
+ }
+ }
+ }
+ // Print ties.
+ if (ShouldPrintRegisterTies && isTied() && !isDef())
+ OS << "(tied-def " << TiedOperandIdx << ")";
+ // Print types.
+ if (TypeToPrint.isValid())
+ OS << '(' << TypeToPrint << ')';
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ const MIRFormatter *Formatter = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ Formatter = TII->getMIRFormatter();
+ }
+ if (Formatter)
+ Formatter->printImm(OS, *getParent(), OpIdx, getImm());
+ else
+ OS << getImm();
+ break;
+ }
+ case MachineOperand::MO_CImmediate:
+ getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << printMBBReference(*getMBB());
+ break;
+ case MachineOperand::MO_FrameIndex: {
+ int FrameIndex = getIndex();
+ bool IsFixed = false;
+ const MachineFrameInfo *MFI = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this))
+ MFI = &MF->getFrameInfo();
+ printFrameIndex(OS, FrameIndex, IsFixed, MFI);
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "%const." << getIndex();
+ printOperandOffset(OS, getOffset());
+ break;
+ case MachineOperand::MO_TargetIndex: {
+ OS << "target-index(";
+ const char *Name = "<unknown>";
+ if (const MachineFunction *MF = getMFIfAvailable(*this))
+ if (const auto *TargetIndexName = ::getTargetIndexName(*MF, getIndex()))
+ Name = TargetIndexName;
+ OS << Name << ')';
+ printOperandOffset(OS, getOffset());
+ break;
+ }
+ case MachineOperand::MO_JumpTableIndex:
+ OS << printJumpTableEntryReference(getIndex());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
+ printOperandOffset(OS, getOffset());
+ break;
+ case MachineOperand::MO_ExternalSymbol: {
+ StringRef Name = getSymbolName();
+ OS << '&';
+ if (Name.empty()) {
+ OS << "\"\"";
+ } else {
+ printLLVMNameWithoutPrefix(OS, Name);
+ }
+ printOperandOffset(OS, getOffset());
+ break;
+ }
+ case MachineOperand::MO_BlockAddress: {
+ OS << "blockaddress(";
+ getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
+ MST);
+ OS << ", ";
+ printIRBlockReference(OS, *getBlockAddress()->getBasicBlock(), MST);
+ OS << ')';
+ MachineOperand::printOperandOffset(OS, getOffset());
+ break;
+ }
+ case MachineOperand::MO_RegisterMask: {
+ OS << "<regmask";
+ if (TRI) {
+ unsigned NumRegsInMask = 0;
+ unsigned NumRegsEmitted = 0;
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ unsigned MaskWord = i / 32;
+ unsigned MaskBit = i % 32;
+ if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+ if (PrintRegMaskNumRegs < 0 ||
+ NumRegsEmitted <= static_cast<unsigned>(PrintRegMaskNumRegs)) {
+ OS << " " << printReg(i, TRI);
+ NumRegsEmitted++;
+ }
+ NumRegsInMask++;
+ }
+ }
+ if (NumRegsEmitted != NumRegsInMask)
+ OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+ } else {
+ OS << " ...";
+ }
+ OS << ">";
+ break;
+ }
+ case MachineOperand::MO_RegisterLiveOut: {
+ const uint32_t *RegMask = getRegLiveOut();
+ OS << "liveout(";
+ if (!TRI) {
+ OS << "<unknown>";
+ } else {
+ bool IsCommaNeeded = false;
+ for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+ if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << printReg(Reg, TRI);
+ IsCommaNeeded = true;
+ }
+ }
+ }
+ OS << ")";
+ break;
+ }
+ case MachineOperand::MO_Metadata:
+ getMetadata()->printAsOperand(OS, MST);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ printSymbol(OS, *getMCSymbol());
+ break;
+ case MachineOperand::MO_DbgInstrRef: {
+ OS << "dbg-instr-ref(" << getInstrRefInstrIndex() << ", "
+ << getInstrRefOpIndex() << ')';
+ break;
+ }
+ case MachineOperand::MO_CFIIndex: {
+ if (const MachineFunction *MF = getMFIfAvailable(*this))
+ printCFI(OS, MF->getFrameInstructions()[getCFIIndex()], TRI);
+ else
+ OS << "<cfi directive>";
+ break;
+ }
+ case MachineOperand::MO_IntrinsicID: {
+ Intrinsic::ID ID = getIntrinsicID();
+ if (ID < Intrinsic::num_intrinsics)
+ OS << "intrinsic(@" << Intrinsic::getBaseName(ID) << ')';
+ else if (IntrinsicInfo)
+ OS << "intrinsic(@" << IntrinsicInfo->getName(ID) << ')';
+ else
+ OS << "intrinsic(" << ID << ')';
+ break;
+ }
+ case MachineOperand::MO_Predicate: {
+ auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
+ OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
+ << Pred << ')';
+ break;
+ }
+ case MachineOperand::MO_ShuffleMask:
+ OS << "shufflemask(";
+ ArrayRef<int> Mask = getShuffleMask();
+ StringRef Separator;
+ for (int Elt : Mask) {
+ if (Elt == -1)
+ OS << Separator << "undef";
+ else
+ OS << Separator << Elt;
+ Separator = ", ";
+ }
+
+ OS << ')';
+ break;
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineOperand::dump() const { dbgs() << *this << '\n'; }
+#endif
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; }
+
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const {
+ if (!isa<const Value *>(V))
+ return false;
+
+ const Value *BasePtr = cast<const Value *>(V);
+ if (BasePtr == nullptr)
+ return false;
+
+ return isDereferenceableAndAlignedPointer(
+ BasePtr, Align(1), APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+ int FI, int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+ int64_t Offset, uint8_t ID) {
+ return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID);
+}
+
+MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getDataLayout().getAllocaAddrSpace());
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
+ LLT type, Align a, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, SyncScope::ID SSID,
+ AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering)
+ : PtrInfo(ptrinfo), MemoryType(type), FlagVals(f), BaseAlign(a),
+ AAInfo(AAInfo), Ranges(Ranges) {
+ assert((PtrInfo.V.isNull() || isa<const PseudoSourceValue *>(PtrInfo.V) ||
+ isa<PointerType>(cast<const Value *>(PtrInfo.V)->getType())) &&
+ "invalid pointer value");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+
+ AtomicInfo.SSID = static_cast<unsigned>(SSID);
+ assert(getSyncScopeID() == SSID && "Value truncated");
+ AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
+ assert(getSuccessOrdering() == Ordering && "Value truncated");
+ AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
+ assert(getFailureOrdering() == FailureOrdering && "Value truncated");
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
+ uint64_t s, Align a,
+ const AAMDNodes &AAInfo,
+ const MDNode *Ranges, SyncScope::ID SSID,
+ AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering)
+ : MachineMemOperand(ptrinfo, f,
+ s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a,
+ AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) ||
+ MMO->getSize() == getSize()) &&
+ "Size mismatch!");
+
+ if (MMO->getBaseAlign() >= getBaseAlign()) {
+ // Update the alignment value.
+ BaseAlign = MMO->getBaseAlign();
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ PtrInfo = MMO->PtrInfo;
+ }
+}
+
+/// getAlign - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+Align MachineMemOperand::getAlign() const {
+ return commonAlignment(getBaseAlign(), getOffset());
+}
+
+void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ SmallVectorImpl<StringRef> &SSNs,
+ const LLVMContext &Context,
+ const MachineFrameInfo *MFI,
+ const TargetInstrInfo *TII) const {
+ OS << '(';
+ if (isVolatile())
+ OS << "volatile ";
+ if (isNonTemporal())
+ OS << "non-temporal ";
+ if (isDereferenceable())
+ OS << "dereferenceable ";
+ if (isInvariant())
+ OS << "invariant ";
+ if (TII) {
+ if (getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1)
+ << "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2)
+ << "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3)
+ << "\" ";
+ } else {
+ if (getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << "\"MOTargetFlag1\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << "\"MOTargetFlag2\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << "\"MOTargetFlag3\" ";
+ }
+
+ assert((isLoad() || isStore()) &&
+ "machine memory operand must be a load or store (or both)");
+ if (isLoad())
+ OS << "load ";
+ if (isStore())
+ OS << "store ";
+
+ printSyncScope(OS, Context, getSyncScopeID(), SSNs);
+
+ if (getSuccessOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(getSuccessOrdering()) << ' ';
+ if (getFailureOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(getFailureOrdering()) << ' ';
+
+ if (getMemoryType().isValid())
+ OS << '(' << getMemoryType() << ')';
+ else
+ OS << "unknown-size";
+
+ if (const Value *Val = getValue()) {
+ OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
+ MIRFormatter::printIRValue(OS, *Val, MST);
+ } else if (const PseudoSourceValue *PVal = getPseudoValue()) {
+ OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
+ assert(PVal && "Expected a pseudo source value");
+ switch (PVal->kind()) {
+ case PseudoSourceValue::Stack:
+ OS << "stack";
+ break;
+ case PseudoSourceValue::GOT:
+ OS << "got";
+ break;
+ case PseudoSourceValue::JumpTable:
+ OS << "jump-table";
+ break;
+ case PseudoSourceValue::ConstantPool:
+ OS << "constant-pool";
+ break;
+ case PseudoSourceValue::FixedStack: {
+ int FrameIndex = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ bool IsFixed = true;
+ printFrameIndex(OS, FrameIndex, IsFixed, MFI);
+ break;
+ }
+ case PseudoSourceValue::GlobalValueCallEntry:
+ OS << "call-entry ";
+ cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
+ OS, /*PrintType=*/false, MST);
+ break;
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ OS << "call-entry &";
+ printLLVMNameWithoutPrefix(
+ OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
+ break;
+ default: {
+ const MIRFormatter *Formatter = TII->getMIRFormatter();
+ // FIXME: This is not necessarily the correct MIR serialization format for
+ // a custom pseudo source value, but at least it allows
+ // MIR printing to work on a target with custom pseudo source
+ // values.
+ OS << "custom \"";
+ Formatter->printCustomPseudoSourceValue(OS, MST, *PVal);
+ OS << '\"';
+ break;
+ }
+ }
+ } else if (getOpaqueValue() == nullptr && getOffset() != 0) {
+ OS << ((isLoad() && isStore()) ? " on "
+ : isLoad() ? " from "
+ : " into ")
+ << "unknown-address";
+ }
+ MachineOperand::printOperandOffset(OS, getOffset());
+ if (getSize() > 0 && getAlign() != getSize())
+ OS << ", align " << getAlign().value();
+ if (getAlign() != getBaseAlign())
+ OS << ", basealign " << getBaseAlign().value();
+ auto AAInfo = getAAInfo();
+ if (AAInfo.TBAA) {
+ OS << ", !tbaa ";
+ AAInfo.TBAA->printAsOperand(OS, MST);
+ }
+ if (AAInfo.Scope) {
+ OS << ", !alias.scope ";
+ AAInfo.Scope->printAsOperand(OS, MST);
+ }
+ if (AAInfo.NoAlias) {
+ OS << ", !noalias ";
+ AAInfo.NoAlias->printAsOperand(OS, MST);
+ }
+ if (getRanges()) {
+ OS << ", !range ";
+ getRanges()->printAsOperand(OS, MST);
+ }
+ // FIXME: Implement addrspace printing/parsing in MIR.
+ // For now, print this even though parsing it is not available in MIR.
+ if (unsigned AS = getAddrSpace())
+ OS << ", addrspace " << AS;
+
+ OS << ')';
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
new file mode 100644
index 000000000000..1c31eba909e7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -0,0 +1,97 @@
+///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
+///
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// Optimization diagnostic interfaces for machine passes. It's packaged as an
+/// analysis pass so that by using this service passes become dependent on MBFI
+/// as well. MBFI is used to compute the "hotness" of the diagnostic message.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include <optional>
+
+using namespace llvm;
+
+DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
+ StringRef MKey, const MachineInstr &MI) {
+ Key = std::string(MKey);
+
+ raw_string_ostream OS(Val);
+ MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/true);
+}
+
+std::optional<uint64_t>
+MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
+ if (!MBFI)
+ return std::nullopt;
+
+ return MBFI->getBlockProfileCount(&MBB);
+}
+
+void MachineOptimizationRemarkEmitter::computeHotness(
+ DiagnosticInfoMIROptimization &Remark) {
+ const MachineBasicBlock *MBB = Remark.getBlock();
+ if (MBB)
+ Remark.setHotness(computeHotness(*MBB));
+}
+
+void MachineOptimizationRemarkEmitter::emit(
+ DiagnosticInfoOptimizationBase &OptDiagCommon) {
+ auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
+ computeHotness(OptDiag);
+
+ LLVMContext &Ctx = MF.getFunction().getContext();
+
+ // Only emit it if its hotness meets the threshold.
+ if (OptDiag.getHotness().value_or(0) < Ctx.getDiagnosticsHotnessThreshold())
+ return;
+
+ Ctx.diagnose(OptDiag);
+}
+
+MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineOptimizationRemarkEmitterPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ MachineBlockFrequencyInfo *MBFI;
+
+ if (MF.getFunction().getContext().getDiagnosticsHotnessRequested())
+ MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ else
+ MBFI = nullptr;
+
+ ORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ return false;
+}
+
+void MachineOptimizationRemarkEmitterPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+char MachineOptimizationRemarkEmitterPass::ID = 0;
+static const char ore_name[] = "Machine Optimization Remark Emitter";
+#define ORE_NAME "machine-opt-remark-emitter"
+
+INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ true, true)
+INITIALIZE_PASS_DEPENDENCY(LazyMachineBlockFrequencyInfoPass)
+INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ true, true)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
new file mode 100644
index 000000000000..a0769105c929
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -0,0 +1,1213 @@
+//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Replaces repeated sequences of instructions with function calls.
+///
+/// This works by placing every instruction from every basic block in a
+/// suffix tree, and repeatedly querying that tree for repeated sequences of
+/// instructions. If a sequence of instructions appears often, then it ought
+/// to be beneficial to pull out into a function.
+///
+/// The MachineOutliner communicates with a given target using hooks defined in
+/// TargetInstrInfo.h. The target supplies the outliner with information on how
+/// a specific sequence of instructions should be outlined. This information
+/// is used to deduce the number of instructions necessary to
+///
+/// * Create an outlined function
+/// * Call that outlined function
+///
+/// Targets must implement
+/// * getOutliningCandidateInfo
+/// * buildOutlinedFrame
+/// * insertOutlinedCall
+/// * isFunctionSafeToOutlineFrom
+///
+/// in order to make use of the MachineOutliner.
+///
+/// This was originally presented at the 2016 LLVM Developers' Meeting in the
+/// talk "Reducing Code Size Using Outlining". For a high-level overview of
+/// how this pass works, the talk is available on YouTube at
+///
+/// https://www.youtube.com/watch?v=yorld-WSOeU
+///
+/// The slides for the talk are available at
+///
+/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
+///
+/// The talk provides an overview of how the outliner finds candidates and
+/// ultimately outlines them. It describes how the main data structure for this
+/// pass, the suffix tree, is queried and purged for candidates. It also gives
+/// a simplified suffix tree construction algorithm for suffix trees based off
+/// of the algorithm actually used here, Ukkonen's algorithm.
+///
+/// For the original RFC for this pass, please see
+///
+/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
+///
+/// For more information on the suffix tree data structure, please see
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+///
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineOutliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SuffixTree.h"
+#include "llvm/Support/raw_ostream.h"
+#include <functional>
+#include <tuple>
+#include <vector>
+
+#define DEBUG_TYPE "machine-outliner"
+
+using namespace llvm;
+using namespace ore;
+using namespace outliner;
+
+// Statistics for outlined functions.
+STATISTIC(NumOutlined, "Number of candidates outlined");
+STATISTIC(FunctionsCreated, "Number of functions created");
+
+// Statistics for instruction mapping.
+STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
+STATISTIC(NumIllegalInUnsignedVec,
+ "Unoutlinable instructions mapped + number of sentinel values");
+STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
+STATISTIC(NumInvisible,
+ "Invisible instructions skipped during mapping");
+STATISTIC(UnsignedVecSize,
+ "Total number of instructions mapped and saved to mapping vector");
+
+// Set to true if the user wants the outliner to run on linkonceodr linkage
+// functions. This is false by default because the linker can dedupe linkonceodr
+// functions. Since the outliner is confined to a single module (modulo LTO),
+// this is off by default. It should, however, be the default behaviour in
+// LTO.
+static cl::opt<bool> EnableLinkOnceODROutlining(
+ "enable-linkonceodr-outlining", cl::Hidden,
+ cl::desc("Enable the machine outliner on linkonceodr functions"),
+ cl::init(false));
+
+/// Number of times to re-run the outliner. This is not the total number of runs
+/// as the outliner will run at least one time. The default value is set to 0,
+/// meaning the outliner will run one time and rerun zero times after that.
+static cl::opt<unsigned> OutlinerReruns(
+ "machine-outliner-reruns", cl::init(0), cl::Hidden,
+ cl::desc(
+ "Number of times to rerun the outliner after the initial outline"));
+
+static cl::opt<unsigned> OutlinerBenefitThreshold(
+ "outliner-benefit-threshold", cl::init(1), cl::Hidden,
+ cl::desc(
+ "The minimum size in bytes before an outlining candidate is accepted"));
+
+namespace {
+
+/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
+struct InstructionMapper {
+
+ /// The next available integer to assign to a \p MachineInstr that
+ /// cannot be outlined.
+ ///
+ /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
+ unsigned IllegalInstrNumber = -3;
+
+ /// The next available integer to assign to a \p MachineInstr that can
+ /// be outlined.
+ unsigned LegalInstrNumber = 0;
+
+ /// Correspondence from \p MachineInstrs to unsigned integers.
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
+ InstructionIntegerMap;
+
+ /// Correspondence between \p MachineBasicBlocks and target-defined flags.
+ DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
+
+ /// The vector of unsigned integers that the module is mapped to.
+ SmallVector<unsigned> UnsignedVec;
+
+ /// Stores the location of the instruction associated with the integer
+ /// at index i in \p UnsignedVec for each index i.
+ SmallVector<MachineBasicBlock::iterator> InstrList;
+
+ // Set if we added an illegal number in the previous step.
+ // Since each illegal number is unique, we only need one of them between
+ // each range of legal numbers. This lets us make sure we don't add more
+ // than one illegal number per range.
+ bool AddedIllegalLastTime = false;
+
+ /// Maps \p *It to a legal integer.
+ ///
+ /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
+ /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToLegalUnsigned(
+ MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+ bool &HaveLegalRange, unsigned &NumLegalInBlock,
+ SmallVector<unsigned> &UnsignedVecForMBB,
+ SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ // We added something legal, so we should unset the AddedLegalLastTime
+ // flag.
+ AddedIllegalLastTime = false;
+
+ // If we have at least two adjacent legal instructions (which may have
+ // invisible instructions in between), remember that.
+ if (CanOutlineWithPrevInstr)
+ HaveLegalRange = true;
+ CanOutlineWithPrevInstr = true;
+
+ // Keep track of the number of legal instructions we insert.
+ NumLegalInBlock++;
+
+ // Get the integer for this instruction or give it the current
+ // LegalInstrNumber.
+ InstrListForMBB.push_back(It);
+ MachineInstr &MI = *It;
+ bool WasInserted;
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
+ ResultIt;
+ std::tie(ResultIt, WasInserted) =
+ InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
+ unsigned MINumber = ResultIt->second;
+
+ // There was an insertion.
+ if (WasInserted)
+ LegalInstrNumber++;
+
+ UnsignedVecForMBB.push_back(MINumber);
+
+ // Make sure we don't overflow or use any integers reserved by the DenseMap.
+ if (LegalInstrNumber >= IllegalInstrNumber)
+ report_fatal_error("Instruction mapping overflow!");
+
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+ "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "Tried to assign DenseMap tombstone or empty key to instruction.");
+
+ // Statistics.
+ ++NumLegalInUnsignedVec;
+ return MINumber;
+ }
+
+ /// Maps \p *It to an illegal integer.
+ ///
+ /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
+ /// IllegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToIllegalUnsigned(
+ MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+ SmallVector<unsigned> &UnsignedVecForMBB,
+ SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ // Can't outline an illegal instruction. Set the flag.
+ CanOutlineWithPrevInstr = false;
+
+ // Only add one illegal number per range of legal numbers.
+ if (AddedIllegalLastTime)
+ return IllegalInstrNumber;
+
+ // Remember that we added an illegal number last time.
+ AddedIllegalLastTime = true;
+ unsigned MINumber = IllegalInstrNumber;
+
+ InstrListForMBB.push_back(It);
+ UnsignedVecForMBB.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+ // Statistics.
+ ++NumIllegalInUnsignedVec;
+
+ assert(LegalInstrNumber < IllegalInstrNumber &&
+ "Instruction mapping overflow!");
+
+ assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ return MINumber;
+ }
+
+ /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
+ /// and appends it to \p UnsignedVec and \p InstrList.
+ ///
+ /// Two instructions are assigned the same integer if they are identical.
+ /// If an instruction is deemed unsafe to outline, then it will be assigned an
+ /// unique integer. The resulting mapping is placed into a suffix tree and
+ /// queried for candidates.
+ ///
+ /// \param MBB The \p MachineBasicBlock to be translated into integers.
+ /// \param TII \p TargetInstrInfo for the function.
+ void convertToUnsignedVec(MachineBasicBlock &MBB,
+ const TargetInstrInfo &TII) {
+ LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
+ << "' to unsigned vector ***\n");
+ unsigned Flags = 0;
+
+ // Don't even map in this case.
+ if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
+ return;
+
+ auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
+ LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
+ << " outlinable range(s)\n");
+ if (OutlinableRanges.empty())
+ return;
+
+ // Store info for the MBB for later outlining.
+ MBBFlagsMap[&MBB] = Flags;
+
+ MachineBasicBlock::iterator It = MBB.begin();
+
+ // The number of instructions in this block that will be considered for
+ // outlining.
+ unsigned NumLegalInBlock = 0;
+
+ // True if we have at least two legal instructions which aren't separated
+ // by an illegal instruction.
+ bool HaveLegalRange = false;
+
+ // True if we can perform outlining given the last mapped (non-invisible)
+ // instruction. This lets us know if we have a legal range.
+ bool CanOutlineWithPrevInstr = false;
+
+ // FIXME: Should this all just be handled in the target, rather than using
+ // repeated calls to getOutliningType?
+ SmallVector<unsigned> UnsignedVecForMBB;
+ SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
+
+ LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
+ for (auto &OutlinableRange : OutlinableRanges) {
+ auto OutlinableRangeBegin = OutlinableRange.first;
+ auto OutlinableRangeEnd = OutlinableRange.second;
+#ifndef NDEBUG
+ LLVM_DEBUG(
+ dbgs() << "Mapping "
+ << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
+ << " instruction range\n");
+ // Everything outside of an outlinable range is illegal.
+ unsigned NumSkippedInRange = 0;
+#endif
+ for (; It != OutlinableRangeBegin; ++It) {
+#ifndef NDEBUG
+ ++NumSkippedInRange;
+#endif
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ }
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
+ << " instructions outside outlinable range\n");
+#endif
+ assert(It != MBB.end() && "Should still have instructions?");
+ // `It` is now positioned at the beginning of a range of instructions
+ // which may be outlinable. Check if each instruction is known to be safe.
+ for (; It != OutlinableRangeEnd; ++It) {
+ // Keep track of where this instruction is in the module.
+ switch (TII.getOutliningType(It, Flags)) {
+ case InstrType::Illegal:
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::Legal:
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::LegalTerminator:
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB,
+ InstrListForMBB);
+ // The instruction also acts as a terminator, so we have to record
+ // that in the string.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::Invisible:
+ // Normally this is set by mapTo(Blah)Unsigned, but we just want to
+ // skip this instruction. So, unset the flag here.
+ ++NumInvisible;
+ AddedIllegalLastTime = false;
+ break;
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
+
+ // Are there enough legal instructions in the block for outlining to be
+ // possible?
+ if (HaveLegalRange) {
+ // After we're done every insertion, uniquely terminate this part of the
+ // "string". This makes sure we won't match across basic block or function
+ // boundaries since the "end" is encoded uniquely and thus appears in no
+ // repeated substring.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ ++NumSentinels;
+ append_range(InstrList, InstrListForMBB);
+ append_range(UnsignedVec, UnsignedVecForMBB);
+ }
+ }
+
+ InstructionMapper() {
+ // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
+ // changed.
+ assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
+ "DenseMapInfo<unsigned>'s empty key isn't -1!");
+ assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
+ "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+ }
+};
+
+/// An interprocedural pass which finds repeated sequences of
+/// instructions and replaces them with calls to functions.
+///
+/// Each instruction is mapped to an unsigned integer and placed in a string.
+/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
+/// is then repeatedly queried for repeated sequences of instructions. Each
+/// non-overlapping repeated sequence is then placed in its own
+/// \p MachineFunction and each instance is then replaced with a call to that
+/// function.
+struct MachineOutliner : public ModulePass {
+
+ static char ID;
+
+ /// Set to true if the outliner should consider functions with
+ /// linkonceodr linkage.
+ bool OutlineFromLinkOnceODRs = false;
+
+ /// The current repeat number of machine outlining.
+ unsigned OutlineRepeatedNum = 0;
+
+ /// Set to true if the outliner should run on all functions in the module
+ /// considered safe for outlining.
+ /// Set to true by default for compatibility with llc's -run-pass option.
+ /// Set when the pass is constructed in TargetPassConfig.
+ bool RunOnAllFunctions = true;
+
+ StringRef getPassName() const override { return "Machine Outliner"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+
+ MachineOutliner() : ModulePass(ID) {
+ initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Remark output explaining that not outlining a set of candidates would be
+ /// better than outlining that set.
+ void emitNotOutliningCheaperRemark(
+ unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
+ OutlinedFunction &OF);
+
+ /// Remark output explaining that a function was outlined.
+ void emitOutlinedFunctionRemark(OutlinedFunction &OF);
+
+ /// Find all repeated substrings that satisfy the outlining cost model by
+ /// constructing a suffix tree.
+ ///
+ /// If a substring appears at least twice, then it must be represented by
+ /// an internal node which appears in at least two suffixes. Each suffix
+ /// is represented by a leaf node. To do this, we visit each internal node
+ /// in the tree, using the leaf children of each internal node. If an
+ /// internal node represents a beneficial substring, then we use each of
+ /// its leaf children to find the locations of its substring.
+ ///
+ /// \param Mapper Contains outlining mapping information.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
+ /// each type of candidate.
+ void findCandidates(InstructionMapper &Mapper,
+ std::vector<OutlinedFunction> &FunctionList);
+
+ /// Replace the sequences of instructions represented by \p OutlinedFunctions
+ /// with calls to functions.
+ ///
+ /// \param M The module we are outlining from.
+ /// \param FunctionList A list of functions to be inserted into the module.
+ /// \param Mapper Contains the instruction mappings for the module.
+ bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
+
+ /// Creates a function for \p OF and inserts it into the module.
+ MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
+ InstructionMapper &Mapper,
+ unsigned Name);
+
+ /// Calls 'doOutline()' 1 + OutlinerReruns times.
+ bool runOnModule(Module &M) override;
+
+ /// Construct a suffix tree on the instructions in \p M and outline repeated
+ /// strings from that tree.
+ bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
+
+ /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
+ /// function for remark emission.
+ DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
+ for (const Candidate &C : OF.Candidates)
+ if (MachineFunction *MF = C.getMF())
+ if (DISubprogram *SP = MF->getFunction().getSubprogram())
+ return SP;
+ return nullptr;
+ }
+
+ /// Populate and \p InstructionMapper with instruction-to-integer mappings.
+ /// These are used to construct a suffix tree.
+ void populateMapper(InstructionMapper &Mapper, Module &M,
+ MachineModuleInfo &MMI);
+
+ /// Initialize information necessary to output a size remark.
+ /// FIXME: This should be handled by the pass manager, not the outliner.
+ /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
+ /// pass manager.
+ void initSizeRemarkInfo(const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount);
+
+ /// Emit the remark.
+ // FIXME: This should be handled by the pass manager, not the outliner.
+ void
+ emitInstrCountChangedRemark(const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount);
+};
+} // Anonymous namespace.
+
+char MachineOutliner::ID = 0;
+
+namespace llvm {
+ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
+ MachineOutliner *OL = new MachineOutliner();
+ OL->RunOnAllFunctions = RunOnAllFunctions;
+ return OL;
+}
+
+} // namespace llvm
+
+INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
+ false)
+
+void MachineOutliner::emitNotOutliningCheaperRemark(
+ unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
+ OutlinedFunction &OF) {
+ // FIXME: Right now, we arbitrarily choose some Candidate from the
+ // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
+ // We should probably sort these by function name or something to make sure
+ // the remarks are stable.
+ Candidate &C = CandidatesForRepeatedSeq.front();
+ MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
+ C.front()->getDebugLoc(), C.getMBB());
+ R << "Did not outline " << NV("Length", StringLen) << " instructions"
+ << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
+ << " locations."
+ << " Bytes from outlining all occurrences ("
+ << NV("OutliningCost", OF.getOutliningCost()) << ")"
+ << " >= Unoutlined instruction bytes ("
+ << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
+ << " (Also found at: ";
+
+ // Tell the user the other places the candidate was found.
+ for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
+ R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
+ CandidatesForRepeatedSeq[i].front()->getDebugLoc());
+ if (i != e - 1)
+ R << ", ";
+ }
+
+ R << ")";
+ return R;
+ });
+}
+
+void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
+ MachineBasicBlock *MBB = &*OF.MF->begin();
+ MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
+ MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
+ MBB->findDebugLoc(MBB->begin()), MBB);
+ R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
+ << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
+ << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
+ << " locations. "
+ << "(Found at: ";
+
+ // Tell the user the other places the candidate was found.
+ for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
+
+ R << NV((Twine("StartLoc") + Twine(i)).str(),
+ OF.Candidates[i].front()->getDebugLoc());
+ if (i != e - 1)
+ R << ", ";
+ }
+
+ R << ")";
+
+ MORE.emit(R);
+}
+
+void MachineOutliner::findCandidates(
+ InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
+ FunctionList.clear();
+ SuffixTree ST(Mapper.UnsignedVec);
+
+ // First, find all of the repeated substrings in the tree of minimum length
+ // 2.
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+ LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
+ LLVM_DEBUG(
+ dbgs() << "Searching for overlaps in all repeated sequences...\n");
+ for (const SuffixTree::RepeatedSubstring &RS : ST) {
+ CandidatesForRepeatedSeq.clear();
+ unsigned StringLen = RS.Length;
+ LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
+ // Debug code to keep track of how many candidates we removed.
+#ifndef NDEBUG
+ unsigned NumDiscarded = 0;
+ unsigned NumKept = 0;
+#endif
+ for (const unsigned &StartIdx : RS.StartIndices) {
+ // Trick: Discard some candidates that would be incompatible with the
+ // ones we've already found for this sequence. This will save us some
+ // work in candidate selection.
+ //
+ // If two candidates overlap, then we can't outline them both. This
+ // happens when we have candidates that look like, say
+ //
+ // AA (where each "A" is an instruction).
+ //
+ // We might have some portion of the module that looks like this:
+ // AAAAAA (6 A's)
+ //
+ // In this case, there are 5 different copies of "AA" in this range, but
+ // at most 3 can be outlined. If only outlining 3 of these is going to
+ // be unbeneficial, then we ought to not bother.
+ //
+ // Note that two things DON'T overlap when they look like this:
+ // start1...end1 .... start2...end2
+ // That is, one must either
+ // * End before the other starts
+ // * Start after the other ends
+ unsigned EndIdx = StartIdx + StringLen - 1;
+ auto FirstOverlap = find_if(
+ CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) {
+ return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx();
+ });
+ if (FirstOverlap != CandidatesForRepeatedSeq.end()) {
+#ifndef NDEBUG
+ ++NumDiscarded;
+ LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx
+ << ", " << EndIdx << "]; overlaps with candidate @ ["
+ << FirstOverlap->getStartIdx() << ", "
+ << FirstOverlap->getEndIdx() << "]\n");
+#endif
+ continue;
+ }
+ // It doesn't overlap with anything, so we can outline it.
+ // Each sequence is over [StartIt, EndIt].
+ // Save the candidate and its location.
+#ifndef NDEBUG
+ ++NumKept;
+#endif
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ MachineBasicBlock *MBB = StartIt->getParent();
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, EndIt,
+ MBB, FunctionList.size(),
+ Mapper.MBBFlagsMap[MBB]);
+ }
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
+#endif
+
+ // We've found something we might want to outline.
+ // Create an OutlinedFunction to store it and check if it'd be beneficial
+ // to outline.
+ if (CandidatesForRepeatedSeq.size() < 2)
+ continue;
+
+ // Arbitrarily choose a TII from the first candidate.
+ // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
+ const TargetInstrInfo *TII =
+ CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
+
+ std::optional<OutlinedFunction> OF =
+ TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
+
+ // If we deleted too many candidates, then there's nothing worth outlining.
+ // FIXME: This should take target-specified instruction sizes into account.
+ if (!OF || OF->Candidates.size() < 2)
+ continue;
+
+ // Is it better to outline this candidate than not?
+ if (OF->getBenefit() < OutlinerBenefitThreshold) {
+ emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, *OF);
+ continue;
+ }
+
+ FunctionList.push_back(*OF);
+ }
+}
+
+MachineFunction *MachineOutliner::createOutlinedFunction(
+ Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
+
+ // Create the function name. This should be unique.
+ // FIXME: We should have a better naming scheme. This should be stable,
+ // regardless of changes to the outliner's cost model/traversal order.
+ std::string FunctionName = "OUTLINED_FUNCTION_";
+ if (OutlineRepeatedNum > 0)
+ FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_";
+ FunctionName += std::to_string(Name);
+ LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
+
+ // Create the function using an IR-level function.
+ LLVMContext &C = M.getContext();
+ Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
+ Function::ExternalLinkage, FunctionName, M);
+
+ // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
+ // which gives us better results when we outline from linkonceodr functions.
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ // Set optsize/minsize, so we don't insert padding between outlined
+ // functions.
+ F->addFnAttr(Attribute::OptimizeForSize);
+ F->addFnAttr(Attribute::MinSize);
+
+ Candidate &FirstCand = OF.Candidates.front();
+ const TargetInstrInfo &TII =
+ *FirstCand.getMF()->getSubtarget().getInstrInfo();
+
+ TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
+
+ // Set uwtable, so we generate eh_frame.
+ UWTableKind UW = std::accumulate(
+ OF.Candidates.cbegin(), OF.Candidates.cend(), UWTableKind::None,
+ [](UWTableKind K, const outliner::Candidate &C) {
+ return std::max(K, C.getMF()->getFunction().getUWTableKind());
+ });
+ if (UW != UWTableKind::None)
+ F->setUWTableKind(UW);
+
+ BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+ IRBuilder<> Builder(EntryBB);
+ Builder.CreateRetVoid();
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MF.setIsOutlined(true);
+ MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
+
+ // Insert the new function into the module.
+ MF.insert(MF.begin(), &MBB);
+
+ MachineFunction *OriginalMF = FirstCand.front()->getMF();
+ const std::vector<MCCFIInstruction> &Instrs =
+ OriginalMF->getFrameInstructions();
+ for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
+ ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ // Don't keep debug information for outlined instructions.
+ auto DL = DebugLoc();
+ if (I->isCFIInstruction()) {
+ unsigned CFIIndex = I->getOperand(0).getCFIIndex();
+ MCCFIInstruction CFI = Instrs[CFIIndex];
+ BuildMI(MBB, MBB.end(), DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(MF.addFrameInst(CFI));
+ } else {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ NewMI->dropMemRefs(MF);
+ NewMI->setDebugLoc(DL);
+ MBB.insert(MBB.end(), NewMI);
+ }
+ }
+
+ // Set normal properties for a late MachineFunction.
+ MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
+ MF.getRegInfo().freezeReservedRegs(MF);
+
+ // Compute live-in set for outlined fn
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LivePhysRegs LiveIns(TRI);
+ for (auto &Cand : OF.Candidates) {
+ // Figure out live-ins at the first instruction.
+ MachineBasicBlock &OutlineBB = *Cand.front()->getParent();
+ LivePhysRegs CandLiveIns(TRI);
+ CandLiveIns.addLiveOuts(OutlineBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(Cand.front(), OutlineBB.end())))
+ CandLiveIns.stepBackward(MI);
+
+ // The live-in set for the outlined function is the union of the live-ins
+ // from all the outlining points.
+ for (MCPhysReg Reg : CandLiveIns)
+ LiveIns.addReg(Reg);
+ }
+ addLiveIns(MBB, LiveIns);
+
+ TII.buildOutlinedFrame(MBB, MF, OF);
+
+ // If there's a DISubprogram associated with this outlined function, then
+ // emit debug info for the outlined function.
+ if (DISubprogram *SP = getSubprogramOrNull(OF)) {
+ // We have a DISubprogram. Get its DICompileUnit.
+ DICompileUnit *CU = SP->getUnit();
+ DIBuilder DB(M, true, CU);
+ DIFile *Unit = SP->getFile();
+ Mangler Mg;
+ // Get the mangled name of the function for the linkage name.
+ std::string Dummy;
+ raw_string_ostream MangledNameStream(Dummy);
+ Mg.getNameWithPrefix(MangledNameStream, F, false);
+
+ DISubprogram *OutlinedSP = DB.createFunction(
+ Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
+ Unit /* File */,
+ 0 /* Line 0 is reserved for compiler-generated code. */,
+ DB.createSubroutineType(
+ DB.getOrCreateTypeArray(std::nullopt)), /* void type */
+ 0, /* Line 0 is reserved for compiler-generated code. */
+ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
+ /* Outlined code is optimized code by definition. */
+ DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
+
+ // Don't add any new variables to the subprogram.
+ DB.finalizeSubprogram(OutlinedSP);
+
+ // Attach subprogram to the function.
+ F->setSubprogram(OutlinedSP);
+ // We're done with the DIBuilder.
+ DB.finalize();
+ }
+
+ return &MF;
+}
+
+bool MachineOutliner::outline(Module &M,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper,
+ unsigned &OutlinedFunctionNum) {
+ LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
+ LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
+ << "\n");
+ bool OutlinedSomething = false;
+
+ // Sort by benefit. The most beneficial functions should be outlined first.
+ stable_sort(FunctionList,
+ [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
+
+ // Walk over each function, outlining them as we go along. Functions are
+ // outlined greedily, based off the sort above.
+ auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
+ LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
+ for (OutlinedFunction &OF : FunctionList) {
+#ifndef NDEBUG
+ auto NumCandidatesBefore = OF.Candidates.size();
+#endif
+ // If we outlined something that overlapped with a candidate in a previous
+ // step, then we can't outline from it.
+ erase_if(OF.Candidates, [&UnsignedVecBegin](Candidate &C) {
+ return std::any_of(UnsignedVecBegin + C.getStartIdx(),
+ UnsignedVecBegin + C.getEndIdx() + 1, [](unsigned I) {
+ return I == static_cast<unsigned>(-1);
+ });
+ });
+
+#ifndef NDEBUG
+ auto NumCandidatesAfter = OF.Candidates.size();
+ LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
+ << "/" << NumCandidatesBefore << " candidates\n");
+#endif
+
+ // If we made it unbeneficial to outline this function, skip it.
+ if (OF.getBenefit() < OutlinerBenefitThreshold) {
+ LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF.getBenefit()
+ << " B) < threshold (" << OutlinerBenefitThreshold
+ << " B)\n");
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF.getBenefit()
+ << " B) > threshold (" << OutlinerBenefitThreshold
+ << " B)\n");
+
+ // It's beneficial. Create the function and outline its sequence's
+ // occurrences.
+ OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
+ emitOutlinedFunctionRemark(OF);
+ FunctionsCreated++;
+ OutlinedFunctionNum++; // Created a function, move to the next name.
+ MachineFunction *MF = OF.MF;
+ const TargetSubtargetInfo &STI = MF->getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Replace occurrences of the sequence with calls to the new function.
+ LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
+ for (Candidate &C : OF.Candidates) {
+ MachineBasicBlock &MBB = *C.getMBB();
+ MachineBasicBlock::iterator StartIt = C.front();
+ MachineBasicBlock::iterator EndIt = C.back();
+
+ // Insert the call.
+ auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
+// Insert the call.
+#ifndef NDEBUG
+ auto MBBBeingOutlinedFromName =
+ MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
+ auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
+ ? "<unknown>"
+ : MBB.getParent()->getName().str();
+ LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
+ << MFBeingOutlinedFromName << ":"
+ << MBBBeingOutlinedFromName << "\n");
+ LLVM_DEBUG(dbgs() << " .. " << *CallInst);
+#endif
+
+ // If the caller tracks liveness, then we need to make sure that
+ // anything we outline doesn't break liveness assumptions. The outlined
+ // functions themselves currently don't track liveness, but we should
+ // make sure that the ranges we yank things out of aren't wrong.
+ if (MBB.getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness)) {
+ // The following code is to add implicit def operands to the call
+ // instruction. It also updates call site information for moved
+ // code.
+ SmallSet<Register, 2> UseRegs, DefRegs;
+ // Copy over the defs in the outlined range.
+ // First inst in outlined range <-- Anything that's defined in this
+ // ... .. range has to be added as an
+ // implicit Last inst in outlined range <-- def to the call
+ // instruction. Also remove call site information for outlined block
+ // of code. The exposed uses need to be copied in the outlined range.
+ for (MachineBasicBlock::reverse_iterator
+ Iter = EndIt.getReverse(),
+ Last = std::next(CallInst.getReverse());
+ Iter != Last; Iter++) {
+ MachineInstr *MI = &*Iter;
+ SmallSet<Register, 2> InstrUseRegs;
+ for (MachineOperand &MOP : MI->operands()) {
+ // Skip over anything that isn't a register.
+ if (!MOP.isReg())
+ continue;
+
+ if (MOP.isDef()) {
+ // Introduce DefRegs set to skip the redundant register.
+ DefRegs.insert(MOP.getReg());
+ if (UseRegs.count(MOP.getReg()) &&
+ !InstrUseRegs.count(MOP.getReg()))
+ // Since the regiester is modeled as defined,
+ // it is not necessary to be put in use register set.
+ UseRegs.erase(MOP.getReg());
+ } else if (!MOP.isUndef()) {
+ // Any register which is not undefined should
+ // be put in the use register set.
+ UseRegs.insert(MOP.getReg());
+ InstrUseRegs.insert(MOP.getReg());
+ }
+ }
+ if (MI->isCandidateForCallSiteEntry())
+ MI->getMF()->eraseCallSiteInfo(MI);
+ }
+
+ for (const Register &I : DefRegs)
+ // If it's a def, add it to the call instruction.
+ CallInst->addOperand(
+ MachineOperand::CreateReg(I, true, /* isDef = true */
+ true /* isImp = true */));
+
+ for (const Register &I : UseRegs)
+ // If it's a exposed use, add it to the call instruction.
+ CallInst->addOperand(
+ MachineOperand::CreateReg(I, false, /* isDef = false */
+ true /* isImp = true */));
+ }
+
+ // Erase from the point after where the call was inserted up to, and
+ // including, the final instruction in the sequence.
+ // Erase needs one past the end, so we need std::next there too.
+ MBB.erase(std::next(StartIt), std::next(EndIt));
+
+ // Keep track of what we removed by marking them all as -1.
+ for (unsigned &I : make_range(UnsignedVecBegin + C.getStartIdx(),
+ UnsignedVecBegin + C.getEndIdx() + 1))
+ I = static_cast<unsigned>(-1);
+ OutlinedSomething = true;
+
+ // Statistics.
+ NumOutlined++;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
+ return OutlinedSomething;
+}
+
+void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
+ MachineModuleInfo &MMI) {
+ // Build instruction mappings for each function in the module. Start by
+ // iterating over each Function in M.
+ LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
+ for (Function &F : M) {
+ LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
+
+ if (F.hasFnAttribute("nooutline")) {
+ LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
+ continue;
+ }
+
+ // There's something in F. Check if it has a MachineFunction associated with
+ // it.
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // If it doesn't, then there's nothing to outline from. Move to the next
+ // Function.
+ if (!MF) {
+ LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
+ continue;
+ }
+
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
+ LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
+ "function by default\n");
+ continue;
+ }
+
+ // We have a MachineFunction. Ask the target if it's suitable for outlining.
+ // If it isn't, then move on to the next Function in the module.
+ if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs)) {
+ LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
+ << ": unsafe to outline from\n");
+ continue;
+ }
+
+ // We have a function suitable for outlining. Iterate over every
+ // MachineBasicBlock in MF and try to map its instructions to a list of
+ // unsigned integers.
+ const unsigned MinMBBSize = 2;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
+ // If there isn't anything in MBB, then there's no point in outlining from
+ // it.
+ // If there are fewer than 2 instructions in the MBB, then it can't ever
+ // contain something worth outlining.
+ // FIXME: This should be based off of the maximum size in B of an outlined
+ // call versus the size in B of the MBB.
+ if (MBB.size() < MinMBBSize) {
+ LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
+ << MinMBBSize << "\n");
+ continue;
+ }
+
+ // Check if MBB could be the target of an indirect branch. If it is, then
+ // we don't want to outline from it.
+ if (MBB.hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
+ continue;
+ }
+
+ // MBB is suitable for outlining. Map it to a list of unsigneds.
+ Mapper.convertToUnsignedVec(MBB, *TII);
+ }
+ }
+ // Statistics.
+ UnsignedVecSize = Mapper.UnsignedVec.size();
+}
+
+void MachineOutliner::initSizeRemarkInfo(
+ const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount) {
+ // Collect instruction counts for every function. We'll use this to emit
+ // per-function size remarks later.
+ for (const Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // We only care about MI counts here. If there's no MachineFunction at this
+ // point, then there won't be after the outliner runs, so let's move on.
+ if (!MF)
+ continue;
+ FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
+ }
+}
+
+void MachineOutliner::emitInstrCountChangedRemark(
+ const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount) {
+ // Iterate over each function in the module and emit remarks.
+ // Note that we won't miss anything by doing this, because the outliner never
+ // deletes functions.
+ for (const Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // The outliner never deletes functions. If we don't have a MF here, then we
+ // didn't have one prior to outlining either.
+ if (!MF)
+ continue;
+
+ std::string Fname = std::string(F.getName());
+ unsigned FnCountAfter = MF->getInstructionCount();
+ unsigned FnCountBefore = 0;
+
+ // Check if the function was recorded before.
+ auto It = FunctionToInstrCount.find(Fname);
+
+ // Did we have a previously-recorded size? If yes, then set FnCountBefore
+ // to that.
+ if (It != FunctionToInstrCount.end())
+ FnCountBefore = It->second;
+
+ // Compute the delta and emit a remark if there was a change.
+ int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
+ static_cast<int64_t>(FnCountBefore);
+ if (FnDelta == 0)
+ continue;
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+ DiagnosticLocation(), &MF->front());
+ R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
+ << ": Function: "
+ << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
+ << ": MI instruction count changed from "
+ << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
+ FnCountBefore)
+ << " to "
+ << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
+ FnCountAfter)
+ << "; Delta: "
+ << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
+ return R;
+ });
+ }
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+ // Check if there's anything in the module. If it's empty, then there's
+ // nothing to outline.
+ if (M.empty())
+ return false;
+
+ // Number to append to the current outlined function.
+ unsigned OutlinedFunctionNum = 0;
+
+ OutlineRepeatedNum = 0;
+ if (!doOutline(M, OutlinedFunctionNum))
+ return false;
+
+ for (unsigned I = 0; I < OutlinerReruns; ++I) {
+ OutlinedFunctionNum = 0;
+ OutlineRepeatedNum++;
+ if (!doOutline(M, OutlinedFunctionNum)) {
+ LLVM_DEBUG({
+ dbgs() << "Did not outline on iteration " << I + 2 << " out of "
+ << OutlinerReruns + 1 << "\n";
+ });
+ break;
+ }
+ }
+
+ return true;
+}
+
+bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ // If the user passed -enable-machine-outliner=always or
+ // -enable-machine-outliner, the pass will run on all functions in the module.
+ // Otherwise, if the target supports default outlining, it will run on all
+ // functions deemed by the target to be worth outlining from by default. Tell
+ // the user how the outliner is running.
+ LLVM_DEBUG({
+ dbgs() << "Machine Outliner: Running on ";
+ if (RunOnAllFunctions)
+ dbgs() << "all functions";
+ else
+ dbgs() << "target-default functions";
+ dbgs() << "\n";
+ });
+
+ // If the user specifies that they want to outline from linkonceodrs, set
+ // it here.
+ OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
+ InstructionMapper Mapper;
+
+ // Prepare instruction mappings for the suffix tree.
+ populateMapper(Mapper, M, MMI);
+ std::vector<OutlinedFunction> FunctionList;
+
+ // Find all of the outlining candidates.
+ findCandidates(Mapper, FunctionList);
+
+ // If we've requested size remarks, then collect the MI counts of every
+ // function before outlining, and the MI counts after outlining.
+ // FIXME: This shouldn't be in the outliner at all; it should ultimately be
+ // the pass manager's responsibility.
+ // This could pretty easily be placed in outline instead, but because we
+ // really ultimately *don't* want this here, it's done like this for now
+ // instead.
+
+ // Check if we want size remarks.
+ bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
+ StringMap<unsigned> FunctionToInstrCount;
+ if (ShouldEmitSizeRemarks)
+ initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
+
+ // Outline each of the candidates and return true if something was outlined.
+ bool OutlinedSomething =
+ outline(M, FunctionList, Mapper, OutlinedFunctionNum);
+
+ // If we outlined something, we definitely changed the MI count of the
+ // module. If we've asked for size remarks, then output them.
+ // FIXME: This should be in the pass manager.
+ if (ShouldEmitSizeRemarks && OutlinedSomething)
+ emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
+
+ LLVM_DEBUG({
+ if (!OutlinedSomething)
+ dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
+ << " because no changes were found.\n";
+ });
+
+ return OutlinedSomething;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
new file mode 100644
index 000000000000..439ff8babcc6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -0,0 +1,108 @@
+//===---------- MachinePassManager.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass management machinery for machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/PassManagerImpl.h"
+
+using namespace llvm;
+
+namespace llvm {
+template class AllAnalysesOn<MachineFunction>;
+template class AnalysisManager<MachineFunction>;
+template class PassManager<MachineFunction>;
+
+Error MachineFunctionPassManager::run(Module &M,
+ MachineFunctionAnalysisManager &MFAM) {
+ // MachineModuleAnalysis is a module analysis pass that is never invalidated
+ // because we don't run any module pass in codegen pipeline. This is very
+ // important because the codegen state is stored in MMI which is the analysis
+ // result of MachineModuleAnalysis. MMI should not be recomputed.
+ auto &MMI = MFAM.getResult<MachineModuleAnalysis>(M);
+
+ (void)RequireCodeGenSCCOrder;
+ assert(!RequireCodeGenSCCOrder && "not implemented");
+
+ // Add a PIC to verify machine functions.
+ if (VerifyMachineFunction) {
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M);
+
+ // No need to pop this callback later since MIR pipeline is flat which means
+ // current pipeline is the top-level pipeline. Callbacks are not used after
+ // current pipeline.
+ PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) {
+ assert(any_cast<const MachineFunction *>(&IR));
+ const MachineFunction *MF = any_cast<const MachineFunction *>(IR);
+ assert(MF && "Machine function should be valid for printing");
+ std::string Banner = std::string("After ") + std::string(PassID);
+ verifyMachineFunction(&MFAM, Banner, *MF);
+ });
+ }
+
+ for (auto &F : InitializationFuncs) {
+ if (auto Err = F(M, MFAM))
+ return Err;
+ }
+
+ unsigned Idx = 0;
+ size_t Size = Passes.size();
+ do {
+ // Run machine module passes
+ for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) {
+ if (auto Err = MachineModulePasses.at(Idx)(M, MFAM))
+ return Err;
+ }
+
+ // Finish running all passes.
+ if (Idx == Size)
+ break;
+
+ // Run machine function passes
+
+ // Get index range of machine function passes.
+ unsigned Begin = Idx;
+ for (; !MachineModulePasses.count(Idx) && Idx != Size; ++Idx)
+ ;
+
+ for (Function &F : M) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ continue;
+
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF);
+
+ for (unsigned I = Begin, E = Idx; I != E; ++I) {
+ auto *P = Passes[I].get();
+
+ if (!PI.runBeforePass<MachineFunction>(*P, MF))
+ continue;
+
+ // TODO: EmitSizeRemarks
+ PreservedAnalyses PassPA = P->run(MF, MFAM);
+ MFAM.invalidate(MF, PassPA);
+ PI.runAfterPass(*P, MF, PassPA);
+ }
+ }
+ } while (true);
+
+ for (auto &F : FinalizationFuncs) {
+ if (auto Err = F(M, MFAM))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
new file mode 100644
index 000000000000..c7e7497dab36
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -0,0 +1,3276 @@
+//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
+//
+// This SMS implementation is a target-independent back-end pass. When enabled,
+// the pass runs just prior to the register allocation pass, while the machine
+// IR is in SSA form. If software pipelining is successful, then the original
+// loop is replaced by the optimized loop. The optimized loop contains one or
+// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
+// the instructions cannot be scheduled in a given MII, we increase the MII by
+// one and try again.
+//
+// The SMS implementation is an extension of the ScheduleDAGInstrs class. We
+// represent loop carried dependences in the DAG as order edges to the Phi
+// nodes. We also perform several passes over the DAG to eliminate unnecessary
+// edges that inhibit the ability to pipeline. The implementation uses the
+// DFAPacketizer class to compute the minimum initiation interval and the check
+// where an instruction may be inserted in the pipelined schedule.
+//
+// In order for the SMS pass to work, several target specific hooks need to be
+// implemented to get information about the loop structure and to rewrite
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePipeliner.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ModuloSchedule.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <deque>
+#include <functional>
+#include <iomanip>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pipeliner"
+
+STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
+STATISTIC(NumPipelined, "Number of loops software pipelined");
+STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
+STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
+STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
+STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
+STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
+STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
+STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
+STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
+STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
+
+/// A command line option to turn software pipelining on or off.
+static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
+ cl::desc("Enable Software Pipelining"));
+
+/// A command line option to enable SWP at -Os.
+static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
+ cl::desc("Enable SWP at Os."), cl::Hidden,
+ cl::init(false));
+
+/// A command line argument to limit minimum initial interval for pipelining.
+static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
+ cl::desc("Size limit for the MII."),
+ cl::Hidden, cl::init(27));
+
+/// A command line argument to force pipeliner to use specified initial
+/// interval.
+static cl::opt<int> SwpForceII("pipeliner-force-ii",
+ cl::desc("Force pipeliner to use specified II."),
+ cl::Hidden, cl::init(-1));
+
+/// A command line argument to limit the number of stages in the pipeline.
+static cl::opt<int>
+ SwpMaxStages("pipeliner-max-stages",
+ cl::desc("Maximum stages allowed in the generated scheduled."),
+ cl::Hidden, cl::init(3));
+
+/// A command line option to disable the pruning of chain dependences due to
+/// an unrelated Phi.
+static cl::opt<bool>
+ SwpPruneDeps("pipeliner-prune-deps",
+ cl::desc("Prune dependences between unrelated Phi nodes."),
+ cl::Hidden, cl::init(true));
+
+/// A command line option to disable the pruning of loop carried order
+/// dependences.
+static cl::opt<bool>
+ SwpPruneLoopCarried("pipeliner-prune-loop-carried",
+ cl::desc("Prune loop carried order dependences."),
+ cl::Hidden, cl::init(true));
+
+#ifndef NDEBUG
+static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
+#endif
+
+static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
+ cl::ReallyHidden,
+ cl::desc("Ignore RecMII"));
+
+static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> EmitTestAnnotations(
+ "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
+ cl::desc("Instead of emitting the pipelined code, annotate instructions "
+ "with the generated schedule for feeding into the "
+ "-modulo-schedule-test pass"));
+
+static cl::opt<bool> ExperimentalCodeGen(
+ "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Use the experimental peeling code generator for software pipelining"));
+
+namespace llvm {
+
+// A command line option to enable the CopyToPhi DAG mutation.
+cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+ cl::init(true),
+ cl::desc("Enable CopyToPhi DAG Mutation"));
+
+/// A command line argument to force pipeliner to use specified issue
+/// width.
+cl::opt<int> SwpForceIssueWidth(
+ "pipeliner-force-issue-width",
+ cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
+ cl::init(-1));
+
+} // end namespace llvm
+
+unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
+char MachinePipeliner::ID = 0;
+#ifndef NDEBUG
+int MachinePipeliner::NumTries = 0;
+#endif
+char &llvm::MachinePipelinerID = MachinePipeliner::ID;
+
+INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
+ "Modulo Software Pipelining", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
+ "Modulo Software Pipelining", false, false)
+
+/// The "main" function for implementing Swing Modulo Scheduling.
+bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+
+ if (!EnableSWP)
+ return false;
+
+ if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
+ !EnableSWPOptSize.getPosition())
+ return false;
+
+ if (!mf.getSubtarget().enableMachinePipeliner())
+ return false;
+
+ // Cannot pipeline loops without instruction itineraries if we are using
+ // DFA for the pipeliner.
+ if (mf.getSubtarget().useDFAforSMS() &&
+ (!mf.getSubtarget().getInstrItineraryData() ||
+ mf.getSubtarget().getInstrItineraryData()->isEmpty()))
+ return false;
+
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+ TII = MF->getSubtarget().getInstrInfo();
+ RegClassInfo.runOnMachineFunction(*MF);
+
+ for (const auto &L : *MLI)
+ scheduleLoop(*L);
+
+ return false;
+}
+
+/// Attempt to perform the SMS algorithm on the specified loop. This function is
+/// the main entry point for the algorithm. The function identifies candidate
+/// loops, calculates the minimum initiation interval, and attempts to schedule
+/// the loop.
+bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
+ bool Changed = false;
+ for (const auto &InnerLoop : L)
+ Changed |= scheduleLoop(*InnerLoop);
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = SwpLoopLimit;
+ if (Limit >= 0) {
+ if (NumTries >= SwpLoopLimit)
+ return Changed;
+ NumTries++;
+ }
+#endif
+
+ setPragmaPipelineOptions(L);
+ if (!canPipelineLoop(L)) {
+ LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Failed to pipeline loop";
+ });
+
+ LI.LoopPipelinerInfo.reset();
+ return Changed;
+ }
+
+ ++NumTrytoPipeline;
+
+ Changed = swingModuloScheduler(L);
+
+ LI.LoopPipelinerInfo.reset();
+ return Changed;
+}
+
+void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
+ // Reset the pragma for the next loop in iteration.
+ disabledByPragma = false;
+ II_setByPragma = 0;
+
+ MachineBasicBlock *LBLK = L.getTopBlock();
+
+ if (LBLK == nullptr)
+ return;
+
+ const BasicBlock *BBLK = LBLK->getBasicBlock();
+ if (BBLK == nullptr)
+ return;
+
+ const Instruction *TI = BBLK->getTerminator();
+ if (TI == nullptr)
+ return;
+
+ MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
+ if (LoopID == nullptr)
+ return;
+
+ assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+
+ if (MD == nullptr)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+
+ if (S == nullptr)
+ continue;
+
+ if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
+ assert(MD->getNumOperands() == 2 &&
+ "Pipeline initiation interval hint metadata should have two operands.");
+ II_setByPragma =
+ mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+ } else if (S->getString() == "llvm.loop.pipeline.disable") {
+ disabledByPragma = true;
+ }
+ }
+}
+
+/// Return true if the loop can be software pipelined. The algorithm is
+/// restricted to loops with a single basic block. Make sure that the
+/// branch in the loop can be analyzed.
+bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
+ if (L.getNumBlocks() != 1) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Not a single basic block: "
+ << ore::NV("NumBlocks", L.getNumBlocks());
+ });
+ return false;
+ }
+
+ if (disabledByPragma) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Disabled by Pragma.";
+ });
+ return false;
+ }
+
+ // Check if the branch can't be understood because we can't do pipelining
+ // if that's the case.
+ LI.TBB = nullptr;
+ LI.FBB = nullptr;
+ LI.BrCond.clear();
+ if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
+ LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
+ NumFailBranch++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "The branch can't be understood";
+ });
+ return false;
+ }
+
+ LI.LoopInductionVar = nullptr;
+ LI.LoopCompare = nullptr;
+ LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
+ if (!LI.LoopPipelinerInfo) {
+ LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
+ NumFailLoop++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "The loop structure is not supported";
+ });
+ return false;
+ }
+
+ if (!L.getLoopPreheader()) {
+ LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
+ NumFailPreheader++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "No loop preheader found";
+ });
+ return false;
+ }
+
+ // Remove any subregisters from inputs to phi nodes.
+ preprocessPhiNodes(*L.getHeader());
+ return true;
+}
+
+void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
+
+ for (MachineInstr &PI : B.phis()) {
+ MachineOperand &DefOp = PI.getOperand(0);
+ assert(DefOp.getSubReg() == 0);
+ auto *RC = MRI.getRegClass(DefOp.getReg());
+
+ for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
+ MachineOperand &RegOp = PI.getOperand(i);
+ if (RegOp.getSubReg() == 0)
+ continue;
+
+ // If the operand uses a subregister, replace it with a new register
+ // without subregisters, and generate a copy to the new register.
+ Register NewReg = MRI.createVirtualRegister(RC);
+ MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
+ MachineBasicBlock::iterator At = PredB.getFirstTerminator();
+ const DebugLoc &DL = PredB.findDebugLoc(At);
+ auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(RegOp.getReg(), getRegState(RegOp),
+ RegOp.getSubReg());
+ Slots.insertMachineInstrInMaps(*Copy);
+ RegOp.setReg(NewReg);
+ RegOp.setSubReg(0);
+ }
+ }
+}
+
+/// The SMS algorithm consists of the following main steps:
+/// 1. Computation and analysis of the dependence graph.
+/// 2. Ordering of the nodes (instructions).
+/// 3. Attempt to Schedule the loop.
+bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
+ assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
+
+ SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
+ II_setByPragma, LI.LoopPipelinerInfo.get());
+
+ MachineBasicBlock *MBB = L.getHeader();
+ // The kernel should not include any terminator instructions. These
+ // will be added back later.
+ SMS.startBlock(MBB);
+
+ // Compute the number of 'real' instructions in the basic block by
+ // ignoring terminators.
+ unsigned size = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
+ E = MBB->instr_end();
+ I != E; ++I, --size)
+ ;
+
+ SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
+ SMS.schedule();
+ SMS.exitRegion();
+
+ SMS.finishBlock();
+ return SMS.hasNewSchedule();
+}
+
+void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
+ if (SwpForceII > 0)
+ MII = SwpForceII;
+ else if (II_setByPragma > 0)
+ MII = II_setByPragma;
+ else
+ MII = std::max(ResMII, RecMII);
+}
+
+void SwingSchedulerDAG::setMAX_II() {
+ if (SwpForceII > 0)
+ MAX_II = SwpForceII;
+ else if (II_setByPragma > 0)
+ MAX_II = II_setByPragma;
+ else
+ MAX_II = MII + 10;
+}
+
+/// We override the schedule function in ScheduleDAGInstrs to implement the
+/// scheduling part of the Swing Modulo Scheduling algorithm.
+void SwingSchedulerDAG::schedule() {
+ AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
+ buildSchedGraph(AA);
+ addLoopCarriedDependences(AA);
+ updatePhiDependences();
+ Topo.InitDAGTopologicalSorting();
+ changeDependences();
+ postProcessDAG();
+ LLVM_DEBUG(dump());
+
+ NodeSetType NodeSets;
+ findCircuits(NodeSets);
+ NodeSetType Circuits = NodeSets;
+
+ // Calculate the MII.
+ unsigned ResMII = calculateResMII();
+ unsigned RecMII = calculateRecMII(NodeSets);
+
+ fuseRecs(NodeSets);
+
+ // This flag is used for testing and can cause correctness problems.
+ if (SwpIgnoreRecMII)
+ RecMII = 0;
+
+ setMII(ResMII, RecMII);
+ setMAX_II();
+
+ LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
+ << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
+
+ // Can't schedule a loop without a valid MII.
+ if (MII == 0) {
+ LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
+ NumFailZeroMII++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Invalid Minimal Initiation Interval: 0";
+ });
+ return;
+ }
+
+ // Don't pipeline large loops.
+ if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
+ LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
+ << ", we don't pipeline large loops\n");
+ NumFailLargeMaxMII++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Minimal Initiation Interval too large: "
+ << ore::NV("MII", (int)MII) << " > "
+ << ore::NV("SwpMaxMii", SwpMaxMii) << "."
+ << "Refer to -pipeliner-max-mii.";
+ });
+ return;
+ }
+
+ computeNodeFunctions(NodeSets);
+
+ registerPressureFilter(NodeSets);
+
+ colocateNodeSets(NodeSets);
+
+ checkNodeSets(NodeSets);
+
+ LLVM_DEBUG({
+ for (auto &I : NodeSets) {
+ dbgs() << " Rec NodeSet ";
+ I.dump();
+ }
+ });
+
+ llvm::stable_sort(NodeSets, std::greater<NodeSet>());
+
+ groupRemainingNodes(NodeSets);
+
+ removeDuplicateNodes(NodeSets);
+
+ LLVM_DEBUG({
+ for (auto &I : NodeSets) {
+ dbgs() << " NodeSet ";
+ I.dump();
+ }
+ });
+
+ computeNodeOrder(NodeSets);
+
+ // check for node order issues
+ checkValidNodeOrder(Circuits);
+
+ SMSchedule Schedule(Pass.MF, this);
+ Scheduled = schedulePipeline(Schedule);
+
+ if (!Scheduled){
+ LLVM_DEBUG(dbgs() << "No schedule found, return\n");
+ NumFailNoSchedule++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Unable to find schedule";
+ });
+ return;
+ }
+
+ unsigned numStages = Schedule.getMaxStageCount();
+ // No need to generate pipeline if there are no overlapped iterations.
+ if (numStages == 0) {
+ LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
+ NumFailZeroStage++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "No need to pipeline - no overlapped iterations in schedule.";
+ });
+ return;
+ }
+ // Check that the maximum stage count is less than user-defined limit.
+ if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
+ LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
+ << " : too many stages, abort\n");
+ NumFailLargeMaxStage++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Too many stages in schedule: "
+ << ore::NV("numStages", (int)numStages) << " > "
+ << ore::NV("SwpMaxStages", SwpMaxStages)
+ << ". Refer to -pipeliner-max-stages.";
+ });
+ return;
+ }
+
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(),
+ Loop.getHeader())
+ << "Pipelined succesfully!";
+ });
+
+ // Generate the schedule as a ModuloSchedule.
+ DenseMap<MachineInstr *, int> Cycles, Stages;
+ std::vector<MachineInstr *> OrderedInsts;
+ for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
+ ++Cycle) {
+ for (SUnit *SU : Schedule.getInstructions(Cycle)) {
+ OrderedInsts.push_back(SU->getInstr());
+ Cycles[SU->getInstr()] = Cycle;
+ Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
+ }
+ }
+ DenseMap<MachineInstr *, std::pair<unsigned, int64_t>> NewInstrChanges;
+ for (auto &KV : NewMIs) {
+ Cycles[KV.first] = Cycles[KV.second];
+ Stages[KV.first] = Stages[KV.second];
+ NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
+ }
+
+ ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
+ std::move(Stages));
+ if (EmitTestAnnotations) {
+ assert(NewInstrChanges.empty() &&
+ "Cannot serialize a schedule with InstrChanges!");
+ ModuloScheduleTestAnnotater MSTI(MF, MS);
+ MSTI.annotate();
+ return;
+ }
+ // The experimental code generator can't work if there are InstChanges.
+ if (ExperimentalCodeGen && NewInstrChanges.empty()) {
+ PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
+ MSE.expand();
+ } else {
+ ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
+ MSE.expand();
+ MSE.cleanup();
+ }
+ ++NumPipelined;
+}
+
+/// Clean up after the software pipeliner runs.
+void SwingSchedulerDAG::finishBlock() {
+ for (auto &KV : NewMIs)
+ MF.deleteMachineInstr(KV.second);
+ NewMIs.clear();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// Return the register values for the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+ unsigned &InitVal, unsigned &LoopVal) {
+ assert(Phi.isPHI() && "Expecting a Phi.");
+
+ InitVal = 0;
+ LoopVal = 0;
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != Loop)
+ InitVal = Phi.getOperand(i).getReg();
+ else
+ LoopVal = Phi.getOperand(i).getReg();
+
+ assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+/// Return true if SUb can be reached from SUa following the chain edges.
+static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
+ SmallPtrSet<SUnit *, 8> Visited;
+ SmallVector<SUnit *, 8> Worklist;
+ Worklist.push_back(SUa);
+ while (!Worklist.empty()) {
+ const SUnit *SU = Worklist.pop_back_val();
+ for (const auto &SI : SU->Succs) {
+ SUnit *SuccSU = SI.getSUnit();
+ if (SI.getKind() == SDep::Order) {
+ if (Visited.count(SuccSU))
+ continue;
+ if (SuccSU == SUb)
+ return true;
+ Worklist.push_back(SuccSU);
+ Visited.insert(SuccSU);
+ }
+ }
+ }
+ return false;
+}
+
+/// Return true if the instruction causes a chain between memory
+/// references before and after it.
+static bool isDependenceBarrier(MachineInstr &MI) {
+ return MI.isCall() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects() ||
+ (MI.hasOrderedMemoryRef() &&
+ (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
+}
+
+/// Return the underlying objects for the memory references of an instruction.
+/// This function calls the code in ValueTracking, but first checks that the
+/// instruction has a memory operand.
+static void getUnderlyingObjects(const MachineInstr *MI,
+ SmallVectorImpl<const Value *> &Objs) {
+ if (!MI->hasOneMemOperand())
+ return;
+ MachineMemOperand *MM = *MI->memoperands_begin();
+ if (!MM->getValue())
+ return;
+ getUnderlyingObjects(MM->getValue(), Objs);
+ for (const Value *V : Objs) {
+ if (!isIdentifiedObject(V)) {
+ Objs.clear();
+ return;
+ }
+ Objs.push_back(V);
+ }
+}
+
+/// Add a chain edge between a load and store if the store can be an
+/// alias of the load on a subsequent iteration, i.e., a loop carried
+/// dependence. This code is very similar to the code in ScheduleDAGInstrs
+/// but that code doesn't create loop carried dependences.
+void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
+ MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ Value *UnknownValue =
+ UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
+ for (auto &SU : SUnits) {
+ MachineInstr &MI = *SU.getInstr();
+ if (isDependenceBarrier(MI))
+ PendingLoads.clear();
+ else if (MI.mayLoad()) {
+ SmallVector<const Value *, 4> Objs;
+ ::getUnderlyingObjects(&MI, Objs);
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
+ for (const auto *V : Objs) {
+ SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
+ SUs.push_back(&SU);
+ }
+ } else if (MI.mayStore()) {
+ SmallVector<const Value *, 4> Objs;
+ ::getUnderlyingObjects(&MI, Objs);
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
+ for (const auto *V : Objs) {
+ MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
+ PendingLoads.find(V);
+ if (I == PendingLoads.end())
+ continue;
+ for (auto *Load : I->second) {
+ if (isSuccOrder(Load, &SU))
+ continue;
+ MachineInstr &LdMI = *Load->getInstr();
+ // First, perform the cheaper check that compares the base register.
+ // If they are the same and the load offset is less than the store
+ // offset, then mark the dependence as loop carried potentially.
+ const MachineOperand *BaseOp1, *BaseOp2;
+ int64_t Offset1, Offset2;
+ bool Offset1IsScalable, Offset2IsScalable;
+ if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
+ Offset1IsScalable, TRI) &&
+ TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
+ Offset2IsScalable, TRI)) {
+ if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+ Offset1IsScalable == Offset2IsScalable &&
+ (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
+ "What happened to the chain edge?");
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+ }
+ // Second, the more expensive check that uses alias analysis on the
+ // base registers. If they alias, and the load offset is less than
+ // the store offset, the mark the dependence as loop carried.
+ if (!AA) {
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+ MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
+ MachineMemOperand *MMO2 = *MI.memoperands_begin();
+ if (!MMO1->getValue() || !MMO2->getValue()) {
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+ if (MMO1->getValue() == MMO2->getValue() &&
+ MMO1->getOffset() <= MMO2->getOffset()) {
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+ if (!AA->isNoAlias(
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(),
+ MMO2->getAAInfo()))) {
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ }
+ }
+ }
+ }
+ }
+}
+
+/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
+/// processes dependences for PHIs. This function adds true dependences
+/// from a PHI to a use, and a loop carried dependence from the use to the
+/// PHI. The loop carried dependence is represented as an anti dependence
+/// edge. This function also removes chain dependences between unrelated
+/// PHIs.
+void SwingSchedulerDAG::updatePhiDependences() {
+ SmallVector<SDep, 4> RemoveDeps;
+ const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
+
+ // Iterate over each DAG node.
+ for (SUnit &I : SUnits) {
+ RemoveDeps.clear();
+ // Set to true if the instruction has an operand defined by a Phi.
+ unsigned HasPhiUse = 0;
+ unsigned HasPhiDef = 0;
+ MachineInstr *MI = I.getInstr();
+ // Iterate over each operand, and we process the definitions.
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MO.isDef()) {
+ // If the register is used by a Phi, then create an anti dependence.
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = MRI.use_instr_begin(Reg),
+ UE = MRI.use_instr_end();
+ UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SUnit *SU = getSUnit(UseMI);
+ if (SU != nullptr && UseMI->isPHI()) {
+ if (!MI->isPHI()) {
+ SDep Dep(SU, SDep::Anti, Reg);
+ Dep.setLatency(1);
+ I.addPred(Dep);
+ } else {
+ HasPhiDef = Reg;
+ // Add a chain edge to a dependent Phi that isn't an existing
+ // predecessor.
+ if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+ I.addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ }
+ } else if (MO.isUse()) {
+ // If the register is defined by a Phi, then create a true dependence.
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
+ if (DefMI == nullptr)
+ continue;
+ SUnit *SU = getSUnit(DefMI);
+ if (SU != nullptr && DefMI->isPHI()) {
+ if (!MI->isPHI()) {
+ SDep Dep(SU, SDep::Data, Reg);
+ Dep.setLatency(0);
+ ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep);
+ I.addPred(Dep);
+ } else {
+ HasPhiUse = Reg;
+ // Add a chain edge to a dependent Phi that isn't an existing
+ // predecessor.
+ if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+ I.addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ }
+ }
+ // Remove order dependences from an unrelated Phi.
+ if (!SwpPruneDeps)
+ continue;
+ for (auto &PI : I.Preds) {
+ MachineInstr *PMI = PI.getSUnit()->getInstr();
+ if (PMI->isPHI() && PI.getKind() == SDep::Order) {
+ if (I.getInstr()->isPHI()) {
+ if (PMI->getOperand(0).getReg() == HasPhiUse)
+ continue;
+ if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
+ continue;
+ }
+ RemoveDeps.push_back(PI);
+ }
+ }
+ for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
+ I.removePred(RemoveDeps[i]);
+ }
+}
+
+/// Iterate over each DAG node and see if we can change any dependences
+/// in order to reduce the recurrence MII.
+void SwingSchedulerDAG::changeDependences() {
+ // See if an instruction can use a value from the previous iteration.
+ // If so, we update the base and offset of the instruction and change
+ // the dependences.
+ for (SUnit &I : SUnits) {
+ unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
+ int64_t NewOffset = 0;
+ if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
+ NewOffset))
+ continue;
+
+ // Get the MI and SUnit for the instruction that defines the original base.
+ Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
+ if (!DefMI)
+ continue;
+ SUnit *DefSU = getSUnit(DefMI);
+ if (!DefSU)
+ continue;
+ // Get the MI and SUnit for the instruction that defins the new base.
+ MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
+ if (!LastMI)
+ continue;
+ SUnit *LastSU = getSUnit(LastMI);
+ if (!LastSU)
+ continue;
+
+ if (Topo.IsReachable(&I, LastSU))
+ continue;
+
+ // Remove the dependence. The value now depends on a prior iteration.
+ SmallVector<SDep, 4> Deps;
+ for (const SDep &P : I.Preds)
+ if (P.getSUnit() == DefSU)
+ Deps.push_back(P);
+ for (int i = 0, e = Deps.size(); i != e; i++) {
+ Topo.RemovePred(&I, Deps[i].getSUnit());
+ I.removePred(Deps[i]);
+ }
+ // Remove the chain dependence between the instructions.
+ Deps.clear();
+ for (auto &P : LastSU->Preds)
+ if (P.getSUnit() == &I && P.getKind() == SDep::Order)
+ Deps.push_back(P);
+ for (int i = 0, e = Deps.size(); i != e; i++) {
+ Topo.RemovePred(LastSU, Deps[i].getSUnit());
+ LastSU->removePred(Deps[i]);
+ }
+
+ // Add a dependence between the new instruction and the instruction
+ // that defines the new base.
+ SDep Dep(&I, SDep::Anti, NewBase);
+ Topo.AddPred(LastSU, &I);
+ LastSU->addPred(Dep);
+
+ // Remember the base and offset information so that we can update the
+ // instruction during code generation.
+ InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
+ }
+}
+
+namespace {
+
+// FuncUnitSorter - Comparison operator used to sort instructions by
+// the number of functional unit choices.
+struct FuncUnitSorter {
+ const InstrItineraryData *InstrItins;
+ const MCSubtargetInfo *STI;
+ DenseMap<InstrStage::FuncUnits, unsigned> Resources;
+
+ FuncUnitSorter(const TargetSubtargetInfo &TSI)
+ : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
+
+ // Compute the number of functional unit alternatives needed
+ // at each stage, and take the minimum value. We prioritize the
+ // instructions by the least number of choices first.
+ unsigned minFuncUnits(const MachineInstr *Inst,
+ InstrStage::FuncUnits &F) const {
+ unsigned SchedClass = Inst->getDesc().getSchedClass();
+ unsigned min = UINT_MAX;
+ if (InstrItins && !InstrItins->isEmpty()) {
+ for (const InstrStage &IS :
+ make_range(InstrItins->beginStage(SchedClass),
+ InstrItins->endStage(SchedClass))) {
+ InstrStage::FuncUnits funcUnits = IS.getUnits();
+ unsigned numAlternatives = llvm::popcount(funcUnits);
+ if (numAlternatives < min) {
+ min = numAlternatives;
+ F = funcUnits;
+ }
+ }
+ return min;
+ }
+ if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc =
+ STI->getSchedModel().getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ // No valid Schedule Class Desc for schedClass, should be
+ // Pseudo/PostRAPseudo
+ return min;
+
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ const MCProcResourceDesc *ProcResource =
+ STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
+ unsigned NumUnits = ProcResource->NumUnits;
+ if (NumUnits < min) {
+ min = NumUnits;
+ F = PRE.ProcResourceIdx;
+ }
+ }
+ return min;
+ }
+ llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
+ }
+
+ // Compute the critical resources needed by the instruction. This
+ // function records the functional units needed by instructions that
+ // must use only one functional unit. We use this as a tie breaker
+ // for computing the resource MII. The instrutions that require
+ // the same, highly used, functional unit have high priority.
+ void calcCriticalResources(MachineInstr &MI) {
+ unsigned SchedClass = MI.getDesc().getSchedClass();
+ if (InstrItins && !InstrItins->isEmpty()) {
+ for (const InstrStage &IS :
+ make_range(InstrItins->beginStage(SchedClass),
+ InstrItins->endStage(SchedClass))) {
+ InstrStage::FuncUnits FuncUnits = IS.getUnits();
+ if (llvm::popcount(FuncUnits) == 1)
+ Resources[FuncUnits]++;
+ }
+ return;
+ }
+ if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc =
+ STI->getSchedModel().getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ // No valid Schedule Class Desc for schedClass, should be
+ // Pseudo/PostRAPseudo
+ return;
+
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ Resources[PRE.ProcResourceIdx]++;
+ }
+ return;
+ }
+ llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
+ }
+
+ /// Return true if IS1 has less priority than IS2.
+ bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
+ InstrStage::FuncUnits F1 = 0, F2 = 0;
+ unsigned MFUs1 = minFuncUnits(IS1, F1);
+ unsigned MFUs2 = minFuncUnits(IS2, F2);
+ if (MFUs1 == MFUs2)
+ return Resources.lookup(F1) < Resources.lookup(F2);
+ return MFUs1 > MFUs2;
+ }
+};
+
+} // end anonymous namespace
+
+/// Calculate the resource constrained minimum initiation interval for the
+/// specified loop. We use the DFA to model the resources needed for
+/// each instruction, and we ignore dependences. A different DFA is created
+/// for each cycle that is required. When adding a new instruction, we attempt
+/// to add it to each existing DFA, until a legal space is found. If the
+/// instruction cannot be reserved in an existing DFA, we create a new one.
+unsigned SwingSchedulerDAG::calculateResMII() {
+ LLVM_DEBUG(dbgs() << "calculateResMII:\n");
+ ResourceManager RM(&MF.getSubtarget(), this);
+ return RM.calculateResMII();
+}
+
+/// Calculate the recurrence-constrainted minimum initiation interval.
+/// Iterate over each circuit. Compute the delay(c) and distance(c)
+/// for each circuit. The II needs to satisfy the inequality
+/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
+/// II that satisfies the inequality, and the RecMII is the maximum
+/// of those values.
+unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
+ unsigned RecMII = 0;
+
+ for (NodeSet &Nodes : NodeSets) {
+ if (Nodes.empty())
+ continue;
+
+ unsigned Delay = Nodes.getLatency();
+ unsigned Distance = 1;
+
+ // ii = ceil(delay / distance)
+ unsigned CurMII = (Delay + Distance - 1) / Distance;
+ Nodes.setRecMII(CurMII);
+ if (CurMII > RecMII)
+ RecMII = CurMII;
+ }
+
+ return RecMII;
+}
+
+/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+/// but we do this to find the circuits, and then change them back.
+static void swapAntiDependences(std::vector<SUnit> &SUnits) {
+ SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
+ for (SUnit &SU : SUnits) {
+ for (SDep &Pred : SU.Preds)
+ if (Pred.getKind() == SDep::Anti)
+ DepsAdded.push_back(std::make_pair(&SU, Pred));
+ }
+ for (std::pair<SUnit *, SDep> &P : DepsAdded) {
+ // Remove this anti dependency and add one in the reverse direction.
+ SUnit *SU = P.first;
+ SDep &D = P.second;
+ SUnit *TargetSU = D.getSUnit();
+ unsigned Reg = D.getReg();
+ unsigned Lat = D.getLatency();
+ SU->removePred(D);
+ SDep Dep(SU, SDep::Anti, Reg);
+ Dep.setLatency(Lat);
+ TargetSU->addPred(Dep);
+ }
+}
+
+/// Create the adjacency structure of the nodes in the graph.
+void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
+ SwingSchedulerDAG *DAG) {
+ BitVector Added(SUnits.size());
+ DenseMap<int, int> OutputDeps;
+ for (int i = 0, e = SUnits.size(); i != e; ++i) {
+ Added.reset();
+ // Add any successor to the adjacency matrix and exclude duplicates.
+ for (auto &SI : SUnits[i].Succs) {
+ // Only create a back-edge on the first and last nodes of a dependence
+ // chain. This records any chains and adds them later.
+ if (SI.getKind() == SDep::Output) {
+ int N = SI.getSUnit()->NodeNum;
+ int BackEdge = i;
+ auto Dep = OutputDeps.find(BackEdge);
+ if (Dep != OutputDeps.end()) {
+ BackEdge = Dep->second;
+ OutputDeps.erase(Dep);
+ }
+ OutputDeps[N] = BackEdge;
+ }
+ // Do not process a boundary node, an artificial node.
+ // A back-edge is processed only if it goes to a Phi.
+ if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
+ (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
+ continue;
+ int N = SI.getSUnit()->NodeNum;
+ if (!Added.test(N)) {
+ AdjK[i].push_back(N);
+ Added.set(N);
+ }
+ }
+ // A chain edge between a store and a load is treated as a back-edge in the
+ // adjacency matrix.
+ for (auto &PI : SUnits[i].Preds) {
+ if (!SUnits[i].getInstr()->mayStore() ||
+ !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
+ continue;
+ if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
+ int N = PI.getSUnit()->NodeNum;
+ if (!Added.test(N)) {
+ AdjK[i].push_back(N);
+ Added.set(N);
+ }
+ }
+ }
+ }
+ // Add back-edges in the adjacency matrix for the output dependences.
+ for (auto &OD : OutputDeps)
+ if (!Added.test(OD.second)) {
+ AdjK[OD.first].push_back(OD.second);
+ Added.set(OD.second);
+ }
+}
+
+/// Identify an elementary circuit in the dependence graph starting at the
+/// specified node.
+bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
+ bool HasBackedge) {
+ SUnit *SV = &SUnits[V];
+ bool F = false;
+ Stack.insert(SV);
+ Blocked.set(V);
+
+ for (auto W : AdjK[V]) {
+ if (NumPaths > MaxPaths)
+ break;
+ if (W < S)
+ continue;
+ if (W == S) {
+ if (!HasBackedge)
+ NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
+ F = true;
+ ++NumPaths;
+ break;
+ } else if (!Blocked.test(W)) {
+ if (circuit(W, S, NodeSets,
+ Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
+ F = true;
+ }
+ }
+
+ if (F)
+ unblock(V);
+ else {
+ for (auto W : AdjK[V]) {
+ if (W < S)
+ continue;
+ B[W].insert(SV);
+ }
+ }
+ Stack.pop_back();
+ return F;
+}
+
+/// Unblock a node in the circuit finding algorithm.
+void SwingSchedulerDAG::Circuits::unblock(int U) {
+ Blocked.reset(U);
+ SmallPtrSet<SUnit *, 4> &BU = B[U];
+ while (!BU.empty()) {
+ SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
+ assert(SI != BU.end() && "Invalid B set.");
+ SUnit *W = *SI;
+ BU.erase(W);
+ if (Blocked.test(W->NodeNum))
+ unblock(W->NodeNum);
+ }
+}
+
+/// Identify all the elementary circuits in the dependence graph using
+/// Johnson's circuit algorithm.
+void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
+ // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+ // but we do this to find the circuits, and then change them back.
+ swapAntiDependences(SUnits);
+
+ Circuits Cir(SUnits, Topo);
+ // Create the adjacency structure.
+ Cir.createAdjacencyStructure(this);
+ for (int i = 0, e = SUnits.size(); i != e; ++i) {
+ Cir.reset();
+ Cir.circuit(i, i, NodeSets);
+ }
+
+ // Change the dependences back so that we've created a DAG again.
+ swapAntiDependences(SUnits);
+}
+
+// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
+// is loop-carried to the USE in next iteration. This will help pipeliner avoid
+// additional copies that are needed across iterations. An artificial dependence
+// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
+
+// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
+// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
+// PHI-------True-Dep------> USEOfPhi
+
+// The mutation creates
+// USEOfPHI -------Artificial-Dep---> SRCOfCopy
+
+// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
+// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
+// late to avoid additional copies across iterations. The possible scheduling
+// order would be
+// USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
+
+void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
+ for (SUnit &SU : DAG->SUnits) {
+ // Find the COPY/REG_SEQUENCE instruction.
+ if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
+ continue;
+
+ // Record the loop carried PHIs.
+ SmallVector<SUnit *, 4> PHISUs;
+ // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
+ SmallVector<SUnit *, 4> SrcSUs;
+
+ for (auto &Dep : SU.Preds) {
+ SUnit *TmpSU = Dep.getSUnit();
+ MachineInstr *TmpMI = TmpSU->getInstr();
+ SDep::Kind DepKind = Dep.getKind();
+ // Save the loop carried PHI.
+ if (DepKind == SDep::Anti && TmpMI->isPHI())
+ PHISUs.push_back(TmpSU);
+ // Save the source of COPY/REG_SEQUENCE.
+ // If the source has no pre-decessors, we will end up creating cycles.
+ else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
+ SrcSUs.push_back(TmpSU);
+ }
+
+ if (PHISUs.size() == 0 || SrcSUs.size() == 0)
+ continue;
+
+ // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
+ // SUnit to the container.
+ SmallVector<SUnit *, 8> UseSUs;
+ // Do not use iterator based loop here as we are updating the container.
+ for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
+ for (auto &Dep : PHISUs[Index]->Succs) {
+ if (Dep.getKind() != SDep::Data)
+ continue;
+
+ SUnit *TmpSU = Dep.getSUnit();
+ MachineInstr *TmpMI = TmpSU->getInstr();
+ if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
+ PHISUs.push_back(TmpSU);
+ continue;
+ }
+ UseSUs.push_back(TmpSU);
+ }
+ }
+
+ if (UseSUs.size() == 0)
+ continue;
+
+ SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
+ // Add the artificial dependencies if it does not form a cycle.
+ for (auto *I : UseSUs) {
+ for (auto *Src : SrcSUs) {
+ if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
+ Src->addPred(SDep(I, SDep::Artificial));
+ SDAG->Topo.AddPred(Src, I);
+ }
+ }
+ }
+ }
+}
+
+/// Return true for DAG nodes that we ignore when computing the cost functions.
+/// We ignore the back-edge recurrence in order to avoid unbounded recursion
+/// in the calculation of the ASAP, ALAP, etc functions.
+static bool ignoreDependence(const SDep &D, bool isPred) {
+ if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
+ return true;
+ return D.getKind() == SDep::Anti && isPred;
+}
+
+/// Compute several functions need to order the nodes for scheduling.
+/// ASAP - Earliest time to schedule a node.
+/// ALAP - Latest time to schedule a node.
+/// MOV - Mobility function, difference between ALAP and ASAP.
+/// D - Depth of each node.
+/// H - Height of each node.
+void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
+ ScheduleInfo.resize(SUnits.size());
+
+ LLVM_DEBUG({
+ for (int I : Topo) {
+ const SUnit &SU = SUnits[I];
+ dumpNode(SU);
+ }
+ });
+
+ int maxASAP = 0;
+ // Compute ASAP and ZeroLatencyDepth.
+ for (int I : Topo) {
+ int asap = 0;
+ int zeroLatencyDepth = 0;
+ SUnit *SU = &SUnits[I];
+ for (const SDep &P : SU->Preds) {
+ SUnit *pred = P.getSUnit();
+ if (P.getLatency() == 0)
+ zeroLatencyDepth =
+ std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
+ if (ignoreDependence(P, true))
+ continue;
+ asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
+ getDistance(pred, SU, P) * MII));
+ }
+ maxASAP = std::max(maxASAP, asap);
+ ScheduleInfo[I].ASAP = asap;
+ ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
+ }
+
+ // Compute ALAP, ZeroLatencyHeight, and MOV.
+ for (int I : llvm::reverse(Topo)) {
+ int alap = maxASAP;
+ int zeroLatencyHeight = 0;
+ SUnit *SU = &SUnits[I];
+ for (const SDep &S : SU->Succs) {
+ SUnit *succ = S.getSUnit();
+ if (succ->isBoundaryNode())
+ continue;
+ if (S.getLatency() == 0)
+ zeroLatencyHeight =
+ std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
+ if (ignoreDependence(S, true))
+ continue;
+ alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
+ getDistance(SU, succ, S) * MII));
+ }
+
+ ScheduleInfo[I].ALAP = alap;
+ ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
+ }
+
+ // After computing the node functions, compute the summary for each node set.
+ for (NodeSet &I : NodeSets)
+ I.computeNodeSetInfo(this);
+
+ LLVM_DEBUG({
+ for (unsigned i = 0; i < SUnits.size(); i++) {
+ dbgs() << "\tNode " << i << ":\n";
+ dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
+ dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
+ dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
+ dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
+ dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
+ dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
+ dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
+ }
+ });
+}
+
+/// Compute the Pred_L(O) set, as defined in the paper. The set is defined
+/// as the predecessors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool pred_L(SetVector<SUnit *> &NodeOrder,
+ SmallSetVector<SUnit *, 8> &Preds,
+ const NodeSet *S = nullptr) {
+ Preds.clear();
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Pred : SU->Preds) {
+ if (S && S->count(Pred.getSUnit()) == 0)
+ continue;
+ if (ignoreDependence(Pred, true))
+ continue;
+ if (NodeOrder.count(Pred.getSUnit()) == 0)
+ Preds.insert(Pred.getSUnit());
+ }
+ // Back-edges are predecessors with an anti-dependence.
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.getKind() != SDep::Anti)
+ continue;
+ if (S && S->count(Succ.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(Succ.getSUnit()) == 0)
+ Preds.insert(Succ.getSUnit());
+ }
+ }
+ return !Preds.empty();
+}
+
+/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
+/// as the successors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool succ_L(SetVector<SUnit *> &NodeOrder,
+ SmallSetVector<SUnit *, 8> &Succs,
+ const NodeSet *S = nullptr) {
+ Succs.clear();
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Succ : SU->Succs) {
+ if (S && S->count(Succ.getSUnit()) == 0)
+ continue;
+ if (ignoreDependence(Succ, false))
+ continue;
+ if (NodeOrder.count(Succ.getSUnit()) == 0)
+ Succs.insert(Succ.getSUnit());
+ }
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.getKind() != SDep::Anti)
+ continue;
+ if (S && S->count(Pred.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(Pred.getSUnit()) == 0)
+ Succs.insert(Pred.getSUnit());
+ }
+ }
+ return !Succs.empty();
+}
+
+/// Return true if there is a path from the specified node to any of the nodes
+/// in DestNodes. Keep track and return the nodes in any path.
+static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
+ SetVector<SUnit *> &DestNodes,
+ SetVector<SUnit *> &Exclude,
+ SmallPtrSet<SUnit *, 8> &Visited) {
+ if (Cur->isBoundaryNode())
+ return false;
+ if (Exclude.contains(Cur))
+ return false;
+ if (DestNodes.contains(Cur))
+ return true;
+ if (!Visited.insert(Cur).second)
+ return Path.contains(Cur);
+ bool FoundPath = false;
+ for (auto &SI : Cur->Succs)
+ if (!ignoreDependence(SI, false))
+ FoundPath |=
+ computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ for (auto &PI : Cur->Preds)
+ if (PI.getKind() == SDep::Anti)
+ FoundPath |=
+ computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ if (FoundPath)
+ Path.insert(Cur);
+ return FoundPath;
+}
+
+/// Compute the live-out registers for the instructions in a node-set.
+/// The live-out registers are those that are defined in the node-set,
+/// but not used. Except for use operands of Phis.
+static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
+ NodeSet &NS) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<RegisterMaskPair, 8> LiveOutRegs;
+ SmallSet<unsigned, 4> Uses;
+ for (SUnit *SU : NS) {
+ const MachineInstr *MI = SU->getInstr();
+ if (MI->isPHI())
+ continue;
+ for (const MachineOperand &MO : MI->all_uses()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual())
+ Uses.insert(Reg);
+ else if (MRI.isAllocatable(Reg))
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ Uses.insert(Unit);
+ }
+ }
+ for (SUnit *SU : NS)
+ for (const MachineOperand &MO : SU->getInstr()->all_defs())
+ if (!MO.isDead()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ if (!Uses.count(Reg))
+ LiveOutRegs.push_back(RegisterMaskPair(Reg,
+ LaneBitmask::getNone()));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ if (!Uses.count(Unit))
+ LiveOutRegs.push_back(
+ RegisterMaskPair(Unit, LaneBitmask::getNone()));
+ }
+ }
+ RPTracker.addLiveRegs(LiveOutRegs);
+}
+
+/// A heuristic to filter nodes in recurrent node-sets if the register
+/// pressure of a set is too high.
+void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
+ for (auto &NS : NodeSets) {
+ // Skip small node-sets since they won't cause register pressure problems.
+ if (NS.size() <= 2)
+ continue;
+ IntervalPressure RecRegPressure;
+ RegPressureTracker RecRPTracker(RecRegPressure);
+ RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
+ computeLiveOuts(MF, RecRPTracker, NS);
+ RecRPTracker.closeBottom();
+
+ std::vector<SUnit *> SUnits(NS.begin(), NS.end());
+ llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
+ return A->NodeNum > B->NodeNum;
+ });
+
+ for (auto &SU : SUnits) {
+ // Since we're computing the register pressure for a subset of the
+ // instructions in a block, we need to set the tracker for each
+ // instruction in the node-set. The tracker is set to the instruction
+ // just after the one we're interested in.
+ MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
+ RecRPTracker.setPos(std::next(CurInstI));
+
+ RegPressureDelta RPDelta;
+ ArrayRef<PressureChange> CriticalPSets;
+ RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
+ CriticalPSets,
+ RecRegPressure.MaxSetPressure);
+ if (RPDelta.Excess.isValid()) {
+ LLVM_DEBUG(
+ dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
+ << ":" << RPDelta.Excess.getUnitInc() << "\n");
+ NS.setExceedPressure(SU);
+ break;
+ }
+ RecRPTracker.recede();
+ }
+ }
+}
+
+/// A heuristic to colocate node sets that have the same set of
+/// successors.
+void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
+ unsigned Colocate = 0;
+ for (int i = 0, e = NodeSets.size(); i < e; ++i) {
+ NodeSet &N1 = NodeSets[i];
+ SmallSetVector<SUnit *, 8> S1;
+ if (N1.empty() || !succ_L(N1, S1))
+ continue;
+ for (int j = i + 1; j < e; ++j) {
+ NodeSet &N2 = NodeSets[j];
+ if (N1.compareRecMII(N2) != 0)
+ continue;
+ SmallSetVector<SUnit *, 8> S2;
+ if (N2.empty() || !succ_L(N2, S2))
+ continue;
+ if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
+ N1.setColocate(++Colocate);
+ N2.setColocate(Colocate);
+ break;
+ }
+ }
+ }
+}
+
+/// Check if the existing node-sets are profitable. If not, then ignore the
+/// recurrent node-sets, and attempt to schedule all nodes together. This is
+/// a heuristic. If the MII is large and all the recurrent node-sets are small,
+/// then it's best to try to schedule all instructions together instead of
+/// starting with the recurrent node-sets.
+void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
+ // Look for loops with a large MII.
+ if (MII < 17)
+ return;
+ // Check if the node-set contains only a simple add recurrence.
+ for (auto &NS : NodeSets) {
+ if (NS.getRecMII() > 2)
+ return;
+ if (NS.getMaxDepth() > MII)
+ return;
+ }
+ NodeSets.clear();
+ LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
+}
+
+/// Add the nodes that do not belong to a recurrence set into groups
+/// based upon connected components.
+void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
+ SetVector<SUnit *> NodesAdded;
+ SmallPtrSet<SUnit *, 8> Visited;
+ // Add the nodes that are on a path between the previous node sets and
+ // the current node set.
+ for (NodeSet &I : NodeSets) {
+ SmallSetVector<SUnit *, 8> N;
+ // Add the nodes from the current node set to the previous node set.
+ if (succ_L(I, N)) {
+ SetVector<SUnit *> Path;
+ for (SUnit *NI : N) {
+ Visited.clear();
+ computePath(NI, Path, NodesAdded, I, Visited);
+ }
+ if (!Path.empty())
+ I.insert(Path.begin(), Path.end());
+ }
+ // Add the nodes from the previous node set to the current node set.
+ N.clear();
+ if (succ_L(NodesAdded, N)) {
+ SetVector<SUnit *> Path;
+ for (SUnit *NI : N) {
+ Visited.clear();
+ computePath(NI, Path, I, NodesAdded, Visited);
+ }
+ if (!Path.empty())
+ I.insert(Path.begin(), Path.end());
+ }
+ NodesAdded.insert(I.begin(), I.end());
+ }
+
+ // Create a new node set with the connected nodes of any successor of a node
+ // in a recurrent set.
+ NodeSet NewSet;
+ SmallSetVector<SUnit *, 8> N;
+ if (succ_L(NodesAdded, N))
+ for (SUnit *I : N)
+ addConnectedNodes(I, NewSet, NodesAdded);
+ if (!NewSet.empty())
+ NodeSets.push_back(NewSet);
+
+ // Create a new node set with the connected nodes of any predecessor of a node
+ // in a recurrent set.
+ NewSet.clear();
+ if (pred_L(NodesAdded, N))
+ for (SUnit *I : N)
+ addConnectedNodes(I, NewSet, NodesAdded);
+ if (!NewSet.empty())
+ NodeSets.push_back(NewSet);
+
+ // Create new nodes sets with the connected nodes any remaining node that
+ // has no predecessor.
+ for (SUnit &SU : SUnits) {
+ if (NodesAdded.count(&SU) == 0) {
+ NewSet.clear();
+ addConnectedNodes(&SU, NewSet, NodesAdded);
+ if (!NewSet.empty())
+ NodeSets.push_back(NewSet);
+ }
+ }
+}
+
+/// Add the node to the set, and add all of its connected nodes to the set.
+void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+ SetVector<SUnit *> &NodesAdded) {
+ NewSet.insert(SU);
+ NodesAdded.insert(SU);
+ for (auto &SI : SU->Succs) {
+ SUnit *Successor = SI.getSUnit();
+ if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
+ NodesAdded.count(Successor) == 0)
+ addConnectedNodes(Successor, NewSet, NodesAdded);
+ }
+ for (auto &PI : SU->Preds) {
+ SUnit *Predecessor = PI.getSUnit();
+ if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
+ addConnectedNodes(Predecessor, NewSet, NodesAdded);
+ }
+}
+
+/// Return true if Set1 contains elements in Set2. The elements in common
+/// are returned in a different container.
+static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
+ SmallSetVector<SUnit *, 8> &Result) {
+ Result.clear();
+ for (SUnit *SU : Set1) {
+ if (Set2.count(SU) != 0)
+ Result.insert(SU);
+ }
+ return !Result.empty();
+}
+
+/// Merge the recurrence node sets that have the same initial node.
+void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
+ for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+ ++I) {
+ NodeSet &NI = *I;
+ for (NodeSetType::iterator J = I + 1; J != E;) {
+ NodeSet &NJ = *J;
+ if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
+ if (NJ.compareRecMII(NI) > 0)
+ NI.setRecMII(NJ.getRecMII());
+ for (SUnit *SU : *J)
+ I->insert(SU);
+ NodeSets.erase(J);
+ E = NodeSets.end();
+ } else {
+ ++J;
+ }
+ }
+ }
+}
+
+/// Remove nodes that have been scheduled in previous NodeSets.
+void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
+ for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+ ++I)
+ for (NodeSetType::iterator J = I + 1; J != E;) {
+ J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
+
+ if (J->empty()) {
+ NodeSets.erase(J);
+ E = NodeSets.end();
+ } else {
+ ++J;
+ }
+ }
+}
+
+/// Compute an ordered list of the dependence graph nodes, which
+/// indicates the order that the nodes will be scheduled. This is a
+/// two-level algorithm. First, a partial order is created, which
+/// consists of a list of sets ordered from highest to lowest priority.
+void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
+ SmallSetVector<SUnit *, 8> R;
+ NodeOrder.clear();
+
+ for (auto &Nodes : NodeSets) {
+ LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
+ OrderKind Order;
+ SmallSetVector<SUnit *, 8> N;
+ if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
+ R.insert(N.begin(), N.end());
+ Order = BottomUp;
+ LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
+ } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
+ R.insert(N.begin(), N.end());
+ Order = TopDown;
+ LLVM_DEBUG(dbgs() << " Top down (succs) ");
+ } else if (isIntersect(N, Nodes, R)) {
+ // If some of the successors are in the existing node-set, then use the
+ // top-down ordering.
+ Order = TopDown;
+ LLVM_DEBUG(dbgs() << " Top down (intersect) ");
+ } else if (NodeSets.size() == 1) {
+ for (const auto &N : Nodes)
+ if (N->Succs.size() == 0)
+ R.insert(N);
+ Order = BottomUp;
+ LLVM_DEBUG(dbgs() << " Bottom up (all) ");
+ } else {
+ // Find the node with the highest ASAP.
+ SUnit *maxASAP = nullptr;
+ for (SUnit *SU : Nodes) {
+ if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
+ (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
+ maxASAP = SU;
+ }
+ R.insert(maxASAP);
+ Order = BottomUp;
+ LLVM_DEBUG(dbgs() << " Bottom up (default) ");
+ }
+
+ while (!R.empty()) {
+ if (Order == TopDown) {
+ // Choose the node with the maximum height. If more than one, choose
+ // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
+ // choose the node with the lowest MOV.
+ while (!R.empty()) {
+ SUnit *maxHeight = nullptr;
+ for (SUnit *I : R) {
+ if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
+ maxHeight = I;
+ else if (getHeight(I) == getHeight(maxHeight) &&
+ getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
+ maxHeight = I;
+ else if (getHeight(I) == getHeight(maxHeight) &&
+ getZeroLatencyHeight(I) ==
+ getZeroLatencyHeight(maxHeight) &&
+ getMOV(I) < getMOV(maxHeight))
+ maxHeight = I;
+ }
+ NodeOrder.insert(maxHeight);
+ LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
+ R.remove(maxHeight);
+ for (const auto &I : maxHeight->Succs) {
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.contains(I.getSUnit()))
+ continue;
+ if (ignoreDependence(I, false))
+ continue;
+ R.insert(I.getSUnit());
+ }
+ // Back-edges are predecessors with an anti-dependence.
+ for (const auto &I : maxHeight->Preds) {
+ if (I.getKind() != SDep::Anti)
+ continue;
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.contains(I.getSUnit()))
+ continue;
+ R.insert(I.getSUnit());
+ }
+ }
+ Order = BottomUp;
+ LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
+ SmallSetVector<SUnit *, 8> N;
+ if (pred_L(NodeOrder, N, &Nodes))
+ R.insert(N.begin(), N.end());
+ } else {
+ // Choose the node with the maximum depth. If more than one, choose
+ // the node with the maximum ZeroLatencyDepth. If still more than one,
+ // choose the node with the lowest MOV.
+ while (!R.empty()) {
+ SUnit *maxDepth = nullptr;
+ for (SUnit *I : R) {
+ if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
+ maxDepth = I;
+ else if (getDepth(I) == getDepth(maxDepth) &&
+ getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
+ maxDepth = I;
+ else if (getDepth(I) == getDepth(maxDepth) &&
+ getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
+ getMOV(I) < getMOV(maxDepth))
+ maxDepth = I;
+ }
+ NodeOrder.insert(maxDepth);
+ LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
+ R.remove(maxDepth);
+ if (Nodes.isExceedSU(maxDepth)) {
+ Order = TopDown;
+ R.clear();
+ R.insert(Nodes.getNode(0));
+ break;
+ }
+ for (const auto &I : maxDepth->Preds) {
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.contains(I.getSUnit()))
+ continue;
+ R.insert(I.getSUnit());
+ }
+ // Back-edges are predecessors with an anti-dependence.
+ for (const auto &I : maxDepth->Succs) {
+ if (I.getKind() != SDep::Anti)
+ continue;
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.contains(I.getSUnit()))
+ continue;
+ R.insert(I.getSUnit());
+ }
+ }
+ Order = TopDown;
+ LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
+ SmallSetVector<SUnit *, 8> N;
+ if (succ_L(NodeOrder, N, &Nodes))
+ R.insert(N.begin(), N.end());
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Node order: ";
+ for (SUnit *I : NodeOrder)
+ dbgs() << " " << I->NodeNum << " ";
+ dbgs() << "\n";
+ });
+}
+
+/// Process the nodes in the computed order and create the pipelined schedule
+/// of the instructions, if possible. Return true if a schedule is found.
+bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
+
+ if (NodeOrder.empty()){
+ LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
+ return false;
+ }
+
+ bool scheduleFound = false;
+ // Keep increasing II until a valid schedule is found.
+ for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
+ Schedule.reset();
+ Schedule.setInitiationInterval(II);
+ LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
+
+ SetVector<SUnit *>::iterator NI = NodeOrder.begin();
+ SetVector<SUnit *>::iterator NE = NodeOrder.end();
+ do {
+ SUnit *SU = *NI;
+
+ // Compute the schedule time for the instruction, which is based
+ // upon the scheduled time for any predecessors/successors.
+ int EarlyStart = INT_MIN;
+ int LateStart = INT_MAX;
+ // These values are set when the size of the schedule window is limited
+ // due to chain dependences.
+ int SchedEnd = INT_MAX;
+ int SchedStart = INT_MIN;
+ Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
+ II, this);
+ LLVM_DEBUG({
+ dbgs() << "\n";
+ dbgs() << "Inst (" << SU->NodeNum << ") ";
+ SU->getInstr()->dump();
+ dbgs() << "\n";
+ });
+ LLVM_DEBUG({
+ dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
+ LateStart, SchedEnd, SchedStart);
+ });
+
+ if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
+ SchedStart > LateStart)
+ scheduleFound = false;
+ else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
+ SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
+ scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+ } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
+ SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
+ scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
+ } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
+ SchedEnd =
+ std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
+ // When scheduling a Phi it is better to start at the late cycle and go
+ // backwards. The default order may insert the Phi too far away from
+ // its first dependence.
+ if (SU->getInstr()->isPHI())
+ scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
+ else
+ scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+ } else {
+ int FirstCycle = Schedule.getFirstCycle();
+ scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
+ FirstCycle + getASAP(SU) + II - 1, II);
+ }
+ // Even if we find a schedule, make sure the schedule doesn't exceed the
+ // allowable number of stages. We keep trying if this happens.
+ if (scheduleFound)
+ if (SwpMaxStages > -1 &&
+ Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
+ scheduleFound = false;
+
+ LLVM_DEBUG({
+ if (!scheduleFound)
+ dbgs() << "\tCan't schedule\n";
+ });
+ } while (++NI != NE && scheduleFound);
+
+ // If a schedule is found, ensure non-pipelined instructions are in stage 0
+ if (scheduleFound)
+ scheduleFound =
+ Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
+
+ // If a schedule is found, check if it is a valid schedule too.
+ if (scheduleFound)
+ scheduleFound = Schedule.isValidSchedule(this);
+ }
+
+ LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
+ << " (II=" << Schedule.getInitiationInterval()
+ << ")\n");
+
+ if (scheduleFound) {
+ scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
+ if (!scheduleFound)
+ LLVM_DEBUG(dbgs() << "Target rejected schedule\n");
+ }
+
+ if (scheduleFound) {
+ Schedule.finalizeSchedule(this);
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Schedule found with Initiation Interval: "
+ << ore::NV("II", Schedule.getInitiationInterval())
+ << ", MaxStageCount: "
+ << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
+ });
+ } else
+ Schedule.reset();
+
+ return scheduleFound && Schedule.getMaxStageCount() > 0;
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineOperand *BaseOp;
+ int64_t Offset;
+ bool OffsetIsScalable;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
+ return false;
+
+ // FIXME: This algorithm assumes instructions have fixed-size offsets.
+ if (OffsetIsScalable)
+ return false;
+
+ if (!BaseOp->isReg())
+ return false;
+
+ Register BaseReg = BaseOp->getReg();
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Check if there is a Phi. If so, get the definition in the loop.
+ MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+ if (BaseDef && BaseDef->isPHI()) {
+ BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+ BaseDef = MRI.getVRegDef(BaseReg);
+ }
+ if (!BaseDef)
+ return false;
+
+ int D = 0;
+ if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+ return false;
+
+ Delta = D;
+ return true;
+}
+
+/// Check if we can change the instruction to use an offset value from the
+/// previous iteration. If so, return true and set the base and offset values
+/// so that we can rewrite the load, if necessary.
+/// v1 = Phi(v0, v3)
+/// v2 = load v1, 0
+/// v3 = post_store v1, 4, x
+/// This function enables the load to be rewritten as v2 = load v3, 4.
+bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
+ unsigned &BasePos,
+ unsigned &OffsetPos,
+ unsigned &NewBase,
+ int64_t &Offset) {
+ // Get the load instruction.
+ if (TII->isPostIncrement(*MI))
+ return false;
+ unsigned BasePosLd, OffsetPosLd;
+ if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
+ return false;
+ Register BaseReg = MI->getOperand(BasePosLd).getReg();
+
+ // Look for the Phi instruction.
+ MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+ MachineInstr *Phi = MRI.getVRegDef(BaseReg);
+ if (!Phi || !Phi->isPHI())
+ return false;
+ // Get the register defined in the loop block.
+ unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
+ if (!PrevReg)
+ return false;
+
+ // Check for the post-increment load/store instruction.
+ MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
+ if (!PrevDef || PrevDef == MI)
+ return false;
+
+ if (!TII->isPostIncrement(*PrevDef))
+ return false;
+
+ unsigned BasePos1 = 0, OffsetPos1 = 0;
+ if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
+ return false;
+
+ // Make sure that the instructions do not access the same memory location in
+ // the next iteration.
+ int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
+ int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
+ bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
+ MF.deleteMachineInstr(NewMI);
+ if (!Disjoint)
+ return false;
+
+ // Set the return value once we determine that we return true.
+ BasePos = BasePosLd;
+ OffsetPos = OffsetPosLd;
+ NewBase = PrevReg;
+ Offset = StoreOffset;
+ return true;
+}
+
+/// Apply changes to the instruction if needed. The changes are need
+/// to improve the scheduling and depend up on the final schedule.
+void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+ SMSchedule &Schedule) {
+ SUnit *SU = getSUnit(MI);
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end()) {
+ std::pair<unsigned, int64_t> RegAndOffset = It->second;
+ unsigned BasePos, OffsetPos;
+ if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+ return;
+ Register BaseReg = MI->getOperand(BasePos).getReg();
+ MachineInstr *LoopDef = findDefInLoop(BaseReg);
+ int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
+ int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
+ int BaseStageNum = Schedule.stageScheduled(SU);
+ int BaseCycleNum = Schedule.cycleScheduled(SU);
+ if (BaseStageNum < DefStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ int OffsetDiff = DefStageNum - BaseStageNum;
+ if (DefCycleNum < BaseCycleNum) {
+ NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
+ if (OffsetDiff > 0)
+ --OffsetDiff;
+ }
+ int64_t NewOffset =
+ MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
+ NewMIs[MI] = NewMI;
+ }
+ }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
+/// Return true for an order or output dependence that is loop carried
+/// potentially. A dependence is loop carried if the destination defines a valu
+/// that may be used or defined by the source in a subsequent iteration.
+bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
+ bool isSucc) {
+ if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
+ Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
+ return false;
+
+ if (!SwpPruneLoopCarried)
+ return true;
+
+ if (Dep.getKind() == SDep::Output)
+ return true;
+
+ MachineInstr *SI = Source->getInstr();
+ MachineInstr *DI = Dep.getSUnit()->getInstr();
+ if (!isSucc)
+ std::swap(SI, DI);
+ assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
+
+ // Assume ordered loads and stores may have a loop carried dependence.
+ if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+ SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
+ SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
+ return true;
+
+ // Only chain dependences between a load and store can be loop carried.
+ if (!DI->mayStore() || !SI->mayLoad())
+ return false;
+
+ unsigned DeltaS, DeltaD;
+ if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
+ return true;
+
+ const MachineOperand *BaseOpS, *BaseOpD;
+ int64_t OffsetS, OffsetD;
+ bool OffsetSIsScalable, OffsetDIsScalable;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
+ TRI) ||
+ !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
+ TRI))
+ return true;
+
+ assert(!OffsetSIsScalable && !OffsetDIsScalable &&
+ "Expected offsets to be byte offsets");
+
+ MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg());
+ MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg());
+ if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())
+ return true;
+
+ unsigned InitValS = 0;
+ unsigned LoopValS = 0;
+ unsigned InitValD = 0;
+ unsigned LoopValD = 0;
+ getPhiRegs(*DefS, BB, InitValS, LoopValS);
+ getPhiRegs(*DefD, BB, InitValD, LoopValD);
+ MachineInstr *InitDefS = MRI.getVRegDef(InitValS);
+ MachineInstr *InitDefD = MRI.getVRegDef(InitValD);
+
+ if (!InitDefS->isIdenticalTo(*InitDefD))
+ return true;
+
+ // Check that the base register is incremented by a constant value for each
+ // iteration.
+ MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS);
+ int D = 0;
+ if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))
+ return true;
+
+ uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
+ uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
+
+ // This is the main test, which checks the offset values and the loop
+ // increment value to determine if the accesses may be loop carried.
+ if (AccessSizeS == MemoryLocation::UnknownSize ||
+ AccessSizeD == MemoryLocation::UnknownSize)
+ return true;
+
+ if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
+ return true;
+
+ return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
+}
+
+void SwingSchedulerDAG::postProcessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+/// Try to schedule the node at the specified StartCycle and continue
+/// until the node is schedule or the EndCycle is reached. This function
+/// returns true if the node is scheduled. This routine may search either
+/// forward or backward for a place to insert the instruction based upon
+/// the relative values of StartCycle and EndCycle.
+bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
+ bool forward = true;
+ LLVM_DEBUG({
+ dbgs() << "Trying to insert node between " << StartCycle << " and "
+ << EndCycle << " II: " << II << "\n";
+ });
+ if (StartCycle > EndCycle)
+ forward = false;
+
+ // The terminating condition depends on the direction.
+ int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
+ for (int curCycle = StartCycle; curCycle != termCycle;
+ forward ? ++curCycle : --curCycle) {
+
+ if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
+ ProcItinResources.canReserveResources(*SU, curCycle)) {
+ LLVM_DEBUG({
+ dbgs() << "\tinsert at cycle " << curCycle << " ";
+ SU->getInstr()->dump();
+ });
+
+ if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+ ProcItinResources.reserveResources(*SU, curCycle);
+ ScheduledInstrs[curCycle].push_back(SU);
+ InstrToCycle.insert(std::make_pair(SU, curCycle));
+ if (curCycle > LastCycle)
+ LastCycle = curCycle;
+ if (curCycle < FirstCycle)
+ FirstCycle = curCycle;
+ return true;
+ }
+ LLVM_DEBUG({
+ dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
+ SU->getInstr()->dump();
+ });
+ }
+ return false;
+}
+
+// Return the cycle of the earliest scheduled instruction in the chain.
+int SMSchedule::earliestCycleInChain(const SDep &Dep) {
+ SmallPtrSet<SUnit *, 8> Visited;
+ SmallVector<SDep, 8> Worklist;
+ Worklist.push_back(Dep);
+ int EarlyCycle = INT_MAX;
+ while (!Worklist.empty()) {
+ const SDep &Cur = Worklist.pop_back_val();
+ SUnit *PrevSU = Cur.getSUnit();
+ if (Visited.count(PrevSU))
+ continue;
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
+ if (it == InstrToCycle.end())
+ continue;
+ EarlyCycle = std::min(EarlyCycle, it->second);
+ for (const auto &PI : PrevSU->Preds)
+ if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
+ Worklist.push_back(PI);
+ Visited.insert(PrevSU);
+ }
+ return EarlyCycle;
+}
+
+// Return the cycle of the latest scheduled instruction in the chain.
+int SMSchedule::latestCycleInChain(const SDep &Dep) {
+ SmallPtrSet<SUnit *, 8> Visited;
+ SmallVector<SDep, 8> Worklist;
+ Worklist.push_back(Dep);
+ int LateCycle = INT_MIN;
+ while (!Worklist.empty()) {
+ const SDep &Cur = Worklist.pop_back_val();
+ SUnit *SuccSU = Cur.getSUnit();
+ if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
+ continue;
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
+ if (it == InstrToCycle.end())
+ continue;
+ LateCycle = std::max(LateCycle, it->second);
+ for (const auto &SI : SuccSU->Succs)
+ if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
+ Worklist.push_back(SI);
+ Visited.insert(SuccSU);
+ }
+ return LateCycle;
+}
+
+/// If an instruction has a use that spans multiple iterations, then
+/// return true. These instructions are characterized by having a back-ege
+/// to a Phi, which contains a reference to another Phi.
+static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
+ for (auto &P : SU->Preds)
+ if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
+ for (auto &S : P.getSUnit()->Succs)
+ if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
+ return P.getSUnit();
+ return nullptr;
+}
+
+/// Compute the scheduling start slot for the instruction. The start slot
+/// depends on any predecessor or successor nodes scheduled already.
+void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+ int *MinEnd, int *MaxStart, int II,
+ SwingSchedulerDAG *DAG) {
+ // Iterate over each instruction that has been scheduled already. The start
+ // slot computation depends on whether the previously scheduled instruction
+ // is a predecessor or successor of the specified instruction.
+ for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
+
+ // Iterate over each instruction in the current cycle.
+ for (SUnit *I : getInstructions(cycle)) {
+ // Because we're processing a DAG for the dependences, we recognize
+ // the back-edge in recurrences by anti dependences.
+ for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
+ const SDep &Dep = SU->Preds[i];
+ if (Dep.getSUnit() == I) {
+ if (!DAG->isBackedge(SU, Dep)) {
+ int EarlyStart = cycle + Dep.getLatency() -
+ DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+ *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+ if (DAG->isLoopCarriedDep(SU, Dep, false)) {
+ int End = earliestCycleInChain(Dep) + (II - 1);
+ *MinEnd = std::min(*MinEnd, End);
+ }
+ } else {
+ int LateStart = cycle - Dep.getLatency() +
+ DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+ *MinLateStart = std::min(*MinLateStart, LateStart);
+ }
+ }
+ // For instruction that requires multiple iterations, make sure that
+ // the dependent instruction is not scheduled past the definition.
+ SUnit *BE = multipleIterations(I, DAG);
+ if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
+ !SU->isPred(I))
+ *MinLateStart = std::min(*MinLateStart, cycle);
+ }
+ for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
+ if (SU->Succs[i].getSUnit() == I) {
+ const SDep &Dep = SU->Succs[i];
+ if (!DAG->isBackedge(SU, Dep)) {
+ int LateStart = cycle - Dep.getLatency() +
+ DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+ *MinLateStart = std::min(*MinLateStart, LateStart);
+ if (DAG->isLoopCarriedDep(SU, Dep)) {
+ int Start = latestCycleInChain(Dep) + 1 - II;
+ *MaxStart = std::max(*MaxStart, Start);
+ }
+ } else {
+ int EarlyStart = cycle + Dep.getLatency() -
+ DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+ *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+ }
+ }
+ }
+ }
+ }
+}
+
+/// Order the instructions within a cycle so that the definitions occur
+/// before the uses. Returns true if the instruction is added to the start
+/// of the list, or false if added to the end.
+void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+ std::deque<SUnit *> &Insts) {
+ MachineInstr *MI = SU->getInstr();
+ bool OrderBeforeUse = false;
+ bool OrderAfterDef = false;
+ bool OrderBeforeDef = false;
+ unsigned MoveDef = 0;
+ unsigned MoveUse = 0;
+ int StageInst1 = stageScheduled(SU);
+
+ unsigned Pos = 0;
+ for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
+ ++I, ++Pos) {
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+
+ Register Reg = MO.getReg();
+ unsigned BasePos, OffsetPos;
+ if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+ if (MI->getOperand(BasePos).getReg() == Reg)
+ if (unsigned NewReg = SSD->getInstrBaseReg(SU))
+ Reg = NewReg;
+ bool Reads, Writes;
+ std::tie(Reads, Writes) =
+ (*I)->getInstr()->readsWritesVirtualRegister(Reg);
+ if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
+ OrderBeforeUse = true;
+ if (MoveUse == 0)
+ MoveUse = Pos;
+ } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
+ // Add the instruction after the scheduled instruction.
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
+ if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
+ OrderBeforeUse = true;
+ if (MoveUse == 0)
+ MoveUse = Pos;
+ } else {
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ }
+ } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
+ OrderBeforeUse = true;
+ if (MoveUse == 0)
+ MoveUse = Pos;
+ if (MoveUse != 0) {
+ OrderAfterDef = true;
+ MoveDef = Pos - 1;
+ }
+ } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
+ // Add the instruction before the scheduled instruction.
+ OrderBeforeUse = true;
+ if (MoveUse == 0)
+ MoveUse = Pos;
+ } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
+ isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
+ if (MoveUse == 0) {
+ OrderBeforeDef = true;
+ MoveUse = Pos;
+ }
+ }
+ }
+ // Check for order dependences between instructions. Make sure the source
+ // is ordered before the destination.
+ for (auto &S : SU->Succs) {
+ if (S.getSUnit() != *I)
+ continue;
+ if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ if (Pos < MoveUse)
+ MoveUse = Pos;
+ }
+ // We did not handle HW dependences in previous for loop,
+ // and we normally set Latency = 0 for Anti deps,
+ // so may have nodes in same cycle with Anti denpendent on HW regs.
+ else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ if ((MoveUse == 0) || (Pos < MoveUse))
+ MoveUse = Pos;
+ }
+ }
+ for (auto &P : SU->Preds) {
+ if (P.getSUnit() != *I)
+ continue;
+ if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ }
+ }
+ }
+
+ // A circular dependence.
+ if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
+ OrderBeforeUse = false;
+
+ // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
+ // to a loop-carried dependence.
+ if (OrderBeforeDef)
+ OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
+
+ // The uncommon case when the instruction order needs to be updated because
+ // there is both a use and def.
+ if (OrderBeforeUse && OrderAfterDef) {
+ SUnit *UseSU = Insts.at(MoveUse);
+ SUnit *DefSU = Insts.at(MoveDef);
+ if (MoveUse > MoveDef) {
+ Insts.erase(Insts.begin() + MoveUse);
+ Insts.erase(Insts.begin() + MoveDef);
+ } else {
+ Insts.erase(Insts.begin() + MoveDef);
+ Insts.erase(Insts.begin() + MoveUse);
+ }
+ orderDependence(SSD, UseSU, Insts);
+ orderDependence(SSD, SU, Insts);
+ orderDependence(SSD, DefSU, Insts);
+ return;
+ }
+ // Put the new instruction first if there is a use in the list. Otherwise,
+ // put it at the end of the list.
+ if (OrderBeforeUse)
+ Insts.push_front(SU);
+ else
+ Insts.push_back(SU);
+}
+
+/// Return true if the scheduled Phi has a loop carried operand.
+bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
+ if (!Phi.isPHI())
+ return false;
+ assert(Phi.isPHI() && "Expecting a Phi.");
+ SUnit *DefSU = SSD->getSUnit(&Phi);
+ unsigned DefCycle = cycleScheduled(DefSU);
+ int DefStage = stageScheduled(DefSU);
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+ SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
+ if (!UseSU)
+ return true;
+ if (UseSU->getInstr()->isPHI())
+ return true;
+ unsigned LoopCycle = cycleScheduled(UseSU);
+ int LoopStage = stageScheduled(UseSU);
+ return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+/// Return true if the instruction is a definition that is loop carried
+/// and defines the use on the next iteration.
+/// v1 = phi(v2, v3)
+/// (Def) v3 = op v1
+/// (MO) = v1
+/// If MO appears before Def, then then v1 and v3 may get assigned to the same
+/// register.
+bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
+ MachineInstr *Def, MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ if (Def->isPHI())
+ return false;
+ MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
+ if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
+ return false;
+ if (!isLoopCarried(SSD, *Phi))
+ return false;
+ unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
+ for (MachineOperand &DMO : Def->all_defs()) {
+ if (DMO.getReg() == LoopReg)
+ return true;
+ }
+ return false;
+}
+
+/// Determine transitive dependences of unpipelineable instructions
+SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
+ SmallSet<SUnit *, 8> DoNotPipeline;
+ SmallVector<SUnit *, 8> Worklist;
+
+ for (auto &SU : SSD->SUnits)
+ if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
+ Worklist.push_back(&SU);
+
+ while (!Worklist.empty()) {
+ auto SU = Worklist.pop_back_val();
+ if (DoNotPipeline.count(SU))
+ continue;
+ LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
+ DoNotPipeline.insert(SU);
+ for (auto &Dep : SU->Preds)
+ Worklist.push_back(Dep.getSUnit());
+ if (SU->getInstr()->isPHI())
+ for (auto &Dep : SU->Succs)
+ if (Dep.getKind() == SDep::Anti)
+ Worklist.push_back(Dep.getSUnit());
+ }
+ return DoNotPipeline;
+}
+
+// Determine all instructions upon which any unpipelineable instruction depends
+// and ensure that they are in stage 0. If unable to do so, return false.
+bool SMSchedule::normalizeNonPipelinedInstructions(
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
+ SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI);
+
+ int NewLastCycle = INT_MIN;
+ for (SUnit &SU : SSD->SUnits) {
+ if (!SU.isInstr())
+ continue;
+ if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
+ NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
+ continue;
+ }
+
+ // Put the non-pipelined instruction as early as possible in the schedule
+ int NewCycle = getFirstCycle();
+ for (auto &Dep : SU.Preds)
+ NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
+
+ int OldCycle = InstrToCycle[&SU];
+ if (OldCycle != NewCycle) {
+ InstrToCycle[&SU] = NewCycle;
+ auto &OldS = getInstructions(OldCycle);
+ llvm::erase_value(OldS, &SU);
+ getInstructions(NewCycle).emplace_back(&SU);
+ LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
+ << ") is not pipelined; moving from cycle " << OldCycle
+ << " to " << NewCycle << " Instr:" << *SU.getInstr());
+ }
+ NewLastCycle = std::max(NewLastCycle, NewCycle);
+ }
+ LastCycle = NewLastCycle;
+ return true;
+}
+
+// Check if the generated schedule is valid. This function checks if
+// an instruction that uses a physical register is scheduled in a
+// different stage than the definition. The pipeliner does not handle
+// physical register values that may cross a basic block boundary.
+// Furthermore, if a physical def/use pair is assigned to the same
+// cycle, orderDependence does not guarantee def/use ordering, so that
+// case should be considered invalid. (The test checks for both
+// earlier and same-cycle use to be more robust.)
+bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
+ for (SUnit &SU : SSD->SUnits) {
+ if (!SU.hasPhysRegDefs)
+ continue;
+ int StageDef = stageScheduled(&SU);
+ int CycleDef = InstrToCycle[&SU];
+ assert(StageDef != -1 && "Instruction should have been scheduled.");
+ for (auto &SI : SU.Succs)
+ if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
+ if (Register::isPhysicalRegister(SI.getReg())) {
+ if (stageScheduled(SI.getSUnit()) != StageDef)
+ return false;
+ if (InstrToCycle[SI.getSUnit()] <= CycleDef)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// A property of the node order in swing-modulo-scheduling is
+/// that for nodes outside circuits the following holds:
+/// none of them is scheduled after both a successor and a
+/// predecessor.
+/// The method below checks whether the property is met.
+/// If not, debug information is printed and statistics information updated.
+/// Note that we do not use an assert statement.
+/// The reason is that although an invalid node oder may prevent
+/// the pipeliner from finding a pipelined schedule for arbitrary II,
+/// it does not lead to the generation of incorrect code.
+void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
+
+ // a sorted vector that maps each SUnit to its index in the NodeOrder
+ typedef std::pair<SUnit *, unsigned> UnitIndex;
+ std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
+
+ for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
+ Indices.push_back(std::make_pair(NodeOrder[i], i));
+
+ auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
+ return std::get<0>(i1) < std::get<0>(i2);
+ };
+
+ // sort, so that we can perform a binary search
+ llvm::sort(Indices, CompareKey);
+
+ bool Valid = true;
+ (void)Valid;
+ // for each SUnit in the NodeOrder, check whether
+ // it appears after both a successor and a predecessor
+ // of the SUnit. If this is the case, and the SUnit
+ // is not part of circuit, then the NodeOrder is not
+ // valid.
+ for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
+ SUnit *SU = NodeOrder[i];
+ unsigned Index = i;
+
+ bool PredBefore = false;
+ bool SuccBefore = false;
+
+ SUnit *Succ;
+ SUnit *Pred;
+ (void)Succ;
+ (void)Pred;
+
+ for (SDep &PredEdge : SU->Preds) {
+ SUnit *PredSU = PredEdge.getSUnit();
+ unsigned PredIndex = std::get<1>(
+ *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
+ if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
+ PredBefore = true;
+ Pred = PredSU;
+ break;
+ }
+ }
+
+ for (SDep &SuccEdge : SU->Succs) {
+ SUnit *SuccSU = SuccEdge.getSUnit();
+ // Do not process a boundary node, it was not included in NodeOrder,
+ // hence not in Indices either, call to std::lower_bound() below will
+ // return Indices.end().
+ if (SuccSU->isBoundaryNode())
+ continue;
+ unsigned SuccIndex = std::get<1>(
+ *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
+ if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
+ SuccBefore = true;
+ Succ = SuccSU;
+ break;
+ }
+ }
+
+ if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
+ // instructions in circuits are allowed to be scheduled
+ // after both a successor and predecessor.
+ bool InCircuit = llvm::any_of(
+ Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
+ if (InCircuit)
+ LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
+ else {
+ Valid = false;
+ NumNodeOrderIssues++;
+ LLVM_DEBUG(dbgs() << "Predecessor ";);
+ }
+ LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
+ << " are scheduled before node " << SU->NodeNum
+ << "\n";);
+ }
+ }
+
+ LLVM_DEBUG({
+ if (!Valid)
+ dbgs() << "Invalid node order found!\n";
+ });
+}
+
+/// Attempt to fix the degenerate cases when the instruction serialization
+/// causes the register lifetimes to overlap. For example,
+/// p' = store_pi(p, b)
+/// = load p, offset
+/// In this case p and p' overlap, which means that two registers are needed.
+/// Instead, this function changes the load to use p' and updates the offset.
+void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
+ unsigned OverlapReg = 0;
+ unsigned NewBaseReg = 0;
+ for (SUnit *SU : Instrs) {
+ MachineInstr *MI = SU->getInstr();
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Look for an instruction that uses p. The instruction occurs in the
+ // same cycle but occurs later in the serialized order.
+ if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
+ // Check that the instruction appears in the InstrChanges structure,
+ // which contains instructions that can have the offset updated.
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end()) {
+ unsigned BasePos, OffsetPos;
+ // Update the base register and adjust the offset.
+ if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ NewMI->getOperand(BasePos).setReg(NewBaseReg);
+ int64_t NewOffset =
+ MI->getOperand(OffsetPos).getImm() - It->second.second;
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
+ NewMIs[MI] = NewMI;
+ }
+ }
+ OverlapReg = 0;
+ NewBaseReg = 0;
+ break;
+ }
+ // Look for an instruction of the form p' = op(p), which uses and defines
+ // two virtual registers that get allocated to the same physical register.
+ unsigned TiedUseIdx = 0;
+ if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
+ // OverlapReg is p in the example above.
+ OverlapReg = MI->getOperand(TiedUseIdx).getReg();
+ // NewBaseReg is p' in the example above.
+ NewBaseReg = MI->getOperand(i).getReg();
+ break;
+ }
+ }
+ }
+}
+
+/// After the schedule has been formed, call this function to combine
+/// the instructions from the different stages/cycles. That is, this
+/// function creates a schedule that represents a single iteration.
+void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
+ // Move all instructions to the first stage from later stages.
+ for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+ for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
+ ++stage) {
+ std::deque<SUnit *> &cycleInstrs =
+ ScheduledInstrs[cycle + (stage * InitiationInterval)];
+ for (SUnit *SU : llvm::reverse(cycleInstrs))
+ ScheduledInstrs[cycle].push_front(SU);
+ }
+ }
+
+ // Erase all the elements in the later stages. Only one iteration should
+ // remain in the scheduled list, and it contains all the instructions.
+ for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
+ ScheduledInstrs.erase(cycle);
+
+ // Change the registers in instruction as specified in the InstrChanges
+ // map. We need to use the new registers to create the correct order.
+ for (const SUnit &SU : SSD->SUnits)
+ SSD->applyInstrChange(SU.getInstr(), *this);
+
+ // Reorder the instructions in each cycle to fix and improve the
+ // generated code.
+ for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
+ std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
+ std::deque<SUnit *> newOrderPhi;
+ for (SUnit *SU : cycleInstrs) {
+ if (SU->getInstr()->isPHI())
+ newOrderPhi.push_back(SU);
+ }
+ std::deque<SUnit *> newOrderI;
+ for (SUnit *SU : cycleInstrs) {
+ if (!SU->getInstr()->isPHI())
+ orderDependence(SSD, SU, newOrderI);
+ }
+ // Replace the old order with the new order.
+ cycleInstrs.swap(newOrderPhi);
+ llvm::append_range(cycleInstrs, newOrderI);
+ SSD->fixupRegisterOverlaps(cycleInstrs);
+ }
+
+ LLVM_DEBUG(dump(););
+}
+
+void NodeSet::print(raw_ostream &os) const {
+ os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+ << " depth " << MaxDepth << " col " << Colocate << "\n";
+ for (const auto &I : Nodes)
+ os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
+ os << "\n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// Print the schedule information to the given output.
+void SMSchedule::print(raw_ostream &os) const {
+ // Iterate over each cycle.
+ for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+ // Iterate over each instruction in the cycle.
+ const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
+ for (SUnit *CI : cycleInstrs->second) {
+ os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
+ os << "(" << CI->NodeNum << ") ";
+ CI->getInstr()->print(os);
+ os << "\n";
+ }
+ }
+}
+
+/// Utility function used for debugging to print the schedule.
+LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
+
+void ResourceManager::dumpMRT() const {
+ LLVM_DEBUG({
+ if (UseDFA)
+ return;
+ std::stringstream SS;
+ SS << "MRT:\n";
+ SS << std::setw(4) << "Slot";
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
+ SS << std::setw(3) << I;
+ SS << std::setw(7) << "#Mops"
+ << "\n";
+ for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
+ SS << std::setw(4) << Slot;
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
+ SS << std::setw(3) << MRT[Slot][I];
+ SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
+ }
+ dbgs() << SS.str();
+ });
+}
+#endif
+
+void ResourceManager::initProcResourceVectors(
+ const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
+ unsigned ProcResourceID = 0;
+
+ // We currently limit the resource kinds to 64 and below so that we can use
+ // uint64_t for Masks
+ assert(SM.getNumProcResourceKinds() < 64 &&
+ "Too many kinds of resources, unsupported");
+ // Create a unique bitmask for every processor resource unit.
+ // Skip resource at index 0, since it always references 'InvalidUnit'.
+ Masks.resize(SM.getNumProcResourceKinds());
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ ProcResourceID++;
+ }
+ // Create a unique bitmask for every processor resource group.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (!Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ for (unsigned U = 0; U < Desc.NumUnits; ++U)
+ Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
+ ProcResourceID++;
+ }
+ LLVM_DEBUG({
+ if (SwpShowResMask) {
+ dbgs() << "ProcResourceDesc:\n";
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
+ dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
+ ProcResource->Name, I, Masks[I],
+ ProcResource->NumUnits);
+ }
+ dbgs() << " -----------------\n";
+ }
+ });
+}
+
+bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) {
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "canReserveResources:\n";
+ });
+ if (UseDFA)
+ return DFAResources[positiveModulo(Cycle, InitiationInterval)]
+ ->canReserveResources(&SU.getInstr()->getDesc());
+
+ const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
+ if (!SCDesc->isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+ dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
+ });
+ return true;
+ }
+
+ reserveResources(SCDesc, Cycle);
+ bool Result = !isOverbooked();
+ unreserveResources(SCDesc, Cycle);
+
+ LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";);
+ return Result;
+}
+
+void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "reserveResources:\n";
+ });
+ if (UseDFA)
+ return DFAResources[positiveModulo(Cycle, InitiationInterval)]
+ ->reserveResources(&SU.getInstr()->getDesc());
+
+ const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
+ if (!SCDesc->isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+ dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
+ });
+ return;
+ }
+
+ reserveResources(SCDesc, Cycle);
+
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ dumpMRT();
+ dbgs() << "reserveResources: done!\n\n";
+ }
+ });
+}
+
+void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
+ int Cycle) {
+ assert(!UseDFA);
+ for (const MCWriteProcResEntry &PRE : make_range(
+ STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
+ for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+ ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
+
+ for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
+ ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
+}
+
+void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
+ int Cycle) {
+ assert(!UseDFA);
+ for (const MCWriteProcResEntry &PRE : make_range(
+ STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
+ for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+ --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
+
+ for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
+ --NumScheduledMops[positiveModulo(C, InitiationInterval)];
+}
+
+bool ResourceManager::isOverbooked() const {
+ assert(!UseDFA);
+ for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *Desc = SM.getProcResource(I);
+ if (MRT[Slot][I] > Desc->NumUnits)
+ return true;
+ }
+ if (NumScheduledMops[Slot] > IssueWidth)
+ return true;
+ }
+ return false;
+}
+
+int ResourceManager::calculateResMIIDFA() const {
+ assert(UseDFA);
+
+ // Sort the instructions by the number of available choices for scheduling,
+ // least to most. Use the number of critical resources as the tie breaker.
+ FuncUnitSorter FUS = FuncUnitSorter(*ST);
+ for (SUnit &SU : DAG->SUnits)
+ FUS.calcCriticalResources(*SU.getInstr());
+ PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
+ FuncUnitOrder(FUS);
+
+ for (SUnit &SU : DAG->SUnits)
+ FuncUnitOrder.push(SU.getInstr());
+
+ SmallVector<std::unique_ptr<DFAPacketizer>, 8> Resources;
+ Resources.push_back(
+ std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));
+
+ while (!FuncUnitOrder.empty()) {
+ MachineInstr *MI = FuncUnitOrder.top();
+ FuncUnitOrder.pop();
+ if (TII->isZeroCost(MI->getOpcode()))
+ continue;
+
+ // Attempt to reserve the instruction in an existing DFA. At least one
+ // DFA is needed for each cycle.
+ unsigned NumCycles = DAG->getSUnit(MI)->Latency;
+ unsigned ReservedCycles = 0;
+ auto *RI = Resources.begin();
+ auto *RE = Resources.end();
+ LLVM_DEBUG({
+ dbgs() << "Trying to reserve resource for " << NumCycles
+ << " cycles for \n";
+ MI->dump();
+ });
+ for (unsigned C = 0; C < NumCycles; ++C)
+ while (RI != RE) {
+ if ((*RI)->canReserveResources(*MI)) {
+ (*RI)->reserveResources(*MI);
+ ++ReservedCycles;
+ break;
+ }
+ RI++;
+ }
+ LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+ << ", NumCycles:" << NumCycles << "\n");
+ // Add new DFAs, if needed, to reserve resources.
+ for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+ LLVM_DEBUG(if (SwpDebugResource) dbgs()
+ << "NewResource created to reserve resources"
+ << "\n");
+ auto *NewResource = TII->CreateTargetScheduleState(*ST);
+ assert(NewResource->canReserveResources(*MI) && "Reserve error.");
+ NewResource->reserveResources(*MI);
+ Resources.push_back(std::unique_ptr<DFAPacketizer>(NewResource));
+ }
+ }
+
+ int Resmii = Resources.size();
+ LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
+ return Resmii;
+}
+
+int ResourceManager::calculateResMII() const {
+ if (UseDFA)
+ return calculateResMIIDFA();
+
+ // Count each resource consumption and divide it by the number of units.
+ // ResMII is the max value among them.
+
+ int NumMops = 0;
+ SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds());
+ for (SUnit &SU : DAG->SUnits) {
+ if (TII->isZeroCost(SU.getInstr()->getOpcode()))
+ continue;
+
+ const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
+ if (!SCDesc->isValid())
+ continue;
+
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ DAG->dumpNode(SU);
+ dbgs() << " #Mops: " << SCDesc->NumMicroOps << "\n"
+ << " WriteProcRes: ";
+ }
+ });
+ NumMops += SCDesc->NumMicroOps;
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ const MCProcResourceDesc *Desc =
+ SM.getProcResource(PRE.ProcResourceIdx);
+ dbgs() << Desc->Name << ": " << PRE.Cycles << ", ";
+ }
+ });
+ ResourceCount[PRE.ProcResourceIdx] += PRE.Cycles;
+ }
+ LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");
+ }
+
+ int Result = (NumMops + IssueWidth - 1) / IssueWidth;
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "#Mops: " << NumMops << ", "
+ << "IssueWidth: " << IssueWidth << ", "
+ << "Cycles: " << Result << "\n";
+ });
+
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ std::stringstream SS;
+ SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)
+ << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"
+ << "\n";
+ dbgs() << SS.str();
+ }
+ });
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *Desc = SM.getProcResource(I);
+ int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ std::stringstream SS;
+ SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
+ << Desc->NumUnits << std::setw(10) << ResourceCount[I]
+ << std::setw(10) << Cycles << "\n";
+ dbgs() << SS.str();
+ }
+ });
+ if (Cycles > Result)
+ Result = Cycles;
+ }
+ return Result;
+}
+
+void ResourceManager::init(int II) {
+ InitiationInterval = II;
+ DFAResources.clear();
+ DFAResources.resize(II);
+ for (auto &I : DFAResources)
+ I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+ MRT.clear();
+ MRT.resize(II, SmallVector<uint64_t>(SM.getNumProcResourceKinds()));
+ NumScheduledMops.clear();
+ NumScheduledMops.resize(II);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 000000000000..fb96d0efa4d4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,79 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+namespace llvm {
+template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
+
+extern bool VerifyMachineDomInfo;
+} // namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+ "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree()
+ : MachineFunctionPass(ID), PDT(nullptr) {
+ initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+}
+
+FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+ return new MachinePostDominatorTree();
+}
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ PDT = std::make_unique<PostDomTreeT>();
+ PDT->recalculate(F);
+ return false;
+}
+
+void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator(
+ ArrayRef<MachineBasicBlock *> Blocks) const {
+ assert(!Blocks.empty());
+
+ MachineBasicBlock *NCD = Blocks.front();
+ for (MachineBasicBlock *BB : Blocks.drop_front()) {
+ NCD = PDT->findNearestCommonDominator(NCD, BB);
+
+ // Stop when the root is reached.
+ if (PDT->isVirtualRoot(PDT->getNode(NCD)))
+ return nullptr;
+ }
+
+ return NCD;
+}
+
+void MachinePostDominatorTree::verifyAnalysis() const {
+ if (PDT && VerifyMachineDomInfo)
+ if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) {
+ errs() << "MachinePostDominatorTree verification failed\n";
+
+ abort();
+ }
+}
+
+void MachinePostDominatorTree::print(llvm::raw_ostream &OS,
+ const Module *M) const {
+ PDT->print(OS);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
new file mode 100644
index 000000000000..45cdcbfeab9f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -0,0 +1,149 @@
+//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "machine-region-info"
+
+using namespace llvm;
+
+STATISTIC(numMachineRegions, "The # of machine regions");
+STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
+
+namespace llvm {
+
+template class RegionBase<RegionTraits<MachineFunction>>;
+template class RegionNodeBase<RegionTraits<MachineFunction>>;
+template class RegionInfoBase<RegionTraits<MachineFunction>>;
+
+} // end namespace llvm
+
+//===----------------------------------------------------------------------===//
+// MachineRegion implementation
+
+MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
+ MachineRegionInfo* RI,
+ MachineDominatorTree *DT, MachineRegion *Parent) :
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {}
+
+MachineRegion::~MachineRegion() = default;
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfo implementation
+
+MachineRegionInfo::MachineRegionInfo() = default;
+
+MachineRegionInfo::~MachineRegionInfo() = default;
+
+void MachineRegionInfo::updateStatistics(MachineRegion *R) {
+ ++numMachineRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple())
+ ++numMachineSimpleRegions;
+}
+
+void MachineRegionInfo::recalculate(MachineFunction &F,
+ MachineDominatorTree *DT_,
+ MachinePostDominatorTree *PDT_,
+ MachineDominanceFrontier *DF_) {
+ DT = DT_;
+ PDT = PDT_;
+ DF = DF_;
+
+ MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&F);
+
+ TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr);
+ updateStatistics(TopLevelRegion);
+ calculate(F);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfoPass implementation
+//
+
+MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
+ initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+MachineRegionInfoPass::~MachineRegionInfoPass() = default;
+
+bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
+ releaseMemory();
+
+ auto DT = &getAnalysis<MachineDominatorTree>();
+ auto PDT = &getAnalysis<MachinePostDominatorTree>();
+ auto DF = &getAnalysis<MachineDominanceFrontier>();
+
+ RI.recalculate(F, DT, PDT, DF);
+
+ LLVM_DEBUG(RI.dump());
+
+ return false;
+}
+
+void MachineRegionInfoPass::releaseMemory() {
+ RI.releaseMemory();
+}
+
+void MachineRegionInfoPass::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (MachineRegionInfo::VerifyRegionInfo)
+ RI.verifyAnalysis();
+}
+
+void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
+ RI.print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const {
+ RI.dump();
+}
+#endif
+
+char MachineRegionInfoPass::ID = 0;
+char &MachineRegionInfoPassID = MachineRegionInfoPass::ID;
+
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+
+FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 000000000000..0048918fc53b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,667 @@
+//===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
+ cl::init(true), cl::desc("Enable subregister liveness tracking."));
+
+// Pin the vtable to this file.
+void MachineRegisterInfo::Delegate::anchor() {}
+
+MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
+ : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+ EnableSubRegLiveness) {
+ unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
+ VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
+ UsedPhysRegMask.resize(NumRegs);
+ PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
+ TheDelegates.clear();
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(Register Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
+ VRegInfo[Reg].first = RC;
+}
+
+void MachineRegisterInfo::setRegBank(Register Reg,
+ const RegisterBank &RegBank) {
+ VRegInfo[Reg].first = &RegBank;
+}
+
+static const TargetRegisterClass *
+constrainRegClass(MachineRegisterInfo &MRI, Register Reg,
+ const TargetRegisterClass *OldRC,
+ const TargetRegisterClass *RC, unsigned MinNumRegs) {
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC =
+ MRI.getTargetRegisterInfo()->getCommonSubClass(OldRC, RC);
+ if (!NewRC || NewRC == OldRC)
+ return NewRC;
+ if (NewRC->getNumRegs() < MinNumRegs)
+ return nullptr;
+ MRI.setRegClass(Reg, NewRC);
+ return NewRC;
+}
+
+const TargetRegisterClass *MachineRegisterInfo::constrainRegClass(
+ Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) {
+ if (Reg.isPhysical())
+ return nullptr;
+ return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs);
+}
+
+bool
+MachineRegisterInfo::constrainRegAttrs(Register Reg,
+ Register ConstrainingReg,
+ unsigned MinNumRegs) {
+ const LLT RegTy = getType(Reg);
+ const LLT ConstrainingRegTy = getType(ConstrainingReg);
+ if (RegTy.isValid() && ConstrainingRegTy.isValid() &&
+ RegTy != ConstrainingRegTy)
+ return false;
+ const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg);
+ if (!ConstrainingRegCB.isNull()) {
+ const auto RegCB = getRegClassOrRegBank(Reg);
+ if (RegCB.isNull())
+ setRegClassOrRegBank(Reg, ConstrainingRegCB);
+ else if (isa<const TargetRegisterClass *>(RegCB) !=
+ isa<const TargetRegisterClass *>(ConstrainingRegCB))
+ return false;
+ else if (isa<const TargetRegisterClass *>(RegCB)) {
+ if (!::constrainRegClass(
+ *this, Reg, cast<const TargetRegisterClass *>(RegCB),
+ cast<const TargetRegisterClass *>(ConstrainingRegCB), MinNumRegs))
+ return false;
+ } else if (RegCB != ConstrainingRegCB)
+ return false;
+ }
+ if (ConstrainingRegTy.isValid())
+ setType(Reg, ConstrainingRegTy);
+ return true;
+}
+
+bool
+MachineRegisterInfo::recomputeRegClass(Register Reg) {
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC =
+ getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (MachineOperand &MO : reg_nodbg_operands(Reg)) {
+ // Apply the effect of the given operand to NewRC.
+ MachineInstr *MI = MO.getParent();
+ unsigned OpNo = &MO - &MI->getOperand(0);
+ NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII,
+ getTargetRegisterInfo());
+ if (!NewRC || NewRC == OldRC)
+ return false;
+ }
+ setRegClass(Reg, NewRC);
+ return true;
+}
+
+Register MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
+ Register Reg = Register::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ RegAllocHints.grow(Reg);
+ insertVRegByName(Name, Reg);
+ return Reg;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+Register
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
+ StringRef Name) {
+ assert(RegClass && "Cannot create register without RegClass!");
+ assert(RegClass->isAllocatable() &&
+ "Virtual register RegClass must be allocatable.");
+
+ // New virtual register number.
+ Register Reg = createIncompleteVirtualRegister(Name);
+ VRegInfo[Reg].first = RegClass;
+ noteNewVirtualRegister(Reg);
+ return Reg;
+}
+
+Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
+ StringRef Name) {
+ Register Reg = createIncompleteVirtualRegister(Name);
+ VRegInfo[Reg].first = VRegInfo[VReg].first;
+ setType(Reg, getType(VReg));
+ noteCloneVirtualRegister(Reg, VReg);
+ return Reg;
+}
+
+void MachineRegisterInfo::setType(Register VReg, LLT Ty) {
+ VRegToType.grow(VReg);
+ VRegToType[VReg] = Ty;
+}
+
+Register
+MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
+ // New virtual register number.
+ Register Reg = createIncompleteVirtualRegister(Name);
+ // FIXME: Should we use a dummy register class?
+ VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
+ setType(Reg, Ty);
+ noteNewVirtualRegister(Reg);
+ return Reg;
+}
+
+void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); }
+
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (!VRegInfo[Reg].second)
+ continue;
+ verifyUseList(Reg);
+ errs() << "Remaining virtual register "
+ << printReg(Reg, getTargetRegisterInfo()) << "...\n";
+ for (MachineInstr &MI : reg_instructions(Reg))
+ errs() << "...in instruction: " << MI << "\n";
+ std::abort();
+ }
+#endif
+ VRegInfo.clear();
+ for (auto &I : LiveIns)
+ I.second = 0;
+}
+
+void MachineRegisterInfo::verifyUseList(Register Reg) const {
+#ifndef NDEBUG
+ bool Valid = true;
+ for (MachineOperand &M : reg_operands(Reg)) {
+ MachineOperand *MO = &M;
+ MachineInstr *MI = MO->getParent();
+ if (!MI) {
+ errs() << printReg(Reg, getTargetRegisterInfo())
+ << " use list MachineOperand " << MO
+ << " has no parent instruction.\n";
+ Valid = false;
+ continue;
+ }
+ MachineOperand *MO0 = &MI->getOperand(0);
+ unsigned NumOps = MI->getNumOperands();
+ if (!(MO >= MO0 && MO < MO0+NumOps)) {
+ errs() << printReg(Reg, getTargetRegisterInfo())
+ << " use list MachineOperand " << MO
+ << " doesn't belong to parent MI: " << *MI;
+ Valid = false;
+ }
+ if (!MO->isReg()) {
+ errs() << printReg(Reg, getTargetRegisterInfo())
+ << " MachineOperand " << MO << ": " << *MO
+ << " is not a register\n";
+ Valid = false;
+ }
+ if (MO->getReg() != Reg) {
+ errs() << printReg(Reg, getTargetRegisterInfo())
+ << " use-list MachineOperand " << MO << ": "
+ << *MO << " is the wrong register\n";
+ Valid = false;
+ }
+ }
+ assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ verifyUseList(Register::index2VirtReg(i));
+ for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i)
+ verifyUseList(i);
+#endif
+}
+
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = nullptr;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = nullptr;
+ Last->Contents.Reg.Next = MO;
+ }
+}
+
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = nullptr;
+ MO->Contents.Reg.Next = nullptr;
+}
+
+/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
+///
+/// The Dst range is assumed to be uninitialized memory. (Or it may contain
+/// operands that won't be destroyed, which is OK because the MO destructor is
+/// trivial anyway).
+///
+/// The Src and Dst ranges may overlap.
+void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
+ MachineOperand *Src,
+ unsigned NumOps) {
+ assert(Src != Dst && NumOps && "Noop moveOperands");
+
+ // Copy backwards if Dst is within the Src range.
+ int Stride = 1;
+ if (Dst >= Src && Dst < Src + NumOps) {
+ Stride = -1;
+ Dst += NumOps - 1;
+ Src += NumOps - 1;
+ }
+
+ // Copy one operand at a time.
+ do {
+ new (Dst) MachineOperand(*Src);
+
+ // Dst takes Src's place in the use-def chain.
+ if (Src->isReg()) {
+ MachineOperand *&Head = getRegUseDefListHead(Src->getReg());
+ MachineOperand *Prev = Src->Contents.Reg.Prev;
+ MachineOperand *Next = Src->Contents.Reg.Next;
+ assert(Head && "List empty, but operand is chained");
+ assert(Prev && "Operand was not on use-def list");
+
+ // Prev links are circular, next link is NULL instead of looping back to
+ // Head.
+ if (Src == Head)
+ Head = Dst;
+ else
+ Prev->Contents.Reg.Next = Dst;
+
+ // Update Prev pointer. This also works when Src was pointing to itself
+ // in a 1-element list. In that case Head == Dst.
+ (Next ? Next : Head)->Contents.Reg.Prev = Dst;
+ }
+
+ Dst += Stride;
+ Src += Stride;
+ } while (--NumOps);
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+/// If ToReg is a physical register we apply the sub register to obtain the
+/// final/proper physical register.
+void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) {
+ if (ToReg.isPhysical()) {
+ O.substPhysReg(ToReg, *TRI);
+ } else {
+ O.setReg(ToReg);
+ }
+ }
+}
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(Register Reg) const {
+ // Since we are in SSA form, we can use the first definition.
+ def_instr_iterator I = def_instr_begin(Reg);
+ assert((I.atEnd() || std::next(I) == def_instr_end()) &&
+ "getVRegDef assumes a single definition or no definition");
+ return !I.atEnd() ? &*I : nullptr;
+}
+
+/// getUniqueVRegDef - Return the unique machine instr that defines the
+/// specified virtual register or null if none is found. If there are
+/// multiple definitions or no definition, return null.
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const {
+ if (def_empty(Reg)) return nullptr;
+ def_instr_iterator I = def_instr_begin(Reg);
+ if (std::next(I) != def_instr_end())
+ return nullptr;
+ return &*I;
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const {
+ return hasSingleElement(use_nodbg_operands(RegNo));
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
+ return hasSingleElement(use_nodbg_instructions(RegNo));
+}
+
+bool MachineRegisterInfo::hasAtMostUserInstrs(Register Reg,
+ unsigned MaxUsers) const {
+ return hasNItemsOrLess(use_instr_nodbg_begin(Reg), use_instr_nodbg_end(),
+ MaxUsers);
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(Register Reg) const {
+ for (MachineOperand &MO : use_operands(Reg))
+ MO.setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(Register Reg) const {
+ for (const std::pair<MCRegister, Register> &LI : liveins())
+ if ((Register)LI.first == Reg || LI.second == Reg)
+ return true;
+ return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+MCRegister MachineRegisterInfo::getLiveInPhysReg(Register VReg) const {
+ for (const std::pair<MCRegister, Register> &LI : liveins())
+ if (LI.second == VReg)
+ return LI.first;
+ return MCRegister();
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+Register MachineRegisterInfo::getLiveInVirtReg(MCRegister PReg) const {
+ for (const std::pair<MCRegister, Register> &LI : liveins())
+ if (LI.first == PReg)
+ return LI.second;
+ return Register();
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_nodbg_empty(LiveIns[i].second)) {
+ // The livein has no non-dbg uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+}
+
+LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(Register Reg) const {
+ // Lane masks are only defined for vregs.
+ assert(Reg.isVirtual());
+ const TargetRegisterClass &TRC = *getRegClass(Reg);
+ return TRC.getLaneMask();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(Register Reg) const {
+ for (MachineInstr &I : use_instructions(Reg))
+ I.dump();
+}
+#endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF);
+ assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() &&
+ "Invalid ReservedRegs vector from target");
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
+ assert(Register::isPhysicalRegister(PhysReg));
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ if (TRI->isConstantPhysReg(PhysReg))
+ return true;
+
+ // Check if any overlapping register is modified, or allocatable so it may be
+ // used later.
+ for (MCRegAliasIterator AI(PhysReg, TRI, true);
+ AI.isValid(); ++AI)
+ if (!def_empty(*AI) || isAllocatable(*AI))
+ return false;
+ return true;
+}
+
+/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
+/// specified register as undefined which causes the DBG_VALUE to be
+/// deleted during LiveDebugVariables analysis.
+void MachineRegisterInfo::markUsesInDebugValueAsUndef(Register Reg) const {
+ // Mark any DBG_VALUE* that uses Reg as undef (but don't delete it.)
+ // We use make_early_inc_range because setReg invalidates the iterator.
+ for (MachineInstr &UseMI : llvm::make_early_inc_range(use_instructions(Reg))) {
+ if (UseMI.isDebugValue() && UseMI.hasDebugOperandForReg(Reg))
+ UseMI.setDebugValueUndef();
+ }
+}
+
+static const Function *getCalledFunction(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isGlobal())
+ continue;
+ const Function *Func = dyn_cast<Function>(MO.getGlobal());
+ if (Func != nullptr)
+ return Func;
+ }
+ return nullptr;
+}
+
+static bool isNoReturnDef(const MachineOperand &MO) {
+ // Anything which is not a noreturn function is a real def.
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.isCall())
+ return false;
+ const MachineBasicBlock &MBB = *MI.getParent();
+ if (!MBB.succ_empty())
+ return false;
+ const MachineFunction &MF = *MBB.getParent();
+ // We need to keep correct unwind information even if the function will
+ // not return, since the runtime may need it.
+ if (MF.getFunction().hasFnAttribute(Attribute::UWTable))
+ return false;
+ const Function *Called = getCalledFunction(MI);
+ return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
+ !Called->hasFnAttribute(Attribute::NoUnwind));
+}
+
+bool MachineRegisterInfo::isPhysRegModified(MCRegister PhysReg,
+ bool SkipNoReturnDef) const {
+ if (UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) {
+ for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) {
+ if (!SkipNoReturnDef && isNoReturnDef(MO))
+ continue;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg,
+ bool SkipRegMaskTest) const {
+ if (!SkipRegMaskTest && UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid();
+ ++AliasReg) {
+ if (!reg_nodbg_empty(*AliasReg))
+ return true;
+ }
+ return false;
+}
+
+void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) {
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ assert(Reg && (Reg < TRI->getNumRegs()) &&
+ "Trying to disable an invalid register");
+
+ if (!IsUpdatedCSRsInitialized) {
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+
+ IsUpdatedCSRsInitialized = true;
+ }
+
+ // Remove the register (and its aliases from the list).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ llvm::erase_value(UpdatedCSRs, *AI);
+}
+
+const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
+ if (IsUpdatedCSRsInitialized)
+ return UpdatedCSRs.data();
+
+ return getTargetRegisterInfo()->getCalleeSavedRegs(MF);
+}
+
+void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
+ if (IsUpdatedCSRsInitialized)
+ UpdatedCSRs.clear();
+
+ append_range(UpdatedCSRs, CSRs);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+ IsUpdatedCSRsInitialized = true;
+}
+
+bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ if (all_of(TRI->superregs_inclusive(*Root),
+ [&](MCPhysReg Super) { return isReserved(Super); }))
+ return true;
+ }
+ return false;
+}
+
+bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isArgumentRegister(MF, Reg);
+}
+
+bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isFixedRegister(MF, Reg);
+}
+
+bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
new file mode 100644
index 000000000000..324084fb9c32
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -0,0 +1,82 @@
+//===- MachineSSAContext.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a specialization of the GenericSSAContext<X>
+/// template class for Machine IR.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MachineSSAContext::setFunction(MachineFunction &Fn) {
+ MF = &Fn;
+ RegInfo = &MF->getRegInfo();
+}
+
+MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) {
+ return &F.front();
+}
+
+void MachineSSAContext::appendBlockTerms(
+ SmallVectorImpl<const MachineInstr *> &terms,
+ const MachineBasicBlock &block) {
+ for (auto &T : block.terminators())
+ terms.push_back(&T);
+}
+
+void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs,
+ const MachineBasicBlock &block) {
+ for (const MachineInstr &instr : block.instrs()) {
+ for (const MachineOperand &op : instr.all_defs())
+ defs.push_back(op.getReg());
+ }
+}
+
+/// Get the defining block of a value.
+MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
+ if (!value)
+ return nullptr;
+ return RegInfo->getVRegDef(value)->getParent();
+}
+
+bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
+ return Phi.isConstantValuePHI();
+}
+
+Printable MachineSSAContext::print(const MachineBasicBlock *Block) const {
+ if (!Block)
+ return Printable([](raw_ostream &Out) { Out << "<nullptr>"; });
+ return Printable([Block](raw_ostream &Out) { Block->printName(Out); });
+}
+
+Printable MachineSSAContext::print(const MachineInstr *I) const {
+ return Printable([I](raw_ostream &Out) { I->print(Out); });
+}
+
+Printable MachineSSAContext::print(Register Value) const {
+ auto *MRI = RegInfo;
+ return Printable([MRI, Value](raw_ostream &Out) {
+ Out << printReg(Value, MRI->getTargetRegisterInfo(), 0, MRI);
+
+ if (Value) {
+ // Try to print the definition.
+ if (auto *Instr = MRI->getUniqueVRegDef(Value)) {
+ Out << ": ";
+ Instr->print(Out);
+ }
+ }
+ });
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 000000000000..48076663ddf5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,373 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-ssaupdater"
+
+using AvailableValsTy = DenseMap<MachineBasicBlock *, Register>;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : InsertedPHIs(NewPHI), TII(MF.getSubtarget().getInstrInfo()),
+ MRI(&MF.getRegInfo()) {}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.
+void MachineSSAUpdater::Initialize(const TargetRegisterClass *RC) {
+ if (!AV)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ VRC = RC;
+}
+
+void MachineSSAUpdater::Initialize(Register V) {
+ Initialize(MRI->getRegClass(V));
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, Register V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Register MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+Register LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, Register>> &PredValues) {
+ if (BB->empty())
+ return Register();
+
+ MachineBasicBlock::iterator I = BB->begin();
+ if (!I->isPHI())
+ return Register();
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->isPHI()) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ Register SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return Register();
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstrBuilder InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ Register NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block. If ExistingValueOnly is
+/// true then this will only return an existing value or $noreg; otherwise new
+/// instructions may be inserted to materialize a value.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
+ bool ExistingValueOnly) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlockInternal(BB, ExistingValueOnly);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // If we cannot insert new instructions, just return $noreg.
+ if (ExistingValueOnly)
+ return Register();
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, Register>, 8> PredValues;
+ Register SingularValue;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock *PredBB : BB->predecessors()) {
+ Register PredVal = GetValueAtEndOfBlockInternal(PredBB, ExistingValueOnly);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = Register();
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ Register DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // If we cannot create new instructions, return $noreg now.
+ if (ExistingValueOnly)
+ return Register();
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI.getReg(0);
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ Register NewVR;
+ if (UseMI->isPHI()) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+namespace llvm {
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+ using BlkT = MachineBasicBlock;
+ using ValT = Register;
+ using PhiT = MachineInstr;
+ using BlkSucc_iterator = MachineBasicBlock::succ_iterator;
+
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ PHI_iterator(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ PHI_iterator &operator++() { idx += 2; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
+
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(MachineBasicBlock *BB,
+ SmallVectorImpl<MachineBasicBlock*> *Preds){
+ append_range(*Preds, BB->predecessors());
+ }
+
+ /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// Add it into the specified block and return the register.
+ static Register GetUndefVal(MachineBasicBlock *BB,
+ MachineSSAUpdater *Updater) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstNonPHI(),
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+ /// Add it into the specified block and return the register.
+ static Register CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ MachineSSAUpdater *Updater) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return PHI->getOperand(0).getReg();
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(MachineInstr *PHI, Register Val,
+ MachineBasicBlock *Pred) {
+ MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ static MachineInstr *InstrIsPHI(MachineInstr *I) {
+ if (I && I->isPHI())
+ return I;
+ return nullptr;
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified register
+ /// is a PHI instruction.
+ static MachineInstr *ValueIsPHI(Register Val, MachineSSAUpdater *Updater) {
+ return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static MachineInstr *ValueIsNewPHI(Register Val, MachineSSAUpdater *Updater) {
+ MachineInstr *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumOperands() <= 1)
+ return PHI;
+ return nullptr;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the register
+ /// that it defines.
+ static Register GetPHIValue(MachineInstr *PHI) {
+ return PHI->getOperand(0).getReg();
+ }
+};
+
+} // end namespace llvm
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Register
+MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB,
+ bool ExistingValueOnly) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ Register ExistingVal = AvailableVals.lookup(BB);
+ if (ExistingVal || ExistingValueOnly)
+ return ExistingVal;
+
+ SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..ba5432459d12
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,4332 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+STATISTIC(NumClustered, "Number of load/store pairs clustered");
+
+namespace llvm {
+
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+cl::opt<bool>
+DumpCriticalPathLength("misched-dcpl", cl::Hidden,
+ cl::desc("Print critical path length to stdout"));
+
+cl::opt<bool> VerifyScheduling(
+ "verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
+#ifndef NDEBUG
+cl::opt<bool> ViewMISchedDAGs(
+ "view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
+cl::opt<bool> MISchedDumpReservedCycles(
+ "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Dump resource usage at schedule boundary."));
+cl::opt<bool> MischedDetailResourceBooking(
+ "misched-detail-resource-booking", cl::Hidden, cl::init(false),
+ cl::desc("Show details of invoking getNextResoufceCycle."));
+#else
+const bool ViewMISchedDAGs = false;
+const bool PrintDAGs = false;
+const bool MischedDetailResourceBooking = false;
+#ifdef LLVM_ENABLE_DUMP
+const bool MISchedDumpReservedCycles = false;
+#endif // LLVM_ENABLE_DUMP
+#endif // NDEBUG
+
+} // end namespace llvm
+
+#ifndef NDEBUG
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+ cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+
+static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
+ cl::desc("Only schedule this function"));
+static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
+ cl::desc("Only schedule this MBB#"));
+#endif // NDEBUG
+
+/// Avoid quadratic complexity in unusually large basic blocks by limiting the
+/// size of the ready lists.
+static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
+ cl::desc("Limit ready list to N instructions"), cl::init(256));
+
+static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
+ cl::desc("Enable register pressure scheduling."), cl::init(true));
+
+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
+ cl::desc("Enable cyclic critical path analysis."), cl::init(true));
+
+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable memop clustering."),
+ cl::init(true));
+static cl::opt<bool>
+ ForceFastCluster("force-fast-cluster", cl::Hidden,
+ cl::desc("Switch to fast cluster algorithm with the lost "
+ "of some fusion opportunities"),
+ cl::init(false));
+static cl::opt<unsigned>
+ FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
+ cl::desc("The threshold for fast cluster"),
+ cl::init(1000));
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> MISchedDumpScheduleTrace(
+ "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
+ cl::desc("Dump resource usage at schedule boundary."));
+static cl::opt<unsigned>
+ HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
+ cl::desc("Set width of the columns with "
+ "the resources and schedule units"),
+ cl::init(19));
+static cl::opt<unsigned>
+ ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
+ cl::desc("Set width of the columns showing resource booking."),
+ cl::init(5));
+static cl::opt<bool> MISchedSortResourcesInTrace(
+ "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
+ cl::desc("Sort the resources printed in the dump trace"));
+#endif
+
+static cl::opt<unsigned>
+ MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
+ cl::desc("Number of intervals to track"), cl::init(10));
+
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
+
+// Pin the vtables to this file.
+void MachineSchedStrategy::anchor() {}
+
+void ScheduleDAGMutation::anchor() {}
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+MachineSchedContext::MachineSchedContext() {
+ RegClassInfo = new RegisterClassInfo();
+}
+
+MachineSchedContext::~MachineSchedContext() {
+ delete RegClassInfo;
+}
+
+namespace {
+
+/// Base class for a machine scheduler class that can run at any point.
+class MachineSchedulerBase : public MachineSchedContext,
+ public MachineFunctionPass {
+public:
+ MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
+
+ void print(raw_ostream &O, const Module* = nullptr) const override;
+
+protected:
+ void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
+};
+
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedulerBase {
+public:
+ MachineScheduler();
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ static char ID; // Class identification, replacement for typeinfo
+
+protected:
+ ScheduleDAGInstrs *createMachineScheduler();
+};
+
+/// PostMachineScheduler runs after shortly before code emission.
+class PostMachineScheduler : public MachineSchedulerBase {
+public:
+ PostMachineScheduler();
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ static char ID; // Class identification, replacement for typeinfo
+
+protected:
+ ScheduleDAGInstrs *createPostMachineScheduler();
+};
+
+} // end anonymous namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
+ "Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
+ "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+char PostMachineScheduler::ID = 0;
+
+char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
+
+PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
+ initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
+ MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return nullptr;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry>>
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+static cl::opt<bool> EnableMachineSched(
+ "enable-misched",
+ cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool> EnablePostRAMachineSched(
+ "enable-post-misched",
+ cl::desc("Enable the post-ra machine instruction scheduling pass."),
+ cl::init(true), cl::Hidden);
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::const_iterator
+priorNonDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugOrPseudoInstr())
+ break;
+ }
+ return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator Beg) {
+ return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
+ .getNonConstIterator();
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::const_iterator
+nextIfDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugOrPseudoInstr())
+ break;
+ }
+ return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator End) {
+ return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
+ .getNonConstIterator();
+}
+
+/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
+ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor != useDefaultMachineSched)
+ return Ctor(this);
+
+ // Get the default scheduler set by the target for this function.
+ ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
+ if (Scheduler)
+ return Scheduler;
+
+ // Default to GenericScheduler.
+ return createGenericSchedLive(this);
+}
+
+/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
+/// the caller. We don't have a command line option to override the postRA
+/// scheduler. The Target must configure it.
+ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
+ // Get the postRA scheduler set by the target for this function.
+ ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
+ if (Scheduler)
+ return Scheduler;
+
+ // Default to GenericScheduler.
+ return createGenericSchedPostRA(this);
+}
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boundary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+
+ if (EnableMachineSched.getNumOccurrences()) {
+ if (!EnableMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enableMachineScheduler())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
+
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ if (VerifyScheduling) {
+ LLVM_DEBUG(LIS->dump());
+ MF->verify(this, "Before machine scheduling.");
+ }
+ RegClassInfo->runOnMachineFunction(*MF);
+
+ // Instantiate the selected scheduler for this target, function, and
+ // optimization level.
+ std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+ scheduleRegions(*Scheduler, false);
+
+ LLVM_DEBUG(LIS->dump());
+ if (VerifyScheduling)
+ MF->verify(this, "After machine scheduling.");
+ return true;
+}
+
+bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+
+ if (EnablePostRAMachineSched.getNumOccurrences()) {
+ if (!EnablePostRAMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
+ LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
+
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ if (VerifyScheduling)
+ MF->verify(this, "Before post machine scheduling.");
+
+ // Instantiate the selected scheduler for this target, function, and
+ // optimization level.
+ std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
+ scheduleRegions(*Scheduler, true);
+
+ if (VerifyScheduling)
+ MF->verify(this, "After post machine scheduling.");
+ return true;
+}
+
+/// Return true of the given instruction should not be included in a scheduling
+/// region.
+///
+/// MachineScheduler does not currently support scheduling across calls. To
+/// handle calls, the DAG builder needs to be modified to create register
+/// anti/output dependencies on the registers clobbered by the call's regmask
+/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
+/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
+/// the boundary, but there would be no benefit to postRA scheduling across
+/// calls this late anyway.
+static bool isSchedBoundary(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB,
+ MachineFunction *MF,
+ const TargetInstrInfo *TII) {
+ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
+}
+
+/// A region of an MBB for scheduling.
+namespace {
+struct SchedRegion {
+ /// RegionBegin is the first instruction in the scheduling region, and
+ /// RegionEnd is either MBB->end() or the scheduling boundary after the
+ /// last instruction in the scheduling region. These iterators cannot refer
+ /// to instructions outside of the identified scheduling region because
+ /// those may be reordered before scheduling this region.
+ MachineBasicBlock::iterator RegionBegin;
+ MachineBasicBlock::iterator RegionEnd;
+ unsigned NumRegionInstrs;
+
+ SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
+ unsigned N) :
+ RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
+};
+} // end anonymous namespace
+
+using MBBRegionsVector = SmallVector<SchedRegion, 16>;
+
+static void
+getSchedRegions(MachineBasicBlock *MBB,
+ MBBRegionsVector &Regions,
+ bool RegionsTopDown) {
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ MachineBasicBlock::iterator I = nullptr;
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = I) {
+
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end() ||
+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
+ --RegionEnd;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ unsigned NumRegionInstrs = 0;
+ I = RegionEnd;
+ for (;I != MBB->begin(); --I) {
+ MachineInstr &MI = *std::prev(I);
+ if (isSchedBoundary(&MI, &*MBB, MF, TII))
+ break;
+ if (!MI.isDebugOrPseudoInstr()) {
+ // MBB::size() uses instr_iterator to count. Here we need a bundle to
+ // count as a single instruction.
+ ++NumRegionInstrs;
+ }
+ }
+
+ // It's possible we found a scheduling region that only has debug
+ // instructions. Don't bother scheduling these.
+ if (NumRegionInstrs != 0)
+ Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+ }
+
+ if (RegionsTopDown)
+ std::reverse(Regions.begin(), Regions.end());
+}
+
+/// Main driver for both MachineScheduler and PostMachineScheduler.
+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
+ bool FixKillFlags) {
+ // Visit all machine basic blocks.
+ //
+ // TODO: Visit blocks in global postorder or postorder within the bottom-up
+ // loop tree. Then we can optionally compute global RegPressure.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler.startBlock(&*MBB);
+
+#ifndef NDEBUG
+ if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
+ continue;
+ if (SchedOnlyBlock.getNumOccurrences()
+ && (int)SchedOnlyBlock != MBB->getNumber())
+ continue;
+#endif
+
+ // Break the block into scheduling regions [I, RegionEnd). RegionEnd
+ // points to the scheduling boundary at the bottom of the region. The DAG
+ // does not include RegionEnd, but the region does (i.e. the next
+ // RegionEnd is above the previous RegionBegin). If the current block has
+ // no terminator then RegionEnd == MBB->end() for the bottom region.
+ //
+ // All the regions of MBB are first found and stored in MBBRegions, which
+ // will be processed (MBB) top-down if initialized with true.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls. Instructions must not be
+ // added to other regions than the current one without updating MBBRegions.
+
+ MBBRegionsVector MBBRegions;
+ getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
+ for (const SchedRegion &R : MBBRegions) {
+ MachineBasicBlock::iterator I = R.RegionBegin;
+ MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
+ unsigned NumRegionInstrs = R.NumRegionInstrs;
+
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == std::prev(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler.exitRegion();
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
+ << " " << MBB->getName() << "\n From: " << *I
+ << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End\n";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << MF->getName();
+ errs() << ":%bb. " << MBB->getNumber();
+ errs() << " " << MBB->getName() << " \n";
+ }
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates the original region iterators.
+ Scheduler.schedule();
+
+ // Close the current region.
+ Scheduler.exitRegion();
+ }
+ Scheduler.finishBlock();
+ // FIXME: Ideally, no further passes should rely on kill flags. However,
+ // thumb2 size reduction is currently an exception, so the PostMIScheduler
+ // needs to do this.
+ if (FixKillFlags)
+ Scheduler.fixupKills(*MBB);
+ }
+ Scheduler.finalizeSchedule();
+}
+
+void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ReadyQueue::dump() const {
+ dbgs() << "Queue " << Name << ": ";
+ for (const SUnit *SU : Queue)
+ dbgs() << SU->NodeNum << " ";
+ dbgs() << "\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Basic machine instruction scheduling. This is
+// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
+// virtual registers.
+// ===----------------------------------------------------------------------===/
+
+// Provide a vtable anchor.
+ScheduleDAGMI::~ScheduleDAGMI() = default;
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ if (SuccEdge->isCluster())
+ NextClusterSucc = SuccSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*SuccSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
+ SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
+
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+ for (SDep &Succ : SU->Succs)
+ releaseSucc(SU, &Succ);
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+ if (PredEdge->isWeak()) {
+ --PredSU->WeakSuccsLeft;
+ if (PredEdge->isCluster())
+ NextClusterPred = PredSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*PredSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
+ PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
+
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SDep &Pred : SU->Preds)
+ releasePred(SU, &Pred);
+}
+
+void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
+ ScheduleDAGInstrs::startBlock(bb);
+ SchedImpl->enterMBB(bb);
+}
+
+void ScheduleDAGMI::finishBlock() {
+ SchedImpl->leaveMBB();
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
+
+ SchedImpl->initPolicy(begin, end, regioninstrs);
+}
+
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
+void ScheduleDAGMI::moveInstruction(
+ MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
+ // Advance RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ ++RegionBegin;
+
+ // Update the instruction stream.
+ BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
+ if (LIS)
+ LIS->handleMove(*MI, /*UpdateFlags=*/true);
+
+ // Recede RegionBegin if an instruction moves above the first.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// Per-region scheduling driver, called back from
+/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
+/// does not consider liveness or register pressure. It is useful for PostRA
+/// scheduling and potentially other custom schedulers.
+void ScheduleDAGMI::schedule() {
+ LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+ LLVM_DEBUG(SchedImpl->dumpPolicy());
+
+ // Build the DAG.
+ buildSchedGraph(AA);
+
+ postProcessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ MachineInstr *MI = SU->getInstr();
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else
+ moveInstruction(MI, CurrentTop);
+ } else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ }
+ // Notify the scheduling strategy before updating the DAG.
+ // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
+ // runs, it can then use the accurate ReadyCycle time to determine whether
+ // newly released nodes can move to the readyQ.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postProcessDAG() {
+ for (auto &m : Mutations)
+ m->apply(this);
+}
+
+void ScheduleDAGMI::
+findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+ SmallVectorImpl<SUnit*> &BotRoots) {
+ for (SUnit &SU : SUnits) {
+ assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
+
+ // Order predecessors so DFSResult follows the critical path.
+ SU.biasCriticalPath();
+
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (!SU.NumPredsLeft)
+ TopRoots.push_back(&SU);
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (!SU.NumSuccsLeft)
+ BotRoots.push_back(&SU);
+ }
+ ExitSU.biasCriticalPath();
+}
+
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ NextClusterSucc = nullptr;
+ NextClusterPred = nullptr;
+
+ // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+ //
+ // Nodes with unreleased weak edges can still be roots.
+ // Release top roots in forward order.
+ for (SUnit *SU : TopRoots)
+ SchedImpl->releaseTopNode(SU);
+
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+ SchedImpl->releaseBottomNode(*I);
+ }
+
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ SchedImpl->registerRoots();
+
+ // Advance past initial DebugValues.
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+ CurrentBottom = RegionEnd;
+}
+
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+ // Release dependent instructions for scheduling.
+ if (IsTopNode)
+ releaseSuccessors(SU);
+ else
+ releasePredecessors(SU);
+
+ SU->isScheduled = true;
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ if (&*RegionBegin == DbgValue)
+ ++RegionBegin;
+ BB->splice(std::next(OrigPrevMI), BB, DbgValue);
+ if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
+ RegionEnd = DbgValue;
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static const char *scheduleTableLegend = " i: issue\n x: resource booked";
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
+ // Bail off when there is no schedule model to query.
+ if (!SchedModel.hasInstrSchedModel())
+ return;
+
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (TopDown):\n";
+ dbgs() << scheduleTableLegend << "\n";
+ const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
+ unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
+ LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (unsigned C = FirstCycle; C <= LastCycle; ++C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ unsigned C = FirstCycle;
+ for (; C <= LastCycle; ++C) {
+ if (C == SU->TopReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+
+ SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC)));
+
+ if (MISchedSortResourcesInTrace)
+ llvm::stable_sort(ResourcesIt,
+ [](const MCWriteProcResEntry &LHS,
+ const MCWriteProcResEntry &RHS) -> bool {
+ return LHS.StartAtCycle < RHS.StartAtCycle ||
+ (LHS.StartAtCycle == RHS.StartAtCycle &&
+ LHS.Cycles < RHS.Cycles);
+ });
+ for (const MCWriteProcResEntry &PI : ResourcesIt) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI.ProcResourceIdx);
+ dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
+ for (; C < SU->TopReadyCycle + PI.StartAtCycle; ++C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, ++C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C++ <= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
+ // Bail off when there is no schedule model to query.
+ if (!SchedModel.hasInstrSchedModel())
+ return;
+
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (BottomUp):\n";
+ dbgs() << scheduleTableLegend << "\n";
+
+ const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
+ int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
+ LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (int C = FirstCycle; C >= LastCycle; --C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ int C = FirstCycle;
+ for (; C >= LastCycle; --C) {
+ if (C == (int)SU->BotReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC)));
+
+ if (MISchedSortResourcesInTrace)
+ llvm::stable_sort(ResourcesIt,
+ [](const MCWriteProcResEntry &LHS,
+ const MCWriteProcResEntry &RHS) -> bool {
+ return LHS.StartAtCycle < RHS.StartAtCycle ||
+ (LHS.StartAtCycle == RHS.StartAtCycle &&
+ LHS.Cycles < RHS.Cycles);
+ });
+ for (const MCWriteProcResEntry &PI : ResourcesIt) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI.ProcResourceIdx);
+ dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
+ for (; C > ((int)SU->BotReadyCycle - (int)PI.StartAtCycle); --C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, --C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C-- >= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+#endif
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
+ if (MISchedDumpScheduleTrace) {
+ if (ForceTopDown)
+ dumpScheduleTraceTopDown();
+ else if (ForceBottomUp)
+ dumpScheduleTraceBottomUp();
+ else {
+ dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+ }
+ }
+
+ for (MachineInstr &MI : *this) {
+ if (SUnit *SU = getSUnit(&MI))
+ dumpNode(*SU);
+ else
+ dbgs() << "Missing SUnit\n";
+ }
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGMILive::~ScheduleDAGMILive() {
+ delete DFSResult;
+}
+
+void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
+ const MachineInstr &MI = *SU.getInstr();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (TrackLaneMasks && !MO.isUse())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+
+ // Ignore re-defs.
+ if (TrackLaneMasks) {
+ bool FoundDef = false;
+ for (const MachineOperand &MO2 : MI.all_defs()) {
+ if (MO2.getReg() == Reg && !MO2.isDead()) {
+ FoundDef = true;
+ break;
+ }
+ }
+ if (FoundDef)
+ continue;
+ }
+
+ // Record this local VReg use.
+ VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == &SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
+ }
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs)
+{
+ // ScheduleDAGMI initializes SchedImpl's per-region policy.
+ ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
+
+ SUPressureDiffs.clear();
+
+ ShouldTrackPressure = SchedImpl->shouldTrackPressure();
+ ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
+
+ assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
+ "ShouldTrackLaneMasks requires ShouldTrackPressure");
+}
+
+// Setup the register pressure trackers for the top scheduled and bottom
+// scheduled regions.
+void ScheduleDAGMILive::initRegPressure() {
+ VRegUses.clear();
+ VRegUses.setUniverse(MRI.getNumVirtRegs());
+ for (SUnit &SU : SUnits)
+ collectVRegUses(SU);
+
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
+ ShouldTrackLaneMasks, false);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, false);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ LLVM_DEBUG(RPTracker.dump());
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ BotRPTracker.initLiveThru(RPTracker);
+ if (!BotRPTracker.getLiveThru().empty()) {
+ TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
+ LLVM_DEBUG(dbgs() << "Live Thru: ";
+ dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
+ };
+
+ // For each live out vreg reduce the pressure change associated with other
+ // uses of the same vreg below the live-out reaching def.
+ updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd) {
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(&LiveUses);
+ updatePressureDiffs(LiveUses);
+ }
+
+ LLVM_DEBUG(dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
+
+ assert((BotRPTracker.getPos() == RegionEnd ||
+ (RegionEnd->isDebugInstr() &&
+ BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
+ "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ const std::vector<unsigned> &RegionPressure =
+ RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
+ if (RegionPressure[i] > Limit) {
+ LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
+ RegionCriticalPSets.push_back(PressureChange(i));
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Excess PSets: ";
+ for (const PressureChange &RCPS
+ : RegionCriticalPSets) dbgs()
+ << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
+ dbgs() << "\n");
+}
+
+void ScheduleDAGMILive::
+updateScheduledPressure(const SUnit *SU,
+ const std::vector<unsigned> &NewMaxPressure) {
+ const PressureDiff &PDiff = getPressureDiff(SU);
+ unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
+ for (const PressureChange &PC : PDiff) {
+ if (!PC.isValid())
+ break;
+ unsigned ID = PC.getPSet();
+ while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
+ ++CritIdx;
+ if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
+ if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
+ && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
+ RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
+ }
+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
+ if (NewMaxPressure[ID] >= Limit - 2) {
+ LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
+ << NewMaxPressure[ID]
+ << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
+ << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
+ << " livethru)\n");
+ }
+ }
+}
+
+/// Update the PressureDiff array for liveness after scheduling this
+/// instruction.
+void ScheduleDAGMILive::updatePressureDiffs(
+ ArrayRef<RegisterMaskPair> LiveUses) {
+ for (const RegisterMaskPair &P : LiveUses) {
+ Register Reg = P.RegUnit;
+ /// FIXME: Currently assuming single-use physregs.
+ if (!Reg.isVirtual())
+ continue;
+
+ if (ShouldTrackLaneMasks) {
+ // If the register has just become live then other uses won't change
+ // this fact anymore => decrement pressure.
+ // If the register has just become dead then other uses make it come
+ // back to life => increment pressure.
+ bool Decrement = P.LaneMask.any();
+
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit &SU = *V2SU.SU;
+ if (SU.isScheduled || &SU == &ExitSU)
+ continue;
+
+ PressureDiff &PDiff = getPressureDiff(&SU);
+ PDiff.addPressureChange(Reg, Decrement, &MRI);
+ LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
+ << printReg(Reg, TRI) << ':'
+ << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
+ dbgs() << " to "; PDiff.dump(*TRI););
+ }
+ } else {
+ assert(P.LaneMask.any());
+ LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
+ // This may be called before CurrentBottom has been initialized. However,
+ // BotRPTracker must have a valid position. We want the value live into the
+ // instruction or live out of the block, so ask for the previous
+ // instruction's live-out.
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI;
+ MachineBasicBlock::const_iterator I =
+ nextIfDebug(BotRPTracker.getPos(), BB->end());
+ if (I == BB->end())
+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ else {
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
+ VNI = LRQ.valueIn();
+ }
+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
+ assert(VNI && "No live value at use.");
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ // If this use comes before the reaching def, it cannot be a last use,
+ // so decrease its pressure change.
+ if (!SU->isScheduled && SU != &ExitSU) {
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to "; PDiff.dump(*TRI););
+ }
+ }
+ }
+ }
+ }
+}
+
+void ScheduleDAGMILive::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits) {
+ dumpNodeAll(SU);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
+ dbgs() << '\n';
+ }
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easily extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMILive then it will want to override this virtual method in order
+/// to update any specialized state.
+void ScheduleDAGMILive::schedule() {
+ LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+ LLVM_DEBUG(SchedImpl->dumpPolicy());
+ buildDAGWithRegPressure();
+
+ postProcessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ if (DFSResult) {
+ unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+ if (!ScheduledTrees.test(SubtreeID)) {
+ ScheduledTrees.set(SubtreeID);
+ DFSResult->scheduleTree(SubtreeID);
+ SchedImpl->scheduleTree(SubtreeID);
+ }
+ }
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMILive::buildDAGWithRegPressure() {
+ if (!ShouldTrackPressure) {
+ RPTracker.reset();
+ RegionCriticalPSets.clear();
+ buildSchedGraph(AA);
+ return;
+ }
+
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
+}
+
+void ScheduleDAGMILive::computeDFSResult() {
+ if (!DFSResult)
+ DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+ DFSResult->clear();
+ ScheduledTrees.clear();
+ DFSResult->resize(SUnits.size());
+ DFSResult->compute(SUnits);
+ ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
+
+/// Compute the max cyclic critical path through the DAG. The scheduling DAG
+/// only provides the critical path for single block loops. To handle loops that
+/// span blocks, we could use the vreg path latencies provided by
+/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
+/// available for use in the scheduler.
+///
+/// The cyclic path estimation identifies a def-use pair that crosses the back
+/// edge and considers the depth and height of the nodes. For example, consider
+/// the following instruction sequence where each instruction has unit latency
+/// and defines an eponymous virtual register:
+///
+/// a->b(a,c)->c(b)->d(c)->exit
+///
+/// The cyclic critical path is a two cycles: b->c->b
+/// The acyclic critical path is four cycles: a->b->c->d->exit
+/// LiveOutHeight = height(c) = len(c->d->exit) = 2
+/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
+/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
+/// LiveInDepth = depth(b) = len(a->b) = 1
+///
+/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
+/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
+/// CyclicCriticalPath = min(2, 2) = 2
+///
+/// This could be relevant to PostRA scheduling, but is currently implemented
+/// assuming LiveIntervals.
+unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
+ // This only applies to single block loop.
+ if (!BB->isSuccessor(BB))
+ return 0;
+
+ unsigned MaxCyclicLatency = 0;
+ // Visit each live out vreg def to find def/use pairs that cross iterations.
+ for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
+ Register Reg = P.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ if (!DefVNI)
+ continue;
+
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
+ const SUnit *DefSU = getSUnit(DefMI);
+ if (!DefSU)
+ continue;
+
+ unsigned LiveOutHeight = DefSU->getHeight();
+ unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
+ // Visit all local users of the vreg def.
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ if (SU == &ExitSU)
+ continue;
+
+ // Only consider uses of the phi.
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (!LRQ.valueIn()->isPHIDef())
+ continue;
+
+ // Assume that a path spanning two iterations is a cycle, which could
+ // overestimate in strange cases. This allows cyclic latency to be
+ // estimated as the minimum slack of the vreg's depth or height.
+ unsigned CyclicLatency = 0;
+ if (LiveOutDepth > SU->getDepth())
+ CyclicLatency = LiveOutDepth - SU->getDepth();
+
+ unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
+ if (LiveInHeight > LiveOutHeight) {
+ if (LiveInHeight - LiveOutHeight < CyclicLatency)
+ CyclicLatency = LiveInHeight - LiveOutHeight;
+ } else
+ CyclicLatency = 0;
+
+ LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
+ << SU->NodeNum << ") = " << CyclicLatency << "c\n");
+ if (CyclicLatency > MaxCyclicLatency)
+ MaxCyclicLatency = CyclicLatency;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
+ return MaxCyclicLatency;
+}
+
+/// Release ExitSU predecessors and setup scheduler queues. Re-position
+/// the Top RP tracker in case the region beginning has changed.
+void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ ScheduleDAGMI::initQueues(TopRoots, BotRoots);
+ if (ShouldTrackPressure) {
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+ }
+}
+
+/// Move an instruction and update register pressure.
+void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
+ moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ if (ShouldTrackPressure) {
+ // Update top scheduled pressure.
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ TopRPTracker.advance(RegOpers);
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
+ TopRPTracker.getRegSetPressureAtPos(), TRI););
+
+ updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
+ }
+ } else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ BotRPTracker.setPos(CurrentBottom);
+ }
+ if (ShouldTrackPressure) {
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ if (BotRPTracker.getPos() != CurrentBottom)
+ BotRPTracker.recedeSkipDebugValues();
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(RegOpers, &LiveUses);
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
+ BotRPTracker.getRegSetPressureAtPos(), TRI););
+
+ updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
+ updatePressureDiffs(LiveUses);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Post-process the DAG to create cluster edges between neighboring
+/// loads or between neighboring stores.
+class BaseMemOpClusterMutation : public ScheduleDAGMutation {
+ struct MemOpInfo {
+ SUnit *SU;
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ int64_t Offset;
+ unsigned Width;
+
+ MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
+ int64_t Offset, unsigned Width)
+ : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
+ Width(Width) {}
+
+ static bool Compare(const MachineOperand *const &A,
+ const MachineOperand *const &B) {
+ if (A->getType() != B->getType())
+ return A->getType() < B->getType();
+ if (A->isReg())
+ return A->getReg() < B->getReg();
+ if (A->isFI()) {
+ const MachineFunction &MF = *A->getParent()->getParent()->getParent();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ bool StackGrowsDown = TFI.getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown;
+ return StackGrowsDown ? A->getIndex() > B->getIndex()
+ : A->getIndex() < B->getIndex();
+ }
+
+ llvm_unreachable("MemOpClusterMutation only supports register or frame "
+ "index bases.");
+ }
+
+ bool operator<(const MemOpInfo &RHS) const {
+ // FIXME: Don't compare everything twice. Maybe use C++20 three way
+ // comparison instead when it's available.
+ if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
+ RHS.BaseOps.begin(), RHS.BaseOps.end(),
+ Compare))
+ return true;
+ if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
+ BaseOps.begin(), BaseOps.end(), Compare))
+ return false;
+ if (Offset != RHS.Offset)
+ return Offset < RHS.Offset;
+ return SU->NodeNum < RHS.SU->NodeNum;
+ }
+ };
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ bool IsLoad;
+
+public:
+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, bool IsLoad)
+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+protected:
+ void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
+ ScheduleDAGInstrs *DAG);
+ void collectMemOpRecords(std::vector<SUnit> &SUnits,
+ SmallVectorImpl<MemOpInfo> &MemOpRecords);
+ bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
+};
+
+class StoreClusterMutation : public BaseMemOpClusterMutation {
+public:
+ StoreClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, false) {}
+};
+
+class LoadClusterMutation : public BaseMemOpClusterMutation {
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, true) {}
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createLoadClusterDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
+ : nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+createStoreClusterDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
+ : nullptr;
+}
+
+} // end namespace llvm
+
+// Sorting all the loads/stores first, then for each load/store, checking the
+// following load/store one by one, until reach the first non-dependent one and
+// call target hook to see if they can cluster.
+// If FastCluster is enabled, we assume that, all the loads/stores have been
+// preprocessed and now, they didn't have dependencies on each other.
+void BaseMemOpClusterMutation::clusterNeighboringMemOps(
+ ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
+ ScheduleDAGInstrs *DAG) {
+ // Keep track of the current cluster length and bytes for each SUnit.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
+
+ // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
+ // cluster mem ops collected within `MemOpRecords` array.
+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
+ // Decision to cluster mem ops is taken based on target dependent logic
+ auto MemOpa = MemOpRecords[Idx];
+
+ // Seek for the next load/store to do the cluster.
+ unsigned NextIdx = Idx + 1;
+ for (; NextIdx < End; ++NextIdx)
+ // Skip if MemOpb has been clustered already or has dependency with
+ // MemOpa.
+ if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
+ (FastCluster ||
+ (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
+ !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
+ break;
+ if (NextIdx == End)
+ continue;
+
+ auto MemOpb = MemOpRecords[NextIdx];
+ unsigned ClusterLength = 2;
+ unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
+ if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
+ ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
+ CurrentClusterBytes =
+ SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
+ }
+
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+ CurrentClusterBytes))
+ continue;
+
+ SUnit *SUa = MemOpa.SU;
+ SUnit *SUb = MemOpb.SU;
+ if (SUa->NodeNum > SUb->NodeNum)
+ std::swap(SUa, SUb);
+
+ // FIXME: Is this check really required?
+ if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+ ++NumClustered;
+
+ if (IsLoad) {
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since
+ // nearby loads should have effectively the same inputs.
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ } else {
+ // Copy predecessor edges from SUb to SUa to avoid the SUnits that
+ // SUb dependent on scheduled in-between SUb and SUa. Successor edges
+ // do not need to be copied from SUa to SUb since no one will depend
+ // on stores.
+ // Notice that, we don't need to care about the memory dependency as
+ // we won't try to cluster them if they have any memory dependency.
+ for (const SDep &Pred : SUb->Preds) {
+ if (Pred.getSUnit() == SUa)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
+ }
+ }
+
+ SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
+ CurrentClusterBytes};
+
+ LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
+ << ", Curr cluster bytes: " << CurrentClusterBytes
+ << "\n");
+ }
+}
+
+void BaseMemOpClusterMutation::collectMemOpRecords(
+ std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
+ for (auto &SU : SUnits) {
+ if ((IsLoad && !SU.getInstr()->mayLoad()) ||
+ (!IsLoad && !SU.getInstr()->mayStore()))
+ continue;
+
+ const MachineInstr &MI = *SU.getInstr();
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ int64_t Offset;
+ bool OffsetIsScalable;
+ unsigned Width;
+ if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+ OffsetIsScalable, Width, TRI)) {
+ MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
+
+ LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
+ << Offset << ", OffsetIsScalable: " << OffsetIsScalable
+ << ", Width: " << Width << "\n");
+ }
+#ifndef NDEBUG
+ for (const auto *Op : BaseOps)
+ assert(Op);
+#endif
+ }
+}
+
+bool BaseMemOpClusterMutation::groupMemOps(
+ ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
+ bool FastCluster =
+ ForceFastCluster ||
+ MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
+
+ for (const auto &MemOp : MemOps) {
+ unsigned ChainPredID = DAG->SUnits.size();
+ if (FastCluster) {
+ for (const SDep &Pred : MemOp.SU->Preds) {
+ // We only want to cluster the mem ops that have the same ctrl(non-data)
+ // pred so that they didn't have ctrl dependency for each other. But for
+ // store instrs, we can still cluster them if the pred is load instr.
+ if ((Pred.isCtrl() &&
+ (IsLoad ||
+ (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
+ !Pred.isArtificial()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
+ break;
+ }
+ }
+ } else
+ ChainPredID = 0;
+
+ Groups[ChainPredID].push_back(MemOp);
+ }
+ return FastCluster;
+}
+
+/// Callback from DAG postProcessing to create cluster edges for loads/stores.
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
+ // Collect all the clusterable loads/stores
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ collectMemOpRecords(DAG->SUnits, MemOpRecords);
+
+ if (MemOpRecords.size() < 2)
+ return;
+
+ // Put the loads/stores without dependency into the same group with some
+ // heuristic if the DAG is too complex to avoid compiling time blow up.
+ // Notice that, some fusion pair could be lost with this.
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
+ bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
+
+ for (auto &Group : Groups) {
+ // Sorting the loads/stores, so that, we can stop the cluster as early as
+ // possible.
+ llvm::sort(Group.second);
+
+ // Trying to cluster all the neighboring loads/stores.
+ clusterNeighboringMemOps(Group.second, FastCluster, DAG);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// CopyConstrain - DAG post-processing to encourage copy elimination.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Post-process the DAG to create weak edges from all uses of a copy to
+/// the one use that defines the copy's source vreg, most likely an induction
+/// variable increment.
+class CopyConstrain : public ScheduleDAGMutation {
+ // Transient state.
+ SlotIndex RegionBeginIdx;
+
+ // RegionEndIdx is the slot index of the last non-debug instruction in the
+ // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
+ SlotIndex RegionEndIdx;
+
+public:
+ CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+protected:
+ void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ return std::make_unique<CopyConstrain>(TII, TRI);
+}
+
+} // end namespace llvm
+
+/// constrainLocalCopy handles two possibilities:
+/// 1) Local src:
+/// I0: = dst
+/// I1: src = ...
+/// I2: = dst
+/// I3: dst = src (copy)
+/// (create pred->succ edges I0->I1, I2->I1)
+///
+/// 2) Local copy:
+/// I0: dst = src (copy)
+/// I1: = dst
+/// I2: src = ...
+/// I3: = dst
+/// (create pred->succ edges I1->I2, I3->I2)
+///
+/// Although the MachineScheduler is currently constrained to single blocks,
+/// this algorithm should handle extended blocks. An EBB is a set of
+/// contiguously numbered blocks such that the previous block in the EBB is
+/// always the single predecessor.
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
+ LiveIntervals *LIS = DAG->getLIS();
+ MachineInstr *Copy = CopySU->getInstr();
+
+ // Check for pure vreg copies.
+ const MachineOperand &SrcOp = Copy->getOperand(1);
+ Register SrcReg = SrcOp.getReg();
+ if (!SrcReg.isVirtual() || !SrcOp.readsReg())
+ return;
+
+ const MachineOperand &DstOp = Copy->getOperand(0);
+ Register DstReg = DstOp.getReg();
+ if (!DstReg.isVirtual() || DstOp.isDead())
+ return;
+
+ // Check if either the dest or source is local. If it's live across a back
+ // edge, it's not local. Note that if both vregs are live across the back
+ // edge, we cannot successfully contrain the copy without cyclic scheduling.
+ // If both the copy's source and dest are local live intervals, then we
+ // should treat the dest as the global for the purpose of adding
+ // constraints. This adds edges from source's other uses to the copy.
+ unsigned LocalReg = SrcReg;
+ unsigned GlobalReg = DstReg;
+ LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
+ LocalReg = DstReg;
+ GlobalReg = SrcReg;
+ LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
+ return;
+ }
+ LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
+
+ // Find the global segment after the start of the local LI.
+ LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
+ // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
+ // local live range. We could create edges from other global uses to the local
+ // start, but the coalescer should have already eliminated these cases, so
+ // don't bother dealing with it.
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // If GlobalSegment is killed at the LocalLI->start, the call to find()
+ // returned the next global segment. But if GlobalSegment overlaps with
+ // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
+ // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
+ if (GlobalSegment->contains(LocalLI->beginIndex()))
+ ++GlobalSegment;
+
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // Check if GlobalLI contains a hole in the vicinity of LocalLI.
+ if (GlobalSegment != GlobalLI->begin()) {
+ // Two address defs have no hole.
+ if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
+ GlobalSegment->start)) {
+ return;
+ }
+ // If the prior global segment may be defined by the same two-address
+ // instruction that also defines LocalLI, then can't make a hole here.
+ if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
+ LocalLI->beginIndex())) {
+ return;
+ }
+ // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
+ // it would be a disconnected component in the live range.
+ assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
+ "Disconnected LRG within the scheduling region.");
+ }
+ MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
+ if (!GlobalDef)
+ return;
+
+ SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
+ if (!GlobalSU)
+ return;
+
+ // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
+ // constraining the uses of the last local def to precede GlobalDef.
+ SmallVector<SUnit*,8> LocalUses;
+ const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
+ MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
+ SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
+ for (const SDep &Succ : LastLocalSU->Succs) {
+ if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
+ continue;
+ if (Succ.getSUnit() == GlobalSU)
+ continue;
+ if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
+ return;
+ LocalUses.push_back(Succ.getSUnit());
+ }
+ // Open the top of the GlobalLI hole by constraining any earlier global uses
+ // to precede the start of LocalLI.
+ SmallVector<SUnit*,8> GlobalUses;
+ MachineInstr *FirstLocalDef =
+ LIS->getInstructionFromIndex(LocalLI->beginIndex());
+ SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
+ for (const SDep &Pred : GlobalSU->Preds) {
+ if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
+ continue;
+ if (Pred.getSUnit() == FirstLocalSU)
+ continue;
+ if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
+ return;
+ GlobalUses.push_back(Pred.getSUnit());
+ }
+ LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+ // Add the weak edges.
+ for (SUnit *LU : LocalUses) {
+ LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
+ << GlobalSU->NodeNum << ")\n");
+ DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
+ }
+ for (SUnit *GU : GlobalUses) {
+ LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
+ << FirstLocalSU->NodeNum << ")\n");
+ DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
+ }
+}
+
+/// Callback from DAG postProcessing to create weak edges to encourage
+/// copy elimination.
+void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+ assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
+
+ MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
+ if (FirstPos == DAG->end())
+ return;
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
+ RegionEndIdx = DAG->getLIS()->getInstructionIndex(
+ *priorNonDebug(DAG->end(), DAG->begin()));
+
+ for (SUnit &SU : DAG->SUnits) {
+ if (!SU.getInstr()->isCopy())
+ continue;
+
+ constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
+// and possibly other custom schedulers.
+//===----------------------------------------------------------------------===//
+
+static const unsigned InvalidCycle = ~0U;
+
+SchedBoundary::~SchedBoundary() { delete HazardRec; }
+
+/// Given a Count of resource usage and a Latency value, return true if a
+/// SchedBoundary becomes resource limited.
+/// If we are checking after scheduling a node, we should return true when
+/// we just reach the resource limit.
+static bool checkResourceLimit(unsigned LFactor, unsigned Count,
+ unsigned Latency, bool AfterSchedNode) {
+ int ResCntFactor = (int)(Count - (Latency * LFactor));
+ if (AfterSchedNode)
+ return ResCntFactor >= (int)LFactor;
+ else
+ return ResCntFactor > (int)LFactor;
+}
+
+void SchedBoundary::reset() {
+ // A new HazardRec is created for each DAG and owned by SchedBoundary.
+ // Destroying and reconstructing it is very expensive though. So keep
+ // invalid, placeholder HazardRecs.
+ if (HazardRec && HazardRec->isEnabled()) {
+ delete HazardRec;
+ HazardRec = nullptr;
+ }
+ Available.clear();
+ Pending.clear();
+ CheckPending = false;
+ CurrCycle = 0;
+ CurrMOps = 0;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
+ ExpectedLatency = 0;
+ DependentLatency = 0;
+ RetiredMOps = 0;
+ MaxExecutedResCount = 0;
+ ZoneCritResIdx = 0;
+ IsResourceLimited = false;
+ ReservedCycles.clear();
+ ReservedResourceSegments.clear();
+ ReservedCyclesIndex.clear();
+ ResourceGroupSubUnitMasks.clear();
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ // Track the maximum number of stall cycles that could arise either from the
+ // latency of a DAG edge or the number of cycles that a processor resource is
+ // reserved (SchedBoundary::ReservedCycles).
+ MaxObservedStall = 0;
+#endif
+ // Reserve a zero-count for invalid CritResIdx.
+ ExecutedResCounts.resize(1);
+ assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
+}
+
+void SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+ reset();
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+ RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+ for (SUnit &SU : DAG->SUnits) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
+ RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
+ * SchedModel->getMicroOpFactor();
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ assert(PI->Cycles >= PI->StartAtCycle);
+ RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle));
+ }
+ }
+}
+
+void SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+ reset();
+ DAG = dag;
+ SchedModel = smodel;
+ Rem = rem;
+ if (SchedModel->hasInstrSchedModel()) {
+ unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+ ReservedCyclesIndex.resize(ResourceCount);
+ ExecutedResCounts.resize(ResourceCount);
+ ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
+ unsigned NumUnits = 0;
+
+ for (unsigned i = 0; i < ResourceCount; ++i) {
+ ReservedCyclesIndex[i] = NumUnits;
+ NumUnits += SchedModel->getProcResource(i)->NumUnits;
+ if (isUnbufferedGroup(i)) {
+ auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
+ for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
+ U != UE; ++U)
+ ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
+ }
+ }
+
+ ReservedCycles.resize(NumUnits, InvalidCycle);
+ }
+}
+
+/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
+/// these "soft stalls" differently than the hard stall cycles based on CPU
+/// resources and computed by checkHazard(). A fully in-order model
+/// (MicroOpBufferSize==0) will not make use of this since instructions are not
+/// available for scheduling until they are ready. However, a weaker in-order
+/// model may use this for heuristics. For example, if a processor has in-order
+/// behavior when reading certain resources, this may come into play.
+unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
+ if (!SU->isUnbuffered)
+ return 0;
+
+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+ if (ReadyCycle > CurrCycle)
+ return ReadyCycle - CurrCycle;
+ return 0;
+}
+
+/// Compute the next cycle at which the given processor resource unit
+/// can be scheduled.
+unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
+ unsigned Cycles,
+ unsigned StartAtCycle) {
+ if (SchedModel && SchedModel->enableIntervals()) {
+ if (isTop())
+ return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
+ CurrCycle, StartAtCycle, Cycles);
+
+ return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
+ CurrCycle, StartAtCycle, Cycles);
+ }
+
+ unsigned NextUnreserved = ReservedCycles[InstanceIdx];
+ // If this resource has never been used, always return cycle zero.
+ if (NextUnreserved == InvalidCycle)
+ return CurrCycle;
+ // For bottom-up scheduling add the cycles needed for the current operation.
+ if (!isTop())
+ NextUnreserved = std::max(CurrCycle, NextUnreserved + Cycles);
+ return NextUnreserved;
+}
+
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled. Returns the next cycle and the index of the processor resource
+/// instance in the reserved cycles vector.
+std::pair<unsigned, unsigned>
+SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
+ unsigned Cycles, unsigned StartAtCycle) {
+ if (MischedDetailResourceBooking) {
+ LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
+ LLVM_DEBUG(dumpReservedCycles());
+ LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
+ }
+ unsigned MinNextUnreserved = InvalidCycle;
+ unsigned InstanceIdx = 0;
+ unsigned StartIndex = ReservedCyclesIndex[PIdx];
+ unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
+ assert(NumberOfInstances > 0 &&
+ "Cannot have zero instances of a ProcResource");
+
+ if (isUnbufferedGroup(PIdx)) {
+ // If any subunits are used by the instruction, report that the resource
+ // group is available at 0, effectively removing the group record from
+ // hazarding and basing the hazarding decisions on the subunit records.
+ // Otherwise, choose the first available instance from among the subunits.
+ // Specifications which assign cycles to both the subunits and the group or
+ // which use an unbuffered group with buffered subunits will appear to
+ // schedule strangely. In the first case, the additional cycles for the
+ // group will be ignored. In the second, the group will be ignored
+ // entirely.
+ for (const MCWriteProcResEntry &PE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC)))
+ if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
+ return std::make_pair(0u, StartIndex);
+
+ auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
+ for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
+ unsigned NextUnreserved, NextInstanceIdx;
+ std::tie(NextUnreserved, NextInstanceIdx) =
+ getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle);
+ if (MinNextUnreserved > NextUnreserved) {
+ InstanceIdx = NextInstanceIdx;
+ MinNextUnreserved = NextUnreserved;
+ }
+ }
+ return std::make_pair(MinNextUnreserved, InstanceIdx);
+ }
+
+ for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
+ ++I) {
+ unsigned NextUnreserved =
+ getNextResourceCycleByInstance(I, Cycles, StartAtCycle);
+ if (MischedDetailResourceBooking)
+ LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
+ << NextUnreserved << "c\n");
+ if (MinNextUnreserved > NextUnreserved) {
+ InstanceIdx = I;
+ MinNextUnreserved = NextUnreserved;
+ }
+ }
+ if (MischedDetailResourceBooking)
+ LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
+ << "[" << InstanceIdx - StartIndex << "]"
+ << " available @" << MinNextUnreserved << "c"
+ << "\n");
+ return std::make_pair(MinNextUnreserved, InstanceIdx);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled()
+ && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
+ return true;
+ }
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
+ return true;
+ }
+
+ if (CurrMOps > 0 &&
+ ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
+ LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
+ << (isTop() ? "begin" : "end") << " group\n");
+ return true;
+ }
+
+ if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (const MCWriteProcResEntry &PE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC))) {
+ unsigned ResIdx = PE.ProcResourceIdx;
+ unsigned Cycles = PE.Cycles;
+ unsigned StartAtCycle = PE.StartAtCycle;
+ unsigned NRCycle, InstanceIdx;
+ std::tie(NRCycle, InstanceIdx) =
+ getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle);
+ if (NRCycle > CurrCycle) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ MaxObservedStall = std::max(Cycles, MaxObservedStall);
+#endif
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
+ << SchedModel->getResourceName(ResIdx)
+ << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
+ << "=" << NRCycle << "c\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// Find the unscheduled node in ReadySUs with the highest latency.
+unsigned SchedBoundary::
+findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
+ SUnit *LateSU = nullptr;
+ unsigned RemLatency = 0;
+ for (SUnit *SU : ReadySUs) {
+ unsigned L = getUnscheduledLatency(SU);
+ if (L > RemLatency) {
+ RemLatency = L;
+ LateSU = SU;
+ }
+ }
+ if (LateSU) {
+ LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
+ << LateSU->NodeNum << ") " << RemLatency << "c\n");
+ }
+ return RemLatency;
+}
+
+// Count resources in this zone and the remaining unscheduled
+// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
+// resource index, or zero if the zone is issue limited.
+unsigned SchedBoundary::
+getOtherResourceCount(unsigned &OtherCritIdx) {
+ OtherCritIdx = 0;
+ if (!SchedModel->hasInstrSchedModel())
+ return 0;
+
+ unsigned OtherCritCount = Rem->RemIssueCount
+ + (RetiredMOps * SchedModel->getMicroOpFactor());
+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
+ << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
+ for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
+ PIdx != PEnd; ++PIdx) {
+ unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
+ if (OtherCount > OtherCritCount) {
+ OtherCritCount = OtherCount;
+ OtherCritIdx = PIdx;
+ }
+ }
+ if (OtherCritIdx) {
+ LLVM_DEBUG(
+ dbgs() << " " << Available.getName() << " + Remain CritRes: "
+ << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
+ << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
+ }
+ return OtherCritCount;
+}
+
+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
+ unsigned Idx) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ // ReadyCycle was been bumped up to the CurrCycle when this node was
+ // scheduled, but CurrCycle may have been eagerly advanced immediately after
+ // scheduling, so may now be greater than ReadyCycle.
+ if (ReadyCycle > CurrCycle)
+ MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
+#endif
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+ bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
+ checkHazard(SU) || (Available.size() >= ReadyListLimit);
+
+ if (!HazardDetected) {
+ Available.push(SU);
+
+ if (InPQueue)
+ Pending.remove(Pending.begin() + Idx);
+ return;
+ }
+
+ if (!InPQueue)
+ Pending.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void SchedBoundary::bumpCycle(unsigned NextCycle) {
+ if (SchedModel->getMicroOpBufferSize() == 0) {
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
+ if (MinReadyCycle > NextCycle)
+ NextCycle = MinReadyCycle;
+ }
+ // Update the current micro-ops, which will issue in the next cycle.
+ unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
+ CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
+
+ // Decrement DependentLatency based on the next cycle.
+ if ((NextCycle - CurrCycle) > DependentLatency)
+ DependentLatency = 0;
+ else
+ DependentLatency -= (NextCycle - CurrCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+ IsResourceLimited =
+ checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
+ getScheduledLatency(), true);
+
+ LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
+ << '\n');
+}
+
+void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
+ ExecutedResCounts[PIdx] += Count;
+ if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
+ MaxExecutedResCount = ExecutedResCounts[PIdx];
+}
+
+/// Add the given processor resource to this scheduled zone.
+///
+/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
+/// during which this resource is consumed.
+///
+/// \return the next cycle at which the instruction may execute without
+/// oversubscribing resources.
+unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
+ unsigned Cycles, unsigned NextCycle,
+ unsigned StartAtCycle) {
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ unsigned Count = Factor * (Cycles - StartAtCycle);
+ LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
+ << Cycles << "x" << Factor << "u\n");
+
+ // Update Executed resources counts.
+ incExecutedResources(PIdx, Count);
+ assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+ Rem->RemainingCounts[PIdx] -= Count;
+
+ // Check if this resource exceeds the current critical resource. If so, it
+ // becomes the critical resource.
+ if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
+ ZoneCritResIdx = PIdx;
+ LLVM_DEBUG(dbgs() << " *** Critical resource "
+ << SchedModel->getResourceName(PIdx) << ": "
+ << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
+ << "c\n");
+ }
+ // For reserved resources, record the highest cycle using the resource.
+ unsigned NextAvailable, InstanceIdx;
+ std::tie(NextAvailable, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle);
+ if (NextAvailable > CurrCycle) {
+ LLVM_DEBUG(dbgs() << " Resource conflict: "
+ << SchedModel->getResourceName(PIdx)
+ << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
+ << " reserved until @" << NextAvailable << "\n");
+ }
+ return NextAvailable;
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void SchedBoundary::bumpNode(SUnit *SU) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ // Scheduling an instruction may have made pending instructions available.
+ CheckPending = true;
+ }
+ // checkHazard should prevent scheduling multiple instructions per cycle that
+ // exceed the issue width.
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
+ assert(
+ (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
+ "Cannot schedule this instruction's MicroOps in the current cycle.");
+
+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+ LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
+
+ unsigned NextCycle = CurrCycle;
+ switch (SchedModel->getMicroOpBufferSize()) {
+ case 0:
+ assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+ break;
+ case 1:
+ if (ReadyCycle > NextCycle) {
+ NextCycle = ReadyCycle;
+ LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
+ }
+ break;
+ default:
+ // We don't currently model the OOO reorder buffer, so consider all
+ // scheduled MOps to be "retired". We do loosely model in-order resource
+ // latency. If this instruction uses an in-order resource, account for any
+ // likely stall cycles.
+ if (SU->isUnbuffered && ReadyCycle > NextCycle)
+ NextCycle = ReadyCycle;
+ break;
+ }
+ RetiredMOps += IncMOps;
+
+ // Update resource counts and critical resource.
+ if (SchedModel->hasInstrSchedModel()) {
+ unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
+ assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
+ Rem->RemIssueCount -= DecRemIssue;
+ if (ZoneCritResIdx) {
+ // Scale scheduled micro-ops for comparing with the critical resource.
+ unsigned ScaledMOps =
+ RetiredMOps * SchedModel->getMicroOpFactor();
+
+ // If scaled micro-ops are now more than the previous critical resource by
+ // a full cycle, then micro-ops issue becomes critical.
+ if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
+ >= (int)SchedModel->getLatencyFactor()) {
+ ZoneCritResIdx = 0;
+ LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
+ << ScaledMOps / SchedModel->getLatencyFactor()
+ << "c\n");
+ }
+ }
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles,
+ NextCycle, PI->StartAtCycle);
+ if (RCycle > NextCycle)
+ NextCycle = RCycle;
+ }
+ if (SU->hasReservedResource) {
+ // For reserved resources, record the highest cycle using the resource.
+ // For top-down scheduling, this is the cycle in which we schedule this
+ // instruction plus the number of cycles the operations reserves the
+ // resource. For bottom-up is it simply the instruction's cycle.
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
+
+ if (SchedModel && SchedModel->enableIntervals()) {
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ if (isTop()) {
+ ReservedResourceSegments[InstanceIdx].add(
+ ResourceSegments::getResourceIntervalTop(
+ NextCycle, PI->StartAtCycle, PI->Cycles),
+ MIResourceCutOff);
+ } else {
+ ReservedResourceSegments[InstanceIdx].add(
+ ResourceSegments::getResourceIntervalBottom(
+ NextCycle, PI->StartAtCycle, PI->Cycles),
+ MIResourceCutOff);
+ }
+ } else {
+
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ if (isTop()) {
+ ReservedCycles[InstanceIdx] =
+ std::max(ReservedUntil, NextCycle + PI->Cycles);
+ } else
+ ReservedCycles[InstanceIdx] = NextCycle;
+ }
+ }
+ }
+ }
+ }
+ // Update ExpectedLatency and DependentLatency.
+ unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
+ unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
+ if (SU->getDepth() > TopLatency) {
+ TopLatency = SU->getDepth();
+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
+ << SU->NodeNum << ") " << TopLatency << "c\n");
+ }
+ if (SU->getHeight() > BotLatency) {
+ BotLatency = SU->getHeight();
+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
+ << SU->NodeNum << ") " << BotLatency << "c\n");
+ }
+ // If we stall for any reason, bump the cycle.
+ if (NextCycle > CurrCycle)
+ bumpCycle(NextCycle);
+ else
+ // After updating ZoneCritResIdx and ExpectedLatency, check if we're
+ // resource limited. If a stall occurred, bumpCycle does this.
+ IsResourceLimited =
+ checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
+ getScheduledLatency(), true);
+
+ // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+ // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+ // one cycle. Since we commonly reach the max MOps here, opportunistically
+ // bump the cycle to avoid uselessly checking everything in the readyQ.
+ CurrMOps += IncMOps;
+
+ // Bump the cycle count for issue group constraints.
+ // This must be done after NextCycle has been adjust for all other stalls.
+ // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
+ // currCycle to X.
+ if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
+ LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
+ << " group\n");
+ bumpCycle(++NextCycle);
+ }
+
+ while (CurrMOps >= SchedModel->getIssueWidth()) {
+ LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
+ << CurrCycle << '\n');
+ bumpCycle(++NextCycle);
+ }
+ LLVM_DEBUG(dumpScheduledState());
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
+ SUnit *SU = *(Pending.begin() + I);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (Available.size() >= ReadyListLimit)
+ break;
+
+ releaseNode(SU, ReadyCycle, true, I);
+ if (E != Pending.size()) {
+ --I;
+ --E;
+ }
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
+SUnit *SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ // Defer any ready instrs that now have a hazard.
+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+ if (checkHazard(*I)) {
+ Pending.push(*I);
+ I = Available.remove(I);
+ continue;
+ }
+ ++I;
+ }
+ for (unsigned i = 0; Available.empty(); ++i) {
+// FIXME: Re-enable assert once PR20057 is resolved.
+// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
+// "permanent hazard");
+ (void)i;
+ bumpCycle(CurrCycle + 1);
+ releasePending();
+ }
+
+ LLVM_DEBUG(Pending.dump());
+ LLVM_DEBUG(Available.dump());
+
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+/// Dump the content of the \ref ReservedCycles vector for the
+/// resources that are used in the basic block.
+///
+LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+
+ unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+ unsigned StartIdx = 0;
+
+ for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
+ const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
+ std::string ResName = SchedModel->getResourceName(ResIdx);
+ for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
+ dbgs() << ResName << "(" << UnitIdx << ") = ";
+ if (SchedModel && SchedModel->enableIntervals()) {
+ if (ReservedResourceSegments.count(StartIdx + UnitIdx))
+ dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
+ else
+ dbgs() << "{ }\n";
+ } else
+ dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
+ }
+ StartIdx += NumUnits;
+ }
+}
+
+// This is useful information to dump after bumpNode.
+// Note that the Queue contents are more useful before pickNodeFromQueue.
+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
+ unsigned ResFactor;
+ unsigned ResCount;
+ if (ZoneCritResIdx) {
+ ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
+ ResCount = getResourceCount(ZoneCritResIdx);
+ } else {
+ ResFactor = SchedModel->getMicroOpFactor();
+ ResCount = RetiredMOps * ResFactor;
+ }
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
+ << " Retired: " << RetiredMOps;
+ dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
+ dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
+ << ResCount / ResFactor << " "
+ << SchedModel->getResourceName(ZoneCritResIdx)
+ << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
+ << (IsResourceLimited ? " - Resource" : " - Latency")
+ << " limited.\n";
+ if (MISchedDumpReservedCycles)
+ dumpReservedCycles();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// GenericScheduler - Generic implementation of MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+void GenericSchedulerBase::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel) {
+ if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+ return;
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+ ResDelta.CritResources += PI->Cycles;
+ if (PI->ProcResourceIdx == Policy.DemandResIdx)
+ ResDelta.DemandedResources += PI->Cycles;
+ }
+}
+
+/// Compute remaining latency. We need this both to determine whether the
+/// overall schedule has become latency-limited and whether the instructions
+/// outside this zone are resource or latency limited.
+///
+/// The "dependent" latency is updated incrementally during scheduling as the
+/// max height/depth of scheduled nodes minus the cycles since it was
+/// scheduled:
+/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+///
+/// The "independent" latency is the max ready queue depth:
+/// ILat = max N.depth for N in Available|Pending
+///
+/// RemainingLatency is the greater of independent and dependent latency.
+///
+/// These computations are expensive, especially in DAGs with many edges, so
+/// only do them if necessary.
+static unsigned computeRemLatency(SchedBoundary &CurrZone) {
+ unsigned RemLatency = CurrZone.getDependentLatency();
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Available.elements()));
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+ return RemLatency;
+}
+
+/// Returns true if the current cycle plus remaning latency is greater than
+/// the critical path in the scheduling region.
+bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
+ SchedBoundary &CurrZone,
+ bool ComputeRemLatency,
+ unsigned &RemLatency) const {
+ // The current cycle is already greater than the critical path, so we are
+ // already latency limited and don't need to compute the remaining latency.
+ if (CurrZone.getCurrCycle() > Rem.CriticalPath)
+ return true;
+
+ // If we haven't scheduled anything yet, then we aren't latency limited.
+ if (CurrZone.getCurrCycle() == 0)
+ return false;
+
+ if (ComputeRemLatency)
+ RemLatency = computeRemLatency(CurrZone);
+
+ return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
+}
+
+/// Set the CandPolicy given a scheduling zone given the current resources and
+/// latencies inside and outside the zone.
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
+ SchedBoundary &CurrZone,
+ SchedBoundary *OtherZone) {
+ // Apply preemptive heuristics based on the total latency and resources
+ // inside and outside this zone. Potential stalls should be considered before
+ // following this policy.
+
+ // Compute the critical resource outside the zone.
+ unsigned OtherCritIdx = 0;
+ unsigned OtherCount =
+ OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
+
+ bool OtherResLimited = false;
+ unsigned RemLatency = 0;
+ bool RemLatencyComputed = false;
+ if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
+ RemLatency = computeRemLatency(CurrZone);
+ RemLatencyComputed = true;
+ OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
+ OtherCount, RemLatency, false);
+ }
+
+ // Schedule aggressively for latency in PostRA mode. We don't check for
+ // acyclic latency during PostRA, and highly out-of-order processors will
+ // skip PostRA scheduling.
+ if (!OtherResLimited &&
+ (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
+ RemLatency))) {
+ Policy.ReduceLatency |= true;
+ LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
+ << " RemainingLatency " << RemLatency << " + "
+ << CurrZone.getCurrCycle() << "c > CritPath "
+ << Rem.CriticalPath << "\n");
+ }
+ // If the same resource is limiting inside and outside the zone, do nothing.
+ if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
+ return;
+
+ LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
+ dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
+ << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
+ } if (OtherResLimited) dbgs()
+ << " RemainingLimit: "
+ << SchedModel->getResourceName(OtherCritIdx) << "\n";
+ if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
+ << " Latency limited both directions.\n");
+
+ if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
+ Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
+
+ if (OtherResLimited)
+ Policy.DemandResIdx = OtherCritIdx;
+}
+
+#ifndef NDEBUG
+const char *GenericSchedulerBase::getReasonStr(
+ GenericSchedulerBase::CandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND ";
+ case Only1: return "ONLY1 ";
+ case PhysReg: return "PHYS-REG ";
+ case RegExcess: return "REG-EXCESS";
+ case RegCritical: return "REG-CRIT ";
+ case Stall: return "STALL ";
+ case Cluster: return "CLUSTER ";
+ case Weak: return "WEAK ";
+ case RegMax: return "REG-MAX ";
+ case ResourceReduce: return "RES-REDUCE";
+ case ResourceDemand: return "RES-DEMAND";
+ case TopDepthReduce: return "TOP-DEPTH ";
+ case TopPathReduce: return "TOP-PATH ";
+ case BotHeightReduce:return "BOT-HEIGHT";
+ case BotPathReduce: return "BOT-PATH ";
+ case NextDefUse: return "DEF-USE ";
+ case NodeOrder: return "ORDER ";
+ };
+ llvm_unreachable("Unknown reason!");
+}
+
+void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
+ PressureChange P;
+ unsigned ResIdx = 0;
+ unsigned Latency = 0;
+ switch (Cand.Reason) {
+ default:
+ break;
+ case RegExcess:
+ P = Cand.RPDelta.Excess;
+ break;
+ case RegCritical:
+ P = Cand.RPDelta.CriticalMax;
+ break;
+ case RegMax:
+ P = Cand.RPDelta.CurrentMax;
+ break;
+ case ResourceReduce:
+ ResIdx = Cand.Policy.ReduceResIdx;
+ break;
+ case ResourceDemand:
+ ResIdx = Cand.Policy.DemandResIdx;
+ break;
+ case TopDepthReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ case TopPathReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotHeightReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotPathReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ }
+ dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ if (P.isValid())
+ dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
+ << ":" << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ if (ResIdx)
+ dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
+ else
+ dbgs() << " ";
+ if (Latency)
+ dbgs() << " " << Latency << " cycles ";
+ else
+ dbgs() << " ";
+ dbgs() << '\n';
+}
+#endif
+
+namespace llvm {
+/// Return true if this heuristic determines order.
+/// TODO: Consider refactor return type of these functions as integer or enum,
+/// as we may need to differentiate whether TryCand is better than Cand.
+bool tryLess(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+bool tryGreater(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ SchedBoundary &Zone) {
+ if (Zone.isTop()) {
+ // Prefer the candidate with the lesser depth, but only if one of them has
+ // depth greater than the total latency scheduled so far, otherwise either
+ // of them could be scheduled now with no stall.
+ if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
+ Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericSchedulerBase::TopPathReduce))
+ return true;
+ } else {
+ // Prefer the candidate with the lesser height, but only if one of them has
+ // height greater than the total latency scheduled so far, otherwise either
+ // of them could be scheduled now with no stall.
+ if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
+ Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericSchedulerBase::BotPathReduce))
+ return true;
+ }
+ return false;
+}
+} // end namespace llvm
+
+static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
+ LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
+ tracePick(Cand.Reason, Cand.AtTop);
+}
+
+void GenericScheduler::initialize(ScheduleDAGMI *dag) {
+ assert(dag->hasVRegLiveness() &&
+ "(PreRA)GenericScheduler needs vreg liveness");
+ DAG = static_cast<ScheduleDAGMILive*>(dag);
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ if (RegionPolicy.ComputeDFSResult)
+ DAG->computeDFSResult();
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ Bot.init(DAG, SchedModel, &Rem);
+
+ // Initialize resource counts.
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+ if (!Bot.HazardRec) {
+ Bot.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+ TopCand.SU = nullptr;
+ BotCand.SU = nullptr;
+}
+
+/// Initialize the per-region scheduling policy.
+void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ const MachineFunction &MF = *Begin->getMF();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+
+ // Avoid setting up the register pressure tracker for small regions to save
+ // compile time. As a rough heuristic, only track pressure when the number of
+ // schedulable instructions exceeds half the integer register file.
+ RegionPolicy.ShouldTrackPressure = true;
+ for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
+ MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
+ if (TLI->isTypeLegal(LegalIntVT)) {
+ unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
+ TLI->getRegClassFor(LegalIntVT));
+ RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+ }
+ }
+
+ // For generic targets, we default to bottom-up, because it's simpler and more
+ // compile-time optimizations have been implemented in that direction.
+ RegionPolicy.OnlyBottomUp = true;
+
+ // Allow the subtarget to override default policy.
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
+
+ // After subtarget overrides, apply command line options.
+ if (!EnableRegPressure) {
+ RegionPolicy.ShouldTrackPressure = false;
+ RegionPolicy.ShouldTrackLaneMasks = false;
+ }
+
+ // Check -misched-topdown/bottomup can force or unforce scheduling direction.
+ // e.g. -misched-bottomup=false allows scheduling in both directions.
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ if (ForceBottomUp.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyBottomUp = ForceBottomUp;
+ if (RegionPolicy.OnlyBottomUp)
+ RegionPolicy.OnlyTopDown = false;
+ }
+ if (ForceTopDown.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyTopDown = ForceTopDown;
+ if (RegionPolicy.OnlyTopDown)
+ RegionPolicy.OnlyBottomUp = false;
+ }
+}
+
+void GenericScheduler::dumpPolicy() const {
+ // Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << "GenericScheduler RegionPolicy: "
+ << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+ << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+ << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+ << "\n";
+#endif
+}
+
+/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
+/// critical path by more cycles than it takes to drain the instruction buffer.
+/// We estimate an upper bounds on in-flight instructions as:
+///
+/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
+/// InFlightIterations = AcyclicPath / CyclesPerIteration
+/// InFlightResources = InFlightIterations * LoopResources
+///
+/// TODO: Check execution resources in addition to IssueCount.
+void GenericScheduler::checkAcyclicLatency() {
+ if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
+ return;
+
+ // Scaled number of cycles per loop iteration.
+ unsigned IterCount =
+ std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
+ Rem.RemIssueCount);
+ // Scaled acyclic critical path.
+ unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
+ // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
+ unsigned InFlightCount =
+ (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
+ unsigned BufferLimit =
+ SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
+
+ Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
+
+ LLVM_DEBUG(
+ dbgs() << "IssueCycles="
+ << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
+ << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
+ << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
+ << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
+ << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
+ if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
+}
+
+void GenericScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (const SUnit *SU : Bot.Available) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
+ }
+ LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
+ }
+
+ if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
+ Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
+ checkAcyclicLatency();
+ }
+}
+
+namespace llvm {
+bool tryPressure(const PressureChange &TryP,
+ const PressureChange &CandP,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) {
+ // If one candidate decreases and the other increases, go with it.
+ // Invalid candidates have UnitInc==0.
+ if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
+ Reason)) {
+ return true;
+ }
+ // Do not compare the magnitude of pressure changes between top and bottom
+ // boundary.
+ if (Cand.AtTop != TryCand.AtTop)
+ return false;
+
+ // If both candidates affect the same set in the same boundary, go with the
+ // smallest increase.
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
+ if (TryPSet == CandPSet) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
+ Reason);
+ }
+
+ int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
+ std::numeric_limits<int>::max();
+
+ int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
+ std::numeric_limits<int>::max();
+
+ // If the candidates are decreasing pressure, reverse priority.
+ if (TryP.getUnitInc() < 0)
+ std::swap(TryRank, CandRank);
+ return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
+}
+
+unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+int biasPhysReg(const SUnit *SU, bool isTop) {
+ const MachineInstr *MI = SU->getInstr();
+
+ if (MI->isCopy()) {
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (MI->getOperand(ScheduledOper).getReg().isPhysical())
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
+ return AtBoundary ? -1 : 1;
+ }
+
+ if (MI->isMoveImmediate()) {
+ // If we have a move immediate and all successors have been assigned, bias
+ // towards scheduling this later. Make sure all register defs are to
+ // physical registers.
+ bool DoBias = true;
+ for (const MachineOperand &Op : MI->defs()) {
+ if (Op.isReg() && !Op.getReg().isPhysical()) {
+ DoBias = false;
+ break;
+ }
+ }
+
+ if (DoBias)
+ return isTop ? -1 : 1;
+ }
+
+ return 0;
+}
+} // end namespace llvm
+
+void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+ Cand.SU = SU;
+ Cand.AtTop = AtTop;
+ if (DAG->isTrackingPressure()) {
+ if (AtTop) {
+ TempTracker.getMaxDownwardPressureDelta(
+ Cand.SU->getInstr(),
+ Cand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ } else {
+ if (VerifyScheduling) {
+ TempTracker.getMaxUpwardPressureDelta(
+ Cand.SU->getInstr(),
+ &DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ } else {
+ RPTracker.getUpwardPressureDelta(
+ Cand.SU->getInstr(),
+ DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ }
+ }
+ LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
+ << " Try SU(" << Cand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
+ << Cand.RPDelta.Excess.getUnitInc() << "\n");
+}
+
+/// Apply a set of heuristics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending, or nullptr
+/// if Cand is from a different zone than TryCand.
+/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
+bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) const {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
+ return TryCand.Reason != NoCand;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
+ Cand.RPDelta.Excess,
+ TryCand, Cand, RegExcess, TRI,
+ DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
+ Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical, TRI,
+ DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+ // heuristics to take precedence.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return TryCand.Reason != NoCand;
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return TryCand.Reason != NoCand;
+ }
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU,
+ TryCand, Cand, Cluster))
+ return TryCand.Reason != NoCand;
+
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop),
+ TryCand, Cand, Weak))
+ return TryCand.Reason != NoCand;
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
+ Cand.RPDelta.CurrentMax,
+ TryCand, Cand, RegMax, TRI,
+ DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return TryCand.Reason != NoCand;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return TryCand.Reason != NoCand;
+
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return TryCand.Reason != NoCand;
+
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Pick the best candidate from the queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const CandPolicy &ZonePolicy,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand) {
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ ReadyQueue &Q = Zone.Available;
+ for (SUnit *SU : Q) {
+
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ if (tryCandidate(Cand, TryCand, ZoneArg)) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(DAG, SchedModel);
+ Cand.setBest(TryCand);
+ LLVM_DEBUG(traceCandidate(Cand));
+ }
+ }
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ tracePick(Only1, false);
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ tracePick(Only1, true);
+ return SU;
+ }
+ // Set the bottom-up policy based on the state of the current bottom zone and
+ // the instructions outside the zone, including the top zone.
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
+ // Set the top-down policy based on the state of the current top zone and
+ // the instructions outside the zone, including the bottom zone.
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ LLVM_DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
+
+ // Check if the top Q has a better candidate.
+ LLVM_DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ LLVM_DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
+
+ // Pick best from BotCand and TopCand.
+ assert(BotCand.isValid());
+ assert(TopCand.isValid());
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ if (tryCandidate(Cand, TopCand, nullptr)) {
+ Cand.setBest(TopCand);
+ LLVM_DEBUG(traceCandidate(Cand));
+ }
+
+ IsTopNode = Cand.AtTop;
+ tracePick(Cand);
+ return Cand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ do {
+ if (RegionPolicy.OnlyTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ TopCand.reset(NoPolicy);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(TopCand);
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (RegionPolicy.OnlyBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ BotCand.reset(NoPolicy);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(BotCand);
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidirectional(IsTopNode);
+ }
+ } while (SU->isScheduled);
+
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
+ return SU;
+}
+
+void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
+ MachineBasicBlock::iterator InsertPos = SU->getInstr();
+ if (!isTop)
+ ++InsertPos;
+ SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+ // Find already scheduled copies with a single physreg dependence and move
+ // them just above the scheduled instruction.
+ for (SDep &Dep : Deps) {
+ if (Dep.getKind() != SDep::Data ||
+ !Register::isPhysicalRegister(Dep.getReg()))
+ continue;
+ SUnit *DepSU = Dep.getSUnit();
+ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+ continue;
+ MachineInstr *Copy = DepSU->getInstr();
+ if (!Copy->isCopy() && !Copy->isMoveImmediate())
+ continue;
+ LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
+ DAG->dumpNode(*Dep.getSUnit()));
+ DAG->moveInstruction(Copy, InsertPos);
+ }
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
+/// update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysReg.
+void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+ Top.bumpNode(SU);
+ if (SU->hasPhysRegUses)
+ reschedulePhysReg(SU, true);
+ } else {
+ SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
+ Bot.bumpNode(SU);
+ if (SU->hasPhysRegDefs)
+ reschedulePhysReg(SU, false);
+ }
+}
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
+ // Register DAG post-processors.
+ //
+ // FIXME: extend the mutation API to allow earlier mutations to instantiate
+ // data and pass it to later mutations. Have a single mutation that gathers
+ // the interesting nodes in one pass.
+ DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+}
+
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+ return createGenericSchedLive(C);
+}
+
+static MachineSchedRegistry
+GenericSchedRegistry("converge", "Standard converging scheduler.",
+ createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
+ DAG = Dag;
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ BotRoots.clear();
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+ // or are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+}
+
+void PostGenericScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (const SUnit *SU : BotRoots) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
+ }
+ LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
+ }
+}
+
+/// Apply a set of heuristics to a new candidate for PostRA scheduling.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
+bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+ Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return TryCand.Reason != NoCand;
+
+ // Keep clustered nodes together.
+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+ Cand.SU == DAG->getNextClusterSucc(),
+ TryCand, Cand, Cluster))
+ return TryCand.Reason != NoCand;
+
+ // Avoid critical resource consumption and balance the schedule.
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return TryCand.Reason != NoCand;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return TryCand.Reason != NoCand;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+ return TryCand.Reason != NoCand;
+ }
+
+ // Fall through to original instruction order.
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ return false;
+}
+
+void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
+ ReadyQueue &Q = Top.Available;
+ for (SUnit *SU : Q) {
+ SchedCandidate TryCand(Cand.Policy);
+ TryCand.SU = SU;
+ TryCand.AtTop = true;
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryCandidate(Cand, TryCand)) {
+ Cand.setBest(TryCand);
+ LLVM_DEBUG(traceCandidate(Cand));
+ }
+ }
+}
+
+/// Pick the next node to schedule.
+SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ do {
+ SU = Top.pickOnlyChoice();
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
+ CandPolicy NoPolicy;
+ SchedCandidate TopCand(NoPolicy);
+ // Set the top-down policy based on the state of the current top zone and
+ // the instructions outside the zone, including the bottom zone.
+ setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
+ pickNodeFromQueue(TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(TopCand);
+ SU = TopCand.SU;
+ }
+ } while (SU->isScheduled);
+
+ IsTopNode = true;
+ Top.removeReady(SU);
+
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
+ return SU;
+}
+
+/// Called after ScheduleDAGMI has scheduled an instruction and updated
+/// scheduled/remaining flags in the DAG nodes.
+void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+ Top.bumpNode(SU);
+}
+
+ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
+ /*RemoveKillFlags=*/true);
+}
+
+//===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Order nodes by the ILP metric.
+struct ILPOrder {
+ const SchedDFSResult *DFSResult = nullptr;
+ const BitVector *ScheduledTrees = nullptr;
+ bool MaximizeILP;
+
+ ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
+
+ /// Apply a less-than relation on node priority.
+ ///
+ /// (Return true if A comes after B in the Q.)
+ bool operator()(const SUnit *A, const SUnit *B) const {
+ unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+ unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+ if (SchedTreeA != SchedTreeB) {
+ // Unscheduled trees have lower priority.
+ if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+ return ScheduledTrees->test(SchedTreeB);
+
+ // Trees with shallower connections have have lower priority.
+ if (DFSResult->getSubtreeLevel(SchedTreeA)
+ != DFSResult->getSubtreeLevel(SchedTreeB)) {
+ return DFSResult->getSubtreeLevel(SchedTreeA)
+ < DFSResult->getSubtreeLevel(SchedTreeB);
+ }
+ }
+ if (MaximizeILP)
+ return DFSResult->getILP(A) < DFSResult->getILP(B);
+ else
+ return DFSResult->getILP(A) > DFSResult->getILP(B);
+ }
+};
+
+/// Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+ ScheduleDAGMILive *DAG = nullptr;
+ ILPOrder Cmp;
+
+ std::vector<SUnit*> ReadyQ;
+
+public:
+ ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
+
+ void initialize(ScheduleDAGMI *dag) override {
+ assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
+ DAG = static_cast<ScheduleDAGMILive*>(dag);
+ DAG->computeDFSResult();
+ Cmp.DFSResult = DAG->getDFSResult();
+ Cmp.ScheduledTrees = &DAG->getScheduledTrees();
+ ReadyQ.clear();
+ }
+
+ void registerRoots() override {
+ // Restore the heap in ReadyQ with the updated DFS results.
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ /// Callback to select the highest priority node from the ready Q.
+ SUnit *pickNode(bool &IsTopNode) override {
+ if (ReadyQ.empty()) return nullptr;
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ SUnit *SU = ReadyQ.back();
+ ReadyQ.pop_back();
+ IsTopNode = false;
+ LLVM_DEBUG(dbgs() << "Pick node "
+ << "SU(" << SU->NodeNum << ") "
+ << " ILP: " << DAG->getDFSResult()->getILP(SU)
+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
+ << " @"
+ << DAG->getDFSResult()->getSubtreeLevel(
+ DAG->getDFSResult()->getSubtreeID(SU))
+ << '\n'
+ << "Scheduling " << *SU->getInstr());
+ return SU;
+ }
+
+ /// Scheduler callback to notify that a new subtree is scheduled.
+ void scheduleTree(unsigned SubtreeID) override {
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+ /// DFSResults, and resort the priority Q.
+ void schedNode(SUnit *SU, bool IsTopNode) override {
+ assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+ }
+
+ void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
+
+ void releaseBottomNode(SUnit *SU) override {
+ ReadyQ.push_back(SU);
+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+};
+
+} // end anonymous namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
+}
+
+static MachineSchedRegistry ILPMaxRegistry(
+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+ bool operator()(SUnit *A, SUnit *B) const {
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
+ }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
+ TopQ;
+
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
+ BottomQ;
+
+public:
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
+
+ void initialize(ScheduleDAGMI*) override {
+ TopQ.clear();
+ BottomQ.clear();
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ SUnit *pickNode(bool &IsTopNode) override {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return nullptr;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ } else {
+ do {
+ if (BottomQ.empty()) return nullptr;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
+ return SU;
+ }
+
+ void schedNode(SUnit *SU, bool IsTopNode) override {}
+
+ void releaseTopNode(SUnit *SU) override {
+ TopQ.push(SU);
+ }
+ void releaseBottomNode(SUnit *SU) override {
+ BottomQ.push(SU);
+ }
+};
+
+} // end anonymous namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMILive(
+ C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
+}
+
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
+#endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMILive.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+ ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return std::string(G->MF.getName());
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
+ if (ViewMISchedCutoff == 0)
+ return false;
+ return (Node->Preds.size() > ViewMISchedCutoff
+ || Node->Succs.size() > ViewMISchedCutoff);
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+ static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+ std::string Str;
+ raw_string_ostream SS(Str);
+ const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
+ const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
+ SS << "SU:" << SU->NodeNum;
+ if (DFS)
+ SS << " I:" << DFS->getNumInstrs(SU);
+ return SS.str();
+ }
+
+ static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+ }
+
+ static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
+ std::string Str("shape=Mrecord");
+ const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
+ const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
+ if (DFS) {
+ Str += ",style=filled,fillcolor=\"#";
+ Str += DOT::getColorString(DFS->getSubtreeID(N));
+ Str += '"';
+ }
+ return Str;
+ }
+};
+
+} // end namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
+
+/// Sort predicate for the intervals stored in an instance of
+/// ResourceSegments. Intervals are always disjoint (no intersection
+/// for any pairs of intervals), therefore we can sort the totality of
+/// the intervals by looking only at the left boundary.
+static bool sortIntervals(const ResourceSegments::IntervalTy &A,
+ const ResourceSegments::IntervalTy &B) {
+ return A.first < B.first;
+}
+
+unsigned ResourceSegments::getFirstAvailableAt(
+ unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle,
+ std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
+ IntervalBuilder) const {
+ assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
+ sortIntervals) &&
+ "Cannot execute on an un-sorted set of intervals.");
+ unsigned RetCycle = CurrCycle;
+ ResourceSegments::IntervalTy NewInterval =
+ IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ for (auto &Interval : _Intervals) {
+ if (!intersects(NewInterval, Interval))
+ continue;
+
+ // Move the interval right next to the top of the one it
+ // intersects.
+ assert(Interval.second > NewInterval.first &&
+ "Invalid intervals configuration.");
+ RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
+ NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ }
+ return RetCycle;
+}
+
+void ResourceSegments::add(ResourceSegments::IntervalTy A,
+ const unsigned CutOff) {
+ assert(A.first < A.second && "Cannot add empty resource usage");
+ assert(CutOff > 0 && "0-size interval history has no use.");
+ assert(all_of(_Intervals,
+ [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
+ return !intersects(A, Interval);
+ }) &&
+ "A resource is being overwritten");
+ _Intervals.push_back(A);
+
+ sortAndMerge();
+
+ // Do not keep the full history of the intervals, just the
+ // latest #CutOff.
+ while (_Intervals.size() > CutOff)
+ _Intervals.pop_front();
+}
+
+bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
+ ResourceSegments::IntervalTy B) {
+ assert(A.first <= A.second && "Invalid interval");
+ assert(B.first <= B.second && "Invalid interval");
+
+ // Share one boundary.
+ if ((A.first == B.first) || (A.second == B.second))
+ return true;
+
+ // full intersersect: [ *** ) B
+ // [***) A
+ if ((A.first > B.first) && (A.second < B.second))
+ return true;
+
+ // right intersect: [ ***) B
+ // [*** ) A
+ if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
+ return true;
+
+ // left intersect: [*** ) B
+ // [ ***) A
+ if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
+ return true;
+
+ return false;
+}
+
+void ResourceSegments::sortAndMerge() {
+ if (_Intervals.size() <= 1)
+ return;
+
+ // First sort the collection.
+ _Intervals.sort(sortIntervals);
+
+ // can use next because I have at least 2 elements in the list
+ auto next = std::next(std::begin(_Intervals));
+ auto E = std::end(_Intervals);
+ for (; next != E; ++next) {
+ if (std::prev(next)->second >= next->first) {
+ next->first = std::prev(next)->first;
+ _Intervals.erase(std::prev(next));
+ continue;
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 000000000000..8da97dc7e742
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,1892 @@
+//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-sink"
+
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+UseBlockFreqInfo("machine-sink-bfi",
+ cl::desc("Use block frequency info to find successors to sink"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+ "machine-sink-split-probability-threshold",
+ cl::desc(
+ "Percentage threshold for splitting single-instruction critical edge. "
+ "If the branch threshold is higher than this threshold, we allow "
+ "speculative execution of up to 1 instruction to avoid branching to "
+ "splitted critical edge"),
+ cl::init(40), cl::Hidden);
+
+static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
+ "machine-sink-load-instrs-threshold",
+ cl::desc("Do not try to find alias store for a load if there is a in-path "
+ "block whose instruction number is higher than this threshold."),
+ cl::init(2000), cl::Hidden);
+
+static cl::opt<unsigned> SinkLoadBlocksThreshold(
+ "machine-sink-load-blocks-threshold",
+ cl::desc("Do not try to find alias store for a load if the block number in "
+ "the straight line is higher than this threshold."),
+ cl::init(20), cl::Hidden);
+
+static cl::opt<bool>
+ SinkInstsIntoCycle("sink-insts-to-avoid-spills",
+ cl::desc("Sink instructions into cycles to avoid "
+ "register spills"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> SinkIntoCycleLimit(
+ "machine-sink-cycle-limit",
+ cl::desc("The maximum number of instructions considered for cycle sinking."),
+ cl::init(50), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
+
+namespace {
+
+ class MachineSinking : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree
+ MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
+ MachineCycleInfo *CI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
+ CEBCandidates;
+ // Remember which edges we are about to split.
+ // This is different from CEBCandidates since those edges
+ // will be split.
+ SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
+
+ DenseSet<Register> RegsToClearKillFlags;
+
+ using AllSuccsCache =
+ std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
+
+ /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
+ /// post-dominated by another DBG_VALUE of the same variable location.
+ /// This is necessary to detect sequences such as:
+ /// %0 = someinst
+ /// DBG_VALUE %0, !123, !DIExpression()
+ /// %1 = anotherinst
+ /// DBG_VALUE %1, !123, !DIExpression()
+ /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
+ /// would re-order assignments.
+ using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
+
+ /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
+ /// debug instructions to sink.
+ SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
+
+ /// Record of debug variables that have had their locations set in the
+ /// current block.
+ DenseSet<DebugVariable> SeenDbgVars;
+
+ std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
+ HasStoreCache;
+ std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
+ std::vector<MachineInstr *>>
+ StoreInstrCache;
+
+ /// Cached BB's register pressure.
+ std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
+
+ public:
+ static char ID; // Pass identification
+
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addPreserved<MachineCycleInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfo>();
+ if (UseBlockFreqInfo)
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ }
+
+ void releaseMemory() override {
+ CEBCandidates.clear();
+ }
+
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ void ProcessDbgInst(MachineInstr &MI);
+ bool isWorthBreakingCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+
+ bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
+ MachineInstr &MI);
+
+ /// Postpone the splitting of the given critical
+ /// edge (\p From, \p To).
+ ///
+ /// We do not split the edges on the fly. Indeed, this invalidates
+ /// the dominance information and thus triggers a lot of updates
+ /// of that information underneath.
+ /// Instead, we postpone all the splits after each iteration of
+ /// the main loop. That way, the information is at least valid
+ /// for the lifetime of an iteration.
+ ///
+ /// \return True if the edge is marked as toSplit, false otherwise.
+ /// False can be returned if, for instance, this is not profitable.
+ bool PostponeSplitCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
+ bool SinkInstruction(MachineInstr &MI, bool &SawStore,
+ AllSuccsCache &AllSuccessors);
+
+ /// If we sink a COPY inst, some debug users of it's destination may no
+ /// longer be dominated by the COPY, and will eventually be dropped.
+ /// This is easily rectified by forwarding the non-dominated debug uses
+ /// to the copy source.
+ void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
+ MachineBasicBlock *TargetBlock);
+ bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
+ bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
+
+ void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
+ SmallVectorImpl<MachineInstr *> &Candidates);
+ bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
+
+ bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo,
+ AllSuccsCache &AllSuccessors);
+
+ bool PerformTrivialForwardCoalescing(MachineInstr &MI,
+ MachineBasicBlock *MBB);
+
+ SmallVector<MachineBasicBlock *, 4> &
+ GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
+ AllSuccsCache &AllSuccessors) const;
+
+ std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
+ };
+
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+
+char &llvm::MachineSinkingID = MachineSinking::ID;
+
+INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
+ "Machine code sinking", false, false)
+
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(const MachineBasicBlock *BB,
+ MachineBasicBlock::const_iterator End,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
+ ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Reg.isPhysical() && MRI && MRI->isConstantPhysReg(Reg))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
+ MachineBasicBlock *MBB) {
+ if (!MI.isCopy())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ LLVM_DEBUG(dbgs() << "*** to: " << MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI.eraseFromParent();
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MRI->clearKillFlags(SrcReg);
+
+ ++NumCoalesces;
+ return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
+ assert(Reg.isVirtual() && "Only makes sense for vregs");
+
+ // Ignore debug uses because debug info doesn't affect the code.
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // %bb.1:
+ // Predecessors according to CFG: %bb.0
+ // ...
+ // %def = DEC64_32r %x, implicit-def dead %eflags
+ // ...
+ // JE_4 <%bb.37>, implicit %eflags
+ // Successors according to CFG: %bb.37 %bb.2
+ //
+ // %bb.2:
+ // %p = PHI %y, %bb.0, %def, %bb.1
+ if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
+ MachineInstr *UseInst = MO.getParent();
+ unsigned OpNo = MO.getOperandNo();
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ return UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
+ })) {
+ BreakPHIEdge = true;
+ return true;
+ }
+
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = MO.getParent();
+ unsigned OpNo = &MO - &UseInst->getOperand(0);
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->isPHI()) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(OpNo+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
+ }
+
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+
+ return true;
+}
+
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert(MI.mayLoad() && "Expected MI that loads!");
+
+ // If we lost memory operands, conservatively assume that the instruction
+ // reads from everything..
+ if (MI.memoperands_empty())
+ return true;
+
+ for (MachineMemOperand *MemOp : MI.memoperands())
+ if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
+ if (PSV->isGOT() || PSV->isConstantPool())
+ return true;
+
+ return false;
+}
+
+void MachineSinking::FindCycleSinkCandidates(
+ MachineCycle *Cycle, MachineBasicBlock *BB,
+ SmallVectorImpl<MachineInstr *> &Candidates) {
+ for (auto &MI : *BB) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
+ if (!TII->shouldSink(MI)) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
+ "target\n");
+ continue;
+ }
+ if (!isCycleInvariant(Cycle, MI)) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
+ continue;
+ }
+ bool DontMoveAcrossStore = true;
+ if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
+ continue;
+ }
+ if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
+ continue;
+ }
+ if (MI.isConvergent())
+ continue;
+
+ const MachineOperand &MO = MI.getOperand(0);
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ continue;
+ if (!MRI->hasOneDef(MO.getReg()))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
+ Candidates.push_back(&MI);
+ }
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
+ CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ RegClassInfo.runOnMachineFunction(MF);
+
+ bool EverMadeChange = false;
+
+ while (true) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ CEBCandidates.clear();
+ ToSplit.clear();
+ for (auto &MBB: MF)
+ MadeChange |= ProcessBlock(MBB);
+
+ // If we have anything we marked as toSplit, split it now.
+ for (const auto &Pair : ToSplit) {
+ auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
+ if (NewSucc != nullptr) {
+ LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
+ << printMBBReference(*Pair.first) << " -- "
+ << printMBBReference(*NewSucc) << " -- "
+ << printMBBReference(*Pair.second) << '\n');
+ if (MBFI)
+ MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
+
+ MadeChange = true;
+ ++NumSplit;
+ } else
+ LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
+ }
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+
+ if (SinkInstsIntoCycle) {
+ SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
+ CI->toplevel_end());
+ for (auto *Cycle : Cycles) {
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ if (!Preheader) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
+ continue;
+ }
+ SmallVector<MachineInstr *, 8> Candidates;
+ FindCycleSinkCandidates(Cycle, Preheader, Candidates);
+
+ // Walk the candidates in reverse order so that we start with the use
+ // of a def-use chain, if there is any.
+ // TODO: Sort the candidates using a cost-model.
+ unsigned i = 0;
+ for (MachineInstr *I : llvm::reverse(Candidates)) {
+ if (i++ == SinkIntoCycleLimit) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
+ "be analysed.");
+ break;
+ }
+
+ if (!SinkIntoCycle(Cycle, *I))
+ break;
+ EverMadeChange = true;
+ ++NumCycleSunk;
+ }
+ }
+ }
+
+ HasStoreCache.clear();
+ StoreInstrCache.clear();
+
+ // Now clear any kill flags for recorded registers.
+ for (auto I : RegsToClearKillFlags)
+ MRI->clearKillFlags(I);
+ RegsToClearKillFlags.clear();
+
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable cycle there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&MBB)) return false;
+
+ bool MadeChange = false;
+
+ // Cache all successors, sorted by frequency info and cycle depth.
+ AllSuccsCache AllSuccessors;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr &MI = *I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (MI.isDebugOrPseudoInstr()) {
+ if (MI.isDebugValue())
+ ProcessDbgInst(MI);
+ continue;
+ }
+
+ bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+ if (Joined) {
+ MadeChange = true;
+ continue;
+ }
+
+ if (SinkInstruction(MI, SawStore, AllSuccessors)) {
+ ++NumSunk;
+ MadeChange = true;
+ }
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ SeenDbgUsers.clear();
+ SeenDbgVars.clear();
+ // recalculate the bb register pressure after sinking one BB.
+ CachedRegisterPressure.clear();
+
+ return MadeChange;
+}
+
+void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
+ // When we see DBG_VALUEs for registers, record any vreg it reads, so that
+ // we know what to sink if the vreg def sinks.
+ assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
+
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ bool SeenBefore = SeenDbgVars.contains(Var);
+
+ for (MachineOperand &MO : MI.debug_operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual())
+ SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
+ }
+
+ // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
+ SeenDbgVars.insert(Var);
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)).second)
+ return true;
+
+ if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
+ return true;
+
+ if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
+ BranchProbability(SplitEdgeProbabilityThreshold, 100))
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (const MachineOperand &MO : MI.all_uses()) {
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // We don't move live definitions of physical registers,
+ // so sinking their uses won't enable any opportunities.
+ if (Reg.isPhysical())
+ continue;
+
+ // If this instruction is the only user of a virtual register,
+ // check if breaking the edge will enable sinking
+ // both this instruction and the defining instruction.
+ if (MRI->hasOneNonDBGUse(Reg)) {
+ // If the definition resides in same MBB,
+ // claim it's likely we can sink these together.
+ // If definition resides elsewhere, we aren't
+ // blocking it from being sunk so don't break the edge.
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (DefMI->getParent() == MI.getParent())
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return false;
+
+ // Avoid breaking back edge. From == To means backedge for single BB cycle.
+ if (!SplitEdges || FromBB == ToBB)
+ return false;
+
+ MachineCycle *FromCycle = CI->getCycle(FromBB);
+ MachineCycle *ToCycle = CI->getCycle(ToBB);
+
+ // Check for backedges of more "complex" cycles.
+ if (FromCycle == ToCycle && FromCycle &&
+ (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
+ return false;
+
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // %bb.1:
+ // v1024
+ // Beq %bb.3
+ // <fallthrough>
+ // %bb.2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // %bb.3:
+ // ...
+ // = v1024
+ //
+ // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
+ //
+ // %bb.1:
+ // ...
+ // Bne %bb.2
+ // %bb.4:
+ // v1024 =
+ // B %bb.3
+ // %bb.2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // %bb.3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
+ for (MachineBasicBlock *Pred : ToBB->predecessors())
+ if (Pred != FromBB && !DT->dominates(ToBB, Pred))
+ return false;
+ }
+
+ ToSplit.insert(std::make_pair(FromBB, ToBB));
+
+ return true;
+}
+
+std::vector<unsigned> &
+MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
+ // Currently to save compiling time, MBB's register pressure will not change
+ // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
+ // register pressure is changed after sinking any instructions into it.
+ // FIXME: need a accurate and cheap register pressure estiminate model here.
+ auto RP = CachedRegisterPressure.find(&MBB);
+ if (RP != CachedRegisterPressure.end())
+ return RP->second;
+
+ RegionPressure Pressure;
+ RegPressureTracker RPTracker(Pressure);
+
+ // Initialize the register pressure tracker.
+ RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
+ /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
+
+ for (MachineBasicBlock::iterator MII = MBB.instr_end(),
+ MIE = MBB.instr_begin();
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (MI.isDebugInstr() || MI.isPseudoProbe())
+ continue;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, false, false);
+ RPTracker.recedeSkipDebugValues();
+ assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
+ RPTracker.recede(RegOpers);
+ }
+
+ RPTracker.closeRegion();
+ auto It = CachedRegisterPressure.insert(
+ std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
+ return It.first->second;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo,
+ AllSuccsCache &AllSuccessors) {
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
+ return false;
+
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!PDT->dominates(SuccToSinkTo, MBB))
+ return true;
+
+ // It is profitable to sink an instruction from a deeper cycle to a shallower
+ // cycle, even if the latter post-dominates the former (PR21115).
+ if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
+ MachineBasicBlock *UseBlock = UseInst.getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then cache results.
+ if (MachineBasicBlock *MBB2 =
+ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
+
+ MachineCycle *MCycle = CI->getCycle(MBB);
+
+ // If the instruction is not inside a cycle, it is not profitable to sink MI to
+ // a post dominate block SuccToSinkTo.
+ if (!MCycle)
+ return false;
+
+ auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ // Get register pressure for block SuccToSinkTo.
+ std::vector<unsigned> BBRegisterPressure =
+ getBBRegisterPressure(*SuccToSinkTo);
+ for (; *PS != -1; PS++)
+ // check if any register pressure set exceeds limit in block SuccToSinkTo
+ // after sinking.
+ if (Weight + BBRegisterPressure[*PS] >=
+ TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
+ return true;
+ return false;
+ };
+
+ // If this instruction is inside a Cycle and sinking this instruction can make
+ // more registers live range shorten, it is still prifitable.
+ for (const MachineOperand &MO : MI.operands()) {
+ // Ignore non-register operands.
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (Reg.isPhysical()) {
+ // Don't handle non-constant and non-ignorable physical register uses.
+ if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
+ return false;
+ continue;
+ }
+
+ // Users for the defs are all dominated by SuccToSinkTo.
+ if (MO.isDef()) {
+ // This def register's live range is shortened after sinking.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
+ LocalUse))
+ return false;
+ } else {
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI)
+ continue;
+ MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
+ // DefMI is defined outside of cycle. There should be no live range
+ // impact for this operand. Defination outside of cycle means:
+ // 1: defination is outside of cycle.
+ // 2: defination is in this cycle, but it is a PHI in the cycle header.
+ if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
+ Cycle->getHeader() == DefMI->getParent()))
+ continue;
+ // The DefMI is defined inside the cycle.
+ // If sinking this operand makes some register pressure set exceed limit,
+ // it is not profitable.
+ if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
+ LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
+ return false;
+ }
+ }
+ }
+
+ // If MI is in cycle and all its operands are alive across the whole cycle or
+ // if no operand sinking make register pressure set exceed limit, it is
+ // profitable to sink MI.
+ return true;
+}
+
+/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
+/// computing it if it was not already cached.
+SmallVector<MachineBasicBlock *, 4> &
+MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
+ AllSuccsCache &AllSuccessors) const {
+ // Do we have the sorted successors in cache ?
+ auto Succs = AllSuccessors.find(MBB);
+ if (Succs != AllSuccessors.end())
+ return Succs->second;
+
+ SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
+
+ // Handle cases where sinking can happen but where the sink point isn't a
+ // successor. For example:
+ //
+ // x = computation
+ // if () {} else {}
+ // use x
+ //
+ for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) {
+ // DomTree children of MBB that have MBB as immediate dominator are added.
+ if (DTChild->getIDom()->getBlock() == MI.getParent() &&
+ // Skip MBBs already added to the AllSuccs vector above.
+ !MBB->isSuccessor(DTChild->getBlock()))
+ AllSuccs.push_back(DTChild->getBlock());
+ }
+
+ // Sort Successors according to their cycle depth or block frequency info.
+ llvm::stable_sort(
+ AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
+ uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
+ bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+ return HasBlockFreq ? LHSFreq < RHSFreq
+ : CI->getCycleDepth(L) < CI->getCycleDepth(R);
+ });
+
+ auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
+
+ return it.first->second;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *
+MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge,
+ AllSuccsCache &AllSuccessors) {
+ assert (MBB && "Invalid MachineBasicBlock!");
+
+ // loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = nullptr;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (Reg.isPhysical()) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
+ return nullptr;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return nullptr;
+ }
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+ return nullptr;
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
+ BreakPHIEdge, LocalUse))
+ return nullptr;
+
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to. If we have reliable block frequency information
+ // (frequency != 0) available, give successors with smaller frequencies
+ // higher priority, otherwise prioritize smaller cycle depths.
+ for (MachineBasicBlock *SuccBlock :
+ GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
+ BreakPHIEdge, LocalUse)) {
+ SuccToSinkTo = SuccBlock;
+ break;
+ }
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return nullptr;
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (!SuccToSinkTo)
+ return nullptr;
+ if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
+ return nullptr;
+ }
+ }
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with cycles.
+ if (MBB == SuccToSinkTo)
+ return nullptr;
+
+ if (!SuccToSinkTo)
+ return nullptr;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo->isEHPad())
+ return nullptr;
+
+ // It ought to be okay to sink instructions into an INLINEASM_BR target, but
+ // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
+ // the source block (which this code does not yet do). So for now, forbid
+ // doing so.
+ if (SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ return nullptr;
+
+ MachineBasicBlock::const_iterator InsertPos =
+ SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI))
+ return nullptr;
+
+ return SuccToSinkTo;
+}
+
+/// Return true if MI is likely to be usable as a memory operation by the
+/// implicit null check optimization.
+///
+/// This is a "best effort" heuristic, and should not be relied upon for
+/// correctness. This returning true does not guarantee that the implicit null
+/// check optimization is legal over MI, and this returning false does not
+/// guarantee MI cannot possibly be used to do a null check.
+static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
+
+ auto *MBB = MI.getParent();
+ if (MBB->pred_size() != 1)
+ return false;
+
+ auto *PredMBB = *MBB->pred_begin();
+ auto *PredBB = PredMBB->getBasicBlock();
+
+ // Frontends that don't use implicit null checks have no reason to emit
+ // branches with make.implicit metadata, and this function should always
+ // return false for them.
+ if (!PredBB ||
+ !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
+ return false;
+
+ const MachineOperand *BaseOp;
+ int64_t Offset;
+ bool OffsetIsScalable;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
+ return false;
+
+ if (!BaseOp->isReg())
+ return false;
+
+ if (!(MI.mayLoad() && !MI.isPredicable()))
+ return false;
+
+ MachineBranchPredicate MBP;
+ if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
+ return false;
+
+ return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
+ (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
+ MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
+ MBP.LHS.getReg() == BaseOp->getReg();
+}
+
+/// If the sunk instruction is a copy, try to forward the copy instead of
+/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
+/// there's any subregister weirdness involved. Returns true if copy
+/// propagation occurred.
+static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
+ Register Reg) {
+ const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
+ const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
+
+ // Copy DBG_VALUE operand and set the original to undef. We then check to
+ // see whether this is something that can be copy-forwarded. If it isn't,
+ // continue around the loop.
+
+ const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
+ auto CopyOperands = TII.isCopyInstr(SinkInst);
+ if (!CopyOperands)
+ return false;
+ SrcMO = CopyOperands->Source;
+ DstMO = CopyOperands->Destination;
+
+ // Check validity of forwarding this copy.
+ bool PostRA = MRI.getNumVirtRegs() == 0;
+
+ // Trying to forward between physical and virtual registers is too hard.
+ if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
+ return false;
+
+ // Only try virtual register copy-forwarding before regalloc, and physical
+ // register copy-forwarding after regalloc.
+ bool arePhysRegs = !Reg.isVirtual();
+ if (arePhysRegs != PostRA)
+ return false;
+
+ // Pre-regalloc, only forward if all subregisters agree (or there are no
+ // subregs at all). More analysis might recover some forwardable copies.
+ if (!PostRA)
+ for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
+ if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
+ DbgMO.getSubReg() != DstMO->getSubReg())
+ return false;
+
+ // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
+ // of this copy. Only forward the copy if the DBG_VALUE operand exactly
+ // matches the copy destination.
+ if (PostRA && Reg != DstMO->getReg())
+ return false;
+
+ for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
+ DbgMO.setReg(SrcMO->getReg());
+ DbgMO.setSubReg(SrcMO->getSubReg());
+ }
+ return true;
+}
+
+using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
+/// Sink an instruction and its associated debug instructions.
+static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
+ MachineBasicBlock::iterator InsertPos,
+ ArrayRef<MIRegs> DbgValuesToSink) {
+ // If we cannot find a location to use (merge with), then we erase the debug
+ // location to prevent debug-info driven tools from potentially reporting
+ // wrong location information.
+ if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
+ MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
+ InsertPos->getDebugLoc()));
+ else
+ MI.setDebugLoc(DebugLoc());
+
+ // Move the instruction.
+ MachineBasicBlock *ParentBlock = MI.getParent();
+ SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+
+ // Sink a copy of debug users to the insert position. Mark the original
+ // DBG_VALUE location as 'undef', indicating that any earlier variable
+ // location should be terminated as we've optimised away the value at this
+ // point.
+ for (const auto &DbgValueToSink : DbgValuesToSink) {
+ MachineInstr *DbgMI = DbgValueToSink.first;
+ MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
+ SuccToSinkTo.insert(InsertPos, NewDbgMI);
+
+ bool PropagatedAllSunkOps = true;
+ for (unsigned Reg : DbgValueToSink.second) {
+ if (DbgMI->hasDebugOperandForReg(Reg)) {
+ if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) {
+ PropagatedAllSunkOps = false;
+ break;
+ }
+ }
+ }
+ if (!PropagatedAllSunkOps)
+ DbgMI->setDebugValueUndef();
+ }
+}
+
+/// hasStoreBetween - check if there is store betweeen straight line blocks From
+/// and To.
+bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
+ MachineBasicBlock *To, MachineInstr &MI) {
+ // Make sure From and To are in straight line which means From dominates To
+ // and To post dominates From.
+ if (!DT->dominates(From, To) || !PDT->dominates(To, From))
+ return true;
+
+ auto BlockPair = std::make_pair(From, To);
+
+ // Does these two blocks pair be queried before and have a definite cached
+ // result?
+ if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
+ return HasStoreCache[BlockPair];
+
+ if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
+ return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
+ return I->mayAlias(AA, MI, false);
+ });
+
+ bool SawStore = false;
+ bool HasAliasedStore = false;
+ DenseSet<MachineBasicBlock *> HandledBlocks;
+ DenseSet<MachineBasicBlock *> HandledDomBlocks;
+ // Go through all reachable blocks from From.
+ for (MachineBasicBlock *BB : depth_first(From)) {
+ // We insert the instruction at the start of block To, so no need to worry
+ // about stores inside To.
+ // Store in block From should be already considered when just enter function
+ // SinkInstruction.
+ if (BB == To || BB == From)
+ continue;
+
+ // We already handle this BB in previous iteration.
+ if (HandledBlocks.count(BB))
+ continue;
+
+ HandledBlocks.insert(BB);
+ // To post dominates BB, it must be a path from block From.
+ if (PDT->dominates(To, BB)) {
+ if (!HandledDomBlocks.count(BB))
+ HandledDomBlocks.insert(BB);
+
+ // If this BB is too big or the block number in straight line between From
+ // and To is too big, stop searching to save compiling time.
+ if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
+ HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
+ for (auto *DomBB : HandledDomBlocks) {
+ if (DomBB != BB && DT->dominates(DomBB, BB))
+ HasStoreCache[std::make_pair(DomBB, To)] = true;
+ else if(DomBB != BB && DT->dominates(BB, DomBB))
+ HasStoreCache[std::make_pair(From, DomBB)] = true;
+ }
+ HasStoreCache[BlockPair] = true;
+ return true;
+ }
+
+ for (MachineInstr &I : *BB) {
+ // Treat as alias conservatively for a call or an ordered memory
+ // operation.
+ if (I.isCall() || I.hasOrderedMemoryRef()) {
+ for (auto *DomBB : HandledDomBlocks) {
+ if (DomBB != BB && DT->dominates(DomBB, BB))
+ HasStoreCache[std::make_pair(DomBB, To)] = true;
+ else if(DomBB != BB && DT->dominates(BB, DomBB))
+ HasStoreCache[std::make_pair(From, DomBB)] = true;
+ }
+ HasStoreCache[BlockPair] = true;
+ return true;
+ }
+
+ if (I.mayStore()) {
+ SawStore = true;
+ // We still have chance to sink MI if all stores between are not
+ // aliased to MI.
+ // Cache all store instructions, so that we don't need to go through
+ // all From reachable blocks for next load instruction.
+ if (I.mayAlias(AA, MI, false))
+ HasAliasedStore = true;
+ StoreInstrCache[BlockPair].push_back(&I);
+ }
+ }
+ }
+ }
+ // If there is no store at all, cache the result.
+ if (!SawStore)
+ HasStoreCache[BlockPair] = false;
+ return HasAliasedStore;
+}
+
+/// Sink instructions into cycles if profitable. This especially tries to
+/// prevent register spills caused by register pressure if there is little to no
+/// overhead moving instructions into cycles.
+bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ assert(Preheader && "Cycle sink needs a preheader block");
+ MachineBasicBlock *SinkBlock = nullptr;
+ bool CanSink = true;
+ const MachineOperand &MO = I.getOperand(0);
+
+ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
+ if (!Cycle->contains(MI.getParent())) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
+ CanSink = false;
+ break;
+ }
+
+ // FIXME: Come up with a proper cost model that estimates whether sinking
+ // the instruction (and thus possibly executing it on every cycle
+ // iteration) is more expensive than a register.
+ // For now assumes that copies are cheap and thus almost always worth it.
+ if (!MI.isCopy()) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
+ CanSink = false;
+ break;
+ }
+ if (!SinkBlock) {
+ SinkBlock = MI.getParent();
+ LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
+ << printMBBReference(*SinkBlock) << "\n");
+ continue;
+ }
+ SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
+ CanSink = false;
+ break;
+ }
+ LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
+ printMBBReference(*SinkBlock) << "\n");
+ }
+
+ if (!CanSink) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
+ return false;
+ }
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
+ return false;
+ }
+ if (SinkBlock == Preheader) {
+ LLVM_DEBUG(
+ dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
+ return false;
+ }
+ if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
+ LLVM_DEBUG(
+ dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
+ SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
+ I);
+
+ // Conservatively clear any kill flags on uses of sunk instruction
+ for (MachineOperand &MO : I.operands()) {
+ if (MO.isReg() && MO.readsReg())
+ RegsToClearKillFlags.insert(MO.getReg());
+ }
+
+ // The instruction is moved from its basic block, so do not retain the
+ // debug information.
+ assert(!I.isDebugInstr() && "Should not sink debug inst");
+ I.setDebugLoc(DebugLoc());
+ return true;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
+ AllSuccsCache &AllSuccessors) {
+ // Don't sink instructions that the target prefers not to sink.
+ if (!TII->shouldSink(MI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ if (!MI.isSafeToMove(AA, SawStore))
+ return false;
+
+ // Convergent operations may not be made control-dependent on additional
+ // values.
+ if (MI.isConvergent())
+ return false;
+
+ // Don't break implicit null checks. This is a performance heuristic, and not
+ // required for correctness.
+ if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI.getParent();
+ MachineBasicBlock *SuccToSinkTo =
+ FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
+
+ // If there are no outputs, it must have side-effects.
+ if (!SuccToSinkTo)
+ return false;
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (const MachineOperand &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg == 0 || !Reg.isPhysical())
+ continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this is a critical edge.
+ // Decide if we can sink along it or need to break the edge.
+ if (SuccToSinkTo->pred_size() > 1) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ bool TryBreak = false;
+ bool Store =
+ MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
+ if (!MI.isSafeToMove(AA, Store)) {
+ LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
+ }
+
+ // Don't sink instructions into a cycle.
+ if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
+ (!CI->getCycle(SuccToSinkTo)->isReducible() ||
+ CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
+ LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
+ TryBreak = true;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ if (!TryBreak)
+ LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ // Mark this edge as to be split.
+ // If the edge can actually be split, the next iteration of the main loop
+ // will sink MI in the newly created block.
+ bool Status =
+ PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
+ LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ // The instruction will not be sunk this time.
+ return false;
+ }
+ }
+
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
+ LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ // The instruction will not be sunk this time.
+ return false;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos =
+ SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
+ LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
+ return false;
+ }
+
+ // Collect debug users of any vreg that this inst defines.
+ SmallVector<MIRegs, 4> DbgUsersToSink;
+ for (auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+ if (!SeenDbgUsers.count(MO.getReg()))
+ continue;
+
+ // Sink any users that don't pass any other DBG_VALUEs for this variable.
+ auto &Users = SeenDbgUsers[MO.getReg()];
+ for (auto &User : Users) {
+ MachineInstr *DbgMI = User.getPointer();
+ if (User.getInt()) {
+ // This DBG_VALUE would re-order assignments. If we can't copy-propagate
+ // it, it can't be recovered. Set it undef.
+ if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg()))
+ DbgMI->setDebugValueUndef();
+ } else {
+ DbgUsersToSink.push_back(
+ {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
+ }
+ }
+ }
+
+ // After sinking, some debug users may not be dominated any more. If possible,
+ // copy-propagate their operands. As it's expensive, don't do this if there's
+ // no debuginfo in the program.
+ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
+ SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
+
+ performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ // Note that we have to clear the kill flags for any register this instruction
+ // uses as we may sink over another instruction which currently kills the
+ // used registers.
+ for (MachineOperand &MO : MI.all_uses())
+ RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
+
+ return true;
+}
+
+void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
+ MachineInstr &MI, MachineBasicBlock *TargetBlock) {
+ assert(MI.isCopy());
+ assert(MI.getOperand(1).isReg());
+
+ // Enumerate all users of vreg operands that are def'd. Skip those that will
+ // be sunk. For the rest, if they are not dominated by the block we will sink
+ // MI into, propagate the copy source to them.
+ SmallVector<MachineInstr *, 4> DbgDefUsers;
+ SmallVector<Register, 4> DbgUseRegs;
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ for (auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+ DbgUseRegs.push_back(MO.getReg());
+ for (auto &User : MRI.use_instructions(MO.getReg())) {
+ if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
+ continue;
+
+ // If is in same block, will either sink or be use-before-def.
+ if (User.getParent() == MI.getParent())
+ continue;
+
+ assert(User.hasDebugOperandForReg(MO.getReg()) &&
+ "DBG_VALUE user of vreg, but has no operand for it?");
+ DbgDefUsers.push_back(&User);
+ }
+ }
+
+ // Point the users of this copy that are no longer dominated, at the source
+ // of the copy.
+ for (auto *User : DbgDefUsers) {
+ for (auto &Reg : DbgUseRegs) {
+ for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
+ DbgOp.setReg(MI.getOperand(1).getReg());
+ DbgOp.setSubReg(MI.getOperand(1).getSubReg());
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// This pass is not intended to be a replacement or a complete alternative
+// for the pre-ra machine sink pass. It is only designed to sink COPY
+// instructions which should be handled after RA.
+//
+// This pass sinks COPY instructions into a successor block, if the COPY is not
+// used in the current block and the COPY is live-in to a single successor
+// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
+// copy on paths where their results aren't needed. This also exposes
+// additional opportunites for dead copy elimination and shrink wrapping.
+//
+// These copies were either not handled by or are inserted after the MachineSink
+// pass. As an example of the former case, the MachineSink pass cannot sink
+// COPY instructions with allocatable source registers; for AArch64 these type
+// of copy instructions are frequently used to move function parameters (PhyReg)
+// into virtual registers in the entry block.
+//
+// For the machine IR below, this pass will sink %w19 in the entry into its
+// successor (%bb.1) because %w19 is only live-in in %bb.1.
+// %bb.0:
+// %wzr = SUBSWri %w1, 1
+// %w19 = COPY %w0
+// Bcc 11, %bb.2
+// %bb.1:
+// Live Ins: %w19
+// BL @fun
+// %w0 = ADDWrr %w0, %w19
+// RET %w0
+// %bb.2:
+// %w0 = COPY %wzr
+// RET %w0
+// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
+// able to see %bb.0 as a candidate.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class PostRAMachineSinking : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ PostRAMachineSinking() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override { return "PostRA Machine Sink"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ /// Track which register units have been modified and used.
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+
+ /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
+ /// entry in this map for each unit it touches. The DBG_VALUE's entry
+ /// consists of a pointer to the instruction itself, and a vector of registers
+ /// referred to by the instruction that overlap the key register unit.
+ DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs;
+
+ /// Sink Copy instructions unused in the same block close to their uses in
+ /// successors.
+ bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
+ const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
+};
+} // namespace
+
+char PostRAMachineSinking::ID = 0;
+char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
+
+INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
+ "PostRA Machine Sink", false, false)
+
+static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ LiveRegUnits LiveInRegUnits(*TRI);
+ LiveInRegUnits.addLiveIns(MBB);
+ return !LiveInRegUnits.available(Reg);
+}
+
+static MachineBasicBlock *
+getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
+ const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
+ unsigned Reg, const TargetRegisterInfo *TRI) {
+ // Try to find a single sinkable successor in which Reg is live-in.
+ MachineBasicBlock *BB = nullptr;
+ for (auto *SI : SinkableBBs) {
+ if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
+ // If BB is set here, Reg is live-in to at least two sinkable successors,
+ // so quit.
+ if (BB)
+ return nullptr;
+ BB = SI;
+ }
+ }
+ // Reg is not live-in to any sinkable successors.
+ if (!BB)
+ return nullptr;
+
+ // Check if any register aliased with Reg is live-in in other successors.
+ for (auto *SI : CurBB.successors()) {
+ if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
+ return nullptr;
+ }
+ return BB;
+}
+
+static MachineBasicBlock *
+getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
+ const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
+ ArrayRef<unsigned> DefedRegsInCopy,
+ const TargetRegisterInfo *TRI) {
+ MachineBasicBlock *SingleBB = nullptr;
+ for (auto DefReg : DefedRegsInCopy) {
+ MachineBasicBlock *BB =
+ getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
+ if (!BB || (SingleBB && SingleBB != BB))
+ return nullptr;
+ SingleBB = BB;
+ }
+ return SingleBB;
+}
+
+static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
+ SmallVectorImpl<unsigned> &UsedOpsInCopy,
+ LiveRegUnits &UsedRegUnits,
+ const TargetRegisterInfo *TRI) {
+ for (auto U : UsedOpsInCopy) {
+ MachineOperand &MO = MI->getOperand(U);
+ Register SrcReg = MO.getReg();
+ if (!UsedRegUnits.available(SrcReg)) {
+ MachineBasicBlock::iterator NI = std::next(MI->getIterator());
+ for (MachineInstr &UI : make_range(NI, CurBB.end())) {
+ if (UI.killsRegister(SrcReg, TRI)) {
+ UI.clearRegisterKills(SrcReg, TRI);
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
+ SmallVectorImpl<unsigned> &UsedOpsInCopy,
+ SmallVectorImpl<unsigned> &DefedRegsInCopy) {
+ MachineFunction &MF = *SuccBB->getParent();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned DefReg : DefedRegsInCopy)
+ for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
+ SuccBB->removeLiveIn(S);
+ for (auto U : UsedOpsInCopy) {
+ Register SrcReg = MI->getOperand(U).getReg();
+ LaneBitmask Mask;
+ for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
+ Mask |= (*S).second;
+ }
+ SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
+ }
+ SuccBB->sortUniqueLiveIns();
+}
+
+static bool hasRegisterDependency(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &UsedOpsInCopy,
+ SmallVectorImpl<unsigned> &DefedRegsInCopy,
+ LiveRegUnits &ModifiedRegUnits,
+ LiveRegUnits &UsedRegUnits) {
+ bool HasRegDependency = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
+ HasRegDependency = true;
+ break;
+ }
+ DefedRegsInCopy.push_back(Reg);
+
+ // FIXME: instead of isUse(), readsReg() would be a better fix here,
+ // For example, we can ignore modifications in reg with undef. However,
+ // it's not perfectly clear if skipping the internal read is safe in all
+ // other targets.
+ } else if (MO.isUse()) {
+ if (!ModifiedRegUnits.available(Reg)) {
+ HasRegDependency = true;
+ break;
+ }
+ UsedOpsInCopy.push_back(i);
+ }
+ }
+ return HasRegDependency;
+}
+
+bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
+ MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII) {
+ SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
+ // FIXME: For now, we sink only to a successor which has a single predecessor
+ // so that we can directly sink COPY instructions to the successor without
+ // adding any new block or branch instruction.
+ for (MachineBasicBlock *SI : CurBB.successors())
+ if (!SI->livein_empty() && SI->pred_size() == 1)
+ SinkableBBs.insert(SI);
+
+ if (SinkableBBs.empty())
+ return false;
+
+ bool Changed = false;
+
+ // Track which registers have been modified and used between the end of the
+ // block and the current instruction.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+ SeenDbgInstrs.clear();
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {
+ // Track the operand index for use in Copy.
+ SmallVector<unsigned, 2> UsedOpsInCopy;
+ // Track the register number defed in Copy.
+ SmallVector<unsigned, 2> DefedRegsInCopy;
+
+ // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
+ // for DBG_VALUEs later, record them when they're encountered.
+ if (MI.isDebugValue() && !MI.isDebugRef()) {
+ SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
+ bool IsValid = true;
+ for (MachineOperand &MO : MI.debug_operands()) {
+ if (MO.isReg() && MO.getReg().isPhysical()) {
+ // Bail if we can already tell the sink would be rejected, rather
+ // than needlessly accumulating lots of DBG_VALUEs.
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
+ ModifiedRegUnits, UsedRegUnits)) {
+ IsValid = false;
+ break;
+ }
+
+ // Record debug use of each reg unit.
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg()))
+ MIUnits[Unit].push_back(MO.getReg());
+ }
+ }
+ if (IsValid) {
+ for (auto &RegOps : MIUnits)
+ SeenDbgInstrs[RegOps.first].emplace_back(&MI,
+ std::move(RegOps.second));
+ }
+ continue;
+ }
+
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+
+ // Do not move any instruction across function call.
+ if (MI.isCall())
+ return false;
+
+ if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ continue;
+ }
+
+ // Don't sink the COPY if it would violate a register dependency.
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
+ ModifiedRegUnits, UsedRegUnits)) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ continue;
+ }
+ assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
+ "Unexpect SrcReg or DefReg");
+ MachineBasicBlock *SuccBB =
+ getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
+ // Don't sink if we cannot find a single sinkable successor in which Reg
+ // is live-in.
+ if (!SuccBB) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ continue;
+ }
+ assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
+ "Unexpected predecessor");
+
+ // Collect DBG_VALUEs that must sink with this copy. We've previously
+ // recorded which reg units that DBG_VALUEs read, if this instruction
+ // writes any of those units then the corresponding DBG_VALUEs must sink.
+ MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
+ for (auto &MO : MI.all_defs()) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg())) {
+ for (const auto &MIRegs : SeenDbgInstrs.lookup(Unit)) {
+ auto &Regs = DbgValsToSinkMap[MIRegs.first];
+ for (unsigned Reg : MIRegs.second)
+ Regs.push_back(Reg);
+ }
+ }
+ }
+ auto DbgValsToSink = DbgValsToSinkMap.takeVector();
+
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
+
+ MachineBasicBlock::iterator InsertPos =
+ SuccBB->SkipPHIsAndLabels(SuccBB->begin());
+ if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
+ LLVM_DEBUG(
+ dbgs() << " *** Not sinking: prologue interference\n");
+ continue;
+ }
+
+ // Clear the kill flag if SrcReg is killed between MI and the end of the
+ // block.
+ clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
+ performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
+ updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
+
+ Changed = true;
+ ++NumPostRACopySink;
+ }
+ return Changed;
+}
+
+bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ bool Changed = false;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+ for (auto &BB : MF)
+ Changed |= tryToSinkCopy(BB, MF, TRI, TII);
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
new file mode 100644
index 000000000000..53bed7397d09
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -0,0 +1,52 @@
+//===- MachineSizeOpts.cpp - code size optimization related code ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
+bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType) {
+ return shouldFuncOptimizeForSizeImpl(MF, PSI, MBFI, QueryType);
+}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType) {
+ assert(MBB);
+ return shouldOptimizeForSizeImpl(MBB, PSI, MBFI, QueryType);
+}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIW,
+ PGSOQueryType QueryType) {
+ assert(MBB);
+ if (!PSI || !MBFIW)
+ return false;
+ BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
+ return shouldOptimizeForSizeImpl(BlockFreq, PSI, &MBFIW->getMBFI(),
+ QueryType);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
new file mode 100644
index 000000000000..9628e4c5aeb5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -0,0 +1,236 @@
+//===- lib/CodeGen/MachineStableHash.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Stable hashing for MachineInstr and MachineOperand. Useful or getting a
+// hash across runs, modules, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/StableHashing.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define DEBUG_TYPE "machine-stable-hash"
+
+using namespace llvm;
+
+STATISTIC(StableHashBailingMachineBasicBlock,
+ "Number of encountered unsupported MachineOperands that were "
+ "MachineBasicBlocks while computing stable hashes");
+STATISTIC(StableHashBailingConstantPoolIndex,
+ "Number of encountered unsupported MachineOperands that were "
+ "ConstantPoolIndex while computing stable hashes");
+STATISTIC(StableHashBailingTargetIndexNoName,
+ "Number of encountered unsupported MachineOperands that were "
+ "TargetIndex with no name");
+STATISTIC(StableHashBailingGlobalAddress,
+ "Number of encountered unsupported MachineOperands that were "
+ "GlobalAddress while computing stable hashes");
+STATISTIC(StableHashBailingBlockAddress,
+ "Number of encountered unsupported MachineOperands that were "
+ "BlockAddress while computing stable hashes");
+STATISTIC(StableHashBailingMetadataUnsupported,
+ "Number of encountered unsupported MachineOperands that were "
+ "Metadata of an unsupported kind while computing stable hashes");
+
+stable_hash llvm::stableHashValue(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MO.getReg().isVirtual()) {
+ const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo();
+ SmallVector<unsigned> DefOpcodes;
+ for (auto &Def : MRI.def_instructions(MO.getReg()))
+ DefOpcodes.push_back(Def.getOpcode());
+ return hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
+ }
+
+ // Register operands don't have target flags.
+ return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(),
+ MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate: {
+ auto Val = MO.isCImm() ? MO.getCImm()->getValue()
+ : MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ auto ValHash =
+ stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+ return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ StableHashBailingMachineBasicBlock++;
+ return 0;
+ case MachineOperand::MO_ConstantPoolIndex:
+ StableHashBailingConstantPoolIndex++;
+ return 0;
+ case MachineOperand::MO_BlockAddress:
+ StableHashBailingBlockAddress++;
+ return 0;
+ case MachineOperand::MO_Metadata:
+ StableHashBailingMetadataUnsupported++;
+ return 0;
+ case MachineOperand::MO_GlobalAddress:
+ StableHashBailingGlobalAddress++;
+ return 0;
+ case MachineOperand::MO_TargetIndex: {
+ if (const char *Name = MO.getTargetIndexName())
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_string(Name),
+ MO.getOffset());
+ StableHashBailingTargetIndexNoName++;
+ return 0;
+ }
+
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getIndex());
+
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ stable_hash_combine_string(MO.getSymbolName()));
+
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut: {
+ if (const MachineInstr *MI = MO.getParent()) {
+ if (const MachineBasicBlock *MBB = MI->getParent()) {
+ if (const MachineFunction *MF = MBB->getParent()) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize =
+ MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ const uint32_t *RegMask = MO.getRegMask();
+ std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
+ RegMask + RegMaskSize);
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(RegMaskHashes.data(),
+ RegMaskHashes.size()));
+ }
+ }
+ }
+
+ assert(0 && "MachineOperand not associated with any MachineFunction");
+ return hash_combine(MO.getType(), MO.getTargetFlags());
+ }
+
+ case MachineOperand::MO_ShuffleMask: {
+ std::vector<llvm::stable_hash> ShuffleMaskHashes;
+
+ llvm::transform(
+ MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
+ [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
+
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(ShuffleMaskHashes.data(),
+ ShuffleMaskHashes.size()));
+ }
+ case MachineOperand::MO_MCSymbol: {
+ auto SymbolName = MO.getMCSymbol()->getName();
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_string(SymbolName));
+ }
+ case MachineOperand::MO_CFIIndex:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getCFIIndex());
+ case MachineOperand::MO_IntrinsicID:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getIntrinsicID());
+ case MachineOperand::MO_Predicate:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getPredicate());
+ case MachineOperand::MO_DbgInstrRef:
+ return stable_hash_combine(MO.getType(), MO.getInstrRefInstrIndex(),
+ MO.getInstrRefOpIndex());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+/// A stable hash value for machine instructions.
+/// Returns 0 if no stable hash could be computed.
+/// The hashing and equality testing functions ignore definitions so this is
+/// useful for CSE, etc.
+stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
+ bool HashConstantPoolIndices,
+ bool HashMemOperands) {
+ // Build up a buffer of hash code components.
+ SmallVector<stable_hash, 16> HashComponents;
+ HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2);
+ HashComponents.push_back(MI.getOpcode());
+ HashComponents.push_back(MI.getFlags());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!HashVRegs && MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
+ continue; // Skip virtual register defs.
+
+ if (MO.isCPI()) {
+ HashComponents.push_back(stable_hash_combine(
+ MO.getType(), MO.getTargetFlags(), MO.getIndex()));
+ continue;
+ }
+
+ stable_hash StableHash = stableHashValue(MO);
+ if (!StableHash)
+ return 0;
+ HashComponents.push_back(StableHash);
+ }
+
+ for (const auto *Op : MI.memoperands()) {
+ if (!HashMemOperands)
+ break;
+ HashComponents.push_back(static_cast<unsigned>(Op->getSize()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getFlags()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getOffset()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getSuccessOrdering()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getAddrSpace()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getSyncScopeID()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getBaseAlign().value()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
+ }
+
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
+
+stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
+ SmallVector<stable_hash> HashComponents;
+ // TODO: Hash more stuff like block alignment and branch probabilities.
+ for (const auto &MI : MBB)
+ HashComponents.push_back(stableHashValue(MI));
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
+
+stable_hash llvm::stableHashValue(const MachineFunction &MF) {
+ SmallVector<stable_hash> HashComponents;
+ // TODO: Hash lots more stuff like function alignment and stack objects.
+ for (const auto &MBB : MF)
+ HashComponents.push_back(stableHashValue(MBB));
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
new file mode 100644
index 000000000000..6128248a028e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -0,0 +1,108 @@
+//===- MachineStripDebug.cpp - Strip debug info ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This removes debug info from everything. It can be used to ensure
+/// tests can be debugified without affecting the output MIR.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Debugify.h"
+
+#define DEBUG_TYPE "mir-strip-debug"
+
+using namespace llvm;
+
+namespace {
+cl::opt<bool>
+ OnlyDebugifiedDefault("mir-strip-debugify-only",
+ cl::desc("Should mir-strip-debug only strip debug "
+ "info from debugified modules by default"),
+ cl::init(true));
+
+struct StripDebugMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ if (OnlyDebugified) {
+ NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify");
+ if (!DebugifyMD) {
+ LLVM_DEBUG(dbgs() << "Not stripping debug info"
+ " (debugify metadata not found)?\n");
+ return false;
+ }
+ }
+
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ bool Changed = false;
+ for (Function &F : M.functions()) {
+ MachineFunction *MaybeMF = MMI.getMachineFunction(F);
+ if (!MaybeMF)
+ continue;
+ MachineFunction &MF = *MaybeMF;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.isDebugInstr()) {
+ // FIXME: We should remove all of them. However, AArch64 emits an
+ // invalid `DBG_VALUE $lr` with only one operand instead of
+ // the usual three and has a test that depends on it's
+ // preservation. Preserve it for now.
+ if (MI.getNumOperands() > 1) {
+ LLVM_DEBUG(dbgs() << "Removing debug instruction " << MI);
+ MBB.erase(&MI);
+ Changed |= true;
+ continue;
+ }
+ }
+ if (MI.getDebugLoc()) {
+ LLVM_DEBUG(dbgs() << "Removing location " << MI);
+ MI.setDebugLoc(DebugLoc());
+ Changed |= true;
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "Keeping " << MI);
+ }
+ }
+ }
+
+ Changed |= stripDebugifyMetadata(M);
+
+ return Changed;
+ }
+
+ StripDebugMachineModule() : StripDebugMachineModule(OnlyDebugifiedDefault) {}
+ StripDebugMachineModule(bool OnlyDebugified)
+ : ModulePass(ID), OnlyDebugified(OnlyDebugified) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ static char ID; // Pass identification.
+
+protected:
+ bool OnlyDebugified;
+};
+char StripDebugMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(StripDebugMachineModule, DEBUG_TYPE,
+ "Machine Strip Debug Module", false, false)
+INITIALIZE_PASS_END(StripDebugMachineModule, DEBUG_TYPE,
+ "Machine Strip Debug Module", false, false)
+
+ModulePass *llvm::createStripDebugMachineModulePass(bool OnlyDebugified) {
+ return new StripDebugMachineModule(OnlyDebugified);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 000000000000..4f66f2e672d1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1356 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-trace-metrics"
+
+char MachineTraceMetrics::ID = 0;
+
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
+ "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
+ "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
+ std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ SchedModel.init(&ST);
+ BlockInfo.resize(MF->getNumBlockIDs());
+ ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ SchedModel.getNumProcResourceKinds());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = nullptr;
+ BlockInfo.clear();
+ for (Ensemble *&E : Ensembles) {
+ delete E;
+ E = nullptr;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+
+ // Add up per-processor resource cycles as well.
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ SmallVector<unsigned, 32> PRCycles(PRKinds);
+
+ for (const auto &MI : *MBB) {
+ if (MI.isTransient())
+ continue;
+ ++InstrCount;
+ if (MI.isCall())
+ FBI->HasCalls = true;
+
+ // Count processor resources used.
+ if (!SchedModel.hasInstrSchedModel())
+ continue;
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
+ if (!SC->isValid())
+ continue;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ }
+ }
+ FBI->InstrCount = InstrCount;
+
+ // Scale the resource cycles so they are comparable.
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceCycles[PROffset + K] =
+ PRCycles[K] * SchedModel.getResourceFactor(K);
+
+ return FBI;
+}
+
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+ assert(BlockInfo[MBBNum].hasResources() &&
+ "getResources() must be called before getProcResourceCycles()");
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+ return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
+}
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() = default;
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ std::fill(ProcResourceDepths.begin() + PROffset,
+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned PredNum = TBI->Pred->getNumber();
+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+
+ // Compute per-resource depths.
+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned SuccNum = TBI->Succ->getNumber();
+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+
+ // Compute per-resource heights.
+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : nullptr;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : nullptr;
+}
+
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+ return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+ return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const override { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+
+/// Pick only the current basic block for the trace and do not choose any
+/// predecessors/successors.
+class LocalEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const override { return "Local"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
+ return nullptr;
+ };
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
+ return nullptr;
+ };
+
+public:
+ LocalEnsemble(MachineTraceMetrics *MTM)
+ : MachineTraceMetrics::Ensemble(MTM) {}
+};
+} // end anonymous namespace
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return nullptr;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return nullptr;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = nullptr;
+ unsigned BestDepth = 0;
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth) {
+ Best = Pred;
+ BestDepth = Depth;
+ }
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->succ_empty())
+ return nullptr;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = nullptr;
+ unsigned BestHeight = 0;
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight) {
+ Best = Succ;
+ BestHeight = Height;
+ }
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy) {
+ assert(strategy < MachineTraceStrategy::TS_NumStrategies &&
+ "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[static_cast<size_t>(strategy)];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case MachineTraceStrategy::TS_MinInstrCount:
+ return (E = new MinInstrCountEnsemble(this));
+ case MachineTraceStrategy::TS_Local:
+ return (E = new LocalEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
+ << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (Ensemble *E : Ensembles)
+ if (E)
+ E->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (Ensemble *E : Ensembles)
+ if (E)
+ E->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward = false;
+
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
+};
+
+} // end anonymous namespace
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(std::optional<const MachineBasicBlock *> From,
+ const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : *From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To).second;
+ }
+};
+
+} // end namespace llvm
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
+ << printMBBReference(*MBB) << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
+ LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(I);
+ LLVM_DEBUG({
+ if (TBI.Pred)
+ dbgs() << printMBBReference(*TBI.Pred) << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ for (const auto *I : post_order_ext(MBB, Bounds)) {
+ LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(I);
+ LLVM_DEBUG({
+ if (TBI.Succ)
+ dbgs() << printMBBReference(*TBI.Succ) << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+ << getName() << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(Pred);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+ << getName() << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(Succ);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (const auto &I : *BadMBB)
+ Cycles.erase(&I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(Register::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = DefI->getParent();
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+
+} // end anonymous namespace
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr &UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ // Debug values should not be included in any calculations.
+ if (UseMI.isDebugInstr())
+ return false;
+
+ bool HasPhysRegs = false;
+ for (const MachineOperand &MO : UseMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg.isPhysical()) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO.readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr &UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
+ if (UseMI.getOperand(i + 1).getMBB() == Pred) {
+ Register Reg = UseMI.getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<MCRegister, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (const MachineOperand &MO : UseMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ // Track live defs and kills for updating RegUnits.
+ if (MO.isDef()) {
+ if (MO.isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO.isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO.readsReg())
+ continue;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (MCRegister Kill : Kills)
+ for (MCRegUnit Unit : TRI->regunits(Kill))
+ RegUnits.erase(Unit);
+
+ // Second, live defs.
+ for (unsigned DefOp : LiveDefOps) {
+ for (MCRegUnit Unit :
+ TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
+ LiveRegUnit &LRU = RegUnits[Unit];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (const LiveInReg &LIR : TBI.LiveIns) {
+ if (!LIR.Reg.isVirtual())
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.isUsefulDominator(TBI))
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
+ SparseSet<LiveRegUnit> &RegUnits) {
+ SmallVector<DataDep, 8> Deps;
+ // Collect all data dependencies.
+ if (UseMI.isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (const DataDep &Dep : Deps) {
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.isUsefulDominator(TBI))
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[&UseMI];
+ MICycles.Depth = Cycle;
+
+ if (TBI.HasValidInstrHeights) {
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+ } else {
+ LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
+ }
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI,
+ SparseSet<LiveRegUnit> &RegUnits) {
+ updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepths(MachineBasicBlock::iterator Start,
+ MachineBasicBlock::iterator End,
+ SparseSet<LiveRegUnit> &RegUnits) {
+ for (; Start != End; Start++)
+ updateDepth(Start->getParent(), *Start, RegUnits);
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Print out resource depths here as well.
+ LLVM_DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ if (PRDepths[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (const auto &UseMI : *MBB) {
+ updateDepth(TBI, UseMI, RegUnits);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
+ if (MO.readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO.isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI.isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list. SchedModel can handle a NULL UseMI.
+ DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
+ I->MI, I->Op);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
+ MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ LiveRegUnit &LRU = RegUnits[Unit];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != &MI) {
+ LRU.Cycle = Height;
+ LRU.MI = &MI;
+ LRU.Op = ReadOps[I];
+ }
+ }
+ }
+
+ return Height;
+}
+
+using MIHeightMap = DenseMap<const MachineInstr *, unsigned>;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
+ unsigned UseHeight, MIHeightMap &Heights,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
+ Dep.UseOp);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ Register Reg = DefMI->getOperand(DefOp).getReg();
+ assert(Reg.isVirtual());
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (LiveInReg &LI : TBI.LiveIns) {
+ if (LI.Reg.isVirtual()) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ LLVM_DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+ for (unsigned K = 0; K != PRHeights.size(); ++K)
+ if (PRHeights[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (const auto &PHI : *Succ) {
+ if (!PHI.isPHI())
+ break;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
+ LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
+ MTM.TII))
+ addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (const MachineInstr &MI : reverse(*MBB)) {
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(&MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
+ MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (const DataDep &Dep : Deps)
+ if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
+
+ InstrCycles &MICycles = Cycles[&MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
+ for (LiveInReg &LIR : TBI.LiveIns) {
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (const LiveRegUnit &RU : RegUnits) {
+ TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
+ << RU.Cycle);
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+
+ if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
+ computeTrace(MBB);
+ if (!TBI.HasValidInstrDepths)
+ computeInstrDepths(MBB);
+ if (!TBI.HasValidInstrHeights)
+ computeInstrHeights(MBB);
+
+ return Trace(*this, TBI);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const {
+ assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ &PHI, Dep.UseOp);
+ return DepCycle;
+}
+
+/// When bottom is set include instructions in current block in estimate.
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // Find the limiting processor resource.
+ // Numbers have been pre-scaled to be comparable.
+ unsigned PRMax = 0;
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ if (Bottom) {
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+ } else {
+ for (unsigned PRD : PRDepths)
+ PRMax = std::max(PRMax, PRD);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ /// All instructions before current block
+ unsigned Instrs = TBI.InstrDepth;
+ // plus instructions in current block
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceLength(
+ ArrayRef<const MachineBasicBlock *> Extrablocks,
+ ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
+ ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
+ // Add up resources above and below the center block.
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+ unsigned PRMax = 0;
+
+ // Capture computing cycles from extra instructions
+ auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
+ unsigned ResourceIdx)
+ ->unsigned {
+ unsigned Cycles = 0;
+ for (const MCSchedClassDesc *SC : Instrs) {
+ if (!SC->isValid())
+ continue;
+ for (TargetSchedModel::ProcResIter
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != ResourceIdx)
+ continue;
+ Cycles +=
+ (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
+ }
+ }
+ return Cycles;
+ };
+
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
+ PRCycles += extraCycles(ExtraInstrs, K);
+ PRCycles -= extraCycles(RemoveInstrs, K);
+ PRMax = std::max(PRMax, PRCycles);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ // Instrs: #instructions in current trace outside current block.
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ // Add instruction count from the extra blocks.
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ Instrs += TE.MTM.getResources(MBB)->InstrCount;
+ Instrs += ExtraInstrs.size();
+ Instrs -= RemoveInstrs.size();
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) const {
+ if (DefMI.getParent() == UseMI.getParent())
+ return true;
+
+ const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
+ const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
+
+ return DepTBI.isUsefulDominator(TBI);
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " %bb." << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=" << printMBBReference(*Pred);
+ else
+ OS << " pred=null";
+ OS << " head=%bb." << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=" << printMBBReference(*Succ);
+ else
+ OS << " succ=null";
+ OS << " tail=%bb." << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
+ << " --> %bb." << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\n%bb." << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- " << printMBBReference(*Block->Pred);
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> " << printMBBReference(*Block->Succ);
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
new file mode 100644
index 000000000000..0e02c50284c6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -0,0 +1,264 @@
+//===- MachineUniformityAnalysis.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineUniformityAnalysis.h"
+#include "llvm/ADT/GenericUniformityImpl.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs(
+ const MachineInstr &I) const {
+ for (auto &op : I.all_defs()) {
+ if (isDivergent(op.getReg()))
+ return true;
+ }
+ return false;
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::markDefsDivergent(
+ const MachineInstr &Instr) {
+ bool insertedDivergent = false;
+ const auto &MRI = F.getRegInfo();
+ const auto &RBI = *F.getSubtarget().getRegBankInfo();
+ const auto &TRI = *MRI.getTargetRegisterInfo();
+ for (auto &op : Instr.all_defs()) {
+ if (!op.getReg().isVirtual())
+ continue;
+ assert(!op.getSubReg());
+ if (TRI.isUniformReg(MRI, RBI, op.getReg()))
+ continue;
+ insertedDivergent |= markDivergent(op.getReg());
+ }
+ return insertedDivergent;
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
+ const auto &InstrInfo = *F.getSubtarget().getInstrInfo();
+
+ for (const MachineBasicBlock &block : F) {
+ for (const MachineInstr &instr : block) {
+ auto uniformity = InstrInfo.getInstructionUniformity(instr);
+ if (uniformity == InstructionUniformity::AlwaysUniform) {
+ addUniformOverride(instr);
+ continue;
+ }
+
+ if (uniformity == InstructionUniformity::NeverUniform) {
+ markDivergent(instr);
+ }
+ }
+ }
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
+ Register Reg) {
+ assert(isDivergent(Reg));
+ const auto &RegInfo = F.getRegInfo();
+ for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
+ markDivergent(UserInstr);
+ }
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
+ const MachineInstr &Instr) {
+ assert(!isAlwaysUniform(Instr));
+ if (Instr.isTerminator())
+ return;
+ for (const MachineOperand &op : Instr.all_defs()) {
+ auto Reg = op.getReg();
+ if (isDivergent(Reg))
+ pushUsers(Reg);
+ }
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle(
+ const MachineInstr &I, const MachineCycle &DefCycle) const {
+ assert(!isAlwaysUniform(I));
+ for (auto &Op : I.operands()) {
+ if (!Op.isReg() || !Op.readsReg())
+ continue;
+ auto Reg = Op.getReg();
+
+ // FIXME: Physical registers need to be properly checked instead of always
+ // returning true
+ if (Reg.isPhysical())
+ return true;
+
+ auto *Def = F.getRegInfo().getVRegDef(Reg);
+ if (DefCycle.contains(Def->getParent()))
+ return true;
+ }
+ return false;
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::
+ propagateTemporalDivergence(const MachineInstr &I,
+ const MachineCycle &DefCycle) {
+ const auto &RegInfo = F.getRegInfo();
+ for (auto &Op : I.all_defs()) {
+ if (!Op.getReg().isVirtual())
+ continue;
+ auto Reg = Op.getReg();
+ if (isDivergent(Reg))
+ continue;
+ for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
+ if (DefCycle.contains(UserInstr.getParent()))
+ continue;
+ markDivergent(UserInstr);
+ }
+ }
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
+ const MachineOperand &U) const {
+ if (!U.isReg())
+ return false;
+
+ auto Reg = U.getReg();
+ if (isDivergent(Reg))
+ return true;
+
+ const auto &RegInfo = F.getRegInfo();
+ auto *Def = RegInfo.getOneDef(Reg);
+ if (!Def)
+ return true;
+
+ auto *DefInstr = Def->getParent();
+ auto *UseInstr = U.getParent();
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+}
+
+// This ensures explicit instantiation of
+// GenericUniformityAnalysisImpl::ImplDeleter::operator()
+template class llvm::GenericUniformityInfo<MachineSSAContext>;
+template struct llvm::GenericUniformityAnalysisImplDeleter<
+ llvm::GenericUniformityAnalysisImpl<MachineSSAContext>>;
+
+MachineUniformityInfo llvm::computeMachineUniformityInfo(
+ MachineFunction &F, const MachineCycleInfo &cycleInfo,
+ const MachineDomTree &domTree, bool HasBranchDivergence) {
+ assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
+ MachineUniformityInfo UI(F, domTree, cycleInfo);
+ if (HasBranchDivergence)
+ UI.compute();
+ return UI;
+}
+
+namespace {
+
+/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
+class MachineUniformityAnalysisPass : public MachineFunctionPass {
+ MachineUniformityInfo UI;
+
+public:
+ static char ID;
+
+ MachineUniformityAnalysisPass();
+
+ MachineUniformityInfo &getUniformityInfo() { return UI; }
+ const MachineUniformityInfo &getUniformityInfo() const { return UI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ // TODO: verify analysis
+};
+
+class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineUniformityInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace
+
+char MachineUniformityAnalysisPass::ID = 0;
+
+MachineUniformityAnalysisPass::MachineUniformityAnalysisPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineUniformityAnalysisPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity",
+ "Machine Uniformity Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity",
+ "Machine Uniformity Info Analysis", true, true)
+
+void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) {
+ auto &DomTree = getAnalysis<MachineDominatorTree>().getBase();
+ auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ // FIXME: Query TTI::hasBranchDivergence. -run-pass seems to end up with a
+ // default NoTTI
+ UI = computeMachineUniformityInfo(MF, CI, DomTree, true);
+ return false;
+}
+
+void MachineUniformityAnalysisPass::print(raw_ostream &OS,
+ const Module *) const {
+ OS << "MachineUniformityInfo for function: " << UI.getFunction().getName()
+ << "\n";
+ UI.print(OS);
+}
+
+char MachineUniformityInfoPrinterPass::ID = 0;
+
+MachineUniformityInfoPrinterPass::MachineUniformityInfoPrinterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineUniformityInfoPrinterPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineUniformityInfoPrinterPass,
+ "print-machine-uniformity",
+ "Print Machine Uniformity Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
+INITIALIZE_PASS_END(MachineUniformityInfoPrinterPass,
+ "print-machine-uniformity",
+ "Print Machine Uniformity Info Analysis", true, true)
+
+void MachineUniformityInfoPrinterPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineUniformityAnalysisPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineUniformityInfoPrinterPass::runOnMachineFunction(
+ MachineFunction &F) {
+ auto &UI = getAnalysis<MachineUniformityAnalysisPass>();
+ UI.print(errs());
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 000000000000..7acd3c4039e8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,3465 @@
+//===- MachineVerifier.cpp - Machine Code Verifier ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled with the command-line option
+// -verify-machineinstrs.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ModRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+ struct MachineVerifier {
+ MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
+
+ unsigned verify(const MachineFunction &MF);
+
+ Pass *const PASS;
+ const char *Banner;
+ const MachineFunction *MF = nullptr;
+ const TargetMachine *TM = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const RegisterBankInfo *RBI = nullptr;
+
+ unsigned foundErrors = 0;
+
+ // Avoid querying the MachineFunctionProperties for each operand.
+ bool isFunctionRegBankSelected = false;
+ bool isFunctionSelected = false;
+ bool isFunctionTracksDebugUserValues = false;
+
+ using RegVector = SmallVector<Register, 16>;
+ using RegMaskVector = SmallVector<const uint32_t *, 4>;
+ using RegSet = DenseSet<Register>;
+ using RegMap = DenseMap<Register, const MachineInstr *>;
+ using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
+
+ const MachineInstr *FirstNonPHI = nullptr;
+ const MachineInstr *FirstTerminator = nullptr;
+ BlockSet FunctionBlocks;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegMaskVector regMasks;
+
+ SlotIndex lastIndex;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, Register Reg) {
+ RV.push_back(Reg);
+ if (Reg.isPhysical())
+ append_range(RV, TRI->subregs(Reg.asMCReg()));
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable = false;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user. vregsLiveIn doesn't include regs
+ // that only are used by PHI nodes.
+ RegMap vregsLiveIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
+ BBInfo() = default;
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(Register Reg) {
+ if (!Reg.isVirtual())
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool Changed = false;
+ for (Register Reg : RS)
+ Changed |= addRequired(Reg);
+ return Changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool Changed = false;
+ for (const auto &I : RM)
+ Changed |= addRequired(I.first);
+ return Changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(Register Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(Register Reg) {
+ return Reg.id() < regsReserved.size() && regsReserved.test(Reg.id());
+ }
+
+ bool isAllocatable(Register Reg) const {
+ return Reg.id() < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
+ !regsReserved.test(Reg.id());
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars = nullptr;
+ LiveIntervals *LiveInts = nullptr;
+ LiveStacks *LiveStks = nullptr;
+ SlotIndexes *Indexes = nullptr;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineBundleBefore(const MachineInstr *MI);
+
+ /// Verify that all of \p MI's virtual register operands are scalars.
+ /// \returns True if all virtual register operands are scalar. False
+ /// otherwise.
+ bool verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+ bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
+ void verifyPreISelGenericInstruction(const MachineInstr *MI);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineBundleAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum,
+ LLT MOVRegType = LLT{});
+ void report(const Twine &Msg, const MachineInstr *MI);
+
+ void report_context(const LiveInterval &LI) const;
+ void report_context(const LiveRange &LR, Register VRegUnit,
+ LaneBitmask LaneMask) const;
+ void report_context(const LiveRange::Segment &S) const;
+ void report_context(const VNInfo &VNI) const;
+ void report_context(SlotIndex Pos) const;
+ void report_context(MCPhysReg PhysReg) const;
+ void report_context_liverange(const LiveRange &LR) const;
+ void report_context_lanemask(LaneBitmask LaneMask) const;
+ void report_context_vreg(Register VReg) const;
+ void report_context_vreg_regunit(Register VRegOrUnit) const;
+
+ void verifyInlineAsm(const MachineInstr *MI);
+
+ void checkLiveness(const MachineOperand *MO, unsigned MONum);
+ void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
+ SlotIndex UseIdx, const LiveRange &LR,
+ Register VRegOrUnit,
+ LaneBitmask LaneMask = LaneBitmask::getNone());
+ void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
+ SlotIndex DefIdx, const LiveRange &LR,
+ Register VRegOrUnit, bool SubRangeCheck = false,
+ LaneBitmask LaneMask = LaneBitmask::getNone());
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock &MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveRangeValue(const LiveRange &, const VNInfo *, Register,
+ LaneBitmask);
+ void verifyLiveRangeSegment(const LiveRange &,
+ const LiveRange::const_iterator I, Register,
+ LaneBitmask);
+ void verifyLiveRange(const LiveRange &, Register,
+ LaneBitmask LaneMask = LaneBitmask::getNone());
+
+ void verifyStackFrame();
+
+ void verifySlotIndexes() const;
+ void verifyProperties(const MachineFunction &MF);
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+
+ const std::string Banner;
+
+ MachineVerifierPass(std::string banner = std::string())
+ : MachineFunctionPass(ID), Banner(std::move(banner)) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addUsedIfAvailable<LiveStacks>();
+ AU.addUsedIfAvailable<LiveVariables>();
+ AU.addUsedIfAvailable<SlotIndexes>();
+ AU.addUsedIfAvailable<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Skip functions that have known verification problems.
+ // FIXME: Remove this mechanism when all problematic passes have been
+ // fixed.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification))
+ return false;
+
+ unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
+ return false;
+ }
+ };
+
+} // end anonymous namespace
+
+char MachineVerifierPass::ID = 0;
+
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
+ return new MachineVerifierPass(Banner);
+}
+
+void llvm::verifyMachineFunction(MachineFunctionAnalysisManager *,
+ const std::string &Banner,
+ const MachineFunction &MF) {
+ // TODO: Use MFAM after porting below analyses.
+ // LiveVariables *LiveVars;
+ // LiveIntervals *LiveInts;
+ // LiveStacks *LiveStks;
+ // SlotIndexes *Indexes;
+ unsigned FoundErrors = MachineVerifier(nullptr, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
+}
+
+bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors)
+ const {
+ MachineFunction &MF = const_cast<MachineFunction&>(*this);
+ unsigned FoundErrors = MachineVerifier(p, Banner).verify(MF);
+ if (AbortOnErrors && FoundErrors)
+ report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
+ return FoundErrors == 0;
+}
+
+void MachineVerifier::verifySlotIndexes() const {
+ if (Indexes == nullptr)
+ return;
+
+ // Ensure the IdxMBB list is sorted by slot indexes.
+ SlotIndex Last;
+ for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(),
+ E = Indexes->MBBIndexEnd(); I != E; ++I) {
+ assert(!Last.isValid() || I->first > Last);
+ Last = I->first;
+ }
+}
+
+void MachineVerifier::verifyProperties(const MachineFunction &MF) {
+ // If a pass has introduced virtual registers without clearing the
+ // NoVRegs property (or set it without allocating the vregs)
+ // then report an error.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs) &&
+ MRI->getNumVirtRegs())
+ report("Function has NoVRegs property but there are VReg operands", &MF);
+}
+
+unsigned MachineVerifier::verify(const MachineFunction &MF) {
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ RBI = MF.getSubtarget().getRegBankInfo();
+ MRI = &MF.getRegInfo();
+
+ const bool isFunctionFailedISel = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel);
+
+ // If we're mid-GlobalISel and we already triggered the fallback path then
+ // it's expected that the MIR is somewhat broken but that's ok since we'll
+ // reset it and clear the FailedISel attribute in ResetMachineFunctions.
+ if (isFunctionFailedISel)
+ return foundErrors;
+
+ isFunctionRegBankSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::RegBankSelected);
+ isFunctionSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected);
+ isFunctionTracksDebugUserValues = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
+
+ LiveVars = nullptr;
+ LiveInts = nullptr;
+ LiveStks = nullptr;
+ Indexes = nullptr;
+ if (PASS) {
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ }
+
+ verifySlotIndexes();
+
+ verifyProperties(MF);
+
+ visitMachineFunctionBefore();
+ for (const MachineBasicBlock &MBB : MF) {
+ visitMachineBasicBlockBefore(&MBB);
+ // Keep track of the current bundle header.
+ const MachineInstr *CurBundle = nullptr;
+ // Do we expect the next instruction to be part of the same bundle?
+ bool InBundle = false;
+
+ for (const MachineInstr &MI : MBB.instrs()) {
+ if (MI.getParent() != &MBB) {
+ report("Bad instruction parent pointer", &MBB);
+ errs() << "Instruction: " << MI;
+ continue;
+ }
+
+ // Check for consistent bundle flags.
+ if (InBundle && !MI.isBundledWithPred())
+ report("Missing BundledPred flag, "
+ "BundledSucc was set on predecessor",
+ &MI);
+ if (!InBundle && MI.isBundledWithPred())
+ report("BundledPred flag is set, "
+ "but BundledSucc not set on predecessor",
+ &MI);
+
+ // Is this a bundle header?
+ if (!MI.isInsideBundle()) {
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ CurBundle = &MI;
+ visitMachineBundleBefore(CurBundle);
+ } else if (!CurBundle)
+ report("No bundle header", &MI);
+ visitMachineInstrBefore(&MI);
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &Op = MI.getOperand(I);
+ if (Op.getParent() != &MI) {
+ // Make sure to use correct addOperand / removeOperand / ChangeTo
+ // functions when replacing operands of a MachineInstr.
+ report("Instruction has operand with wrong parent set", &MI);
+ }
+
+ visitMachineOperand(&Op, I);
+ }
+
+ // Was this the last bundled instruction?
+ InBundle = MI.isBundledWithSucc();
+ }
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ if (InBundle)
+ report("BundledSucc flag set on last instruction in block", &MBB.back());
+ visitMachineBasicBlockAfter(&MBB);
+ }
+ visitMachineFunctionAfter();
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regMasks.clear();
+ MBBInfoMap.clear();
+
+ return foundErrors;
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ errs() << '\n';
+ if (!foundErrors++) {
+ if (Banner)
+ errs() << "# " << Banner << '\n';
+ if (LiveInts != nullptr)
+ LiveInts->print(errs());
+ else
+ MF->print(errs(), Indexes);
+ }
+ errs() << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getName() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ errs() << "- basic block: " << printMBBReference(*MBB) << ' '
+ << MBB->getName() << " (" << (const void *)MBB << ')';
+ if (Indexes)
+ errs() << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ errs() << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ errs() << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(*MI))
+ errs() << Indexes->getInstructionIndex(*MI) << '\t';
+ MI->print(errs(), /*IsStandalone=*/true);
+}
+
+void MachineVerifier::report(const char *msg, const MachineOperand *MO,
+ unsigned MONum, LLT MOVRegType) {
+ assert(MO);
+ report(msg, MO->getParent());
+ errs() << "- operand " << MONum << ": ";
+ MO->print(errs(), MOVRegType, TRI);
+ errs() << "\n";
+}
+
+void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) {
+ report(Msg.str().c_str(), MI);
+}
+
+void MachineVerifier::report_context(SlotIndex Pos) const {
+ errs() << "- at: " << Pos << '\n';
+}
+
+void MachineVerifier::report_context(const LiveInterval &LI) const {
+ errs() << "- interval: " << LI << '\n';
+}
+
+void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit,
+ LaneBitmask LaneMask) const {
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegUnit);
+ if (LaneMask.any())
+ report_context_lanemask(LaneMask);
+}
+
+void MachineVerifier::report_context(const LiveRange::Segment &S) const {
+ errs() << "- segment: " << S << '\n';
+}
+
+void MachineVerifier::report_context(const VNInfo &VNI) const {
+ errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n";
+}
+
+void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
+ errs() << "- liverange: " << LR << '\n';
+}
+
+void MachineVerifier::report_context(MCPhysReg PReg) const {
+ errs() << "- p. register: " << printReg(PReg, TRI) << '\n';
+}
+
+void MachineVerifier::report_context_vreg(Register VReg) const {
+ errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
+}
+
+void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const {
+ if (VRegOrUnit.isVirtual()) {
+ report_context_vreg(VRegOrUnit);
+ } else {
+ errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n';
+ }
+}
+
+void MachineVerifier::report_context_lanemask(LaneBitmask LaneMask) const {
+ errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n';
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ markReachable(Succ);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
+ regsReserved = MRI->reservedRegsFrozen() ? MRI->getReservedRegs()
+ : TRI->getReservedRegs(*MF);
+
+ if (!MF->empty())
+ markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (const auto &MBB : *MF) {
+ FunctionBlocks.insert(&MBB);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ MInfo.Preds.insert(MBB.pred_begin(), MBB.pred_end());
+ if (MInfo.Preds.size() != MBB.pred_size())
+ report("MBB has duplicate entries in its predecessor list.", &MBB);
+
+ MInfo.Succs.insert(MBB.succ_begin(), MBB.succ_end());
+ if (MInfo.Succs.size() != MBB.succ_size())
+ report("MBB has duplicate entries in its successor list.", &MBB);
+ }
+
+ // Check that the register use lists are sane.
+ MRI->verifyUseLists();
+
+ if (!MF->empty())
+ verifyStackFrame();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ FirstTerminator = nullptr;
+ FirstNonPHI = nullptr;
+
+ if (!MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
+ // If this block has allocatable physical registers live-in, check that
+ // it is an entry block or landing pad.
+ for (const auto &LI : MBB->liveins()) {
+ if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
+ MBB->getIterator() != MBB->getParent()->begin() &&
+ !MBB->isInlineAsmBrIndirectTarget()) {
+ report("MBB has allocatable live-in, but isn't entry, landing-pad, or "
+ "inlineasm-br-indirect-target.",
+ MBB);
+ report_context(LI.PhysReg);
+ }
+ }
+ }
+
+ if (MBB->isIRBlockAddressTaken()) {
+ if (!MBB->getAddressTakenIRBlock()->hasAddressTaken())
+ report("ir-block-address-taken is associated with basic block not used by "
+ "a blockaddress.",
+ MBB);
+ }
+
+ // Count the number of landing pad successors.
+ SmallPtrSet<const MachineBasicBlock*, 4> LandingPadSuccs;
+ for (const auto *succ : MBB->successors()) {
+ if (succ->isEHPad())
+ LandingPadSuccs.insert(succ);
+ if (!FunctionBlocks.count(succ))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[succ].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ errs() << "MBB is not in the predecessor list of the successor "
+ << printMBBReference(*succ) << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!FunctionBlocks.count(Pred))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[Pred].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ errs() << "MBB is not in the successor list of the predecessor "
+ << printMBBReference(*Pred) << ".\n";
+ }
+ }
+
+ const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ const Function &F = MF->getFunction();
+ if (LandingPadSuccs.size() > 1 &&
+ !(AsmInfo &&
+ AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+ BB && isa<SwitchInst>(BB->getTerminator())) &&
+ !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ report("MBB has more than one landing pad successor", MBB);
+
+ // Call analyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB,
+ Cond)) {
+ // Ok, analyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ if (!MBB->empty() && MBB->back().isBarrier() &&
+ !TII->isPredicated(MBB->back())) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!MBB->back().isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (MBB->back().isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!MBB->back().isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditional branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("analyzeBranch returned invalid data!", MBB);
+ }
+
+ // Now check that the successors match up with the answers reported by
+ // analyzeBranch.
+ if (TBB && !MBB->isSuccessor(TBB))
+ report("MBB exits via jump or conditional branch, but its target isn't a "
+ "CFG successor!",
+ MBB);
+ if (FBB && !MBB->isSuccessor(FBB))
+ report("MBB exits via conditional branch, but its target isn't a CFG "
+ "successor!",
+ MBB);
+
+ // There might be a fallthrough to the next block if there's either no
+ // unconditional true branch, or if there's a condition, and one of the
+ // branches is missing.
+ bool Fallthrough = !TBB || (!Cond.empty() && !FBB);
+
+ // A conditional fallthrough must be an actual CFG successor, not
+ // unreachable. (Conversely, an unconditional fallthrough might not really
+ // be a successor, because the block might end in unreachable.)
+ if (!Cond.empty() && !FBB) {
+ MachineFunction::const_iterator MBBI = std::next(MBB->getIterator());
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } else if (!MBB->isSuccessor(&*MBBI))
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!",
+ MBB);
+ }
+
+ // Verify that there aren't any extra un-accounted-for successors.
+ for (const MachineBasicBlock *SuccMBB : MBB->successors()) {
+ // If this successor is one of the branch targets, it's okay.
+ if (SuccMBB == TBB || SuccMBB == FBB)
+ continue;
+ // If we might have a fallthrough, and the successor is the fallthrough
+ // block, that's also ok.
+ if (Fallthrough && SuccMBB == MBB->getNextNode())
+ continue;
+ // Also accept successors which are for exception-handling or might be
+ // inlineasm_br targets.
+ if (SuccMBB->isEHPad() || SuccMBB->isInlineAsmBrIndirectTarget())
+ continue;
+ report("MBB has unexpected successors which are not branch targets, "
+ "fallthrough, EHPads, or inlineasm_br targets.",
+ MBB);
+ }
+ }
+
+ regsLive.clear();
+ if (MRI->tracksLiveness()) {
+ for (const auto &LI : MBB->liveins()) {
+ if (!Register::isPhysicalRegister(LI.PhysReg)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ for (const MCPhysReg &SubReg : TRI->subregs_inclusive(LI.PhysReg))
+ regsLive.insert(SubReg);
+ }
+ }
+
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ BitVector PR = MFI.getPristineRegs(*MF);
+ for (unsigned I : PR.set_bits()) {
+ for (const MCPhysReg &SubReg : TRI->subregs_inclusive(I))
+ regsLive.insert(SubReg);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+// This function gets called for all bundle headers, including normal
+// stand-alone unbundled instructions.
+void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
+ if (Indexes && Indexes->hasIndex(*MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(*MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ errs() << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ if (MI->isTerminator()) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ // For GlobalISel, G_INVOKE_REGION_START is a terminator that we allow to
+ // precede non-terminators.
+ if (FirstTerminator->getOpcode() != TargetOpcode::G_INVOKE_REGION_START) {
+ report("Non-terminator instruction after the first terminator", MI);
+ errs() << "First terminator was:\t" << *FirstTerminator;
+ }
+ }
+}
+
+// The operands on an INLINEASM instruction must follow a template.
+// Verify that the flag operands make sense.
+void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
+ // The first two operands on INLINEASM are the asm string and global flags.
+ if (MI->getNumOperands() < 2) {
+ report("Too few operands on inline asm", MI);
+ return;
+ }
+ if (!MI->getOperand(0).isSymbol())
+ report("Asm string must be an external symbol", MI);
+ if (!MI->getOperand(1).isImm())
+ report("Asm flags must be an immediate", MI);
+ // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+ // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16,
+ // and Extra_IsConvergent = 32.
+ if (!isUInt<6>(MI->getOperand(1).getImm()))
+ report("Unknown asm flags", &MI->getOperand(1), 1);
+
+ static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
+
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ unsigned NumOps;
+ for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // There may be implicit ops after the fixed operands.
+ if (!MO.isImm())
+ break;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+ }
+
+ if (OpNo > MI->getNumOperands())
+ report("Missing operands in last group", MI);
+
+ // An optional MDNode follows the groups.
+ if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata())
+ ++OpNo;
+
+ // All trailing operands must be implicit registers.
+ for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isReg() || !MO.isImplicit())
+ report("Expected implicit register after groups", &MO, OpNo);
+ }
+
+ if (MI->getOpcode() == TargetOpcode::INLINEASM_BR) {
+ const MachineBasicBlock *MBB = MI->getParent();
+
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isMBB())
+ continue;
+
+ // Check the successor & predecessor lists look ok, assume they are
+ // not. Find the indirect target without going through the successors.
+ const MachineBasicBlock *IndirectTargetMBB = MO.getMBB();
+ if (!IndirectTargetMBB) {
+ report("INLINEASM_BR indirect target does not exist", &MO, i);
+ break;
+ }
+
+ if (!MBB->isSuccessor(IndirectTargetMBB))
+ report("INLINEASM_BR indirect target missing from successor list", &MO,
+ i);
+
+ if (!IndirectTargetMBB->isPredecessor(MBB))
+ report("INLINEASM_BR indirect target predecessor list missing parent",
+ &MO, i);
+ }
+ }
+}
+
+bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (none_of(MI.explicit_operands(), [&MRI](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ const auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return false;
+ return !MRI.getType(Reg).isScalar();
+ }))
+ return true;
+ report("All register operands must have scalar types", &MI);
+ return false;
+}
+
+/// Check that types are consistent when two operands need to have the same
+/// number of vector elements.
+/// \return true if the types are valid.
+bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
+ const MachineInstr *MI) {
+ if (Ty0.isVector() != Ty1.isVector()) {
+ report("operand types must be all-vector or all-scalar", MI);
+ // Generally we try to report as many issues as possible at once, but in
+ // this case it's not clear what should we be comparing the size of the
+ // scalar with: the size of the whole vector or its lane. Instead of
+ // making an arbitrary choice and emitting not so helpful message, let's
+ // avoid the extra noise and stop here.
+ return false;
+ }
+
+ if (Ty0.isVector() && Ty0.getNumElements() != Ty1.getNumElements()) {
+ report("operand types must preserve number of vector elements", MI);
+ return false;
+ }
+
+ return true;
+}
+
+void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
+ if (isFunctionSelected)
+ report("Unexpected generic instruction in a Selected function", MI);
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MI->getNumOperands();
+
+ // Branches must reference a basic block if they are not indirect
+ if (MI->isBranch() && !MI->isIndirectBranch()) {
+ bool HasMBB = false;
+ for (const MachineOperand &Op : MI->operands()) {
+ if (Op.isMBB()) {
+ HasMBB = true;
+ break;
+ }
+ }
+
+ if (!HasMBB) {
+ report("Branch instruction is missing a basic block operand or "
+ "isIndirectBranch property",
+ MI);
+ }
+ }
+
+ // Check types.
+ SmallVector<LLT, 4> Types;
+ for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps);
+ I != E; ++I) {
+ if (!MCID.operands()[I].isGenericType())
+ continue;
+ // Generic instructions specify type equality constraints between some of
+ // their operands. Make sure these are consistent.
+ size_t TypeIdx = MCID.operands()[I].getGenericTypeIndex();
+ Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (!MO->isReg()) {
+ report("generic instruction must use register operands", MI);
+ continue;
+ }
+
+ LLT OpTy = MRI->getType(MO->getReg());
+ // Don't report a type mismatch if there is no actual mismatch, only a
+ // type missing, to reduce noise:
+ if (OpTy.isValid()) {
+ // Only the first valid type for a type index will be printed: don't
+ // overwrite it later so it's always clear which type was expected:
+ if (!Types[TypeIdx].isValid())
+ Types[TypeIdx] = OpTy;
+ else if (Types[TypeIdx] != OpTy)
+ report("Type mismatch in generic instruction", MO, I, OpTy);
+ } else {
+ // Generic instructions must have types attached to their operands.
+ report("Generic instruction is missing a virtual register type", MO, I);
+ }
+ }
+
+ // Generic opcodes must not have physical register operands.
+ for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (MO->isReg() && MO->getReg().isPhysical())
+ report("Generic instruction cannot have physical register", MO, I);
+ }
+
+ // Avoid out of bounds in checks below. This was already reported earlier.
+ if (MI->getNumOperands() < MCID.getNumOperands())
+ return;
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(*MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_ASSERT_SEXT:
+ case TargetOpcode::G_ASSERT_ZEXT: {
+ std::string OpcName =
+ Opc == TargetOpcode::G_ASSERT_ZEXT ? "G_ASSERT_ZEXT" : "G_ASSERT_SEXT";
+ if (!MI->getOperand(2).isImm()) {
+ report(Twine(OpcName, " expects an immediate operand #2"), MI);
+ break;
+ }
+
+ Register Dst = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+ LLT SrcTy = MRI->getType(Src);
+ int64_t Imm = MI->getOperand(2).getImm();
+ if (Imm <= 0) {
+ report(Twine(OpcName, " size must be >= 1"), MI);
+ break;
+ }
+
+ if (Imm >= SrcTy.getScalarSizeInBits()) {
+ report(Twine(OpcName, " size must be less than source bit width"), MI);
+ break;
+ }
+
+ const RegisterBank *SrcRB = RBI->getRegBank(Src, *MRI, *TRI);
+ const RegisterBank *DstRB = RBI->getRegBank(Dst, *MRI, *TRI);
+
+ // Allow only the source bank to be set.
+ if ((SrcRB && DstRB && SrcRB != DstRB) || (DstRB && !SrcRB)) {
+ report(Twine(OpcName, " cannot change register bank"), MI);
+ break;
+ }
+
+ // Don't allow a class change. Do allow member class->regbank.
+ const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Dst);
+ if (DstRC && DstRC != MRI->getRegClassOrNull(Src)) {
+ report(
+ Twine(OpcName, " source and destination register classes must match"),
+ MI);
+ break;
+ }
+
+ break;
+ }
+
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.isVector())
+ report("Instruction cannot use a vector result type", MI);
+
+ if (MI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ if (!MI->getOperand(1).isCImm()) {
+ report("G_CONSTANT operand must be cimm", MI);
+ break;
+ }
+
+ const ConstantInt *CI = MI->getOperand(1).getCImm();
+ if (CI->getBitWidth() != DstTy.getSizeInBits())
+ report("inconsistent constant size", MI);
+ } else {
+ if (!MI->getOperand(1).isFPImm()) {
+ report("G_FCONSTANT operand must be fpimm", MI);
+ break;
+ }
+ const ConstantFP *CF = MI->getOperand(1).getFPImm();
+
+ if (APFloat::getSizeInBits(CF->getValueAPF().getSemantics()) !=
+ DstTy.getSizeInBits()) {
+ report("inconsistent constant size", MI);
+ }
+ }
+
+ break;
+ }
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
+ LLT ValTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!PtrTy.isPointer())
+ report("Generic memory instruction must access a pointer", MI);
+
+ // Generic loads and stores must have a single MachineMemOperand
+ // describing that access.
+ if (!MI->hasOneMemOperand()) {
+ report("Generic instruction accessing memory must have one mem operand",
+ MI);
+ } else {
+ const MachineMemOperand &MMO = **MI->memoperands_begin();
+ if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD ||
+ MI->getOpcode() == TargetOpcode::G_SEXTLOAD) {
+ if (MMO.getSizeInBits() >= ValTy.getSizeInBits())
+ report("Generic extload must have a narrower memory type", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_LOAD) {
+ if (MMO.getSize() > ValTy.getSizeInBytes())
+ report("load memory size cannot exceed result size", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_STORE) {
+ if (ValTy.getSizeInBytes() < MMO.getSize())
+ report("store memory size cannot exceed value size", MI);
+ }
+
+ const AtomicOrdering Order = MMO.getSuccessOrdering();
+ if (Opc == TargetOpcode::G_STORE) {
+ if (Order == AtomicOrdering::Acquire ||
+ Order == AtomicOrdering::AcquireRelease)
+ report("atomic store cannot use acquire ordering", MI);
+
+ } else {
+ if (Order == AtomicOrdering::Release ||
+ Order == AtomicOrdering::AcquireRelease)
+ report("atomic load cannot use release ordering", MI);
+ }
+ }
+
+ break;
+ }
+ case TargetOpcode::G_PHI: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstTy.isValid() || !all_of(drop_begin(MI->operands()),
+ [this, &DstTy](const MachineOperand &MO) {
+ if (!MO.isReg())
+ return true;
+ LLT Ty = MRI->getType(MO.getReg());
+ if (!Ty.isValid() || (Ty != DstTy))
+ return false;
+ return true;
+ }))
+ report("Generic Instruction G_PHI has operands with incompatible/missing "
+ "types",
+ MI);
+ break;
+ }
+ case TargetOpcode::G_BITCAST: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ if (SrcTy.isPointer() != DstTy.isPointer())
+ report("bitcast cannot convert between pointers and other types", MI);
+
+ if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ report("bitcast sizes must match", MI);
+
+ if (SrcTy == DstTy)
+ report("bitcast must change the type", MI);
+
+ break;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_ADDRSPACE_CAST: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+ DstTy = DstTy.getScalarType();
+ SrcTy = SrcTy.getScalarType();
+
+ if (MI->getOpcode() == TargetOpcode::G_INTTOPTR) {
+ if (!DstTy.isPointer())
+ report("inttoptr result type must be a pointer", MI);
+ if (SrcTy.isPointer())
+ report("inttoptr source type must not be a pointer", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_PTRTOINT) {
+ if (!SrcTy.isPointer())
+ report("ptrtoint source type must be a pointer", MI);
+ if (DstTy.isPointer())
+ report("ptrtoint result type must not be a pointer", MI);
+ } else {
+ assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST);
+ if (!SrcTy.isPointer() || !DstTy.isPointer())
+ report("addrspacecast types must be pointers", MI);
+ else {
+ if (SrcTy.getAddressSpace() == DstTy.getAddressSpace())
+ report("addrspacecast must convert different address spaces", MI);
+ }
+ }
+
+ break;
+ }
+ case TargetOpcode::G_PTR_ADD: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid())
+ break;
+
+ if (!PtrTy.getScalarType().isPointer())
+ report("gep first operand must be a pointer", MI);
+
+ if (OffsetTy.getScalarType().isPointer())
+ report("gep offset operand must not be a pointer", MI);
+
+ // TODO: Is the offset allowed to be a scalar with a vector?
+ break;
+ }
+ case TargetOpcode::G_PTRMASK: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT MaskTy = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid() || !MaskTy.isValid())
+ break;
+
+ if (!DstTy.getScalarType().isPointer())
+ report("ptrmask result type must be a pointer", MI);
+
+ if (!MaskTy.getScalarType().isScalar())
+ report("ptrmask mask type must be an integer", MI);
+
+ verifyVectorElementMatch(DstTy, MaskTy, MI);
+ break;
+ }
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC: {
+ // Number of operands and presense of types is already checked (and
+ // reported in case of any issues), so no need to report them again. As
+ // we're trying to report as many issues as possible at once, however, the
+ // instructions aren't guaranteed to have the right number of operands or
+ // types attached to them at this point
+ assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}");
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ LLT DstElTy = DstTy.getScalarType();
+ LLT SrcElTy = SrcTy.getScalarType();
+ if (DstElTy.isPointer() || SrcElTy.isPointer())
+ report("Generic extend/truncate can not operate on pointers", MI);
+
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+ unsigned DstSize = DstElTy.getSizeInBits();
+ unsigned SrcSize = SrcElTy.getSizeInBits();
+ switch (MI->getOpcode()) {
+ default:
+ if (DstSize <= SrcSize)
+ report("Generic extend has destination type no larger than source", MI);
+ break;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPTRUNC:
+ if (DstSize >= SrcSize)
+ report("Generic truncate has destination type no smaller than source",
+ MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ LLT SelTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT CondTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!SelTy.isValid() || !CondTy.isValid())
+ break;
+
+ // Scalar condition select on a vector is valid.
+ if (CondTy.isVector())
+ verifyVectorElementMatch(SelTy, CondTy, MI);
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ // G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
+ // e.g. s2N = MERGE sN, sN
+ // Merging multiple scalars into a vector is not allowed, should use
+ // G_BUILD_VECTOR for that.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (DstTy.isVector() || SrcTy.isVector())
+ report("G_MERGE_VALUES cannot operate on vectors", MI);
+
+ const unsigned NumOps = MI->getNumOperands();
+ if (DstTy.getSizeInBits() != SrcTy.getSizeInBits() * (NumOps - 1))
+ report("G_MERGE_VALUES result size is inconsistent", MI);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ if (MRI->getType(MI->getOperand(I).getReg()) != SrcTy)
+ report("G_MERGE_VALUES source types do not match", MI);
+ }
+
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ unsigned NumDsts = MI->getNumOperands() - 1;
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ for (unsigned i = 1; i < NumDsts; ++i) {
+ if (MRI->getType(MI->getOperand(i).getReg()) != DstTy) {
+ report("G_UNMERGE_VALUES destination types do not match", MI);
+ break;
+ }
+ }
+
+ LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg());
+ if (DstTy.isVector()) {
+ // This case is the converse of G_CONCAT_VECTORS.
+ if (!SrcTy.isVector() || SrcTy.getScalarType() != DstTy.getScalarType() ||
+ SrcTy.getNumElements() != NumDsts * DstTy.getNumElements())
+ report("G_UNMERGE_VALUES source operand does not match vector "
+ "destination operands",
+ MI);
+ } else if (SrcTy.isVector()) {
+ // This case is the converse of G_BUILD_VECTOR, but relaxed to allow
+ // mismatched types as long as the total size matches:
+ // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<4 x s32>)
+ if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
+ report("G_UNMERGE_VALUES vector source operand does not match scalar "
+ "destination operands",
+ MI);
+ } else {
+ // This case is the converse of G_MERGE_VALUES.
+ if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) {
+ report("G_UNMERGE_VALUES scalar source operand does not match scalar "
+ "destination operands",
+ MI);
+ }
+ }
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // Source types must be scalars, dest type a vector. Total size of scalars
+ // must match the dest vector size.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || SrcEltTy.isVector()) {
+ report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+ break;
+ }
+
+ if (DstTy.getElementType() != SrcEltTy)
+ report("G_BUILD_VECTOR result element type must match source type", MI);
+
+ if (DstTy.getNumElements() != MI->getNumOperands() - 1)
+ report("G_BUILD_VECTOR must have an operand for each elemement", MI);
+
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
+ report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
+
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ // Source types must be scalars, dest type a vector. Scalar types must be
+ // larger than the dest vector elt type, as this is a truncating operation.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || SrcEltTy.isVector())
+ report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
+ MI);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
+ report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
+ MI);
+ if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
+ report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
+ "dest elt type",
+ MI);
+ break;
+ }
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ // Source types should be vectors, and total size should match the dest
+ // vector size.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || !SrcTy.isVector())
+ report("G_CONCAT_VECTOR requires vector source and destination operands",
+ MI);
+
+ if (MI->getNumOperands() < 3)
+ report("G_CONCAT_VECTOR requires at least 2 source operands", MI);
+
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
+ report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
+ if (DstTy.getNumElements() !=
+ SrcTy.getNumElements() * (MI->getNumOperands() - 1))
+ report("G_CONCAT_VECTOR num dest and source elements should match", MI);
+ break;
+ }
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(2).getReg());
+
+ if ((DstTy.isVector() != SrcTy.isVector()) ||
+ (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()))
+ report("Generic vector icmp/fcmp must preserve number of lanes", MI);
+
+ break;
+ }
+ case TargetOpcode::G_EXTRACT: {
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ if (!SrcOp.isReg()) {
+ report("extract source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &OffsetOp = MI->getOperand(2);
+ if (!OffsetOp.isImm()) {
+ report("extract offset must be a constant", MI);
+ break;
+ }
+
+ unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+ if (SrcSize == DstSize)
+ report("extract source must be larger than result", MI);
+
+ if (DstSize + OffsetOp.getImm() > SrcSize)
+ report("extract reads past end of register", MI);
+ break;
+ }
+ case TargetOpcode::G_INSERT: {
+ const MachineOperand &SrcOp = MI->getOperand(2);
+ if (!SrcOp.isReg()) {
+ report("insert source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+ if (!OffsetOp.isImm()) {
+ report("insert offset must be a constant", MI);
+ break;
+ }
+
+ unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+
+ if (DstSize <= SrcSize)
+ report("inserted size must be smaller than total register", MI);
+
+ if (SrcSize + OffsetOp.getImm() > DstSize)
+ report("insert writes past end of register", MI);
+
+ break;
+ }
+ case TargetOpcode::G_JUMP_TABLE: {
+ if (!MI->getOperand(1).isJTI())
+ report("G_JUMP_TABLE source operand must be a jump table index", MI);
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstTy.isPointer())
+ report("G_JUMP_TABLE dest operand must have a pointer type", MI);
+ break;
+ }
+ case TargetOpcode::G_BRJT: {
+ if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+ report("G_BRJT src operand 0 must be a pointer type", MI);
+
+ if (!MI->getOperand(1).isJTI())
+ report("G_BRJT src operand 1 must be a jump table index", MI);
+
+ const auto &IdxOp = MI->getOperand(2);
+ if (!IdxOp.isReg() || MRI->getType(IdxOp.getReg()).isPointer())
+ report("G_BRJT src operand 2 must be a scalar reg type", MI);
+ break;
+ }
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
+ // TODO: Should verify number of def and use operands, but the current
+ // interface requires passing in IR types for mangling.
+ const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
+ if (!IntrIDOp.isIntrinsicID()) {
+ report("G_INTRINSIC first src operand must be an intrinsic ID", MI);
+ break;
+ }
+
+ bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
+ unsigned IntrID = IntrIDOp.getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs = Intrinsic::getAttributes(
+ MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
+ bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ if (NoSideEffects && DeclHasSideEffects) {
+ report("G_INTRINSIC used with intrinsic that accesses memory", MI);
+ break;
+ }
+ if (!NoSideEffects && !DeclHasSideEffects) {
+ report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
+ break;
+ }
+ }
+
+ break;
+ }
+ case TargetOpcode::G_SEXT_INREG: {
+ if (!MI->getOperand(2).isImm()) {
+ report("G_SEXT_INREG expects an immediate operand #2", MI);
+ break;
+ }
+
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ int64_t Imm = MI->getOperand(2).getImm();
+ if (Imm <= 0)
+ report("G_SEXT_INREG size must be >= 1", MI);
+ if (Imm >= SrcTy.getScalarSizeInBits())
+ report("G_SEXT_INREG size must be less than source bit width", MI);
+ break;
+ }
+ case TargetOpcode::G_SHUFFLE_VECTOR: {
+ const MachineOperand &MaskOp = MI->getOperand(3);
+ if (!MaskOp.isShuffleMask()) {
+ report("Incorrect mask operand type for G_SHUFFLE_VECTOR", MI);
+ break;
+ }
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg());
+
+ if (Src0Ty != Src1Ty)
+ report("Source operands must be the same type", MI);
+
+ if (Src0Ty.getScalarType() != DstTy.getScalarType())
+ report("G_SHUFFLE_VECTOR cannot change element type", MI);
+
+ // Don't check that all operands are vector because scalars are used in
+ // place of 1 element vectors.
+ int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
+ int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
+
+ ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask();
+
+ if (static_cast<int>(MaskIdxes.size()) != DstNumElts)
+ report("Wrong result type for shufflemask", MI);
+
+ for (int Idx : MaskIdxes) {
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= 2 * SrcNumElts)
+ report("Out of bounds shuffle index", MI);
+ }
+
+ break;
+ }
+ case TargetOpcode::G_DYN_STACKALLOC: {
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &AllocOp = MI->getOperand(1);
+ const MachineOperand &AlignOp = MI->getOperand(2);
+
+ if (!DstOp.isReg() || !MRI->getType(DstOp.getReg()).isPointer()) {
+ report("dst operand 0 must be a pointer type", MI);
+ break;
+ }
+
+ if (!AllocOp.isReg() || !MRI->getType(AllocOp.getReg()).isScalar()) {
+ report("src operand 1 must be a scalar reg type", MI);
+ break;
+ }
+
+ if (!AlignOp.isImm()) {
+ report("src operand 2 must be an immediate type", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_MEMCPY_INLINE:
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE: {
+ ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ if (MMOs.size() != 2) {
+ report("memcpy/memmove must have 2 memory operands", MI);
+ break;
+ }
+
+ if ((!MMOs[0]->isStore() || MMOs[0]->isLoad()) ||
+ (MMOs[1]->isStore() || !MMOs[1]->isLoad())) {
+ report("wrong memory operand types", MI);
+ break;
+ }
+
+ if (MMOs[0]->getSize() != MMOs[1]->getSize())
+ report("inconsistent memory operand sizes", MI);
+
+ LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcPtrTy = MRI->getType(MI->getOperand(1).getReg());
+
+ if (!DstPtrTy.isPointer() || !SrcPtrTy.isPointer()) {
+ report("memory instruction operand must be a pointer", MI);
+ break;
+ }
+
+ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
+ report("inconsistent store address space", MI);
+ if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
+ report("inconsistent load address space", MI);
+
+ if (Opc != TargetOpcode::G_MEMCPY_INLINE)
+ if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL))
+ report("'tail' flag (operand 3) must be an immediate 0 or 1", MI);
+
+ break;
+ }
+ case TargetOpcode::G_BZERO:
+ case TargetOpcode::G_MEMSET: {
+ ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ std::string Name = Opc == TargetOpcode::G_MEMSET ? "memset" : "bzero";
+ if (MMOs.size() != 1) {
+ report(Twine(Name, " must have 1 memory operand"), MI);
+ break;
+ }
+
+ if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) {
+ report(Twine(Name, " memory operand must be a store"), MI);
+ break;
+ }
+
+ LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstPtrTy.isPointer()) {
+ report(Twine(Name, " operand must be a pointer"), MI);
+ break;
+ }
+
+ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
+ report("inconsistent " + Twine(Name, " address space"), MI);
+
+ if (!MI->getOperand(MI->getNumOperands() - 1).isImm() ||
+ (MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL))
+ report("'tail' flag (last operand) must be an immediate 0 or 1", MI);
+
+ break;
+ }
+ case TargetOpcode::G_VECREDUCE_SEQ_FADD:
+ case TargetOpcode::G_VECREDUCE_SEQ_FMUL: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isScalar())
+ report("Vector reduction requires a scalar destination type", MI);
+ if (!Src1Ty.isScalar())
+ report("Sequential FADD/FMUL vector reduction requires a scalar 1st operand", MI);
+ if (!Src2Ty.isVector())
+ report("Sequential FADD/FMUL vector reduction must have a vector 2nd operand", MI);
+ break;
+ }
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstTy.isScalar())
+ report("Vector reduction requires a scalar destination type", MI);
+ break;
+ }
+
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.isVector()) {
+ report("Bitfield extraction is not supported on vectors", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_ROTR:
+ case TargetOpcode::G_ROTL: {
+ LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
+ if (Src1Ty.isVector() != Src2Ty.isVector()) {
+ report("Shifts and rotates require operands to be either all scalars or "
+ "all vectors",
+ MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_LROUND: {
+ verifyAllRegOpsScalar(*MI, *MRI);
+ break;
+ }
+ case TargetOpcode::G_IS_FPCLASS: {
+ LLT DestTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT DestEltTy = DestTy.getScalarType();
+ if (!DestEltTy.isScalar()) {
+ report("Destination must be a scalar or vector of scalars", MI);
+ break;
+ }
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT SrcEltTy = SrcTy.getScalarType();
+ if (!SrcEltTy.isScalar()) {
+ report("Source must be a scalar or vector of scalars", MI);
+ break;
+ }
+ if (!verifyVectorElementMatch(DestTy, SrcTy, MI))
+ break;
+ const MachineOperand &TestMO = MI->getOperand(2);
+ if (!TestMO.isImm()) {
+ report("floating-point class set (operand 2) must be an immediate", MI);
+ break;
+ }
+ int64_t Test = TestMO.getImm();
+ if (Test < 0 || Test > fcAllFlags) {
+ report("Incorrect floating-point class set (operand 2)", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_ASSERT_ALIGN: {
+ if (MI->getOperand(2).getImm() < 1)
+ report("alignment immediate must be >= 1", MI);
+ break;
+ }
+ case TargetOpcode::G_CONSTANT_POOL: {
+ if (!MI->getOperand(1).isCPI())
+ report("Src operand 1 must be a constant pool index", MI);
+ if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+ report("Dst operand 0 must be a pointer", MI);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ errs() << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumOperands() << " given.\n";
+ }
+
+ if (MI->isPHI()) {
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs))
+ report("Found PHI instruction with NoPHIs property set", MI);
+
+ if (FirstNonPHI)
+ report("Found PHI instruction after non-PHI", MI);
+ } else if (FirstNonPHI == nullptr)
+ FirstNonPHI = MI;
+
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
+ // Check that unspillable terminators define a reg and have at most one use.
+ if (TII->isUnspillableTerminator(MI)) {
+ if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
+ report("Unspillable Terminator does not define a reg", MI);
+ Register Def = MI->getOperand(0).getReg();
+ if (Def.isVirtual() &&
+ !MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs) &&
+ std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
+ report("Unspillable Terminator expected to have at most one use!", MI);
+ }
+
+ // A fully-formed DBG_VALUE must have a location. Ignore partially formed
+ // DBG_VALUEs: these are convenient to use in tests, but should never get
+ // generated.
+ if (MI->isDebugValue() && MI->getNumOperands() == 4)
+ if (!MI->getDebugLoc())
+ report("Missing DebugLoc for debug instruction", MI);
+
+ // Meta instructions should never be the subject of debug value tracking,
+ // they don't create a value in the output program at all.
+ if (MI->isMetaInstruction() && MI->peekDebugInstrNum())
+ report("Metadata instruction should not have a value tracking number", MI);
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineMemOperand *Op : MI->memoperands()) {
+ if (Op->isLoad() && !MI->mayLoad())
+ report("Missing mayLoad flag", MI);
+ if (Op->isStore() && !MI->mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one, unless they are inside a bundle.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(*MI);
+ if (MI->isDebugOrPseudoInstr()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ unsigned Opc = MCID.getOpcode();
+ if (isPreISelGenericOpcode(Opc) || isPreISelGenericOptimizationHint(Opc)) {
+ verifyPreISelGenericInstruction(MI);
+ return;
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(*MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ switch (MI->getOpcode()) {
+ case TargetOpcode::COPY: {
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ const Register SrcReg = SrcOp.getReg();
+ const Register DstReg = DstOp.getReg();
+
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
+ if (SrcTy.isValid() && DstTy.isValid()) {
+ // If both types are valid, check that the types are the same.
+ if (SrcTy != DstTy) {
+ report("Copy Instruction is illegal with mismatching types", MI);
+ errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n";
+ }
+
+ break;
+ }
+
+ if (!SrcTy.isValid() && !DstTy.isValid())
+ break;
+
+ // If we have only one valid type, this is likely a copy between a virtual
+ // and physical register.
+ unsigned SrcSize = 0;
+ unsigned DstSize = 0;
+ if (SrcReg.isPhysical() && DstTy.isValid()) {
+ const TargetRegisterClass *SrcRC =
+ TRI->getMinimalPhysRegClassLLT(SrcReg, DstTy);
+ if (SrcRC)
+ SrcSize = TRI->getRegSizeInBits(*SrcRC);
+ }
+
+ if (SrcSize == 0)
+ SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
+
+ if (DstReg.isPhysical() && SrcTy.isValid()) {
+ const TargetRegisterClass *DstRC =
+ TRI->getMinimalPhysRegClassLLT(DstReg, SrcTy);
+ if (DstRC)
+ DstSize = TRI->getRegSizeInBits(*DstRC);
+ }
+
+ if (DstSize == 0)
+ DstSize = TRI->getRegSizeInBits(DstReg, *MRI);
+
+ if (SrcSize != 0 && DstSize != 0 && SrcSize != DstSize) {
+ if (!DstOp.getSubReg() && !SrcOp.getSubReg()) {
+ report("Copy Instruction is illegal with mismatching sizes", MI);
+ errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize
+ << "\n";
+ }
+ }
+ break;
+ }
+ case TargetOpcode::STATEPOINT: {
+ StatepointOpers SO(MI);
+ if (!MI->getOperand(SO.getIDPos()).isImm() ||
+ !MI->getOperand(SO.getNBytesPos()).isImm() ||
+ !MI->getOperand(SO.getNCallArgsPos()).isImm()) {
+ report("meta operands to STATEPOINT not constant!", MI);
+ break;
+ }
+
+ auto VerifyStackMapConstant = [&](unsigned Offset) {
+ if (Offset >= MI->getNumOperands()) {
+ report("stack map constant to STATEPOINT is out of range!", MI);
+ return;
+ }
+ if (!MI->getOperand(Offset - 1).isImm() ||
+ MI->getOperand(Offset - 1).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset).isImm())
+ report("stack map constant to STATEPOINT not well formed!", MI);
+ };
+ VerifyStackMapConstant(SO.getCCIdx());
+ VerifyStackMapConstant(SO.getFlagsIdx());
+ VerifyStackMapConstant(SO.getNumDeoptArgsIdx());
+ VerifyStackMapConstant(SO.getNumGCPtrIdx());
+ VerifyStackMapConstant(SO.getNumAllocaIdx());
+ VerifyStackMapConstant(SO.getNumGcMapEntriesIdx());
+
+ // Verify that all explicit statepoint defs are tied to gc operands as
+ // they are expected to be a relocation of gc operands.
+ unsigned FirstGCPtrIdx = SO.getFirstGCPtrIdx();
+ unsigned LastGCPtrIdx = SO.getNumAllocaIdx() - 2;
+ for (unsigned Idx = 0; Idx < MI->getNumDefs(); Idx++) {
+ unsigned UseOpIdx;
+ if (!MI->isRegTiedToUseOperand(Idx, &UseOpIdx)) {
+ report("STATEPOINT defs expected to be tied", MI);
+ break;
+ }
+ if (UseOpIdx < FirstGCPtrIdx || UseOpIdx > LastGCPtrIdx) {
+ report("STATEPOINT def tied to non-gc operand", MI);
+ break;
+ }
+ }
+
+ // TODO: verify we have properly encoded deopt arguments
+ } break;
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned InsertedSize;
+ if (unsigned SubIdx = MI->getOperand(2).getSubReg())
+ InsertedSize = TRI->getSubRegIdxSize(SubIdx);
+ else
+ InsertedSize = TRI->getRegSizeInBits(MI->getOperand(2).getReg(), *MRI);
+ unsigned SubRegSize = TRI->getSubRegIdxSize(MI->getOperand(3).getImm());
+ if (SubRegSize < InsertedSize) {
+ report("INSERT_SUBREG expected inserted value to have equal or lesser "
+ "size than the subreg it was inserted into", MI);
+ break;
+ }
+ } break;
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned NumOps = MI->getNumOperands();
+ if (!(NumOps & 1)) {
+ report("Invalid number of operands for REG_SEQUENCE", MI);
+ break;
+ }
+
+ for (unsigned I = 1; I != NumOps; I += 2) {
+ const MachineOperand &RegOp = MI->getOperand(I);
+ const MachineOperand &SubRegOp = MI->getOperand(I + 1);
+
+ if (!RegOp.isReg())
+ report("Invalid register operand for REG_SEQUENCE", &RegOp, I);
+
+ if (!SubRegOp.isImm() || SubRegOp.getImm() == 0 ||
+ SubRegOp.getImm() >= TRI->getNumSubRegIndices()) {
+ report("Invalid subregister index operand for REG_SEQUENCE",
+ &SubRegOp, I + 1);
+ }
+ }
+
+ Register DstReg = MI->getOperand(0).getReg();
+ if (DstReg.isPhysical())
+ report("REG_SEQUENCE does not support physical register results", MI);
+
+ if (MI->getOperand(0).getSubReg())
+ report("Invalid subreg result for REG_SEQUENCE", MI);
+
+ break;
+ }
+ }
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumDefs = MCID.getNumDefs();
+ if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
+ NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
+
+ // The first MCID.NumDefs operands must be explicit register defines
+ if (MONum < NumDefs) {
+ const MCOperandInfo &MCOI = MCID.operands()[MONum];
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.operands()[MONum];
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end. Check non-variadic operands only.
+ bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1;
+ if (!IsOptional) {
+ if (MO->isReg()) {
+ if (MO->isDef() && !MCOI.isOptionalDef() && !MCID.variadicOpsAreDefs())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+
+ // Check that an instruction has register operands only as expected.
+ if (MCOI.OperandType == MCOI::OPERAND_REGISTER &&
+ !MO->isReg() && !MO->isFI())
+ report("Expected a register operand.", MO, MONum);
+ if (MO->isReg()) {
+ if (MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||
+ (MCOI.OperandType == MCOI::OPERAND_PCREL &&
+ !TII->isPCRelRegisterOperandLegal(*MO)))
+ report("Expected a non-register operand.", MO, MONum);
+ }
+ }
+
+ int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ if (!MO->isReg())
+ report("Tied use must be a register", MO, MONum);
+ else if (!MO->isTied())
+ report("Operand should be tied", MO, MONum);
+ else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
+ report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ else if (MO->getReg().isPhysical()) {
+ const MachineOperand &MOTied = MI->getOperand(TiedTo);
+ if (!MOTied.isReg())
+ report("Tied counterpart must be a register", &MOTied, TiedTo);
+ else if (MOTied.getReg().isPhysical() &&
+ MO->getReg() != MOTied.getReg())
+ report("Tied physical registers must match.", &MOTied, TiedTo);
+ }
+ } else if (MO->isReg() && MO->isTied())
+ report("Explicit operand should not be tied", MO, MONum);
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ // Verify debug flag on debug instructions. Check this first because reg0
+ // indicates an undefined debug value.
+ if (MI->isDebugInstr() && MO->isUse()) {
+ if (!MO->isDebug())
+ report("Register operand must be marked debug", MO, MONum);
+ } else if (MO->isDebug()) {
+ report("Register operand must not be marked debug", MO, MONum);
+ }
+
+ const Register Reg = MO->getReg();
+ if (!Reg)
+ return;
+ if (MRI->tracksLiveness() && !MI->isDebugInstr())
+ checkLiveness(MO, MONum);
+
+ if (MO->isDef() && MO->isUndef() && !MO->getSubReg() &&
+ MO->getReg().isVirtual()) // TODO: Apply to physregs too
+ report("Undef virtual register def operands require a subregister", MO, MONum);
+
+ // Verify the consistency of tied operands.
+ if (MO->isTied()) {
+ unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
+ const MachineOperand &OtherMO = MI->getOperand(OtherIdx);
+ if (!OtherMO.isReg())
+ report("Must be tied to a register", MO, MONum);
+ if (!OtherMO.isTied())
+ report("Missing tie flags on tied operand", MO, MONum);
+ if (MI->findTiedOperandIdx(OtherIdx) != MONum)
+ report("Inconsistent tie links", MO, MONum);
+ if (MONum < MCID.getNumDefs()) {
+ if (OtherIdx < MCID.getNumOperands()) {
+ if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO))
+ report("Explicit def tied to explicit use without tie constraint",
+ MO, MONum);
+ } else {
+ if (!OtherMO.isImplicit())
+ report("Explicit def should be tied to implicit use", MO, MONum);
+ }
+ }
+ }
+
+ // Verify two-address constraints after the twoaddressinstruction pass.
+ // Both twoaddressinstruction pass and phi-node-elimination pass call
+ // MRI->leaveSSA() to set MF as NoSSA, we should do the verification after
+ // twoaddressinstruction pass not after phi-node-elimination pass. So we
+ // shouldn't use the NoSSA as the condition, we should based on
+ // TiedOpsRewritten property to verify two-address constraints, this
+ // property will be set in twoaddressinstruction pass.
+ unsigned DefIdx;
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ MO->isUse() && MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
+
+ // Check register classes.
+ unsigned SubIdx = MO->getSubReg();
+
+ if (Reg.isPhysical()) {
+ if (SubIdx) {
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
+ }
+ if (MONum < MCID.getNumOperands()) {
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (!DRC->contains(Reg)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ errs() << printReg(Reg, TRI) << " is not a "
+ << TRI->getRegClassName(DRC) << " register.\n";
+ }
+ }
+ }
+ if (MO->isRenamable()) {
+ if (MRI->isReserved(Reg)) {
+ report("isRenamable set on reserved register", MO, MONum);
+ return;
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
+ if (!RC) {
+ // This is a generic virtual register.
+
+ // Do not allow undef uses for generic virtual registers. This ensures
+ // getVRegDef can never fail and return null on a generic register.
+ //
+ // FIXME: This restriction should probably be broadened to all SSA
+ // MIR. However, DetectDeadLanes/ProcessImplicitDefs technically still
+ // run on the SSA function just before phi elimination.
+ if (MO->isUndef())
+ report("Generic virtual register use cannot be undef", MO, MONum);
+
+ // Debug value instruction is permitted to use undefined vregs.
+ // This is a performance measure to skip the overhead of immediately
+ // pruning unused debug operands. The final undef substitution occurs
+ // when debug values are allocated in LDVImpl::handleDebugValue, so
+ // these verifications always apply after this pass.
+ if (isFunctionTracksDebugUserValues || !MO->isUse() ||
+ !MI->isDebugValue() || !MRI->def_empty(Reg)) {
+ // If we're post-Select, we can't have gvregs anymore.
+ if (isFunctionSelected) {
+ report("Generic virtual register invalid in a Selected function",
+ MO, MONum);
+ return;
+ }
+
+ // The gvreg must have a type and it must not have a SubIdx.
+ LLT Ty = MRI->getType(Reg);
+ if (!Ty.isValid()) {
+ report("Generic virtual register must have a valid type", MO,
+ MONum);
+ return;
+ }
+
+ const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ const RegisterBankInfo *RBI = MF->getSubtarget().getRegBankInfo();
+
+ // If we're post-RegBankSelect, the gvreg must have a bank.
+ if (!RegBank && isFunctionRegBankSelected) {
+ report("Generic virtual register must have a bank in a "
+ "RegBankSelected function",
+ MO, MONum);
+ return;
+ }
+
+ // Make sure the register fits into its register bank if any.
+ if (RegBank && Ty.isValid() &&
+ RBI->getMaximumSize(RegBank->getID()) < Ty.getSizeInBits()) {
+ report("Register bank is too small for virtual register", MO,
+ MONum);
+ errs() << "Register bank " << RegBank->getName() << " too small("
+ << RBI->getMaximumSize(RegBank->getID()) << ") to fit "
+ << Ty.getSizeInBits() << "-bits\n";
+ return;
+ }
+ }
+
+ if (SubIdx) {
+ report("Generic virtual register does not allow subregister index", MO,
+ MONum);
+ return;
+ }
+
+ // If this is a target specific instruction and this operand
+ // has register class constraint, the virtual register must
+ // comply to it.
+ if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
+ MONum < MCID.getNumOperands() &&
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ report("Virtual register does not match instruction constraint", MO,
+ MONum);
+ errs() << "Expect register class "
+ << TRI->getRegClassName(
+ TII->getRegClass(MCID, MONum, TRI, *MF))
+ << " but got nothing\n";
+ return;
+ }
+
+ break;
+ }
+ if (SubIdx) {
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ errs() << "Register class " << TRI->getRegClassName(RC)
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
+ }
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ errs() << "Register class " << TRI->getRegClassName(RC)
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
+ }
+ }
+ if (MONum < MCID.getNumOperands()) {
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (SubIdx) {
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(RC, *MF);
+ if (!SuperRC) {
+ report("No largest legal super class exists.", MO, MONum);
+ return;
+ }
+ DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx);
+ if (!DRC) {
+ report("No matching super-reg register class.", MO, MONum);
+ return;
+ }
+ }
+ if (!RC->hasSuperClassEq(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ errs() << "Expected a " << TRI->getRegClassName(DRC)
+ << " register, but got a " << TRI->getRegClassName(RC)
+ << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_RegisterMask:
+ regMasks.push_back(MO->getRegMask());
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ int FI = MO->getIndex();
+ LiveInterval &LI = LiveStks->getInterval(FI);
+ SlotIndex Idx = LiveInts->getInstructionIndex(*MI);
+
+ bool stores = MI->mayStore();
+ bool loads = MI->mayLoad();
+ // For a memory-to-memory move, we need to check if the frame
+ // index is used for storing or loading, by inspecting the
+ // memory operands.
+ if (stores && loads) {
+ for (auto *MMO : MI->memoperands()) {
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+ if (PSV == nullptr) continue;
+ const FixedStackPseudoSourceValue *Value =
+ dyn_cast<FixedStackPseudoSourceValue>(PSV);
+ if (Value == nullptr) continue;
+ if (Value->getFrameIndex() != FI) continue;
+
+ if (MMO->isStore())
+ loads = false;
+ else
+ stores = false;
+ break;
+ }
+ if (loads == stores)
+ report("Missing fixed stack memoperand.", MI);
+ }
+ if (loads && !LI.liveAt(Idx.getRegSlot(true))) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ errs() << "Live stack: " << LI << '\n';
+ }
+ if (stores && !LI.liveAt(Idx.getRegSlot())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ errs() << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
+ case MachineOperand::MO_CFIIndex:
+ if (MO->getCFIIndex() >= MF->getFrameInstructions().size())
+ report("CFI instruction has invalid index", MO, MONum);
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
+ unsigned MONum, SlotIndex UseIdx,
+ const LiveRange &LR,
+ Register VRegOrUnit,
+ LaneBitmask LaneMask) {
+ LiveQueryResult LRQ = LR.Query(UseIdx);
+ // Check if we have a segment at the use, note however that we only need one
+ // live subregister range, the others may be dead.
+ if (!LRQ.valueIn() && LaneMask.none()) {
+ report("No live segment at use", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ report_context(UseIdx);
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask.any())
+ report_context_lanemask(LaneMask);
+ report_context(UseIdx);
+ }
+}
+
+void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
+ unsigned MONum, SlotIndex DefIdx,
+ const LiveRange &LR,
+ Register VRegOrUnit,
+ bool SubRangeCheck,
+ LaneBitmask LaneMask) {
+ if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
+ // The LR can correspond to the whole reg and its def slot is not obliged
+ // to be the same as the MO' def slot. E.g. when we check here "normal"
+ // subreg MO but there is other EC subreg MO in the same instruction so the
+ // whole reg has EC def slot and differs from the currently checked MO' def
+ // slot. For example:
+ // %0 [16e,32r:0) 0@16e L..3 [16e,32r:0) 0@16e L..C [16r,32r:0) 0@16r
+ // Check that there is an early-clobber def of the same superregister
+ // somewhere is performed in visitMachineFunctionAfter()
+ if (((SubRangeCheck || MO->getSubReg() == 0) && VNI->def != DefIdx) ||
+ !SlotIndex::isSameInstr(VNI->def, DefIdx) ||
+ (VNI->def != DefIdx &&
+ (!VNI->def.isEarlyClobber() || !DefIdx.isRegister()))) {
+ report("Inconsistent valno->def", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask.any())
+ report_context_lanemask(LaneMask);
+ report_context(*VNI);
+ report_context(DefIdx);
+ }
+ } else {
+ report("No live segment at def", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask.any())
+ report_context_lanemask(LaneMask);
+ report_context(DefIdx);
+ }
+ // Check that, if the dead def flag is present, LiveInts agree.
+ if (MO->isDead()) {
+ LiveQueryResult LRQ = LR.Query(DefIdx);
+ if (!LRQ.isDeadDef()) {
+ assert(VRegOrUnit.isVirtual() && "Expecting a virtual register.");
+ // A dead subreg def only tells us that the specific subreg is dead. There
+ // could be other non-dead defs of other subregs, or we could have other
+ // parts of the register being live through the instruction. So unless we
+ // are checking liveness for a subrange it is ok for the live range to
+ // continue, given that we have a dead def of a subregister.
+ if (SubRangeCheck || MO->getSubReg() == 0) {
+ report("Live range continues after dead def flag", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask.any())
+ report_context_lanemask(LaneMask);
+ }
+ }
+ }
+}
+
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const Register Reg = MO->getReg();
+ const unsigned SubRegIdx = MO->getSubReg();
+
+ const LiveInterval *LI = nullptr;
+ if (LiveInts && Reg.isVirtual()) {
+ if (LiveInts->hasInterval(Reg)) {
+ LI = &LiveInts->getInterval(Reg);
+ if (SubRegIdx != 0 && (MO->isDef() || !MO->isUndef()) && !LI->empty() &&
+ !LI->hasSubRanges() && MRI->shouldTrackSubRegLiveness(Reg))
+ report("Live interval for subreg operand has no subranges", MO, MONum);
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
+
+ // Both use and def operands can read a register.
+ if (MO->readsReg()) {
+ if (MO->isKill())
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill (unless we are inside a bundle, in
+ // which case we have already checked that LiveVars knows any kills on the
+ // bundle header instead).
+ if (LiveVars && Reg.isVirtual() && MO->isKill() &&
+ !MI->isBundledWithPred()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (!is_contained(VI.Kills, MI))
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
+ // Check the cached regunit intervals.
+ if (Reg.isPhysical() && !isReserved(Reg)) {
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ if (MRI->isReservedRegUnit(Unit))
+ continue;
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(Unit))
+ checkLivenessAtUse(MO, MONum, UseIdx, *LR, Unit);
+ }
+ }
+
+ if (Reg.isVirtual()) {
+ // This is a virtual register interval.
+ checkLivenessAtUse(MO, MONum, UseIdx, *LI, Reg);
+
+ if (LI->hasSubRanges() && !MO->isDef()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask LiveInMask;
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((MOMask & SR.LaneMask).none())
+ continue;
+ checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
+ LiveQueryResult LRQ = SR.Query(UseIdx);
+ if (LRQ.valueIn())
+ LiveInMask |= SR.LaneMask;
+ }
+ // At least parts of the register has to be live at the use.
+ if ((LiveInMask & MOMask).none()) {
+ report("No live subrange at use", MO, MONum);
+ report_context(*LI);
+ report_context(UseIdx);
+ }
+ }
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (Reg.isPhysical()) {
+ // Reserved registers may be used even when 'dead'.
+ bool Bad = !isReserved(Reg);
+ // We are fine if just any subregister has a defined value.
+ if (Bad) {
+
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg)) {
+ if (regsLive.count(SubReg)) {
+ Bad = false;
+ break;
+ }
+ }
+ }
+ // If there is an additional implicit-use of a super register we stop
+ // here. By definition we are fine if the super register is not
+ // (completely) dead, if the complete super register is dead we will
+ // get a report for its operand.
+ if (Bad) {
+ for (const MachineOperand &MOP : MI->uses()) {
+ if (!MOP.isReg() || !MOP.isImplicit())
+ continue;
+
+ if (!MOP.getReg().isPhysical())
+ continue;
+
+ if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg))
+ Bad = false;
+ }
+ }
+ if (Bad)
+ report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ }
+
+ if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && Reg.isVirtual() &&
+ std::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live segment, but only for virtual registers.
+ if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
+ DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
+
+ if (Reg.isVirtual()) {
+ checkLivenessAtDef(MO, MONum, DefIdx, *LI, Reg);
+
+ if (LI->hasSubRanges()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((SR.LaneMask & MOMask).none())
+ continue;
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
+ }
+ }
+ }
+ }
+ }
+}
+
+// This function gets called after visiting all instructions in a bundle. The
+// argument points to the bundle header.
+// Normal stand-alone instructions are also considered 'bundles', and this
+// function is called for all of them.
+void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ // Kill any masked registers.
+ while (!regMasks.empty()) {
+ const uint32_t *Mask = regMasks.pop_back_val();
+ for (Register Reg : regsLive)
+ if (Reg.isPhysical() &&
+ MachineOperand::clobbersPhysReg(Mask, Reg.asMCReg()))
+ regsDead.push_back(Reg);
+ }
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ errs() << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
+}
+
+namespace {
+// This implements a set of registers that serves as a filter: can filter other
+// sets by passing through elements not in the filter and blocking those that
+// are. Any filter implicitly includes the full set of physical registers upon
+// creation, thus filtering them all out. The filter itself as a set only grows,
+// and needs to be as efficient as possible.
+struct VRegFilter {
+ // Add elements to the filter itself. \pre Input set \p FromRegSet must have
+ // no duplicates. Both virtual and physical registers are fine.
+ template <typename RegSetT> void add(const RegSetT &FromRegSet) {
+ SmallVector<Register, 0> VRegsBuffer;
+ filterAndAdd(FromRegSet, VRegsBuffer);
+ }
+ // Filter \p FromRegSet through the filter and append passed elements into \p
+ // ToVRegs. All elements appended are then added to the filter itself.
+ // \returns true if anything changed.
+ template <typename RegSetT>
+ bool filterAndAdd(const RegSetT &FromRegSet,
+ SmallVectorImpl<Register> &ToVRegs) {
+ unsigned SparseUniverse = Sparse.size();
+ unsigned NewSparseUniverse = SparseUniverse;
+ unsigned NewDenseSize = Dense.size();
+ size_t Begin = ToVRegs.size();
+ for (Register Reg : FromRegSet) {
+ if (!Reg.isVirtual())
+ continue;
+ unsigned Index = Register::virtReg2Index(Reg);
+ if (Index < SparseUniverseMax) {
+ if (Index < SparseUniverse && Sparse.test(Index))
+ continue;
+ NewSparseUniverse = std::max(NewSparseUniverse, Index + 1);
+ } else {
+ if (Dense.count(Reg))
+ continue;
+ ++NewDenseSize;
+ }
+ ToVRegs.push_back(Reg);
+ }
+ size_t End = ToVRegs.size();
+ if (Begin == End)
+ return false;
+ // Reserving space in sets once performs better than doing so continuously
+ // and pays easily for double look-ups (even in Dense with SparseUniverseMax
+ // tuned all the way down) and double iteration (the second one is over a
+ // SmallVector, which is a lot cheaper compared to DenseSet or BitVector).
+ Sparse.resize(NewSparseUniverse);
+ Dense.reserve(NewDenseSize);
+ for (unsigned I = Begin; I < End; ++I) {
+ Register Reg = ToVRegs[I];
+ unsigned Index = Register::virtReg2Index(Reg);
+ if (Index < SparseUniverseMax)
+ Sparse.set(Index);
+ else
+ Dense.insert(Reg);
+ }
+ return true;
+ }
+
+private:
+ static constexpr unsigned SparseUniverseMax = 10 * 1024 * 8;
+ // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyound
+ // are tracked by Dense. The only purpose of the threashold and the Dense set
+ // is to have a reasonably growing memory usage in pathological cases (large
+ // number of very sparse VRegFilter instances live at the same time). In
+ // practice even in the worst-by-execution time cases having all elements
+ // tracked by Sparse (very large SparseUniverseMax scenario) tends to be more
+ // space efficient than if tracked by Dense. The threashold is set to keep the
+ // worst-case memory usage within 2x of figures determined empirically for
+ // "all Dense" scenario in such worst-by-execution-time cases.
+ BitVector Sparse;
+ DenseSet<unsigned> Dense;
+};
+
+// Implements both a transfer function and a (binary, in-place) join operator
+// for a dataflow over register sets with set union join and filtering transfer
+// (out_b = in_b \ filter_b). filter_b is expected to be set-up ahead of time.
+// Maintains out_b as its state, allowing for O(n) iteration over it at any
+// time, where n is the size of the set (as opposed to O(U) where U is the
+// universe). filter_b implicitly contains all physical registers at all times.
+class FilteringVRegSet {
+ VRegFilter Filter;
+ SmallVector<Register, 0> VRegs;
+
+public:
+ // Set-up the filter_b. \pre Input register set \p RS must have no duplicates.
+ // Both virtual and physical registers are fine.
+ template <typename RegSetT> void addToFilter(const RegSetT &RS) {
+ Filter.add(RS);
+ }
+ // Passes \p RS through the filter_b (transfer function) and adds what's left
+ // to itself (out_b).
+ template <typename RegSetT> bool add(const RegSetT &RS) {
+ // Double-duty the Filter: to maintain VRegs a set (and the join operation
+ // a set union) just add everything being added here to the Filter as well.
+ return Filter.filterAndAdd(RS, VRegs);
+ }
+ using const_iterator = decltype(VRegs)::const_iterator;
+ const_iterator begin() const { return VRegs.begin(); }
+ const_iterator end() const { return VRegs.end(); }
+ size_t size() const { return VRegs.size(); }
+};
+} // namespace
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ if (MF->empty())
+ // ReversePostOrderTraversal doesn't handle empty functions.
+ return;
+
+ for (const MachineBasicBlock *MB :
+ ReversePostOrderTraversal<const MachineFunction *>(MF)) {
+ FilteringVRegSet VRegs;
+ BBInfo &Info = MBBInfoMap[MB];
+ assert(Info.reachable);
+
+ VRegs.addToFilter(Info.regsKilled);
+ VRegs.addToFilter(Info.regsLiveOut);
+ for (const MachineBasicBlock *Pred : MB->predecessors()) {
+ const BBInfo &PredInfo = MBBInfoMap[Pred];
+ if (!PredInfo.reachable)
+ continue;
+
+ VRegs.add(PredInfo.regsLiveOut);
+ VRegs.add(PredInfo.vregsPassed);
+ }
+ Info.vregsPassed.reserve(VRegs.size());
+ Info.vregsPassed.insert(VRegs.begin(), VRegs.end());
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(Pred);
+ }
+
+ // Handle the PHI node.
+ for (const MachineInstr &MI : MBB.phis()) {
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ // Skip those Operands which are undef regs or not regs.
+ if (!MI.getOperand(i).isReg() || !MI.getOperand(i).readsReg())
+ continue;
+
+ // Get register and predecessor for one PHI edge.
+ Register Reg = MI.getOperand(i).getReg();
+ const MachineBasicBlock *Pred = MI.getOperand(i + 1).getMBB();
+
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (PInfo.addRequired(Reg))
+ todo.insert(Pred);
+ }
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Pred == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[Pred];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(Pred);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
+ for (const MachineInstr &Phi : MBB) {
+ if (!Phi.isPHI())
+ break;
+ seen.clear();
+
+ const MachineOperand &MODef = Phi.getOperand(0);
+ if (!MODef.isReg() || !MODef.isDef()) {
+ report("Expected first PHI operand to be a register def", &MODef, 0);
+ continue;
+ }
+ if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() ||
+ MODef.isEarlyClobber() || MODef.isDebug())
+ report("Unexpected flag on PHI operand", &MODef, 0);
+ Register DefReg = MODef.getReg();
+ if (!DefReg.isVirtual())
+ report("Expected first PHI operand to be a virtual register", &MODef, 0);
+
+ for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
+ const MachineOperand &MO0 = Phi.getOperand(I);
+ if (!MO0.isReg()) {
+ report("Expected PHI operand to be a register", &MO0, I);
+ continue;
+ }
+ if (MO0.isImplicit() || MO0.isInternalRead() || MO0.isEarlyClobber() ||
+ MO0.isDebug() || MO0.isTied())
+ report("Unexpected flag on PHI operand", &MO0, I);
+
+ const MachineOperand &MO1 = Phi.getOperand(I + 1);
+ if (!MO1.isMBB()) {
+ report("Expected PHI operand to be a basic block", &MO1, I + 1);
+ continue;
+ }
+
+ const MachineBasicBlock &Pre = *MO1.getMBB();
+ if (!Pre.isSuccessor(&MBB)) {
+ report("PHI input is not a predecessor block", &MO1, I + 1);
+ continue;
+ }
+
+ if (MInfo.reachable) {
+ seen.insert(&Pre);
+ BBInfo &PrInfo = MBBInfoMap[&Pre];
+ if (!MO0.isUndef() && PrInfo.reachable &&
+ !PrInfo.isLiveOut(MO0.getReg()))
+ report("PHI operand is not live-out from predecessor", &MO0, I);
+ }
+ }
+
+ // Did we see all predecessors?
+ if (MInfo.reachable) {
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ if (!seen.count(Pred)) {
+ report("Missing PHI operand", &Phi);
+ errs() << printMBBReference(*Pred)
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (const MachineBasicBlock &MBB : *MF)
+ checkPHIOps(MBB);
+
+ // Now check liveness info if available
+ calcRegsRequired();
+
+ // Check for killed virtual registers that should be live out.
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (Register VReg : MInfo.vregsRequired)
+ if (MInfo.regsKilled.count(VReg)) {
+ report("Virtual register killed in block, but needed live out.", &MBB);
+ errs() << "Virtual register " << printReg(VReg)
+ << " is used after the block.\n";
+ }
+ }
+
+ if (!MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (Register VReg : MInfo.vregsRequired) {
+ report("Virtual register defs don't dominate all uses.", MF);
+ report_context_vreg(VReg);
+ }
+ }
+
+ if (LiveVars)
+ verifyLiveVariables();
+ if (LiveInts)
+ verifyLiveIntervals();
+
+ // Check live-in list of each MBB. If a register is live into MBB, check
+ // that the register is in regsLiveOut of each predecessor block. Since
+ // this must come from a definition in the predecesssor or its live-in
+ // list, this will catch a live-through case where the predecessor does not
+ // have the register in its live-in list. This currently only checks
+ // registers that have no aliases, are not allocatable and are not
+ // reserved, which could mean a condition code register for instance.
+ if (MRI->tracksLiveness())
+ for (const auto &MBB : *MF)
+ for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+ MCPhysReg LiveInReg = P.PhysReg;
+ bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid();
+ if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg))
+ continue;
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (!PInfo.regsLiveOut.count(LiveInReg)) {
+ report("Live in register not found to be live out from predecessor.",
+ &MBB);
+ errs() << TRI->getName(LiveInReg)
+ << " not found to be live out from "
+ << printMBBReference(*Pred) << "\n";
+ }
+ }
+ }
+
+ for (auto CSInfo : MF->getCallSitesInfo())
+ if (!CSInfo.first->isCall())
+ report("Call site info referencing instruction that is not call", MF);
+
+ // If there's debug-info, check that we don't have any duplicate value
+ // tracking numbers.
+ if (MF->getFunction().getSubprogram()) {
+ DenseSet<unsigned> SeenNumbers;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (auto Num = MI.peekDebugInstrNum()) {
+ auto Result = SeenNumbers.insert((unsigned)Num);
+ if (!Result.second)
+ report("Instruction has a duplicated value tracking number", &MI);
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MBB.getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", &MBB);
+ errs() << "Virtual register " << printReg(Reg)
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MBB.getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", &MBB);
+ errs() << "Virtual register " << printReg(Reg)
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+
+ if (!LiveInts->hasInterval(Reg)) {
+ report("Missing live interval for virtual register", MF);
+ errs() << printReg(Reg, TRI) << " still has defs or uses\n";
+ continue;
+ }
+
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ assert(Reg == LI.reg() && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
+
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(i))
+ verifyLiveRange(*LR, i);
+}
+
+void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
+ const VNInfo *VNI, Register Reg,
+ LaneBitmask LaneMask) {
+ if (VNI->isUnused())
+ return;
+
+ const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
+
+ if (!DefVNI) {
+ report("Value not live at VNInfo def and not marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ if (DefVNI != VNI) {
+ report("Live segment at def has different VNInfo", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid VNInfo definition index", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef VNInfo is not defined at MBB start", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+ return;
+ }
+
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at VNInfo def index", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ if (Reg != 0) {
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (Reg.isVirtual()) {
+ if (MOI->getReg() != Reg)
+ continue;
+ } else {
+ if (!MOI->getReg().isPhysical() || !TRI->hasRegUnit(MOI->getReg(), Reg))
+ continue;
+ }
+ if (LaneMask.any() &&
+ (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+ continue;
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
+
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
+ const LiveRange::const_iterator I,
+ Register Reg,
+ LaneBitmask LaneMask) {
+ const LiveRange::Segment &S = *I;
+ const VNInfo *VNI = S.valno;
+ assert(VNI && "Live segment has no valno");
+
+ if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live segment", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ report_context(*VNI);
+ }
+
+ if (VNI->isUnused()) {
+ report("Live segment valno is marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (S.start != MBBStartIdx && S.start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(S.end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ return;
+ }
+
+ // Checks for non-live-out segments.
+ if (S.end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // RegUnit intervals are allowed dead phis.
+ if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def &&
+ S.end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (S.end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+
+ if (S.end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(S.start, S.end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
+
+ // After tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if it is being redefined by an early-clobber def.
+ // TODO: Before tied operands are rewritten, a live segment can only end at
+ // an early-clobber slot if the last use is tied to an early-clobber def.
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ S.end.isEarlyClobber()) {
+ if (I + 1 == LR.end() || (I + 1)->start != S.end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction",
+ EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (Reg.isVirtual()) {
+ // A live segment can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasSubRegDef = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != Reg)
+ continue;
+ unsigned Sub = MOI->getSubReg();
+ LaneBitmask SLM =
+ Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : LaneBitmask::getAll();
+ if (MOI->isDef()) {
+ if (Sub != 0) {
+ hasSubRegDef = true;
+ // An operand %0:sub0 reads %0:sub1..n. Invert the lane
+ // mask for subregister defs. Read-undef defs will be handled by
+ // readsReg below.
+ SLM = ~SLM;
+ }
+ if (MOI->isDead())
+ hasDeadDef = true;
+ }
+ if (LaneMask.any() && (LaneMask & SLM).none())
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ }
+ if (S.end.isDead()) {
+ // Make sure that the corresponding machine operand for a "dead" live
+ // range has the dead flag. We cannot perform this check for subregister
+ // liveranges as partially dead values are allowed.
+ if (LaneMask.none() && !hasDeadDef) {
+ report(
+ "Instruction ending live segment on dead slot has no dead flag",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ } else {
+ if (!hasRead) {
+ // When tracking subregister liveness, the main range must start new
+ // values on partial register writes, even if there is no read.
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
+ !hasSubRegDef) {
+ report("Instruction ending live segment doesn't read the register",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB->getIterator();
+ // Is this live segment the beginning of a non-PHIDef VN?
+ if (S.start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+
+ SmallVector<SlotIndex, 4> Undefs;
+ if (LaneMask.any()) {
+ LiveInterval &OwnerLI = LiveInts->getInterval(Reg);
+ OwnerLI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes);
+ }
+
+ while (true) {
+ assert(LiveInts->isLiveInToMBB(LR, &*MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!Reg.isVirtual() && MFI->isEHPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(&*MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (const MachineBasicBlock *Pred : MFI->predecessors()) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred);
+ // Predecessor of landing pad live-out on last call.
+ if (MFI->isEHPad()) {
+ for (const MachineInstr &MI : llvm::reverse(*Pred)) {
+ if (MI.isCall()) {
+ PEnd = Indexes->getInstructionIndex(MI).getBoundaryIndex();
+ break;
+ }
+ }
+ }
+ const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value. However for a phi
+ // instruction with subregister intervals
+ // only one of the subregisters (not necessarily the current one) needs to
+ // be defined.
+ if (!PVNI && (LaneMask.none() || !IsPHI)) {
+ if (LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes))
+ continue;
+ report("Register not marked live out of predecessor", Pred);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ errs() << " live into " << printMBBReference(*MFI) << '@'
+ << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
+ }
+
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", Pred);
+ report_context(LR, Reg, LaneMask);
+ errs() << "Valno #" << PVNI->id << " live out of "
+ << printMBBReference(*Pred) << '@' << PEnd << "\nValno #"
+ << VNI->id << " live into " << printMBBReference(*MFI) << '@'
+ << LiveInts->getMBBStartIdx(&*MFI) << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
+
+void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg,
+ LaneBitmask LaneMask) {
+ for (const VNInfo *VNI : LR.valnos)
+ verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
+
+ for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I)
+ verifyLiveRangeSegment(LR, I, Reg, LaneMask);
+}
+
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ Register Reg = LI.reg();
+ assert(Reg.isVirtual());
+ verifyLiveRange(LI, Reg);
+
+ LaneBitmask Mask;
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((Mask & SR.LaneMask).any()) {
+ report("Lane masks of sub ranges overlap in live interval", MF);
+ report_context(LI);
+ }
+ if ((SR.LaneMask & ~MaxMask).any()) {
+ report("Subrange lanemask is invalid", MF);
+ report_context(LI);
+ }
+ if (SR.empty()) {
+ report("Subrange must not be empty", MF);
+ report_context(SR, LI.reg(), SR.LaneMask);
+ }
+ Mask |= SR.LaneMask;
+ verifyLiveRange(SR, LI.reg(), SR.LaneMask);
+ if (!LI.covers(SR)) {
+ report("A Subrange is not covered by the main range", MF);
+ report_context(LI);
+ }
+ }
+
+ // Check the LI only has one connected component.
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF);
+ report_context(LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ errs() << comp << ": valnos";
+ for (const VNInfo *I : LI.valnos)
+ if (comp == ConEQ.getEqClass(I))
+ errs() << ' ' << I->id;
+ errs() << '\n';
+ }
+ }
+}
+
+namespace {
+
+ // FrameSetup and FrameDestroy can have zero adjustment, so using a single
+ // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the
+ // value is zero.
+ // We use a bool plus an integer to capture the stack state.
+ struct StackStateOfBB {
+ StackStateOfBB() = default;
+ StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) :
+ EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup),
+ ExitIsSetup(ExitSetup) {}
+
+ // Can be negative, which means we are setting up a frame.
+ int EntryValue = 0;
+ int ExitValue = 0;
+ bool EntryIsSetup = false;
+ bool ExitIsSetup = false;
+ };
+
+} // end anonymous namespace
+
+/// Make sure on every path through the CFG, a FrameSetup <n> is always followed
+/// by a FrameDestroy <n>, stack adjustments are identical on all
+/// CFG edges to a merge point, and frame is destroyed at end of a return block.
+void MachineVerifier::verifyStackFrame() {
+ unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
+
+ SmallVector<StackStateOfBB, 8> SPState;
+ SPState.resize(MF->getNumBlockIDs());
+ df_iterator_default_set<const MachineBasicBlock*> Reachable;
+
+ // Visit the MBBs in DFS order.
+ for (df_ext_iterator<const MachineFunction *,
+ df_iterator_default_set<const MachineBasicBlock *>>
+ DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
+ DFI != DFE; ++DFI) {
+ const MachineBasicBlock *MBB = *DFI;
+
+ StackStateOfBB BBState;
+ // Check the exit state of the DFS stack predecessor.
+ if (DFI.getPathLength() >= 2) {
+ const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+ assert(Reachable.count(StackPred) &&
+ "DFS stack predecessor is already visited.\n");
+ BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue;
+ BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup;
+ BBState.ExitValue = BBState.EntryValue;
+ BBState.ExitIsSetup = BBState.EntryIsSetup;
+ }
+
+ // Update stack state by checking contents of MBB.
+ for (const auto &I : *MBB) {
+ if (I.getOpcode() == FrameSetupOpcode) {
+ if (BBState.ExitIsSetup)
+ report("FrameSetup is after another FrameSetup", &I);
+ BBState.ExitValue -= TII->getFrameTotalSize(I);
+ BBState.ExitIsSetup = true;
+ }
+
+ if (I.getOpcode() == FrameDestroyOpcode) {
+ int Size = TII->getFrameTotalSize(I);
+ if (!BBState.ExitIsSetup)
+ report("FrameDestroy is not after a FrameSetup", &I);
+ int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
+ BBState.ExitValue;
+ if (BBState.ExitIsSetup && AbsSPAdj != Size) {
+ report("FrameDestroy <n> is after FrameSetup <m>", &I);
+ errs() << "FrameDestroy <" << Size << "> is after FrameSetup <"
+ << AbsSPAdj << ">.\n";
+ }
+ BBState.ExitValue += Size;
+ BBState.ExitIsSetup = false;
+ }
+ }
+ SPState[MBB->getNumber()] = BBState;
+
+ // Make sure the exit state of any predecessor is consistent with the entry
+ // state.
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Reachable.count(Pred) &&
+ (SPState[Pred->getNumber()].ExitValue != BBState.EntryValue ||
+ SPState[Pred->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
+ report("The exit stack state of a predecessor is inconsistent.", MBB);
+ errs() << "Predecessor " << printMBBReference(*Pred)
+ << " has exit state (" << SPState[Pred->getNumber()].ExitValue
+ << ", " << SPState[Pred->getNumber()].ExitIsSetup << "), while "
+ << printMBBReference(*MBB) << " has entry state ("
+ << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
+ }
+ }
+
+ // Make sure the entry state of any successor is consistent with the exit
+ // state.
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ if (Reachable.count(Succ) &&
+ (SPState[Succ->getNumber()].EntryValue != BBState.ExitValue ||
+ SPState[Succ->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
+ report("The entry stack state of a successor is inconsistent.", MBB);
+ errs() << "Successor " << printMBBReference(*Succ)
+ << " has entry state (" << SPState[Succ->getNumber()].EntryValue
+ << ", " << SPState[Succ->getNumber()].EntryIsSetup << "), while "
+ << printMBBReference(*MBB) << " has exit state ("
+ << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
+ }
+ }
+
+ // Make sure a basic block with return ends with zero stack adjustment.
+ if (!MBB->empty() && MBB->back().isReturn()) {
+ if (BBState.ExitIsSetup)
+ report("A return block ends with a FrameSetup.", MBB);
+ if (BBState.ExitValue)
+ report("A return block ends with a nonzero stack adjustment.", MBB);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
new file mode 100644
index 000000000000..fa5df68b8abc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
@@ -0,0 +1,213 @@
+//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the implementation of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "machine-scheduler"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+static bool isHazard(const SDep &Dep) {
+ return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
+}
+
+static SUnit *getPredClusterSU(const SUnit &SU) {
+ for (const SDep &SI : SU.Preds)
+ if (SI.isCluster())
+ return SI.getSUnit();
+
+ return nullptr;
+}
+
+bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+ unsigned Num = 1;
+ const SUnit *CurrentSU = &SU;
+ while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
+ return Num < FuseLimit;
+}
+
+bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
+ // Check that neither instr is already paired with another along the edge
+ // between them.
+ for (SDep &SI : FirstSU.Succs)
+ if (SI.isCluster())
+ return false;
+
+ for (SDep &SI : SecondSU.Preds)
+ if (SI.isCluster())
+ return false;
+ // Though the reachability checks above could be made more generic,
+ // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid,
+ // the extra computation cost makes it less interesting in general cases.
+
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
+ return false;
+
+ // TODO - If we want to chain more than two instructions, we need to create
+ // artifical edges to make dependencies from the FirstSU also dependent
+ // on other chained instructions, and other chained instructions also
+ // dependent on the dependencies of the SecondSU, to prevent them from being
+ // scheduled into these chained instructions.
+ assert(hasLessThanNumFused(FirstSU, 2) &&
+ "Currently we only support chaining together two instructions");
+
+ // Adjust the latency between both instrs.
+ for (SDep &SI : FirstSU.Succs)
+ if (SI.getSUnit() == &SecondSU)
+ SI.setLatency(0);
+
+ for (SDep &SI : SecondSU.Preds)
+ if (SI.getSUnit() == &FirstSU)
+ SI.setLatency(0);
+
+ LLVM_DEBUG(
+ dbgs() << "Macro fuse: "; DAG.dumpNodeName(FirstSU); dbgs() << " - ";
+ DAG.dumpNodeName(SecondSU); dbgs() << " / ";
+ dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - "
+ << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);
+
+ // Make data dependencies from the FirstSU also dependent on the SecondSU to
+ // prevent them from being scheduled between the FirstSU and the SecondSU.
+ if (&SecondSU != &DAG.ExitSU)
+ for (const SDep &SI : FirstSU.Succs) {
+ SUnit *SU = SI.getSUnit();
+ if (SI.isWeak() || isHazard(SI) ||
+ SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
+ continue;
+ LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(SecondSU);
+ dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';);
+ DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
+ }
+
+ // Make the FirstSU also dependent on the dependencies of the SecondSU to
+ // prevent them from being scheduled between the FirstSU and the SecondSU.
+ if (&FirstSU != &DAG.EntrySU) {
+ for (const SDep &SI : SecondSU.Preds) {
+ SUnit *SU = SI.getSUnit();
+ if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
+ continue;
+ LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(*SU); dbgs() << " - ";
+ DAG.dumpNodeName(FirstSU); dbgs() << '\n';);
+ DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
+ }
+ // ExitSU comes last by design, which acts like an implicit dependency
+ // between ExitSU and any bottom root in the graph. We should transfer
+ // this to FirstSU as well.
+ if (&SecondSU == &DAG.ExitSU) {
+ for (SUnit &SU : DAG.SUnits) {
+ if (SU.Succs.empty())
+ DAG.addEdge(&FirstSU, SDep(&SU, SDep::Artificial));
+ }
+ }
+ }
+
+ ++NumFused;
+ return true;
+}
+
+namespace {
+
+/// Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ ShouldSchedulePredTy shouldScheduleAdjacent;
+ bool FuseBlock;
+ bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
+
+public:
+ MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
+ : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+} // end anonymous namespace
+
+void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
+ if (FuseBlock)
+ // For each of the SUnits in the scheduling block, try to fuse the instr in
+ // it with one in its predecessors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(*DAG, ISU);
+
+ if (DAG->ExitSU.getInstr())
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(*DAG, DAG->ExitSU);
+}
+
+/// Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) {
+ const MachineInstr &AnchorMI = *AnchorSU.getInstr();
+ const TargetInstrInfo &TII = *DAG.TII;
+ const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorSU.Preds) {
+ // Ignore dependencies other than data or strong ordering.
+ if (Dep.isWeak() || isHazard(Dep))
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ if (DepSU.isBoundaryNode())
+ continue;
+
+ // Only chain two instructions together at most.
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!hasLessThanNumFused(DepSU, 2) ||
+ !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ continue;
+
+ if (fuseInstructionPair(DAG, DepSU, AnchorSU))
+ return true;
+ }
+
+ return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+llvm::createMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+ return nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+llvm::createBranchMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+ return nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
new file mode 100644
index 000000000000..0bef513342ff
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -0,0 +1,2208 @@
+//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ModuloSchedule.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "pipeliner"
+using namespace llvm;
+
+void ModuloSchedule::print(raw_ostream &OS) {
+ for (MachineInstr *MI : ScheduledInstrs)
+ OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleExpander implementation
+//===----------------------------------------------------------------------===//
+
+/// Return the register values for the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+ unsigned &InitVal, unsigned &LoopVal) {
+ assert(Phi.isPHI() && "Expecting a Phi.");
+
+ InitVal = 0;
+ LoopVal = 0;
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != Loop)
+ InitVal = Phi.getOperand(i).getReg();
+ else
+ LoopVal = Phi.getOperand(i).getReg();
+
+ assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes from the incoming block.
+static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+/// Return the Phi register value that comes the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+void ModuloScheduleExpander::expand() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = *BB->pred_begin();
+ if (Preheader == BB)
+ Preheader = *std::next(BB->pred_begin());
+
+ // Iterate over the definitions in each instruction, and compute the
+ // stage difference for each use. Keep the maximum value.
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ int DefStage = Schedule.getStage(MI);
+ for (const MachineOperand &Op : MI->all_defs()) {
+ Register Reg = Op.getReg();
+ unsigned MaxDiff = 0;
+ bool PhiIsSwapped = false;
+ for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
+ MachineInstr *UseMI = UseOp.getParent();
+ int UseStage = Schedule.getStage(UseMI);
+ unsigned Diff = 0;
+ if (UseStage != -1 && UseStage >= DefStage)
+ Diff = UseStage - DefStage;
+ if (MI->isPHI()) {
+ if (isLoopCarried(*MI))
+ ++Diff;
+ else
+ PhiIsSwapped = true;
+ }
+ MaxDiff = std::max(Diff, MaxDiff);
+ }
+ RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
+ }
+ }
+
+ generatePipelinedLoop();
+}
+
+void ModuloScheduleExpander::generatePipelinedLoop() {
+ LoopInfo = TII->analyzeLoopForPipelining(BB);
+ assert(LoopInfo && "Must be able to analyze loop!");
+
+ // Create a new basic block for the kernel and add it to the CFG.
+ MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+
+ unsigned MaxStageCount = Schedule.getNumStages() - 1;
+
+ // Remember the registers that are used in different stages. The index is
+ // the iteration, or stage, that the instruction is scheduled in. This is
+ // a map between register names in the original block and the names created
+ // in each stage of the pipelined loop.
+ ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+
+ // The renaming destination by Phis for the registers across stages.
+ // This map is updated during Phis generation to point to the most recent
+ // renaming destination.
+ ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
+
+ InstrMapTy InstrMap;
+
+ SmallVector<MachineBasicBlock *, 4> PrologBBs;
+
+ // Generate the prolog instructions that set up the pipeline.
+ generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
+ MF.insert(BB->getIterator(), KernelBB);
+
+ // Rearrange the instructions to generate the new, pipelined loop,
+ // and update register names as needed.
+ for (MachineInstr *CI : Schedule.getInstructions()) {
+ if (CI->isPHI())
+ continue;
+ unsigned StageNum = Schedule.getStage(CI);
+ MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
+ updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = CI;
+ }
+
+ // Copy any terminator instructions to the new kernel, and update
+ // names as needed.
+ for (MachineInstr &MI : BB->terminators()) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
+ updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = &MI;
+ }
+
+ NewKernel = KernelBB;
+ KernelBB->transferSuccessors(BB);
+ KernelBB->replaceSuccessor(BB, KernelBB);
+
+ generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
+ InstrMap, MaxStageCount, MaxStageCount, false);
+ generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
+ InstrMap, MaxStageCount, MaxStageCount, false);
+
+ LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+
+ SmallVector<MachineBasicBlock *, 4> EpilogBBs;
+ // Generate the epilog instructions to complete the pipeline.
+ generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
+ PrologBBs);
+
+ // We need this step because the register allocation doesn't handle some
+ // situations well, so we insert copies to help out.
+ splitLifetimes(KernelBB, EpilogBBs);
+
+ // Remove dead instructions due to loop induction variables.
+ removeDeadInstructions(KernelBB, EpilogBBs);
+
+ // Add branches between prolog and epilog blocks.
+ addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
+
+ delete[] VRMap;
+ delete[] VRMapPhi;
+}
+
+void ModuloScheduleExpander::cleanup() {
+ // Remove the original loop since it's no longer referenced.
+ for (auto &I : *BB)
+ LIS.RemoveMachineInstrFromMaps(I);
+ BB->clear();
+ BB->eraseFromParent();
+}
+
+/// Generate the pipeline prolog code.
+void ModuloScheduleExpander::generateProlog(unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &PrologBBs) {
+ MachineBasicBlock *PredBB = Preheader;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which will be generated in the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ for (unsigned i = 0; i < LastStage; ++i) {
+ // Create and insert the prolog basic block prior to the original loop
+ // basic block. The original loop is removed later.
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ PrologBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+ NewBB->transferSuccessors(PredBB);
+ PredBB->addSuccessor(NewBB);
+ PredBB = NewBB;
+
+ // Generate instructions for each appropriate stage. Process instructions
+ // in original program order.
+ for (int StageNum = i; StageNum >= 0; --StageNum) {
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstTerminator();
+ BBI != BBE; ++BBI) {
+ if (Schedule.getStage(&*BBI) == StageNum) {
+ if (BBI->isPHI())
+ continue;
+ MachineInstr *NewMI =
+ cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
+ updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = &*BBI;
+ }
+ }
+ }
+ rewritePhiValues(NewBB, i, VRMap, InstrMap);
+ LLVM_DEBUG({
+ dbgs() << "prolog:\n";
+ NewBB->dump();
+ });
+ }
+
+ PredBB->replaceSuccessor(BB, KernelBB);
+
+ // Check if we need to remove the branch from the preheader to the original
+ // loop, and replace it with a branch to the new loop.
+ unsigned numBranches = TII->removeBranch(*Preheader);
+ if (numBranches) {
+ SmallVector<MachineOperand, 0> Cond;
+ TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
+ }
+}
+
+/// Generate the pipeline epilog code. The epilog code finishes the iterations
+/// that were started in either the prolog or the kernel. We create a basic
+/// block for each stage that needs to complete.
+void ModuloScheduleExpander::generateEpilog(
+ unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
+ ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
+ // We need to change the branch from the kernel to the first epilog block, so
+ // this call to analyze branch uses the kernel rather than the original BB.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
+ assert(!checkBranch && "generateEpilog must be able to analyze the branch");
+ if (checkBranch)
+ return;
+
+ MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
+ if (*LoopExitI == KernelBB)
+ ++LoopExitI;
+ assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
+ MachineBasicBlock *LoopExitBB = *LoopExitI;
+
+ MachineBasicBlock *PredBB = KernelBB;
+ MachineBasicBlock *EpilogStart = LoopExitBB;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which was generated for the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ int EpilogStage = LastStage + 1;
+ for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
+ EpilogBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+
+ PredBB->replaceSuccessor(LoopExitBB, NewBB);
+ NewBB->addSuccessor(LoopExitBB);
+
+ if (EpilogStart == LoopExitBB)
+ EpilogStart = NewBB;
+
+ // Add instructions to the epilog depending on the current block.
+ // Process instructions in original program order.
+ for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
+ for (auto &BBI : *BB) {
+ if (BBI.isPHI())
+ continue;
+ MachineInstr *In = &BBI;
+ if ((unsigned)Schedule.getStage(In) == StageNum) {
+ // Instructions with memoperands in the epilog are updated with
+ // conservative values.
+ MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
+ updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = In;
+ }
+ }
+ }
+ generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
+ InstrMap, LastStage, EpilogStage, i == 1);
+ generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
+ InstrMap, LastStage, EpilogStage, i == 1);
+ PredBB = NewBB;
+
+ LLVM_DEBUG({
+ dbgs() << "epilog:\n";
+ NewBB->dump();
+ });
+ }
+
+ // Fix any Phi nodes in the loop exit block.
+ LoopExitBB->replacePhiUsesWith(BB, PredBB);
+
+ // Create a branch to the new epilog from the kernel.
+ // Remove the original branch and add a new branch to the epilog.
+ TII->removeBranch(*KernelBB);
+ assert((OrigBB == TBB || OrigBB == FBB) &&
+ "Unable to determine looping branch direction");
+ if (OrigBB != TBB)
+ TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
+ else
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ // Add a branch to the loop exit.
+ if (EpilogBBs.size() > 0) {
+ MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
+ SmallVector<MachineOperand, 4> Cond1;
+ TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
+ }
+}
+
+/// Replace all uses of FromReg that appear outside the specified
+/// basic block with ToReg.
+static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock *MBB,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS) {
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI.use_operands(FromReg)))
+ if (O.getParent()->getParent() != MBB)
+ O.setReg(ToReg);
+ if (!LIS.hasInterval(ToReg))
+ LIS.createEmptyInterval(ToReg);
+}
+
+/// Return true if the register has a use that occurs outside the
+/// specified loop.
+static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
+ MachineRegisterInfo &MRI) {
+ for (const MachineOperand &MO : MRI.use_operands(Reg))
+ if (MO.getParent()->getParent() != BB)
+ return true;
+ return false;
+}
+
+/// Generate Phis for the specific block in the generated pipelined code.
+/// This function looks at the Phis from the original code to guide the
+/// creation of new Phis.
+void ModuloScheduleExpander::generateExistingPhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ // Compute the stage number for the initial value of the Phi, which
+ // comes from the prolog. The prolog to use depends on to which kernel/
+ // epilog that we're adding the Phi.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ bool InKernel = (LastStageNum == CurStageNum);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - (CurStageNum - LastStageNum);
+ PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstNonPHI();
+ BBI != BBE; ++BBI) {
+ Register Def = BBI->getOperand(0).getReg();
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(*BBI, BB, InitVal, LoopVal);
+
+ unsigned PhiOp1 = 0;
+ // The Phi value from the loop body typically is defined in the loop, but
+ // not always. So, we need to check if the value is defined in the loop.
+ unsigned PhiOp2 = LoopVal;
+ if (VRMap[LastStageNum].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum][LoopVal];
+
+ int StageScheduled = Schedule.getStage(&*BBI);
+ int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
+ unsigned NumStages = getStagesForReg(Def, CurStageNum);
+ if (NumStages == 0) {
+ // We don't need to generate a Phi anymore, but we need to rename any uses
+ // of the Phi value.
+ unsigned NewReg = VRMap[PrevStage][LoopVal];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
+ InitVal, NewReg);
+ if (VRMap[CurStageNum].count(LoopVal))
+ VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
+ }
+ // Adjust the number of Phis needed depending on the number of prologs left,
+ // and the distance from where the Phi is first scheduled. The number of
+ // Phis cannot exceed the number of prolog stages. Each stage can
+ // potentially define two values.
+ unsigned MaxPhis = PrologStage + 2;
+ if (!InKernel && (int)PrologStage <= LoopValStage)
+ MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
+ unsigned NumPhis = std::min(NumStages, MaxPhis);
+
+ unsigned NewReg = 0;
+ unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
+ // In the epilog, we may need to look back one stage to get the correct
+ // Phi name, because the epilog and prolog blocks execute the same stage.
+ // The correct name is from the previous block only when the Phi has
+ // been completely scheduled prior to the epilog, and Phi value is not
+ // needed in multiple stages.
+ int StageDiff = 0;
+ if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
+ NumPhis == 1)
+ StageDiff = 1;
+ // Adjust the computations below when the phi and the loop definition
+ // are scheduled in different stages.
+ if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiff = StageScheduled - LoopValStage;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ // If the Phi hasn't been scheduled, then use the initial Phi operand
+ // value. Otherwise, use the scheduled version of the instruction. This
+ // is a little complicated when a Phi references another Phi.
+ if (np > PrologStage || StageScheduled >= (int)LastStageNum)
+ PhiOp1 = InitVal;
+ // Check if the Phi has already been scheduled in a prolog stage.
+ else if (PrologStage >= AccessStage + StageDiff + np &&
+ VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
+ PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
+ // Check if the Phi has already been scheduled, but the loop instruction
+ // is either another Phi, or doesn't occur in the loop.
+ else if (PrologStage >= AccessStage + StageDiff + np) {
+ // If the Phi references another Phi, we need to examine the other
+ // Phi to get the correct value.
+ PhiOp1 = LoopVal;
+ MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
+ int Indirects = 1;
+ while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
+ int PhiStage = Schedule.getStage(InstOp1);
+ if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
+ PhiOp1 = getInitPhiReg(*InstOp1, BB);
+ else
+ PhiOp1 = getLoopPhiReg(*InstOp1, BB);
+ InstOp1 = MRI.getVRegDef(PhiOp1);
+ int PhiOpStage = Schedule.getStage(InstOp1);
+ int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
+ if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
+ VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
+ PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
+ break;
+ }
+ ++Indirects;
+ }
+ } else
+ PhiOp1 = InitVal;
+ // If this references a generated Phi in the kernel, get the Phi operand
+ // from the incoming block.
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+
+ MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
+ bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
+ // In the epilog, a map lookup is needed to get the value from the kernel,
+ // or previous epilog block. How is does this depends on if the
+ // instruction is scheduled in the previous block.
+ if (!InKernel) {
+ int StageDiffAdj = 0;
+ if (LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiffAdj = StageScheduled - LoopValStage;
+ // Use the loop value defined in the kernel, unless the kernel
+ // contains the last definition of the Phi.
+ if (np == 0 && PrevStage == LastStageNum &&
+ (StageScheduled != 0 || LoopValStage != 0) &&
+ VRMap[PrevStage - StageDiffAdj].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
+ // Use the value defined by the Phi. We add one because we switch
+ // from looking at the loop value to the Phi definition.
+ else if (np > 0 && PrevStage == LastStageNum &&
+ VRMap[PrevStage - np + 1].count(Def))
+ PhiOp2 = VRMap[PrevStage - np + 1][Def];
+ // Use the loop value defined in the kernel.
+ else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
+ VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
+ // Use the value defined by the Phi, unless we're generating the first
+ // epilog and the Phi refers to a Phi in a different stage.
+ else if (VRMap[PrevStage - np].count(Def) &&
+ (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
+ (LoopValStage == StageScheduled)))
+ PhiOp2 = VRMap[PrevStage - np][Def];
+ }
+
+ // Check if we can reuse an existing Phi. This occurs when a Phi
+ // references another Phi, and the other Phi is scheduled in an
+ // earlier stage. We can try to reuse an existing Phi up until the last
+ // stage of the current Phi.
+ if (LoopDefIsPhi) {
+ if (static_cast<int>(PrologStage - np) >= StageScheduled) {
+ int LVNumStages = getStagesForPhi(LoopVal);
+ int StageDiff = (StageScheduled - LoopValStage);
+ LVNumStages -= StageDiff;
+ // Make sure the loop value Phi has been processed already.
+ if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
+ NewReg = PhiOp2;
+ unsigned ReuseStage = CurStageNum;
+ if (isLoopCarried(*PhiInst))
+ ReuseStage -= LVNumStages;
+ // Check if the Phi to reuse has been generated yet. If not, then
+ // there is nothing to reuse.
+ if (VRMap[ReuseStage - np].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage - np][LoopVal];
+
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
+ Def, NewReg);
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ PhiOp2 = NewReg;
+ if (VRMap[LastStageNum - np - 1].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ continue;
+ }
+ }
+ }
+ if (InKernel && StageDiff > 0 &&
+ VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+ PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
+ }
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // We define the Phis after creating the new pipelined code, so
+ // we need to rename the Phi values in scheduled instructions.
+
+ unsigned PrevReg = 0;
+ if (InKernel && VRMap[PrevStage - np].count(LoopVal))
+ PrevReg = VRMap[PrevStage - np][LoopVal];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
+ NewReg, PrevReg);
+ // If the Phi has been scheduled, use the new name for rewriting.
+ if (VRMap[CurStageNum - np].count(Def)) {
+ unsigned R = VRMap[CurStageNum - np][Def];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
+ NewReg);
+ }
+
+ // Check if we need to rename any uses that occurs after the loop. The
+ // register to replace depends on whether the Phi is scheduled in the
+ // epilog.
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+
+ // In the kernel, a dependent Phi uses the value from this Phi.
+ if (InKernel)
+ PhiOp2 = NewReg;
+
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ }
+
+ while (NumPhis++ < NumStages) {
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
+ NewReg, 0);
+ }
+
+ // Check if we need to rename a Phi that has been eliminated due to
+ // scheduling.
+ if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
+ replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
+ }
+}
+
+/// Generate Phis for the specified block in the generated pipelined code.
+/// These are new Phis needed because the definition is scheduled after the
+/// use in the pipelined sequence.
+void ModuloScheduleExpander::generatePhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
+ InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast) {
+ // Compute the stage number that contains the initial Phi value, and
+ // the Phi from the previous stage.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ unsigned StageDiff = CurStageNum - LastStageNum;
+ bool InKernel = (StageDiff == 0);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - StageDiff;
+ PrevStage = LastStageNum + StageDiff - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
+ BBE = BB->instr_end();
+ BBI != BBE; ++BBI) {
+ for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = BBI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ continue;
+
+ int StageScheduled = Schedule.getStage(&*BBI);
+ assert(StageScheduled != -1 && "Expecting scheduled instruction.");
+ Register Def = MO.getReg();
+ unsigned NumPhis = getStagesForReg(Def, CurStageNum);
+ // An instruction scheduled in stage 0 and is used after the loop
+ // requires a phi in the epilog for the last definition from either
+ // the kernel or prolog.
+ if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
+ hasUseAfterLoop(Def, BB, MRI))
+ NumPhis = 1;
+ if (!InKernel && (unsigned)StageScheduled > PrologStage)
+ continue;
+
+ unsigned PhiOp2;
+ if (InKernel) {
+ PhiOp2 = VRMap[PrevStage][Def];
+ if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+ if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+ PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ }
+ // The number of Phis can't exceed the number of prolog stages. The
+ // prolog stage number is zero based.
+ if (NumPhis > PrologStage + 1 - StageScheduled)
+ NumPhis = PrologStage + 1 - StageScheduled;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ // Example for
+ // Org:
+ // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
+ //
+ // Prolog0 (Stage0):
+ // %Clone0 = ...
+ // Prolog1 (Stage1):
+ // %Clone1 = ...
+ // Kernel (Stage2):
+ // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
+ // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
+ // %Clone2 = ...
+ // Epilog0 (Stage3):
+ // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
+ // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
+ // Epilog1 (Stage4):
+ // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
+ //
+ // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
+ // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
+ // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
+
+ unsigned PhiOp1 = VRMap[PrologStage][Def];
+ if (np <= PrologStage)
+ PhiOp1 = VRMap[PrologStage - np][Def];
+ if (!InKernel) {
+ if (PrevStage == LastStageNum && np == 0)
+ PhiOp2 = VRMap[LastStageNum][Def];
+ else
+ PhiOp2 = VRMapPhi[PrevStage - np][Def];
+ }
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ Register NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // Rewrite uses and update the map. The actions depend upon whether
+ // we generating code for the kernel or epilog blocks.
+ if (InKernel) {
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
+ NewReg);
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
+ NewReg);
+
+ PhiOp2 = NewReg;
+ VRMapPhi[PrevStage - np - 1][Def] = NewReg;
+ } else {
+ VRMapPhi[CurStageNum - np][Def] = NewReg;
+ if (np == NumPhis - 1)
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
+ NewReg);
+ }
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ }
+ }
+ }
+}
+
+/// Remove instructions that generate values with no uses.
+/// Typically, these are induction variable operations that generate values
+/// used in the loop itself. A dead instruction has a definition with
+/// no uses, or uses that occur in the original loop only.
+void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ // For each epilog block, check that the value defined by each instruction
+ // is used. If not, delete it.
+ for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
+ for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
+ ME = MBB->instr_rend();
+ MI != ME;) {
+ // From DeadMachineInstructionElem. Don't delete inline assembly.
+ if (MI->isInlineAsm()) {
+ ++MI;
+ continue;
+ }
+ bool SawStore = false;
+ // Check if it's safe to remove the instruction due to side effects.
+ // We can, and want to, remove Phis here.
+ if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
+ ++MI;
+ continue;
+ }
+ bool used = true;
+ for (const MachineOperand &MO : MI->all_defs()) {
+ Register reg = MO.getReg();
+ // Assume physical registers are used, unless they are marked dead.
+ if (reg.isPhysical()) {
+ used = !MO.isDead();
+ if (used)
+ break;
+ continue;
+ }
+ unsigned realUses = 0;
+ for (const MachineOperand &U : MRI.use_operands(reg)) {
+ // Check if there are any uses that occur only in the original
+ // loop. If so, that's not a real use.
+ if (U.getParent()->getParent() != BB) {
+ realUses++;
+ used = true;
+ break;
+ }
+ }
+ if (realUses > 0)
+ break;
+ used = false;
+ }
+ if (!used) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI++->eraseFromParent();
+ continue;
+ }
+ ++MI;
+ }
+ // In the kernel block, check if we can remove a Phi that generates a value
+ // used in an instruction removed in the epilog block.
+ for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
+ Register reg = MI.getOperand(0).getReg();
+ if (MRI.use_begin(reg) == MRI.use_end()) {
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ }
+ }
+}
+
+/// For loop carried definitions, we split the lifetime of a virtual register
+/// that has uses past the definition in the next iteration. A copy with a new
+/// virtual register is inserted before the definition, which helps with
+/// generating a better register assignment.
+///
+/// v1 = phi(a, v2) v1 = phi(a, v2)
+/// v2 = phi(b, v3) v2 = phi(b, v3)
+/// v3 = .. v4 = copy v1
+/// .. = V1 v3 = ..
+/// .. = v4
+void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (auto &PHI : KernelBB->phis()) {
+ Register Def = PHI.getOperand(0).getReg();
+ // Check for any Phi definition that used as an operand of another Phi
+ // in the same block.
+ for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
+ E = MRI.use_instr_end();
+ I != E; ++I) {
+ if (I->isPHI() && I->getParent() == KernelBB) {
+ // Get the loop carried definition.
+ unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
+ if (!LCDef)
+ continue;
+ MachineInstr *MI = MRI.getVRegDef(LCDef);
+ if (!MI || MI->getParent() != KernelBB || MI->isPHI())
+ continue;
+ // Search through the rest of the block looking for uses of the Phi
+ // definition. If one occurs, then split the lifetime.
+ unsigned SplitReg = 0;
+ for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
+ KernelBB->instr_end()))
+ if (BBJ.readsRegister(Def)) {
+ // We split the lifetime when we find the first use.
+ if (SplitReg == 0) {
+ SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
+ BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(Def);
+ }
+ BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
+ }
+ if (!SplitReg)
+ continue;
+ // Search through each of the epilog blocks for any uses to be renamed.
+ for (auto &Epilog : EpilogBBs)
+ for (auto &I : *Epilog)
+ if (I.readsRegister(Def))
+ I.substituteRegister(Def, SplitReg, 0, *TRI);
+ break;
+ }
+ }
+ }
+}
+
+/// Remove the incoming block from the Phis in a basic block.
+static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
+ for (MachineInstr &MI : *BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
+ if (MI.getOperand(i + 1).getMBB() == Incoming) {
+ MI.removeOperand(i + 1);
+ MI.removeOperand(i);
+ break;
+ }
+ }
+}
+
+/// Create branches from each prolog basic block to the appropriate epilog
+/// block. These edges are needed if the loop ends before reaching the
+/// kernel.
+void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
+ MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs,
+ ValueMapTy *VRMap) {
+ assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
+ MachineBasicBlock *LastPro = KernelBB;
+ MachineBasicBlock *LastEpi = KernelBB;
+
+ // Start from the blocks connected to the kernel and work "out"
+ // to the first prolog and the last epilog blocks.
+ SmallVector<MachineInstr *, 4> PrevInsts;
+ unsigned MaxIter = PrologBBs.size() - 1;
+ for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
+ // Add branches to the prolog that go to the corresponding
+ // epilog, and the fall-thru prolog/kernel block.
+ MachineBasicBlock *Prolog = PrologBBs[j];
+ MachineBasicBlock *Epilog = EpilogBBs[i];
+
+ SmallVector<MachineOperand, 4> Cond;
+ std::optional<bool> StaticallyGreater =
+ LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
+ unsigned numAdded = 0;
+ if (!StaticallyGreater) {
+ Prolog->addSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
+ } else if (*StaticallyGreater == false) {
+ Prolog->addSuccessor(Epilog);
+ Prolog->removeSuccessor(LastPro);
+ LastEpi->removeSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, LastEpi);
+ // Remove the blocks that are no longer referenced.
+ if (LastPro != LastEpi) {
+ LastEpi->clear();
+ LastEpi->eraseFromParent();
+ }
+ if (LastPro == KernelBB) {
+ LoopInfo->disposed();
+ NewKernel = nullptr;
+ }
+ LastPro->clear();
+ LastPro->eraseFromParent();
+ } else {
+ numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, Prolog);
+ }
+ LastPro = Prolog;
+ LastEpi = Epilog;
+ for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
+ E = Prolog->instr_rend();
+ I != E && numAdded > 0; ++I, --numAdded)
+ updateInstruction(&*I, false, j, 0, VRMap);
+ }
+
+ if (NewKernel) {
+ LoopInfo->setPreheader(PrologBBs[MaxIter]);
+ LoopInfo->adjustTripCount(-(MaxIter + 1));
+ }
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineOperand *BaseOp;
+ int64_t Offset;
+ bool OffsetIsScalable;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
+ return false;
+
+ // FIXME: This algorithm assumes instructions have fixed-size offsets.
+ if (OffsetIsScalable)
+ return false;
+
+ if (!BaseOp->isReg())
+ return false;
+
+ Register BaseReg = BaseOp->getReg();
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Check if there is a Phi. If so, get the definition in the loop.
+ MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+ if (BaseDef && BaseDef->isPHI()) {
+ BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+ BaseDef = MRI.getVRegDef(BaseReg);
+ }
+ if (!BaseDef)
+ return false;
+
+ int D = 0;
+ if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+ return false;
+
+ Delta = D;
+ return true;
+}
+
+/// Update the memory operand with a new offset when the pipeliner
+/// generates a new copy of the instruction that refers to a
+/// different memory location.
+void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
+ MachineInstr &OldMI,
+ unsigned Num) {
+ if (Num == 0)
+ return;
+ // If the instruction has memory operands, then adjust the offset
+ // when the instruction appears in different stages.
+ if (NewMI.memoperands_empty())
+ return;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
+ for (MachineMemOperand *MMO : NewMI.memoperands()) {
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic() ||
+ (MMO->isInvariant() && MMO->isDereferenceable()) ||
+ (!MMO->getValue())) {
+ NewMMOs.push_back(MMO);
+ continue;
+ }
+ unsigned Delta;
+ if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
+ int64_t AdjOffset = Delta * Num;
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
+ } else {
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
+ }
+ }
+ NewMI.setMemRefs(MF, NewMMOs);
+}
+
+/// Clone the instruction for the new pipelined loop and update the
+/// memory operands, if needed.
+MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
+ unsigned CurStageNum,
+ unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Clone the instruction for the new pipelined loop. If needed, this
+/// function updates the instruction using the values saved in the
+/// InstrChanges structure.
+MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
+ MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ auto It = InstrChanges.find(OldMI);
+ if (It != InstrChanges.end()) {
+ std::pair<unsigned, int64_t> RegAndOffset = It->second;
+ unsigned BasePos, OffsetPos;
+ if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
+ return nullptr;
+ int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
+ MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
+ if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
+ NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Update the machine instruction with new virtual registers. This
+/// function may change the definitions and/or uses.
+void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
+ bool LastDef,
+ unsigned CurStageNum,
+ unsigned InstrStageNum,
+ ValueMapTy *VRMap) {
+ for (MachineOperand &MO : NewMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ Register reg = MO.getReg();
+ if (MO.isDef()) {
+ // Create a new virtual register for the definition.
+ const TargetRegisterClass *RC = MRI.getRegClass(reg);
+ Register NewReg = MRI.createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ VRMap[CurStageNum][reg] = NewReg;
+ if (LastDef)
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+ } else if (MO.isUse()) {
+ MachineInstr *Def = MRI.getVRegDef(reg);
+ // Compute the stage that contains the last definition for instruction.
+ int DefStageNum = Schedule.getStage(Def);
+ unsigned StageNum = CurStageNum;
+ if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
+ // Compute the difference in stages between the defintion and the use.
+ unsigned StageDiff = (InstrStageNum - DefStageNum);
+ // Make an adjustment to get the last definition.
+ StageNum -= StageDiff;
+ }
+ if (VRMap[StageNum].count(reg))
+ MO.setReg(VRMap[StageNum][reg]);
+ }
+ }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
+/// Return the new name for the value from the previous stage.
+unsigned ModuloScheduleExpander::getPrevMapVal(
+ unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
+ ValueMapTy *VRMap, MachineBasicBlock *BB) {
+ unsigned PrevVal = 0;
+ if (StageNum > PhiStage) {
+ MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
+ if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
+ // The name is defined in the previous stage.
+ PrevVal = VRMap[StageNum - 1][LoopVal];
+ else if (VRMap[StageNum].count(LoopVal))
+ // The previous name is defined in the current stage when the instruction
+ // order is swapped.
+ PrevVal = VRMap[StageNum][LoopVal];
+ else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
+ // The loop value hasn't yet been scheduled.
+ PrevVal = LoopVal;
+ else if (StageNum == PhiStage + 1)
+ // The loop value is another phi, which has not been scheduled.
+ PrevVal = getInitPhiReg(*LoopInst, BB);
+ else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
+ // The loop value is another phi, which has been scheduled.
+ PrevVal =
+ getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
+ LoopStage, VRMap, BB);
+ }
+ return PrevVal;
+}
+
+/// Rewrite the Phi values in the specified block to use the mappings
+/// from the initial operand. Once the Phi is scheduled, we switch
+/// to using the loop value instead of the Phi value, so those names
+/// do not need to be rewritten.
+void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
+ unsigned StageNum,
+ ValueMapTy *VRMap,
+ InstrMapTy &InstrMap) {
+ for (auto &PHI : BB->phis()) {
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(PHI, BB, InitVal, LoopVal);
+ Register PhiDef = PHI.getOperand(0).getReg();
+
+ unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
+ unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
+ unsigned NumPhis = getStagesForPhi(PhiDef);
+ if (NumPhis > StageNum)
+ NumPhis = StageNum;
+ for (unsigned np = 0; np <= NumPhis; ++np) {
+ unsigned NewVal =
+ getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
+ if (!NewVal)
+ NewVal = InitVal;
+ rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
+ NewVal);
+ }
+ }
+}
+
+/// Rewrite a previously scheduled instruction to use the register value
+/// from the new instruction. Make sure the instruction occurs in the
+/// basic block, and we don't change the uses in the new instruction.
+void ModuloScheduleExpander::rewriteScheduledInstr(
+ MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
+ unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
+ unsigned PrevReg) {
+ bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
+ int StagePhi = Schedule.getStage(Phi) + PhiNum;
+ // Rewrite uses that have been scheduled already to use the new
+ // Phi register.
+ for (MachineOperand &UseOp :
+ llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
+ MachineInstr *UseMI = UseOp.getParent();
+ if (UseMI->getParent() != BB)
+ continue;
+ if (UseMI->isPHI()) {
+ if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
+ continue;
+ if (getLoopPhiReg(*UseMI, BB) != OldReg)
+ continue;
+ }
+ InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
+ assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
+ MachineInstr *OrigMI = OrigInstr->second;
+ int StageSched = Schedule.getStage(OrigMI);
+ int CycleSched = Schedule.getCycle(OrigMI);
+ unsigned ReplaceReg = 0;
+ // This is the stage for the scheduled instruction.
+ if (StagePhi == StageSched && Phi->isPHI()) {
+ int CyclePhi = Schedule.getCycle(Phi);
+ if (PrevReg && InProlog)
+ ReplaceReg = PrevReg;
+ else if (PrevReg && !isLoopCarried(*Phi) &&
+ (CyclePhi <= CycleSched || OrigMI->isPHI()))
+ ReplaceReg = PrevReg;
+ else
+ ReplaceReg = NewReg;
+ }
+ // The scheduled instruction occurs before the scheduled Phi, and the
+ // Phi is not loop carried.
+ if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
+ ReplaceReg = NewReg;
+ if (StagePhi > StageSched && Phi->isPHI())
+ ReplaceReg = NewReg;
+ if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
+ ReplaceReg = NewReg;
+ if (ReplaceReg) {
+ const TargetRegisterClass *NRC =
+ MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+ if (NRC)
+ UseOp.setReg(ReplaceReg);
+ else {
+ Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ SplitReg)
+ .addReg(ReplaceReg);
+ UseOp.setReg(SplitReg);
+ }
+ }
+ }
+}
+
+bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
+ if (!Phi.isPHI())
+ return false;
+ int DefCycle = Schedule.getCycle(&Phi);
+ int DefStage = Schedule.getStage(&Phi);
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+ MachineInstr *Use = MRI.getVRegDef(LoopVal);
+ if (!Use || Use->isPHI())
+ return true;
+ int LoopCycle = Schedule.getCycle(Use);
+ int LoopStage = Schedule.getStage(Use);
+ return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+//===----------------------------------------------------------------------===//
+// PeelingModuloScheduleExpander implementation
+//===----------------------------------------------------------------------===//
+// This is a reimplementation of ModuloScheduleExpander that works by creating
+// a fully correct steady-state kernel and peeling off the prolog and epilogs.
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Remove any dead phis in MBB. Dead phis either have only one block as input
+// (in which case they are the identity) or have no uses.
+void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
+ LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
+ assert(MI.isPHI());
+ if (MRI.use_empty(MI.getOperand(0).getReg())) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ Changed = true;
+ } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(MI.getOperand(1).getReg(),
+ MRI.getRegClass(MI.getOperand(0).getReg()));
+ assert(ConstrainRegClass &&
+ "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
+ MRI.replaceRegWith(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg());
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+}
+
+/// Rewrites the kernel block in-place to adhere to the given schedule.
+/// KernelRewriter holds all of the state required to perform the rewriting.
+class KernelRewriter {
+ ModuloSchedule &S;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *PreheaderBB, *ExitBB;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ // Map from register class to canonical undef register for that class.
+ DenseMap<const TargetRegisterClass *, Register> Undefs;
+ // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
+ // this map is only used when InitReg is non-undef.
+ DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
+ // Map from LoopReg to phi register where the InitReg is undef.
+ DenseMap<Register, Register> UndefPhis;
+
+ // Reg is used by MI. Return the new register MI should use to adhere to the
+ // schedule. Insert phis as necessary.
+ Register remapUse(Register Reg, MachineInstr &MI);
+ // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
+ // If InitReg is not given it is chosen arbitrarily. It will either be undef
+ // or will be chosen so as to share another phi.
+ Register phi(Register LoopReg, std::optional<Register> InitReg = {},
+ const TargetRegisterClass *RC = nullptr);
+ // Create an undef register of the given register class.
+ Register undef(const TargetRegisterClass *RC);
+
+public:
+ KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
+ LiveIntervals *LIS = nullptr);
+ void rewrite();
+};
+} // namespace
+
+KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
+ MachineBasicBlock *LoopBB, LiveIntervals *LIS)
+ : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
+ ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
+ TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
+ PreheaderBB = *BB->pred_begin();
+ if (PreheaderBB == BB)
+ PreheaderBB = *std::next(BB->pred_begin());
+}
+
+void KernelRewriter::rewrite() {
+ // Rearrange the loop to be in schedule order. Note that the schedule may
+ // contain instructions that are not owned by the loop block (InstrChanges and
+ // friends), so we gracefully handle unowned instructions and delete any
+ // instructions that weren't in the schedule.
+ auto InsertPt = BB->getFirstTerminator();
+ MachineInstr *FirstMI = nullptr;
+ for (MachineInstr *MI : S.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ if (MI->getParent())
+ MI->removeFromParent();
+ BB->insert(InsertPt, MI);
+ if (!FirstMI)
+ FirstMI = MI;
+ }
+ assert(FirstMI && "Failed to find first MI in schedule");
+
+ // At this point all of the scheduled instructions are between FirstMI
+ // and the end of the block. Kill from the first non-phi to FirstMI.
+ for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*I);
+ (I++)->eraseFromParent();
+ }
+
+ // Now remap every instruction in the loop.
+ for (MachineInstr &MI : *BB) {
+ if (MI.isPHI() || MI.isTerminator())
+ continue;
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
+ continue;
+ Register Reg = remapUse(MO.getReg(), MI);
+ MO.setReg(Reg);
+ }
+ }
+ EliminateDeadPhis(BB, MRI, LIS);
+
+ // Ensure a phi exists for all instructions that are either referenced by
+ // an illegal phi or by an instruction outside the loop. This allows us to
+ // treat remaps of these values the same as "normal" values that come from
+ // loop-carried phis.
+ for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
+ if (MI->isPHI()) {
+ Register R = MI->getOperand(0).getReg();
+ phi(R);
+ continue;
+ }
+
+ for (MachineOperand &Def : MI->defs()) {
+ for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
+ if (MI.getParent() != BB) {
+ phi(Def.getReg());
+ break;
+ }
+ }
+ }
+ }
+}
+
+Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
+ MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
+ if (!Producer)
+ return Reg;
+
+ int ConsumerStage = S.getStage(&MI);
+ if (!Producer->isPHI()) {
+ // Non-phi producers are simple to remap. Insert as many phis as the
+ // difference between the consumer and producer stages.
+ if (Producer->getParent() != BB)
+ // Producer was not inside the loop. Use the register as-is.
+ return Reg;
+ int ProducerStage = S.getStage(Producer);
+ assert(ConsumerStage != -1 &&
+ "In-loop consumer should always be scheduled!");
+ assert(ConsumerStage >= ProducerStage);
+ unsigned StageDiff = ConsumerStage - ProducerStage;
+
+ for (unsigned I = 0; I < StageDiff; ++I)
+ Reg = phi(Reg);
+ return Reg;
+ }
+
+ // First, dive through the phi chain to find the defaults for the generated
+ // phis.
+ SmallVector<std::optional<Register>, 4> Defaults;
+ Register LoopReg = Reg;
+ auto LoopProducer = Producer;
+ while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
+ LoopReg = getLoopPhiReg(*LoopProducer, BB);
+ Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
+ LoopProducer = MRI.getUniqueVRegDef(LoopReg);
+ assert(LoopProducer);
+ }
+ int LoopProducerStage = S.getStage(LoopProducer);
+
+ std::optional<Register> IllegalPhiDefault;
+
+ if (LoopProducerStage == -1) {
+ // Do nothing.
+ } else if (LoopProducerStage > ConsumerStage) {
+ // This schedule is only representable if ProducerStage == ConsumerStage+1.
+ // In addition, Consumer's cycle must be scheduled after Producer in the
+ // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
+ // functions.
+#ifndef NDEBUG // Silence unused variables in non-asserts mode.
+ int LoopProducerCycle = S.getCycle(LoopProducer);
+ int ConsumerCycle = S.getCycle(&MI);
+#endif
+ assert(LoopProducerCycle <= ConsumerCycle);
+ assert(LoopProducerStage == ConsumerStage + 1);
+ // Peel off the first phi from Defaults and insert a phi between producer
+ // and consumer. This phi will not be at the front of the block so we
+ // consider it illegal. It will only exist during the rewrite process; it
+ // needs to exist while we peel off prologs because these could take the
+ // default value. After that we can replace all uses with the loop producer
+ // value.
+ IllegalPhiDefault = Defaults.front();
+ Defaults.erase(Defaults.begin());
+ } else {
+ assert(ConsumerStage >= LoopProducerStage);
+ int StageDiff = ConsumerStage - LoopProducerStage;
+ if (StageDiff > 0) {
+ LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
+ << " to " << (Defaults.size() + StageDiff) << "\n");
+ // If we need more phis than we have defaults for, pad out with undefs for
+ // the earliest phis, which are at the end of the defaults chain (the
+ // chain is in reverse order).
+ Defaults.resize(Defaults.size() + StageDiff,
+ Defaults.empty() ? std::optional<Register>()
+ : Defaults.back());
+ }
+ }
+
+ // Now we know the number of stages to jump back, insert the phi chain.
+ auto DefaultI = Defaults.rbegin();
+ while (DefaultI != Defaults.rend())
+ LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
+
+ if (IllegalPhiDefault) {
+ // The consumer optionally consumes LoopProducer in the same iteration
+ // (because the producer is scheduled at an earlier cycle than the consumer)
+ // or the initial value. To facilitate this we create an illegal block here
+ // by embedding a phi in the middle of the block. We will fix this up
+ // immediately prior to pruning.
+ auto RC = MRI.getRegClass(Reg);
+ Register R = MRI.createVirtualRegister(RC);
+ MachineInstr *IllegalPhi =
+ BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(*IllegalPhiDefault)
+ .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
+ .addReg(LoopReg)
+ .addMBB(BB); // Block choice is arbitrary and has no effect.
+ // Illegal phi should belong to the producer stage so that it can be
+ // filtered correctly during peeling.
+ S.setStage(IllegalPhi, LoopProducerStage);
+ return R;
+ }
+
+ return LoopReg;
+}
+
+Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
+ const TargetRegisterClass *RC) {
+ // If the init register is not undef, try and find an existing phi.
+ if (InitReg) {
+ auto I = Phis.find({LoopReg, *InitReg});
+ if (I != Phis.end())
+ return I->second;
+ } else {
+ for (auto &KV : Phis) {
+ if (KV.first.first == LoopReg)
+ return KV.second;
+ }
+ }
+
+ // InitReg is either undef or no existing phi takes InitReg as input. Try and
+ // find a phi that takes undef as input.
+ auto I = UndefPhis.find(LoopReg);
+ if (I != UndefPhis.end()) {
+ Register R = I->second;
+ if (!InitReg)
+ // Found a phi taking undef as input, and this input is undef so return
+ // without any more changes.
+ return R;
+ // Found a phi taking undef as input, so rewrite it to take InitReg.
+ MachineInstr *MI = MRI.getVRegDef(R);
+ MI->getOperand(1).setReg(*InitReg);
+ Phis.insert({{LoopReg, *InitReg}, R});
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ assert(ConstrainRegClass && "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
+ UndefPhis.erase(I);
+ return R;
+ }
+
+ // Failed to find any existing phi to reuse, so create a new one.
+ if (!RC)
+ RC = MRI.getRegClass(LoopReg);
+ Register R = MRI.createVirtualRegister(RC);
+ if (InitReg) {
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ assert(ConstrainRegClass && "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
+ }
+ BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(InitReg ? *InitReg : undef(RC))
+ .addMBB(PreheaderBB)
+ .addReg(LoopReg)
+ .addMBB(BB);
+ if (!InitReg)
+ UndefPhis[LoopReg] = R;
+ else
+ Phis[{LoopReg, *InitReg}] = R;
+ return R;
+}
+
+Register KernelRewriter::undef(const TargetRegisterClass *RC) {
+ Register &R = Undefs[RC];
+ if (R == 0) {
+ // Create an IMPLICIT_DEF that defines this register if we need it.
+ // All uses of this should be removed by the time we have finished unrolling
+ // prologs and epilogs.
+ R = MRI.createVirtualRegister(RC);
+ auto *InsertBB = &PreheaderBB->getParent()->front();
+ BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), R);
+ }
+ return R;
+}
+
+namespace {
+/// Describes an operand in the kernel of a pipelined loop. Characteristics of
+/// the operand are discovered, such as how many in-loop PHIs it has to jump
+/// through and defaults for these phis.
+class KernelOperandInfo {
+ MachineBasicBlock *BB;
+ MachineRegisterInfo &MRI;
+ SmallVector<Register, 4> PhiDefaults;
+ MachineOperand *Source;
+ MachineOperand *Target;
+
+public:
+ KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
+ const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
+ : MRI(MRI) {
+ Source = MO;
+ BB = MO->getParent()->getParent();
+ while (isRegInLoop(MO)) {
+ MachineInstr *MI = MRI.getVRegDef(MO->getReg());
+ if (MI->isFullCopy()) {
+ MO = &MI->getOperand(1);
+ continue;
+ }
+ if (!MI->isPHI())
+ break;
+ // If this is an illegal phi, don't count it in distance.
+ if (IllegalPhis.count(MI)) {
+ MO = &MI->getOperand(3);
+ continue;
+ }
+
+ Register Default = getInitPhiReg(*MI, BB);
+ MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
+ : &MI->getOperand(3);
+ PhiDefaults.push_back(Default);
+ }
+ Target = MO;
+ }
+
+ bool operator==(const KernelOperandInfo &Other) const {
+ return PhiDefaults.size() == Other.PhiDefaults.size();
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
+ << *Source->getParent();
+ }
+
+private:
+ bool isRegInLoop(MachineOperand *MO) {
+ return MO->isReg() && MO->getReg().isVirtual() &&
+ MRI.getVRegDef(MO->getReg())->getParent() == BB;
+ }
+};
+} // namespace
+
+MachineBasicBlock *
+PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
+ MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
+ if (LPD == LPD_Front)
+ PeeledFront.push_back(NewBB);
+ else
+ PeeledBack.push_front(NewBB);
+ for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
+ ++I, ++NI) {
+ CanonicalMIs[&*I] = &*I;
+ CanonicalMIs[&*NI] = &*I;
+ BlockMIs[{NewBB, &*I}] = &*NI;
+ BlockMIs[{BB, &*I}] = &*I;
+ }
+ return NewBB;
+}
+
+void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
+ int MinStage) {
+ for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
+ I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
+ MachineInstr *MI = &*I++;
+ int Stage = getStage(MI);
+ if (Stage == -1 || Stage >= MinStage)
+ continue;
+
+ for (MachineOperand &DefMO : MI->defs()) {
+ SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+ for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+ // Only PHIs can use values from this block by construction.
+ // Match with the equivalent PHI in B.
+ assert(UseMI.isPHI());
+ Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+ MI->getParent());
+ Subs.emplace_back(&UseMI, Reg);
+ }
+ for (auto &Sub : Subs)
+ Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ }
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+}
+
+void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
+ MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
+ auto InsertPt = DestBB->getFirstNonPHI();
+ DenseMap<Register, Register> Remaps;
+ for (MachineInstr &MI : llvm::make_early_inc_range(
+ llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
+ if (MI.isPHI()) {
+ // This is an illegal PHI. If we move any instructions using an illegal
+ // PHI, we need to create a legal Phi.
+ if (getStage(&MI) != Stage) {
+ // The legal Phi is not necessary if the illegal phi's stage
+ // is being moved.
+ Register PhiR = MI.getOperand(0).getReg();
+ auto RC = MRI.getRegClass(PhiR);
+ Register NR = MRI.createVirtualRegister(RC);
+ MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
+ DebugLoc(), TII->get(TargetOpcode::PHI), NR)
+ .addReg(PhiR)
+ .addMBB(SourceBB);
+ BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[&MI];
+ Remaps[PhiR] = NR;
+ }
+ }
+ if (getStage(&MI) != Stage)
+ continue;
+ MI.removeFromParent();
+ DestBB->insert(InsertPt, &MI);
+ auto *KernelMI = CanonicalMIs[&MI];
+ BlockMIs[{DestBB, KernelMI}] = &MI;
+ BlockMIs.erase({SourceBB, KernelMI});
+ }
+ SmallVector<MachineInstr *, 4> PhiToDelete;
+ for (MachineInstr &MI : DestBB->phis()) {
+ assert(MI.getNumOperands() == 3);
+ MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
+ // If the instruction referenced by the phi is moved inside the block
+ // we don't need the phi anymore.
+ if (getStage(Def) == Stage) {
+ Register PhiReg = MI.getOperand(0).getReg();
+ assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
+ MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.getOperand(0).setReg(PhiReg);
+ PhiToDelete.push_back(&MI);
+ }
+ }
+ for (auto *P : PhiToDelete)
+ P->eraseFromParent();
+ InsertPt = DestBB->getFirstNonPHI();
+ // Helper to clone Phi instructions into the destination block. We clone Phi
+ // greedily to avoid combinatorial explosion of Phi instructions.
+ auto clonePhi = [&](MachineInstr *Phi) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
+ DestBB->insert(InsertPt, NewMI);
+ Register OrigR = Phi->getOperand(0).getReg();
+ Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+ NewMI->getOperand(0).setReg(R);
+ NewMI->getOperand(1).setReg(OrigR);
+ NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
+ Remaps[OrigR] = R;
+ CanonicalMIs[NewMI] = CanonicalMIs[Phi];
+ BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
+ PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
+ return R;
+ };
+ for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
+ for (MachineOperand &MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ if (Remaps.count(MO.getReg()))
+ MO.setReg(Remaps[MO.getReg()]);
+ else {
+ // If we are using a phi from the source block we need to add a new phi
+ // pointing to the old one.
+ MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
+ if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
+ Register R = clonePhi(Use);
+ MO.setReg(R);
+ }
+ }
+ }
+ }
+}
+
+Register
+PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
+ MachineInstr *Phi) {
+ unsigned distance = PhiNodeLoopIteration[Phi];
+ MachineInstr *CanonicalUse = CanonicalPhi;
+ Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
+ for (unsigned I = 0; I < distance; ++I) {
+ assert(CanonicalUse->isPHI());
+ assert(CanonicalUse->getNumOperands() == 5);
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
+ std::swap(LoopRegIdx, InitRegIdx);
+ CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
+ CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
+ }
+ return CanonicalUseReg;
+}
+
+void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
+ BitVector LS(Schedule.getNumStages(), true);
+ BitVector AS(Schedule.getNumStages(), true);
+ LiveStages[BB] = LS;
+ AvailableStages[BB] = AS;
+
+ // Peel out the prologs.
+ LS.reset();
+ for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
+ LS[I] = true;
+ Prologs.push_back(peelKernel(LPD_Front));
+ LiveStages[Prologs.back()] = LS;
+ AvailableStages[Prologs.back()] = LS;
+ }
+
+ // Create a block that will end up as the new loop exiting block (dominated by
+ // all prologs and epilogs). It will only contain PHIs, in the same order as
+ // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
+ // that the exiting block is a (sub) clone of BB. This in turn gives us the
+ // property that any value deffed in BB but used outside of BB is used by a
+ // PHI in the exiting block.
+ MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
+ EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
+ // Push out the epilogs, again in reverse order.
+ // We can't assume anything about the minumum loop trip count at this point,
+ // so emit a fairly complex epilog.
+
+ // We first peel number of stages minus one epilogue. Then we remove dead
+ // stages and reorder instructions based on their stage. If we have 3 stages
+ // we generate first:
+ // E0[3, 2, 1]
+ // E1[3', 2']
+ // E2[3'']
+ // And then we move instructions based on their stages to have:
+ // E0[3]
+ // E1[2, 3']
+ // E2[1, 2', 3'']
+ // The transformation is legal because we only move instructions past
+ // instructions of a previous loop iteration.
+ for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
+ Epilogs.push_back(peelKernel(LPD_Back));
+ MachineBasicBlock *B = Epilogs.back();
+ filterInstructions(B, Schedule.getNumStages() - I);
+ // Keep track at which iteration each phi belongs to. We need it to know
+ // what version of the variable to use during prologue/epilogue stitching.
+ EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
+ for (MachineInstr &Phi : B->phis())
+ PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
+ }
+ for (size_t I = 0; I < Epilogs.size(); I++) {
+ LS.reset();
+ for (size_t J = I; J < Epilogs.size(); J++) {
+ int Iteration = J;
+ unsigned Stage = Schedule.getNumStages() - 1 + I - J;
+ // Move stage one block at a time so that Phi nodes are updated correctly.
+ for (size_t K = Iteration; K > I; K--)
+ moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
+ LS[Stage] = true;
+ }
+ LiveStages[Epilogs[I]] = LS;
+ AvailableStages[Epilogs[I]] = AS;
+ }
+
+ // Now we've defined all the prolog and epilog blocks as a fallthrough
+ // sequence, add the edges that will be followed if the loop trip count is
+ // lower than the number of stages (connecting prologs directly with epilogs).
+ auto PI = Prologs.begin();
+ auto EI = Epilogs.begin();
+ assert(Prologs.size() == Epilogs.size());
+ for (; PI != Prologs.end(); ++PI, ++EI) {
+ MachineBasicBlock *Pred = *(*EI)->pred_begin();
+ (*PI)->addSuccessor(*EI);
+ for (MachineInstr &MI : (*EI)->phis()) {
+ Register Reg = MI.getOperand(1).getReg();
+ MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
+ if (Use && Use->getParent() == Pred) {
+ MachineInstr *CanonicalUse = CanonicalMIs[Use];
+ if (CanonicalUse->isPHI()) {
+ // If the use comes from a phi we need to skip as many phi as the
+ // distance between the epilogue and the kernel. Trace through the phi
+ // chain to find the right value.
+ Reg = getPhiCanonicalReg(CanonicalUse, Use);
+ }
+ Reg = getEquivalentRegisterIn(Reg, *PI);
+ }
+ MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ MI.addOperand(MachineOperand::CreateMBB(*PI));
+ }
+ }
+
+ // Create a list of all blocks in order.
+ SmallVector<MachineBasicBlock *, 8> Blocks;
+ llvm::copy(PeeledFront, std::back_inserter(Blocks));
+ Blocks.push_back(BB);
+ llvm::copy(PeeledBack, std::back_inserter(Blocks));
+
+ // Iterate in reverse order over all instructions, remapping as we go.
+ for (MachineBasicBlock *B : reverse(Blocks)) {
+ for (auto I = B->instr_rbegin();
+ I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
+ MachineBasicBlock::reverse_instr_iterator MI = I++;
+ rewriteUsesOf(&*MI);
+ }
+ }
+ for (auto *MI : IllegalPhisToDelete) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ IllegalPhisToDelete.clear();
+
+ // Now all remapping has been done, we're free to optimize the generated code.
+ for (MachineBasicBlock *B : reverse(Blocks))
+ EliminateDeadPhis(B, MRI, LIS);
+ EliminateDeadPhis(ExitingBB, MRI, LIS);
+}
+
+MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
+ MachineFunction &MF = *BB->getParent();
+ MachineBasicBlock *Exit = *BB->succ_begin();
+ if (Exit == BB)
+ Exit = *std::next(BB->succ_begin());
+
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ MF.insert(std::next(BB->getIterator()), NewBB);
+
+ // Clone all phis in BB into NewBB and rewrite.
+ for (MachineInstr &MI : BB->phis()) {
+ auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
+ Register OldR = MI.getOperand(3).getReg();
+ Register R = MRI.createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 4> Uses;
+ for (MachineInstr &Use : MRI.use_instructions(OldR))
+ if (Use.getParent() != BB)
+ Uses.push_back(&Use);
+ for (MachineInstr *Use : Uses)
+ Use->substituteRegister(OldR, R, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(OldR)
+ .addMBB(BB);
+ BlockMIs[{NewBB, &MI}] = NI;
+ CanonicalMIs[NI] = &MI;
+ }
+ BB->replaceSuccessor(Exit, NewBB);
+ Exit->replacePhiUsesWith(BB, NewBB);
+ NewBB->addSuccessor(Exit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
+ (void)CanAnalyzeBr;
+ assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+ TII->removeBranch(*BB);
+ TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
+ Cond, DebugLoc());
+ TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
+ return NewBB;
+}
+
+Register
+PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
+ MachineBasicBlock *BB) {
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
+ return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
+}
+
+void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
+ if (MI->isPHI()) {
+ // This is an illegal PHI. The loop-carried (desired) value is operand 3,
+ // and it is produced by this block.
+ Register PhiR = MI->getOperand(0).getReg();
+ Register R = MI->getOperand(3).getReg();
+ int RMIStage = getStage(MRI.getUniqueVRegDef(R));
+ if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
+ R = MI->getOperand(1).getReg();
+ MRI.setRegClass(R, MRI.getRegClass(PhiR));
+ MRI.replaceRegWith(PhiR, R);
+ // Postpone deleting the Phi as it may be referenced by BlockMIs and used
+ // later to figure out how to remap registers.
+ MI->getOperand(0).setReg(PhiR);
+ IllegalPhisToDelete.push_back(MI);
+ return;
+ }
+
+ int Stage = getStage(MI);
+ if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
+ LiveStages[MI->getParent()].test(Stage))
+ // Instruction is live, no rewriting to do.
+ return;
+
+ for (MachineOperand &DefMO : MI->defs()) {
+ SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+ for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+ // Only PHIs can use values from this block by construction.
+ // Match with the equivalent PHI in B.
+ assert(UseMI.isPHI());
+ Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+ MI->getParent());
+ Subs.emplace_back(&UseMI, Reg);
+ }
+ for (auto &Sub : Subs)
+ Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ }
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+}
+
+void PeelingModuloScheduleExpander::fixupBranches() {
+ // Work outwards from the kernel.
+ bool KernelDisposed = false;
+ int TC = Schedule.getNumStages() - 1;
+ for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
+ ++PI, ++EI, --TC) {
+ MachineBasicBlock *Prolog = *PI;
+ MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
+ MachineBasicBlock *Epilog = *EI;
+ SmallVector<MachineOperand, 4> Cond;
+ TII->removeBranch(*Prolog);
+ std::optional<bool> StaticallyGreater =
+ LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
+ if (!StaticallyGreater) {
+ LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
+ // Dynamically branch based on Cond.
+ TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
+ } else if (*StaticallyGreater == false) {
+ LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
+ // Prolog never falls through; branch to epilog and orphan interior
+ // blocks. Leave it to unreachable-block-elim to clean up.
+ Prolog->removeSuccessor(Fallthrough);
+ for (MachineInstr &P : Fallthrough->phis()) {
+ P.removeOperand(2);
+ P.removeOperand(1);
+ }
+ TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
+ KernelDisposed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
+ // Prolog always falls through; remove incoming values in epilog.
+ Prolog->removeSuccessor(Epilog);
+ for (MachineInstr &P : Epilog->phis()) {
+ P.removeOperand(4);
+ P.removeOperand(3);
+ }
+ }
+ }
+
+ if (!KernelDisposed) {
+ LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
+ LoopInfo->setPreheader(Prologs.back());
+ } else {
+ LoopInfo->disposed();
+ }
+}
+
+void PeelingModuloScheduleExpander::rewriteKernel() {
+ KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
+ KR.rewrite();
+}
+
+void PeelingModuloScheduleExpander::expand() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = Schedule.getLoop()->getLoopPreheader();
+ LLVM_DEBUG(Schedule.dump());
+ LoopInfo = TII->analyzeLoopForPipelining(BB);
+ assert(LoopInfo);
+
+ rewriteKernel();
+ peelPrologAndEpilogs();
+ fixupBranches();
+}
+
+void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = Schedule.getLoop()->getLoopPreheader();
+
+ // Dump the schedule before we invalidate and remap all its instructions.
+ // Stash it in a string so we can print it if we found an error.
+ std::string ScheduleDump;
+ raw_string_ostream OS(ScheduleDump);
+ Schedule.print(OS);
+ OS.flush();
+
+ // First, run the normal ModuleScheduleExpander. We don't support any
+ // InstrChanges.
+ assert(LIS && "Requires LiveIntervals!");
+ ModuloScheduleExpander MSE(MF, Schedule, *LIS,
+ ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
+ if (!ExpandedKernel) {
+ // The expander optimized away the kernel. We can't do any useful checking.
+ MSE.cleanup();
+ return;
+ }
+ // Before running the KernelRewriter, re-add BB into the CFG.
+ Preheader->addSuccessor(BB);
+
+ // Now run the new expansion algorithm.
+ KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
+ KR.rewrite();
+ peelPrologAndEpilogs();
+
+ // Collect all illegal phis that the new algorithm created. We'll give these
+ // to KernelOperandInfo.
+ SmallPtrSet<MachineInstr *, 4> IllegalPhis;
+ for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
+ if (NI->isPHI())
+ IllegalPhis.insert(&*NI);
+ }
+
+ // Co-iterate across both kernels. We expect them to be identical apart from
+ // phis and full COPYs (we look through both).
+ SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
+ auto OI = ExpandedKernel->begin();
+ auto NI = BB->begin();
+ for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
+ while (OI->isPHI() || OI->isFullCopy())
+ ++OI;
+ while (NI->isPHI() || NI->isFullCopy())
+ ++NI;
+ assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
+ // Analyze every operand separately.
+ for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
+ OOpI != OI->operands_end(); ++OOpI, ++NOpI)
+ KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
+ KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
+ }
+
+ bool Failed = false;
+ for (auto &OldAndNew : KOIs) {
+ if (OldAndNew.first == OldAndNew.second)
+ continue;
+ Failed = true;
+ errs() << "Modulo kernel validation error: [\n";
+ errs() << " [golden] ";
+ OldAndNew.first.print(errs());
+ errs() << " ";
+ OldAndNew.second.print(errs());
+ errs() << "]\n";
+ }
+
+ if (Failed) {
+ errs() << "Golden reference kernel:\n";
+ ExpandedKernel->print(errs());
+ errs() << "New kernel:\n";
+ BB->print(errs());
+ errs() << ScheduleDump;
+ report_fatal_error(
+ "Modulo kernel validation (-pipeliner-experimental-cg) failed");
+ }
+
+ // Cleanup by removing BB from the CFG again as the original
+ // ModuloScheduleExpander intended.
+ Preheader->removeSuccessor(BB);
+ MSE.cleanup();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleTestPass implementation
+//===----------------------------------------------------------------------===//
+// This pass constructs a ModuloSchedule from its module and runs
+// ModuloScheduleExpander.
+//
+// The module is expected to contain a single-block analyzable loop.
+// The total order of instructions is taken from the loop as-is.
+// Instructions are expected to be annotated with a PostInstrSymbol.
+// This PostInstrSymbol must have the following format:
+// "Stage=%d Cycle=%d".
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ModuloScheduleTest : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ModuloScheduleTest() : MachineFunctionPass(ID) {
+ initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void runOnLoop(MachineFunction &MF, MachineLoop &L);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // namespace
+
+char ModuloScheduleTest::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
+ "Modulo Schedule test pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
+ "Modulo Schedule test pass", false, false)
+
+bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ for (auto *L : MLI) {
+ if (L->getTopBlock() != L->getBottomBlock())
+ continue;
+ runOnLoop(MF, *L);
+ return false;
+ }
+ return false;
+}
+
+static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
+ std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
+ std::pair<StringRef, StringRef> StageTokenAndValue =
+ getToken(StageAndCycle.first, "-");
+ std::pair<StringRef, StringRef> CycleTokenAndValue =
+ getToken(StageAndCycle.second, "-");
+ if (StageTokenAndValue.first != "Stage" ||
+ CycleTokenAndValue.first != "_Cycle") {
+ llvm_unreachable(
+ "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
+ return;
+ }
+
+ StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
+ CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
+
+ dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
+}
+
+void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBasicBlock *BB = L.getTopBlock();
+ dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
+
+ DenseMap<MachineInstr *, int> Cycle, Stage;
+ std::vector<MachineInstr *> Instrs;
+ for (MachineInstr &MI : *BB) {
+ if (MI.isTerminator())
+ continue;
+ Instrs.push_back(&MI);
+ if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
+ dbgs() << "Parsing post-instr symbol for " << MI;
+ parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
+ }
+ }
+
+ ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
+ std::move(Stage));
+ ModuloScheduleExpander MSE(
+ MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MSE.cleanup();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleTestAnnotater implementation
+//===----------------------------------------------------------------------===//
+
+void ModuloScheduleTestAnnotater::annotate() {
+ for (MachineInstr *MI : S.getInstructions()) {
+ SmallVector<char, 16> SV;
+ raw_svector_ostream OS(SV);
+ OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
+ MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
+ MI->setPostInstrSymbol(MF, Sym);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp
new file mode 100644
index 000000000000..e4cd92ac4868
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp
@@ -0,0 +1,92 @@
+//===- MultiHazardRecognizer.cpp - Scheduler Support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MultiHazardRecognizer class, which is a wrapper
+// for a set of ScheduleHazardRecognizer instances
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MultiHazardRecognizer.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <functional>
+#include <numeric>
+
+using namespace llvm;
+
+void MultiHazardRecognizer::AddHazardRecognizer(
+ std::unique_ptr<ScheduleHazardRecognizer> &&R) {
+ MaxLookAhead = std::max(MaxLookAhead, R->getMaxLookAhead());
+ Recognizers.push_back(std::move(R));
+}
+
+bool MultiHazardRecognizer::atIssueLimit() const {
+ return llvm::any_of(Recognizers,
+ std::mem_fn(&ScheduleHazardRecognizer::atIssueLimit));
+}
+
+ScheduleHazardRecognizer::HazardType
+MultiHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ for (auto &R : Recognizers) {
+ auto res = R->getHazardType(SU, Stalls);
+ if (res != NoHazard)
+ return res;
+ }
+ return NoHazard;
+}
+
+void MultiHazardRecognizer::Reset() {
+ for (auto &R : Recognizers)
+ R->Reset();
+}
+
+void MultiHazardRecognizer::EmitInstruction(SUnit *SU) {
+ for (auto &R : Recognizers)
+ R->EmitInstruction(SU);
+}
+
+void MultiHazardRecognizer::EmitInstruction(MachineInstr *MI) {
+ for (auto &R : Recognizers)
+ R->EmitInstruction(MI);
+}
+
+unsigned MultiHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return std::max(a, R->PreEmitNoops(SU));
+ };
+ return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN);
+}
+
+unsigned MultiHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return std::max(a, R->PreEmitNoops(MI));
+ };
+ return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN);
+}
+
+bool MultiHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ auto SPA = [=](std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return R->ShouldPreferAnother(SU);
+ };
+ return llvm::any_of(Recognizers, SPA);
+}
+
+void MultiHazardRecognizer::AdvanceCycle() {
+ for (auto &R : Recognizers)
+ R->AdvanceCycle();
+}
+
+void MultiHazardRecognizer::RecedeCycle() {
+ for (auto &R : Recognizers)
+ R->RecedeCycle();
+}
+
+void MultiHazardRecognizer::EmitNoop() {
+ for (auto &R : Recognizers)
+ R->EmitNoop();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
new file mode 100644
index 000000000000..7304bfef55cb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -0,0 +1,55 @@
+//===-- NonRelocatableStringpool.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/NonRelocatableStringpool.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+
+DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
+ if (S.empty() && !Strings.empty())
+ return EmptyString;
+
+ if (Translator)
+ S = Translator(S);
+ auto I = Strings.insert({S, DwarfStringPoolEntry()});
+ auto &Entry = I.first->second;
+ if (I.second || !Entry.isIndexed()) {
+ Entry.Index = NumEntries++;
+ Entry.Offset = CurrentEndOffset;
+ Entry.Symbol = nullptr;
+ CurrentEndOffset += S.size() + 1;
+ }
+ return DwarfStringPoolEntryRef(*I.first);
+}
+
+StringRef NonRelocatableStringpool::internString(StringRef S) {
+ DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
+
+ if (Translator)
+ S = Translator(S);
+
+ auto InsertResult = Strings.insert({S, Entry});
+ return InsertResult.first->getKey();
+}
+
+std::vector<DwarfStringPoolEntryRef>
+NonRelocatableStringpool::getEntriesForEmission() const {
+ std::vector<DwarfStringPoolEntryRef> Result;
+ Result.reserve(Strings.size());
+ for (const auto &E : Strings)
+ if (E.getValue().isIndexed())
+ Result.emplace_back(E);
+ llvm::sort(Result, [](const DwarfStringPoolEntryRef A,
+ const DwarfStringPoolEntryRef B) {
+ return A.getIndex() < B.getIndex();
+ });
+ return Result;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 000000000000..d997fbbed5a6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,206 @@
+//===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "opt-phis"
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ public:
+ static char ID; // Pass identification
+
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ using InstrSet = SmallPtrSet<MachineInstr *, 16>;
+ using InstrSetIterator = SmallPtrSetIterator<MachineInstr *>;
+
+ bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+ InstrSet &PHIsInCycle);
+ bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+ bool OptimizeBB(MachineBasicBlock &MBB);
+ };
+
+} // end anonymous namespace
+
+char OptimizePHIs::ID = 0;
+
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
+
+INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE,
+ "Optimize machine instruction PHIs", false, false)
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getSubtarget().getInstrInfo();
+
+ // Find dead PHI cycles and PHI cycles that can be replaced by a single
+ // value. InstCombine does these optimizations, but DAG legalization may
+ // introduce new opportunities, e.g., when i64 values are split up for
+ // 32-bit targets.
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : Fn)
+ Changed |= OptimizeBB(MBB);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned. PHIs may be grouped by cycle, several cycles or chains.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+ unsigned &SingleValReg,
+ InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ Register DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI).second)
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ Register SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ SrcMI->getOperand(1).getReg().isVirtual()) {
+ SrcReg = SrcMI->getOperand(1).getReg();
+ SrcMI = MRI->getVRegDef(SrcReg);
+ }
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0 && SingleValReg != SrcReg)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+ Register DstReg = MI->getOperand(0).getReg();
+ assert(DstReg.isVirtual() && "PHI destination is not a virtual register");
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI).second)
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DstReg)) {
+ if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle))
+ return false;
+ }
+
+ return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ // Check for single-value PHI cycles.
+ unsigned SingleValReg = 0;
+ InstrSet PHIsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+ SingleValReg != 0) {
+ Register OldReg = MI->getOperand(0).getReg();
+ if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+ continue;
+
+ MRI->replaceRegWith(OldReg, SingleValReg);
+ MI->eraseFromParent();
+
+ // The kill flags on OldReg and SingleValReg may no longer be correct.
+ MRI->clearKillFlags(SingleValReg);
+
+ ++NumPHICycles;
+ Changed = true;
+ continue;
+ }
+
+ // Check for dead PHI cycles.
+ PHIsInCycle.clear();
+ if (IsDeadPHICycle(MI, PHIsInCycle)) {
+ for (MachineInstr *PhiMI : PHIsInCycle) {
+ if (MII == PhiMI)
+ ++MII;
+ PhiMI->eraseFromParent();
+ }
+ ++NumDeadPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 000000000000..dbb9a9ffdf60
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,759 @@
+//===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "phi-node-elimination"
+
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden, cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
+
+static cl::opt<bool>
+SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+ cl::Hidden, cl::desc("Split all critical edges during "
+ "PHI elimination"));
+
+static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
+ "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
+ cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
+
+namespace {
+
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator LastPHIIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ void analyzePHINodes(const MachineFunction& MF);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI,
+ std::vector<SparseBitVector<>> *LiveInSets);
+
+ // These functions are temporary abstractions around LiveVariables and
+ // LiveIntervals, so they can go away when LiveVariables does.
+ bool isLiveIn(Register Reg, const MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB);
+
+ using BBVRegPair = std::pair<unsigned, Register>;
+ using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
+
+ // Count the number of non-undef PHI uses of each register in each BB.
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ using LoweredPHIMap =
+ DenseMap<MachineInstr*, unsigned, MachineInstrExpressionTrait>;
+ LoweredPHIMap LoweredPHIs;
+ };
+
+} // end anonymous namespace
+
+STATISTIC(NumLowered, "Number of phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE,
+ "Eliminate PHI nodes for register allocation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE,
+ "Eliminate PHI nodes for register allocation", false, false)
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addUsedIfAvailable<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+
+ bool Changed = false;
+
+ // Split critical edges to help the coalescer.
+ if (!DisableEdgeSplitting && (LV || LIS)) {
+ // A set of live-in regs for each MBB which is used to update LV
+ // efficiently also with large functions.
+ std::vector<SparseBitVector<>> LiveInSets;
+ if (LV) {
+ LiveInSets.resize(MF.size());
+ for (unsigned Index = 0, e = MRI->getNumVirtRegs(); Index != e; ++Index) {
+ // Set the bit for this register for each MBB where it is
+ // live-through or live-in (killed).
+ Register VirtReg = Register::index2VirtReg(Index);
+ MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+ if (!DefMI)
+ continue;
+ LiveVariables::VarInfo &VI = LV->getVarInfo(VirtReg);
+ SparseBitVector<>::iterator AliveBlockItr = VI.AliveBlocks.begin();
+ SparseBitVector<>::iterator EndItr = VI.AliveBlocks.end();
+ while (AliveBlockItr != EndItr) {
+ unsigned BlockNum = *(AliveBlockItr++);
+ LiveInSets[BlockNum].set(Index);
+ }
+ // The register is live into an MBB in which it is killed but not
+ // defined. See comment for VarInfo in LiveVariables.h.
+ MachineBasicBlock *DefMBB = DefMI->getParent();
+ if (VI.Kills.size() > 1 ||
+ (!VI.Kills.empty() && VI.Kills.front()->getParent() != DefMBB))
+ for (auto *MI : VI.Kills)
+ LiveInSets[MI->getParent()->getNumber()].set(Index);
+ }
+ }
+
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (auto &MBB : MF)
+ Changed |= SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr));
+ }
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(MF);
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (auto &MBB : MF)
+ Changed |= EliminatePHINodes(MF, MBB);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (MachineInstr *DefMI : ImpDefs) {
+ Register DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(DefReg)) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*DefMI);
+ DefMI->eraseFromParent();
+ }
+ }
+
+ // Clean up the lowered PHI instructions.
+ for (auto &I : LoweredPHIs) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*I.first);
+ MF.deleteMachineInstr(I.first);
+ }
+
+ // TODO: we should use the incremental DomTree updater here.
+ if (Changed)
+ if (auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->getBase().recalculate(MF);
+
+ LoweredPHIs.clear();
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+
+ MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+
+ return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || !MBB.front().isPHI())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the last PHI node.
+ MachineBasicBlock::iterator LastPHIIt =
+ std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
+
+ while (MBB.front().isPHI())
+ LowerPHINode(MBB, LastPHIIt);
+
+ return true;
+}
+
+/// Return true if all defs of VirtReg are implicit-defs.
+/// This includes registers with no defs.
+static bool isImplicitlyDefined(unsigned VirtReg,
+ const MachineRegisterInfo &MRI) {
+ for (MachineInstr &DI : MRI.def_instructions(VirtReg))
+ if (!DI.isImplicitDef())
+ return false;
+ return true;
+}
+
+/// Return true if all sources of the phi node are implicit_def's, or undef's.
+static bool allPhiOperandsUndefined(const MachineInstr &MPhi,
+ const MachineRegisterInfo &MRI) {
+ for (unsigned I = 1, E = MPhi.getNumOperands(); I != E; I += 2) {
+ const MachineOperand &MO = MPhi.getOperand(I);
+ if (!isImplicitlyDefined(MO.getReg(), MRI) && !MO.isUndef())
+ return false;
+ }
+ return true;
+}
+/// LowerPHINode - Lower the PHI node at the top of the specified block.
+void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator LastPHIIt) {
+ ++NumLowered;
+
+ MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt);
+
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(&*MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ Register DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ unsigned IncomingReg = 0;
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ MachineInstr *PHICopy = nullptr;
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ if (allPhiOperandsUndefined(*MPhi, *MRI))
+ // If all sources of a PHI node are implicit_def or undef uses, just emit an
+ // implicit_def instead of a copy.
+ PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ else {
+ // Can we reuse an earlier PHI node? This only happens for critical edges,
+ // typically those created by tail duplication.
+ unsigned &entry = LoweredPHIs[MPhi];
+ if (entry) {
+ // An identical PHI node was already lowered. Reuse the incoming register.
+ IncomingReg = entry;
+ reusedIncoming = true;
+ ++NumReused;
+ LLVM_DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for "
+ << *MPhi);
+ } else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ }
+ // Give the target possiblity to handle special cases fallthrough otherwise
+ PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ IncomingReg, DestReg);
+ }
+
+ if (MPhi->peekDebugInstrNum()) {
+ // If referred to by debug-info, store where this PHI was.
+ MachineFunction *MF = MBB.getParent();
+ unsigned ID = MPhi->peekDebugInstrNum();
+ auto P = MachineFunction::DebugPHIRegallocPos(&MBB, IncomingReg, 0);
+ auto Res = MF->DebugPHIPositions.insert({ID, P});
+ assert(Res.second);
+ (void)Res;
+ }
+
+ // Update live variable information if there is any.
+ if (LV) {
+ if (IncomingReg) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+ // Increment use count of the newly created virtual register.
+ LV->setPHIJoin(IncomingReg);
+
+ MachineInstr *OldKill = nullptr;
+ bool IsPHICopyAfterOldKill = false;
+
+ if (reusedIncoming && (OldKill = VI.findKill(&MBB))) {
+ // Calculate whether the PHICopy is after the OldKill.
+ // In general, the PHICopy is inserted as the first non-phi instruction
+ // by default, so it's before the OldKill. But some Target hooks for
+ // createPHIDestinationCopy() may modify the default insert position of
+ // PHICopy.
+ for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end();
+ I != E; ++I) {
+ if (I == PHICopy)
+ break;
+
+ if (I == OldKill) {
+ IsPHICopyAfterOldKill = true;
+ break;
+ }
+ }
+ }
+
+ // When we are reusing the incoming register and it has been marked killed
+ // by OldKill, if the PHICopy is after the OldKill, we should remove the
+ // killed flag from OldKill.
+ if (IsPHICopyAfterOldKill) {
+ LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
+ LLVM_DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the first used incoming
+ // value or the resued incoming value whose PHICopy is after the OldKIll
+ // is killed. Note that because the value is defined in several places
+ // (once each for each incoming block), the "def" block and instruction
+ // fields for the VarInfo is not filled in.
+ if (!OldKill || IsPHICopyAfterOldKill)
+ LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(*MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, *PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, *MPhi);
+ }
+ }
+
+ // Update LiveIntervals for the new copy or implicit def.
+ if (LIS) {
+ SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy);
+
+ SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
+ if (IncomingReg) {
+ // Add the region from the beginning of MBB to the copy instruction to
+ // IncomingReg's live interval.
+ LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg);
+ VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
+ if (!IncomingVNI)
+ IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
+ LIS->getVNInfoAllocator());
+ IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ IncomingVNI));
+ }
+
+ LiveInterval &DestLI = LIS->getInterval(DestReg);
+ assert(!DestLI.empty() && "PHIs should have nonempty LiveIntervals.");
+ if (DestLI.endIndex().isDead()) {
+ // A dead PHI's live range begins and ends at the start of the MBB, but
+ // the lowered copy, which will still be dead, needs to begin and end at
+ // the copy instruction.
+ VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
+ assert(OrigDestVNI && "PHI destination should be live at block entry.");
+ DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot());
+ DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ DestLI.removeValNo(OrigDestVNI);
+ } else {
+ // Otherwise, remove the region from the beginning of MBB to the copy
+ // instruction from DestReg's live interval.
+ DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI && "PHI destination should be live at its definition.");
+ DestVNI->def = DestCopyIndex.getRegSlot();
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ if (!MPhi->getOperand(i).isUndef()) {
+ --VRegPHIUseCount[BBVRegPair(
+ MPhi->getOperand(i + 1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+ }
+ }
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+ bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
+ isImplicitlyDefined(SrcReg, *MRI);
+ assert(SrcReg.isVirtual() &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock).second)
+ continue; // If the copy has already been emitted, we're done.
+
+ MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+ if (SrcRegDef && TII->isUnspillableTerminator(SrcRegDef)) {
+ assert(SrcRegDef->getOperand(0).isReg() &&
+ SrcRegDef->getOperand(0).isDef() &&
+ "Expected operand 0 to be a reg def!");
+ // Now that the PHI's use has been removed (as the instruction was
+ // removed) there should be no other uses of the SrcReg.
+ assert(MRI->use_empty(SrcReg) &&
+ "Expected a single use from UnspillableTerminator");
+ SrcRegDef->getOperand(0).setReg(IncomingReg);
+
+ // Update LiveVariables.
+ if (LV) {
+ LiveVariables::VarInfo &SrcVI = LV->getVarInfo(SrcReg);
+ LiveVariables::VarInfo &IncomingVI = LV->getVarInfo(IncomingReg);
+ IncomingVI.AliveBlocks = std::move(SrcVI.AliveBlocks);
+ SrcVI.AliveBlocks.clear();
+ }
+
+ continue;
+ }
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos =
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+ // Insert the copy.
+ MachineInstr *NewSrcInstr = nullptr;
+ if (!reusedIncoming && IncomingReg) {
+ if (SrcUndef) {
+ // The source register is undefined, so there is no need for a real
+ // COPY, but we still need to ensure joint dominance by defs.
+ // Insert an IMPLICIT_DEF instruction.
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF),
+ IncomingReg);
+
+ // Clean up the old implicit-def, if there even was one.
+ if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
+ if (DefMI->isImplicitDef())
+ ImpDefs.insert(DefMI);
+ } else {
+ // Delete the debug location, since the copy is inserted into a
+ // different basic block.
+ NewSrcInstr = TII->createPHISourceCopy(opBlock, InsertPos, nullptr,
+ SrcReg, SrcSubReg, IncomingReg);
+ }
+ }
+
+ // We only need to update the LiveVariables kill of SrcReg if this was the
+ // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
+ // out of the predecessor. We can also ignore undef sources.
+ if (LV && !SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
+ !LV->isLiveOut(SrcReg, opBlock)) {
+ // We want to be able to insert a kill of the register if this PHI (aka,
+ // the copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value
+ // is live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Okay, if we now know that the value is not live out of the block, we
+ // can add a kill marker in this block saying that it kills the incoming
+ // value!
+
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end();
+ ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = InsertPos;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugInstr())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = NewSrcInstr;
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, *KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+ }
+
+ if (LIS) {
+ if (NewSrcInstr) {
+ LIS->InsertMachineInstrInMaps(*NewSrcInstr);
+ LIS->addSegmentToEndOfBlock(IncomingReg, *NewSrcInstr);
+ }
+
+ if (!SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
+ LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock *Succ : opBlock.successors()) {
+ SlotIndex startIdx = LIS->getMBBStartIdx(Succ);
+ VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
+
+ // Definitions by other PHIs are not truly live-in for our purposes.
+ if (VNI && VNI->def != startIdx) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (!isLiveOut) {
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ for (MachineBasicBlock::iterator Term = InsertPos;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't just insert a copy.
+ KillInst = InsertPos;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugInstr())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = std::prev(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) &&
+ "Cannot find kill instruction");
+
+ SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst);
+ SrcLI.removeSegment(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
+ }
+ }
+ }
+ }
+
+ // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+ if (reusedIncoming || !IncomingReg) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MPhi);
+ MF.deleteMachineInstr(MPhi);
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+ for (const auto &MBB : MF) {
+ for (const auto &BBI : MBB) {
+ if (!BBI.isPHI())
+ break;
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
+ if (!BBI.getOperand(i).isUndef()) {
+ ++VRegPHIUseCount[BBVRegPair(
+ BBI.getOperand(i + 1).getMBB()->getNumber(),
+ BBI.getOperand(i).getReg())];
+ }
+ }
+ }
+ }
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI,
+ std::vector<SparseBitVector<>> *LiveInSets) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ Register Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB == &MBB && !SplitAllCriticalEdges)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr;
+ if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB);
+ if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
+ continue;
+ if (ShouldSplit) {
+ LLVM_DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge "
+ << printMBBReference(*PreMBB) << " -> "
+ << printMBBReference(MBB) << ": " << *BBI);
+ }
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB);
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ LLVM_DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop)
+ dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop)
+ dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit && !SplitAllCriticalEdges)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, *this, LiveInSets)) {
+ LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n");
+ continue;
+ }
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
+ return Changed;
+}
+
+bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveIn() requires either LiveVariables or LiveIntervals");
+ if (LIS)
+ return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB);
+ else
+ return LV->isLiveIn(Reg, *MBB);
+}
+
+bool PHIElimination::isLiveOutPastPHIs(Register Reg,
+ const MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
+ // LiveVariables considers uses in PHIs to be in the predecessor basic block,
+ // so that a register used only in a PHI is not live out of the block. In
+ // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
+ // in the predecessor basic block, so that a register used only in a PHI is live
+ // out of the block.
+ if (LIS) {
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ for (const MachineBasicBlock *SI : MBB->successors())
+ if (LI.liveAt(LIS->getMBBStartIdx(SI)))
+ return true;
+ return false;
+ } else {
+ return LV->isLiveOut(Reg, *MBB);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..016335f420d3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,64 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction. Similarly for an INLINEASM_BR
+ // going to an indirect target. This is similar to SplitKit.cpp's
+ // computeLastInsertPoint, and similarly assumes that there cannot be multiple
+ // instructions that are Calls with EHPad successors or INLINEASM_BR in a
+ // block.
+ bool EHPadSuccessor = SuccMBB->isEHPad();
+ if (!EHPadSuccessor && !SuccMBB->isInlineAsmBrIndirectTarget())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs in this basic block.
+ SmallPtrSet<MachineInstr *, 8> DefsInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineInstr &RI : MRI.def_instructions(SrcReg))
+ if (RI.getParent() == MBB)
+ DefsInMBB.insert(&RI);
+
+ MachineBasicBlock::iterator InsertPoint = MBB->begin();
+ // Insert the copy at the _latest_ point of:
+ // 1. Immediately AFTER the last def
+ // 2. Immediately BEFORE a call/inlineasm_br.
+ for (auto I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) {
+ if (DefsInMBB.contains(&*I)) {
+ InsertPoint = std::next(I.getReverse());
+ break;
+ }
+ if ((EHPadSuccessor && I->isCall()) ||
+ I->getOpcode() == TargetOpcode::INLINEASM_BR) {
+ InsertPoint = I.getReverse();
+ break;
+ }
+ }
+
+ // Make sure the copy goes after any phi nodes but before
+ // any debug nodes.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..0ff3a41f47d3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,24 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
new file mode 100644
index 000000000000..43b23368ead2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
@@ -0,0 +1,97 @@
+//===-- ParallelCG.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ParallelCG.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
+ function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
+ CodeGenFileType FileType) {
+ std::unique_ptr<TargetMachine> TM = TMFactory();
+ assert(TM && "Failed to create target machine!");
+
+ legacy::PassManager CodeGenPasses;
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
+ report_fatal_error("Failed to setup codegen");
+ CodeGenPasses.run(*M);
+}
+
+void llvm::splitCodeGen(
+ Module &M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
+ ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
+ const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
+ CodeGenFileType FileType, bool PreserveLocals) {
+ assert(BCOSs.empty() || BCOSs.size() == OSs.size());
+
+ if (OSs.size() == 1) {
+ if (!BCOSs.empty())
+ WriteBitcodeToFile(M, *BCOSs[0]);
+ codegen(&M, *OSs[0], TMFactory, FileType);
+ return;
+ }
+
+ // Create ThreadPool in nested scope so that threads will be joined
+ // on destruction.
+ {
+ ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size()));
+ int ThreadCount = 0;
+
+ SplitModule(
+ M, OSs.size(),
+ [&](std::unique_ptr<Module> MPart) {
+ // We want to clone the module in a new context to multi-thread the
+ // codegen. We do it by serializing partition modules to bitcode
+ // (while still on the main thread, in order to avoid data races) and
+ // spinning up new threads which deserialize the partitions into
+ // separate contexts.
+ // FIXME: Provide a more direct way to do this in LLVM.
+ SmallString<0> BC;
+ raw_svector_ostream BCOS(BC);
+ WriteBitcodeToFile(*MPart, BCOS);
+
+ if (!BCOSs.empty()) {
+ BCOSs[ThreadCount]->write(BC.begin(), BC.size());
+ BCOSs[ThreadCount]->flush();
+ }
+
+ llvm::raw_pwrite_stream *ThreadOS = OSs[ThreadCount++];
+ // Enqueue the task
+ CodegenThreadPool.async(
+ [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
+ LLVMContext Ctx;
+ Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+ MemoryBufferRef(StringRef(BC.data(), BC.size()),
+ "<split-module>"),
+ Ctx);
+ if (!MOrErr)
+ report_fatal_error("Failed to read bitcode");
+ std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+ codegen(MPartInCtx.get(), *ThreadOS, TMFactory, FileType);
+ },
+ // Pass BC using std::move to ensure that it get moved rather than
+ // copied into the thread's context.
+ std::move(BC));
+ },
+ PreserveLocals);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
new file mode 100644
index 000000000000..9449f143366f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -0,0 +1,98 @@
+//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements edits function bodies in place to support the
+// "patchable-function" attribute.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+namespace {
+struct PatchableFunction : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PatchableFunction() : MachineFunctionPass(ID) {
+ initializePatchableFunctionPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+}
+
+bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
+ MachineBasicBlock &FirstMBB = *MF.begin();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ // The initial .loc covers PATCHABLE_FUNCTION_ENTER.
+ BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+ return true;
+ }
+
+ if (!MF.getFunction().hasFnAttribute("patchable-function"))
+ return false;
+
+#ifndef NDEBUG
+ Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function");
+ StringRef PatchType = PatchAttr.getValueAsString();
+ assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
+#endif
+
+ auto &FirstMBB = *MF.begin();
+ auto *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineBasicBlock::iterator FirstActualI = llvm::find_if(
+ FirstMBB, [](const MachineInstr &MI) { return !MI.isMetaInstruction(); });
+
+ if (FirstActualI == FirstMBB.end()) {
+ // As of Microsoft documentation on /hotpatch feature, we must ensure that
+ // "the first instruction of each function is at least two bytes, and no
+ // jump within the function goes to the first instruction"
+
+ // When the first MBB is empty, insert a patchable no-op. This ensures the
+ // first instruction is patchable in two special cases:
+ // - the function is empty (e.g. unreachable)
+ // - the function jumps back to the first instruction, which is in a
+ // successor MBB.
+ BuildMI(&FirstMBB, DebugLoc(), TII->get(TargetOpcode::PATCHABLE_OP))
+ .addImm(2)
+ .addImm(TargetOpcode::PATCHABLE_OP);
+ MF.ensureAlignment(Align(16));
+ return true;
+ }
+
+ auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_OP))
+ .addImm(2)
+ .addImm(FirstActualI->getOpcode());
+
+ for (auto &MO : FirstActualI->operands())
+ MIB.add(MO);
+
+ FirstActualI->eraseFromParent();
+ MF.ensureAlignment(Align(16));
+ return true;
+}
+
+char PatchableFunction::ID = 0;
+char &llvm::PatchableFunctionID = PatchableFunction::ID;
+INITIALIZE_PASS(PatchableFunction, "patchable-function",
+ "Implement the 'patchable-function' attribute", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 000000000000..a08cc78f11b1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,2128 @@
+//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+// Another instance, in this code:
+//
+// sub r1, r3 | sub r1, imm
+// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
+// bge L1
+//
+// If the branch instruction can use flag from "sub", then we can replace
+// "sub" with "subs" and eliminate the "cmp" instruction.
+//
+// - Optimize Loads:
+//
+// Loads that can be folded into a later instruction. A load is foldable
+// if it loads to virtual registers and the virtual register defined has
+// a single use.
+//
+// - Optimize Copies and Bitcast (more generally, target specific copies):
+//
+// Rewrite copies and bitcasts to avoid cross register bank copies
+// when possible.
+// E.g., Consider the following example, where capital and lower
+// letters denote different register file:
+// b = copy A <-- cross-bank copy
+// C = copy b <-- cross-bank copy
+// =>
+// b = copy A <-- cross-bank copy
+// C = copy A <-- same-bank copy
+//
+// E.g., for bitcast:
+// b = bitcast A <-- cross-bank copy
+// C = bitcast b <-- cross-bank copy
+// =>
+// b = bitcast A <-- cross-bank copy
+// C = copy A <-- same-bank copy
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx;
+
+#define DEBUG_TYPE "peephole-opt"
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
+/// Specifiy whether or not the value tracking looks through
+/// complex instructions. When this is true, the value tracker
+/// bails on everything that is not a copy or a bitcast.
+static cl::opt<bool>
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable advanced copy optimization"));
+
+static cl::opt<bool> DisableNAPhysCopyOpt(
+ "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable non-allocatable physical register copy optimization"));
+
+// Limit the number of PHI instructions to process
+// in PeepholeOptimizer::getNextSource.
+static cl::opt<unsigned> RewritePHILimit(
+ "rewrite-phi-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the length of PHI chains to lookup"));
+
+// Limit the length of recurrence chain when evaluating the benefit of
+// commuting operands.
+static cl::opt<unsigned> MaxRecurrenceChain(
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
+ "of commuting operands"));
+
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumCmps, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
+STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
+STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
+STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
+
+namespace {
+
+ class ValueTrackerResult;
+ class RecurrenceInstr;
+
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree
+ MachineLoopInfo *MLI = nullptr;
+
+ public:
+ static char ID; // Pass identification
+
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
+ /// Track Def -> Use info used for rewriting copies.
+ using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
+
+ /// Sequence of instructions that formulate recurrence cycle.
+ using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
+
+ private:
+ bool optimizeCmpInstr(MachineInstr &MI);
+ bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs);
+ bool optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeCondBranch(MachineInstr &MI);
+ bool optimizeCoalescableCopy(MachineInstr &MI);
+ bool optimizeUncoalescableCopy(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeRecurrence(MachineInstr &PHI);
+ bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
+ bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs);
+ bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs);
+
+ /// Finds recurrence cycles, but only ones that formulated around
+ /// a def operand and a use operand that are tied. If there is a use
+ /// operand commutable with the tied use operand, find recurrence cycle
+ /// along that operand as well.
+ bool findTargetRecurrence(Register Reg,
+ const SmallSet<Register, 2> &TargetReg,
+ RecurrenceCycle &RC);
+
+ /// If copy instruction \p MI is a virtual register copy or a copy of a
+ /// constant physical register to a virtual register, track it in the
+ /// set \p CopyMIs. If this virtual register was previously seen as a
+ /// copy, replace the uses of this copy with the previously seen copy's
+ /// destination register.
+ bool foldRedundantCopy(MachineInstr &MI,
+ DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs);
+
+ /// Is the register \p Reg a non-allocatable physical register?
+ bool isNAPhysCopy(Register Reg);
+
+ /// If copy instruction \p MI is a non-allocatable virtual<->physical
+ /// register copy, track it in the \p NAPhysToVirtMIs map. If this
+ /// non-allocatable physical register was previously copied to a virtual
+ /// registered and hasn't been clobbered, the virt->phys copy can be
+ /// deleted.
+ bool foldRedundantNAPhysCopy(
+ MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
+
+ bool isLoadFoldable(MachineInstr &MI,
+ SmallSet<Register, 16> &FoldAsLoadDefCandidates);
+
+ /// Check whether \p MI is understood by the register coalescer
+ /// but may require some rewriting.
+ bool isCoalescableCopy(const MachineInstr &MI) {
+ // SubregToRegs are not interesting, because they are already register
+ // coalescer friendly.
+ return MI.isCopy() || (!DisableAdvCopyOpt &&
+ (MI.isRegSequence() || MI.isInsertSubreg() ||
+ MI.isExtractSubreg()));
+ }
+
+ /// Check whether \p MI is a copy like instruction that is
+ /// not recognized by the register coalescer.
+ bool isUncoalescableCopy(const MachineInstr &MI) {
+ return MI.isBitcast() ||
+ (!DisableAdvCopyOpt &&
+ (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()));
+ }
+
+ MachineInstr &rewriteSource(MachineInstr &CopyLike,
+ RegSubRegPair Def, RewriteMapTy &RewriteMap);
+ };
+
+ /// Helper class to hold instructions that are inside recurrence cycles.
+ /// The recurrence cycle is formulated around 1) a def operand and its
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
+ /// with another use operand which is tied to the def operand. In the latter
+ /// case, index of the tied use operand and the commutable use operand are
+ /// maintained with CommutePair.
+ class RecurrenceInstr {
+ public:
+ using IndexPair = std::pair<unsigned, unsigned>;
+
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+ MachineInstr *getMI() const { return MI; }
+ std::optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+ private:
+ MachineInstr *MI;
+ std::optional<IndexPair> CommutePair;
+ };
+
+ /// Helper class to hold a reply for ValueTracker queries.
+ /// Contains the returned sources for a given search and the instructions
+ /// where the sources were tracked from.
+ class ValueTrackerResult {
+ private:
+ /// Track all sources found by one ValueTracker query.
+ SmallVector<RegSubRegPair, 2> RegSrcs;
+
+ /// Instruction using the sources in 'RegSrcs'.
+ const MachineInstr *Inst = nullptr;
+
+ public:
+ ValueTrackerResult() = default;
+
+ ValueTrackerResult(Register Reg, unsigned SubReg) {
+ addSource(Reg, SubReg);
+ }
+
+ bool isValid() const { return getNumSources() > 0; }
+
+ void setInst(const MachineInstr *I) { Inst = I; }
+ const MachineInstr *getInst() const { return Inst; }
+
+ void clear() {
+ RegSrcs.clear();
+ Inst = nullptr;
+ }
+
+ void addSource(Register SrcReg, unsigned SrcSubReg) {
+ RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
+ }
+
+ void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
+ assert(Idx < getNumSources() && "Reg pair source out of index");
+ RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
+ }
+
+ int getNumSources() const { return RegSrcs.size(); }
+
+ RegSubRegPair getSrc(int Idx) const {
+ return RegSrcs[Idx];
+ }
+
+ Register getSrcReg(int Idx) const {
+ assert(Idx < getNumSources() && "Reg source out of index");
+ return RegSrcs[Idx].Reg;
+ }
+
+ unsigned getSrcSubReg(int Idx) const {
+ assert(Idx < getNumSources() && "SubReg source out of index");
+ return RegSrcs[Idx].SubReg;
+ }
+
+ bool operator==(const ValueTrackerResult &Other) const {
+ if (Other.getInst() != getInst())
+ return false;
+
+ if (Other.getNumSources() != getNumSources())
+ return false;
+
+ for (int i = 0, e = Other.getNumSources(); i != e; ++i)
+ if (Other.getSrcReg(i) != getSrcReg(i) ||
+ Other.getSrcSubReg(i) != getSrcSubReg(i))
+ return false;
+ return true;
+ }
+ };
+
+ /// Helper class to track the possible sources of a value defined by
+ /// a (chain of) copy related instructions.
+ /// Given a definition (instruction and definition index), this class
+ /// follows the use-def chain to find successive suitable sources.
+ /// The given source can be used to rewrite the definition into
+ /// def = COPY src.
+ ///
+ /// For instance, let us consider the following snippet:
+ /// v0 =
+ /// v2 = INSERT_SUBREG v1, v0, sub0
+ /// def = COPY v2.sub0
+ ///
+ /// Using a ValueTracker for def = COPY v2.sub0 will give the following
+ /// suitable sources:
+ /// v2.sub0 and v0.
+ /// Then, def can be rewritten into def = COPY v0.
+ class ValueTracker {
+ private:
+ /// The current point into the use-def chain.
+ const MachineInstr *Def = nullptr;
+
+ /// The index of the definition in Def.
+ unsigned DefIdx = 0;
+
+ /// The sub register index of the definition.
+ unsigned DefSubReg;
+
+ /// The register where the value can be found.
+ Register Reg;
+
+ /// MachineRegisterInfo used to perform tracking.
+ const MachineRegisterInfo &MRI;
+
+ /// Optional TargetInstrInfo used to perform some complex tracking.
+ const TargetInstrInfo *TII;
+
+ /// Dispatcher to the right underlying implementation of getNextSource.
+ ValueTrackerResult getNextSourceImpl();
+
+ /// Specialized version of getNextSource for Copy instructions.
+ ValueTrackerResult getNextSourceFromCopy();
+
+ /// Specialized version of getNextSource for Bitcast instructions.
+ ValueTrackerResult getNextSourceFromBitcast();
+
+ /// Specialized version of getNextSource for RegSequence instructions.
+ ValueTrackerResult getNextSourceFromRegSequence();
+
+ /// Specialized version of getNextSource for InsertSubreg instructions.
+ ValueTrackerResult getNextSourceFromInsertSubreg();
+
+ /// Specialized version of getNextSource for ExtractSubreg instructions.
+ ValueTrackerResult getNextSourceFromExtractSubreg();
+
+ /// Specialized version of getNextSource for SubregToReg instructions.
+ ValueTrackerResult getNextSourceFromSubregToReg();
+
+ /// Specialized version of getNextSource for PHI instructions.
+ ValueTrackerResult getNextSourceFromPHI();
+
+ public:
+ /// Create a ValueTracker instance for the value defined by \p Reg.
+ /// \p DefSubReg represents the sub register index the value tracker will
+ /// track. It does not need to match the sub register index used in the
+ /// definition of \p Reg.
+ /// If \p Reg is a physical register, a value tracker constructed with
+ /// this constructor will not find any alternative source.
+ /// Indeed, when \p Reg is a physical register that constructor does not
+ /// know which definition of \p Reg it should track.
+ /// Use the next constructor to track a physical register.
+ ValueTracker(Register Reg, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII = nullptr)
+ : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
+ if (!Reg.isPhysical()) {
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
+ }
+ }
+
+ /// Following the use-def chain, get the next available source
+ /// for the tracked value.
+ /// \return A ValueTrackerResult containing a set of registers
+ /// and sub registers with tracked values. A ValueTrackerResult with
+ /// an empty set of registers means no source was found.
+ ValueTrackerResult getNextSource();
+ };
+
+} // end anonymous namespace
+
+char PeepholeOptimizer::ID = 0;
+
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
+ "Peephole Optimizations", false, false)
+
+/// If instruction is a copy-like instruction, i.e. it reads a single register
+/// and writes a single register and it does not modify the source, and if the
+/// source value is preserved as a sub-register of the result, then replace all
+/// reachable uses of the source with the subreg of the result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
+ Register SrcReg, DstReg;
+ unsigned SubIdx;
+ if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (DstReg.isPhysical() || SrcReg.isPhysical())
+ return false;
+
+ if (MRI->hasOneNonDBGUse(SrcReg))
+ // No other uses.
+ return false;
+
+ // Ensure DstReg can get a register class that actually supports
+ // sub-registers. Don't change the class until we commit.
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
+ if (!DstRC)
+ return false;
+
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx =
+ TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
+ ReachedBBs.insert(UI.getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI == &MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == &MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ Uses.append(ExtendedUses.begin(), ExtendedUses.end());
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
+ if (UI.isPHI())
+ PHIBBs.insert(UI.getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ // About to add uses of DstReg, clear DstReg's kill flags.
+ if (!Changed) {
+ MRI->clearKillFlags(DstReg);
+ MRI->constrainRegClass(DstReg, DstRC);
+ }
+
+ // SubReg defs are illegal in machine SSA phase,
+ // we should not generate SubReg defs.
+ //
+ // For example, for the instructions:
+ //
+ // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
+ // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
+ //
+ // We should generate:
+ //
+ // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
+ // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
+ // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
+ //
+ if (UseSrcSubIdx)
+ RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
+
+ Register NewVR = MRI->createVirtualRegister(RC);
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+ if (UseSrcSubIdx)
+ UseMO->setSubReg(0);
+
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// If the instruction is a compare and the previous instruction it's comparing
+/// against already sets (or could be modified to set) the same flag as the
+/// compare, then we can remove the comparison and use the flag from the
+/// previous instruction.
+bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ Register SrcReg, SrcReg2;
+ int64_t CmpMask, CmpValue;
+ if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ SrcReg.isPhysical() || SrcReg2.isPhysical())
+ return false;
+
+ // Attempt to optimize the comparison instruction.
+ LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
+ if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
+ LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
+ ++NumCmps;
+ return true;
+ }
+
+ return false;
+}
+
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(MI, LocalMIs))
+ return false;
+ LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
+ MI.eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// Check if a simpler conditional branch can be generated.
+bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
+ return TII->optimizeCondBranch(MI);
+}
+
+/// Try to find the next source that share the same register file
+/// for the value defined by \p Reg and \p SubReg.
+/// When true is returned, the \p RewriteMap can be used by the client to
+/// retrieve all Def -> Use along the way up to the next source. Any found
+/// Use that is not itself a key for another entry, is the next source to
+/// use. During the search for the next source, multiple sources can be found
+/// given multiple incoming sources of a PHI instruction. In this case, we
+/// look in each PHI source for the next source; all found next sources must
+/// share the same register file as \p Reg and \p SubReg. The client should
+/// then be capable to rewrite all intermediate PHIs to get the next source.
+/// \return False if no alternative sources are available. True otherwise.
+bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
+ RewriteMapTy &RewriteMap) {
+ // Do not try to find a new source for a physical register.
+ // So far we do not have any motivating example for doing that.
+ // Thus, instead of maintaining untested code, we will revisit that if
+ // that changes at some point.
+ Register Reg = RegSubReg.Reg;
+ if (Reg.isPhysical())
+ return false;
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+
+ SmallVector<RegSubRegPair, 4> SrcToLook;
+ RegSubRegPair CurSrcPair = RegSubReg;
+ SrcToLook.push_back(CurSrcPair);
+
+ unsigned PHICount = 0;
+ do {
+ CurSrcPair = SrcToLook.pop_back_val();
+ // As explained above, do not handle physical registers
+ if (CurSrcPair.Reg.isPhysical())
+ return false;
+
+ ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
+
+ // Follow the chain of copies until we find a more suitable source, a phi
+ // or have to abort.
+ while (true) {
+ ValueTrackerResult Res = ValTracker.getNextSource();
+ // Abort at the end of a chain (without finding a suitable source).
+ if (!Res.isValid())
+ return false;
+
+ // Insert the Def -> Use entry for the recently found source.
+ ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
+ if (CurSrcRes.isValid()) {
+ assert(CurSrcRes == Res && "ValueTrackerResult found must match");
+ // An existent entry with multiple sources is a PHI cycle we must avoid.
+ // Otherwise it's an entry with a valid next source we already found.
+ if (CurSrcRes.getNumSources() > 1) {
+ LLVM_DEBUG(dbgs()
+ << "findNextSource: found PHI cycle, aborting...\n");
+ return false;
+ }
+ break;
+ }
+ RewriteMap.insert(std::make_pair(CurSrcPair, Res));
+
+ // ValueTrackerResult usually have one source unless it's the result from
+ // a PHI instruction. Add the found PHI edges to be looked up further.
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs > 1) {
+ PHICount++;
+ if (PHICount >= RewritePHILimit) {
+ LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < NumSrcs; ++i)
+ SrcToLook.push_back(Res.getSrc(i));
+ break;
+ }
+
+ CurSrcPair = Res.getSrc(0);
+ // Do not extend the live-ranges of physical registers as they add
+ // constraints to the register allocator. Moreover, if we want to extend
+ // the live-range of a physical register, unlike SSA virtual register,
+ // we will have to check that they aren't redefine before the related use.
+ if (CurSrcPair.Reg.isPhysical())
+ return false;
+
+ // Keep following the chain if the value isn't any better yet.
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
+ if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
+ CurSrcPair.SubReg))
+ continue;
+
+ // We currently cannot deal with subreg operands on PHI instructions
+ // (see insertPHI()).
+ if (PHICount > 0 && CurSrcPair.SubReg != 0)
+ continue;
+
+ // We found a suitable source, and are done with this chain.
+ break;
+ }
+ } while (!SrcToLook.empty());
+
+ // If we did not find a more suitable source, there is nothing to optimize.
+ return CurSrcPair.Reg != Reg;
+}
+
+/// Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// guaranteed to have the same register class. This is necessary whenever we
+/// successfully traverse a PHI instruction and find suitable sources coming
+/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
+/// suitable to be used in a new COPY instruction.
+static MachineInstr &
+insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+ const SmallVectorImpl<RegSubRegPair> &SrcRegs,
+ MachineInstr &OrigPHI) {
+ assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
+
+ const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
+ // NewRC is only correct if no subregisters are involved. findNextSource()
+ // should have rejected those cases already.
+ assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
+ Register NewVR = MRI.createVirtualRegister(NewRC);
+ MachineBasicBlock *MBB = OrigPHI.getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
+ TII.get(TargetOpcode::PHI), NewVR);
+
+ unsigned MBBOpIdx = 2;
+ for (const RegSubRegPair &RegPair : SrcRegs) {
+ MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
+ MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
+ // Since we're extended the lifetime of RegPair.Reg, clear the
+ // kill flags to account for that and make RegPair.Reg reaches
+ // the new PHI.
+ MRI.clearKillFlags(RegPair.Reg);
+ MBBOpIdx += 2;
+ }
+
+ return *MIB;
+}
+
+namespace {
+
+/// Interface to query instructions amenable to copy rewriting.
+class Rewriter {
+protected:
+ MachineInstr &CopyLike;
+ unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
+public:
+ Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
+ virtual ~Rewriter() = default;
+
+ /// Get the next rewritable source (SrcReg, SrcSubReg) and
+ /// the related value that it affects (DstReg, DstSubReg).
+ /// A source is considered rewritable if its register class and the
+ /// register class of the related DstReg may not be register
+ /// coalescer friendly. In other words, given a copy-like instruction
+ /// not all the arguments may be returned at rewritable source, since
+ /// some arguments are none to be register coalescer friendly.
+ ///
+ /// Each call of this method moves the current source to the next
+ /// rewritable source.
+ /// For instance, let CopyLike be the instruction to rewrite.
+ /// CopyLike has one definition and one source:
+ /// dst.dstSubIdx = CopyLike src.srcSubIdx.
+ ///
+ /// The first call will give the first rewritable source, i.e.,
+ /// the only source this instruction has:
+ /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
+ /// This source defines the whole definition, i.e.,
+ /// (DstReg, DstSubReg) = (dst, dstSubIdx).
+ ///
+ /// The second and subsequent calls will return false, as there is only one
+ /// rewritable source.
+ ///
+ /// \return True if a rewritable source has been found, false otherwise.
+ /// The output arguments are valid if and only if true is returned.
+ virtual bool getNextRewritableSource(RegSubRegPair &Src,
+ RegSubRegPair &Dst) = 0;
+
+ /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
+ /// \return True if the rewriting was possible, false otherwise.
+ virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
+};
+
+/// Rewriter for COPY instructions.
+class CopyRewriter : public Rewriter {
+public:
+ CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
+ assert(MI.isCopy() && "Expected copy instruction");
+ }
+ virtual ~CopyRewriter() = default;
+
+ bool getNextRewritableSource(RegSubRegPair &Src,
+ RegSubRegPair &Dst) override {
+ // CurrentSrcIdx > 0 means this function has already been called.
+ if (CurrentSrcIdx > 0)
+ return false;
+ // This is the first call to getNextRewritableSource.
+ // Move the CurrentSrcIdx to remember that we made that call.
+ CurrentSrcIdx = 1;
+ // The rewritable source is the argument.
+ const MachineOperand &MOSrc = CopyLike.getOperand(1);
+ Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+ return true;
+ }
+
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+ if (CurrentSrcIdx != 1)
+ return false;
+ MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
+ MOSrc.setReg(NewReg);
+ MOSrc.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// Helper class to rewrite uncoalescable copy like instructions
+/// into new COPY (coalescable friendly) instructions.
+class UncoalescableRewriter : public Rewriter {
+ unsigned NumDefs; ///< Number of defs in the bitcast.
+
+public:
+ UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
+ NumDefs = MI.getDesc().getNumDefs();
+ }
+
+ /// \see See Rewriter::getNextRewritableSource()
+ /// All such sources need to be considered rewritable in order to
+ /// rewrite a uncoalescable copy-like instruction. This method return
+ /// each definition that must be checked if rewritable.
+ bool getNextRewritableSource(RegSubRegPair &Src,
+ RegSubRegPair &Dst) override {
+ // Find the next non-dead definition and continue from there.
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+
+ while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
+ ++CurrentSrcIdx;
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+ }
+
+ // What we track are the alternative sources of the definition.
+ Src = RegSubRegPair(0, 0);
+ const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
+ Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+
+ CurrentSrcIdx++;
+ return true;
+ }
+
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+ return false;
+ }
+};
+
+/// Specialized rewriter for INSERT_SUBREG instruction.
+class InsertSubregRewriter : public Rewriter {
+public:
+ InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
+ assert(MI.isInsertSubreg() && "Invalid instruction");
+ }
+
+ /// \see See Rewriter::getNextRewritableSource()
+ /// Here CopyLike has the following form:
+ /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
+ /// Src1 has the same register class has dst, hence, there is
+ /// nothing to rewrite.
+ /// Src2.src2SubIdx, may not be register coalescer friendly.
+ /// Therefore, the first call to this method returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (DstReg, DstSubReg) = (dst, subIdx).
+ ///
+ /// Subsequence calls will return false.
+ bool getNextRewritableSource(RegSubRegPair &Src,
+ RegSubRegPair &Dst) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 2)
+ return false;
+ // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
+ CurrentSrcIdx = 2;
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
+ Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+
+ // We want to track something that is compatible with the
+ // partial definition.
+ if (MODef.getSubReg())
+ // Bail if we have to compose sub-register indices.
+ return false;
+ Dst = RegSubRegPair(MODef.getReg(),
+ (unsigned)CopyLike.getOperand(3).getImm());
+ return true;
+ }
+
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+ if (CurrentSrcIdx != 2)
+ return false;
+ // We are rewriting the inserted reg.
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// Specialized rewriter for EXTRACT_SUBREG instruction.
+class ExtractSubregRewriter : public Rewriter {
+ const TargetInstrInfo &TII;
+
+public:
+ ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
+ : Rewriter(MI), TII(TII) {
+ assert(MI.isExtractSubreg() && "Invalid instruction");
+ }
+
+ /// \see Rewriter::getNextRewritableSource()
+ /// Here CopyLike has the following form:
+ /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
+ /// There is only one rewritable source: Src.subIdx,
+ /// which defines dst.dstSubIdx.
+ bool getNextRewritableSource(RegSubRegPair &Src,
+ RegSubRegPair &Dst) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 1)
+ return false;
+ // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
+ CurrentSrcIdx = 1;
+ const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
+ // If we have to compose sub-register indices, bail out.
+ if (MOExtractedReg.getSubReg())
+ return false;
+
+ Src = RegSubRegPair(MOExtractedReg.getReg(),
+ CopyLike.getOperand(2).getImm());
+
+ // We want to track something that is compatible with the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+ return true;
+ }
+
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+ // The only source we can rewrite is the input register.
+ if (CurrentSrcIdx != 1)
+ return false;
+
+ CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
+
+ // If we find a source that does not require to extract something,
+ // rewrite the operation with a copy.
+ if (!NewSubReg) {
+ // Move the current index to an invalid position.
+ // We do not want another call to this method to be able
+ // to do any change.
+ CurrentSrcIdx = -1;
+ // Rewrite the operation as a COPY.
+ // Get rid of the sub-register index.
+ CopyLike.removeOperand(2);
+ // Morph the operation into a COPY.
+ CopyLike.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+ }
+ CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
+ return true;
+ }
+};
+
+/// Specialized rewriter for REG_SEQUENCE instruction.
+class RegSequenceRewriter : public Rewriter {
+public:
+ RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
+ assert(MI.isRegSequence() && "Invalid instruction");
+ }
+
+ /// \see Rewriter::getNextRewritableSource()
+ /// Here CopyLike has the following form:
+ /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
+ /// Each call will return a different source, walking all the available
+ /// source.
+ ///
+ /// The first call returns:
+ /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
+ /// (DstReg, DstSubReg) = (dst, subIdx1).
+ ///
+ /// The second call returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (DstReg, DstSubReg) = (dst, subIdx2).
+ ///
+ /// And so on, until all the sources have been traversed, then
+ /// it returns false.
+ bool getNextRewritableSource(RegSubRegPair &Src,
+ RegSubRegPair &Dst) override {
+ // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+
+ // If this is the first call, move to the first argument.
+ if (CurrentSrcIdx == 0) {
+ CurrentSrcIdx = 1;
+ } else {
+ // Otherwise, move to the next argument and check that it is valid.
+ CurrentSrcIdx += 2;
+ if (CurrentSrcIdx >= CopyLike.getNumOperands())
+ return false;
+ }
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
+ Src.Reg = MOInsertedReg.getReg();
+ // If we have to compose sub-register indices, bail out.
+ if ((Src.SubReg = MOInsertedReg.getSubReg()))
+ return false;
+
+ // We want to track something that is compatible with the related
+ // partial definition.
+ Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
+
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ Dst.Reg = MODef.getReg();
+ // If we have to compose sub-registers, bail.
+ return MODef.getSubReg() == 0;
+ }
+
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+ // We cannot rewrite out of bound operands.
+ // Moreover, rewritable sources are at odd positions.
+ if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
+ return false;
+
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+/// Get the appropriated Rewriter for \p MI.
+/// \return A pointer to a dynamically allocated Rewriter or nullptr if no
+/// rewriter works for \p MI.
+static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {
+ // Handle uncoalescable copy-like instructions.
+ if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike())
+ return new UncoalescableRewriter(MI);
+
+ switch (MI.getOpcode()) {
+ default:
+ return nullptr;
+ case TargetOpcode::COPY:
+ return new CopyRewriter(MI);
+ case TargetOpcode::INSERT_SUBREG:
+ return new InsertSubregRewriter(MI);
+ case TargetOpcode::EXTRACT_SUBREG:
+ return new ExtractSubregRewriter(MI, TII);
+ case TargetOpcode::REG_SEQUENCE:
+ return new RegSequenceRewriter(MI);
+ }
+}
+
+/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
+/// the new source to use for rewrite. If \p HandleMultipleSources is true and
+/// multiple sources for a given \p Def are found along the way, we found a
+/// PHI instructions that needs to be rewritten.
+/// TODO: HandleMultipleSources should be removed once we test PHI handling
+/// with coalescable copies.
+static RegSubRegPair
+getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ RegSubRegPair Def,
+ const PeepholeOptimizer::RewriteMapTy &RewriteMap,
+ bool HandleMultipleSources = true) {
+ RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
+ while (true) {
+ ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
+ // If there are no entries on the map, LookupSrc is the new source.
+ if (!Res.isValid())
+ return LookupSrc;
+
+ // There's only one source for this definition, keep searching...
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs == 1) {
+ LookupSrc.Reg = Res.getSrcReg(0);
+ LookupSrc.SubReg = Res.getSrcSubReg(0);
+ continue;
+ }
+
+ // TODO: Remove once multiple srcs w/ coalescable copies are supported.
+ if (!HandleMultipleSources)
+ break;
+
+ // Multiple sources, recurse into each source to find a new source
+ // for it. Then, rewrite the PHI accordingly to its new edges.
+ SmallVector<RegSubRegPair, 4> NewPHISrcs;
+ for (unsigned i = 0; i < NumSrcs; ++i) {
+ RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
+ NewPHISrcs.push_back(
+ getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
+ }
+
+ // Build the new PHI node and return its def register as the new source.
+ MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
+ MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
+ LLVM_DEBUG(dbgs() << "-- getNewSource\n");
+ LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
+ LLVM_DEBUG(dbgs() << " With: " << NewPHI);
+ const MachineOperand &MODef = NewPHI.getOperand(0);
+ return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+ }
+
+ return RegSubRegPair(0, 0);
+}
+
+/// Optimize generic copy instructions to avoid cross register bank copy.
+/// The optimization looks through a chain of copies and tries to find a source
+/// that has a compatible register class.
+/// Two register classes are considered to be compatible if they share the same
+/// register bank.
+/// New copies issued by this optimization are register allocator
+/// friendly. This optimization does not remove any copy as it may
+/// overconstrain the register allocator, but replaces some operands
+/// when possible.
+/// \pre isCoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been rewritten. False otherwise.
+bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
+ assert(isCoalescableCopy(MI) && "Invalid argument");
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "Coalescer can understand multiple defs?!");
+ const MachineOperand &MODef = MI.getOperand(0);
+ // Do not rewrite physical definitions.
+ if (MODef.getReg().isPhysical())
+ return false;
+
+ bool Changed = false;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
+ // If none exists, bail out.
+ if (!CpyRewriter)
+ return false;
+ // Rewrite each rewritable source.
+ RegSubRegPair Src;
+ RegSubRegPair TrackPair;
+ while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
+ // Keep track of PHI nodes and its incoming edges when looking for sources.
+ RewriteMapTy RewriteMap;
+ // Try to find a more suitable source. If we failed to do so, or get the
+ // actual source, move to the next source.
+ if (!findNextSource(TrackPair, RewriteMap))
+ continue;
+
+ // Get the new source to rewrite. TODO: Only enable handling of multiple
+ // sources (PHIs) once we have a motivating example and testcases for it.
+ RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
+ /*HandleMultipleSources=*/false);
+ if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
+ continue;
+
+ // Rewrite source.
+ if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
+ // We may have extended the live-range of NewSrc, account for that.
+ MRI->clearKillFlags(NewSrc.Reg);
+ Changed = true;
+ }
+ }
+ // TODO: We could have a clean-up method to tidy the instruction.
+ // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
+ // => v0 = COPY v1
+ // Currently we haven't seen motivating example for that and we
+ // want to avoid untested code.
+ NumRewrittenCopies += Changed;
+ return Changed;
+}
+
+/// Rewrite the source found through \p Def, by using the \p RewriteMap
+/// and create a new COPY instruction. More info about RewriteMap in
+/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+/// Uncoalescable copies, since they are copy like instructions that aren't
+/// recognized by the register allocator.
+MachineInstr &
+PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
+ RegSubRegPair Def, RewriteMapTy &RewriteMap) {
+ assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
+
+ // Find the new source to use in the COPY rewrite.
+ RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
+
+ // Insert the COPY.
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
+ Register NewVReg = MRI->createVirtualRegister(DefRC);
+
+ MachineInstr *NewCopy =
+ BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
+
+ if (Def.SubReg) {
+ NewCopy->getOperand(0).setSubReg(Def.SubReg);
+ NewCopy->getOperand(0).setIsUndef();
+ }
+
+ LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
+ LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
+ LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
+ MRI->replaceRegWith(Def.Reg, NewVReg);
+ MRI->clearKillFlags(NewVReg);
+
+ // We extended the lifetime of NewSrc.Reg, clear the kill flags to
+ // account for that.
+ MRI->clearKillFlags(NewSrc.Reg);
+
+ return *NewCopy;
+}
+
+/// Optimize copy-like instructions to create
+/// register coalescer friendly instruction.
+/// The optimization tries to kill-off the \p MI by looking
+/// through a chain of copies to find a source that has a compatible
+/// register class.
+/// If such a source is found, it replace \p MI by a generic COPY
+/// operation.
+/// \pre isUncoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been optimized. In that case, \p MI has
+/// been removed from its parent.
+/// All COPY instructions created, are inserted in \p LocalMIs.
+bool PeepholeOptimizer::optimizeUncoalescableCopy(
+ MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
+ assert(isUncoalescableCopy(MI) && "Invalid argument");
+ UncoalescableRewriter CpyRewriter(MI);
+
+ // Rewrite each rewritable source by generating new COPYs. This works
+ // differently from optimizeCoalescableCopy since it first makes sure that all
+ // definitions can be rewritten.
+ RewriteMapTy RewriteMap;
+ RegSubRegPair Src;
+ RegSubRegPair Def;
+ SmallVector<RegSubRegPair, 4> RewritePairs;
+ while (CpyRewriter.getNextRewritableSource(Src, Def)) {
+ // If a physical register is here, this is probably for a good reason.
+ // Do not rewrite that.
+ if (Def.Reg.isPhysical())
+ return false;
+
+ // If we do not know how to rewrite this definition, there is no point
+ // in trying to kill this instruction.
+ if (!findNextSource(Def, RewriteMap))
+ return false;
+
+ RewritePairs.push_back(Def);
+ }
+
+ // The change is possible for all defs, do it.
+ for (const RegSubRegPair &Def : RewritePairs) {
+ // Rewrite the "copy" in a way the register coalescer understands.
+ MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
+ LocalMIs.insert(&NewCopy);
+ }
+
+ // MI is now dead.
+ LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
+ MI.eraseFromParent();
+ ++NumUncoalescableCopies;
+ return true;
+}
+
+/// Check whether MI is a candidate for folding into a later instruction.
+/// We only fold loads to virtual registers and the virtual register defined
+/// has a single user.
+bool PeepholeOptimizer::isLoadFoldable(
+ MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
+ if (!MI.canFoldAsLoad() || !MI.mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ Register Reg = MI.getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
+ MRI->hasOneNonDBGUser(Reg)) {
+ FoldAsLoadDefCandidates.insert(Reg);
+ return true;
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(
+ MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MI.isMoveImmediate())
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ Register Reg = MI.getOperand(0).getReg();
+ if (Reg.isVirtual()) {
+ ImmDefMIs.insert(std::make_pair(Reg, &MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// Try folding register operands that are defined by move immediate
+/// instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::foldImmediate(
+ MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
+ if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
+// FIXME: This is very simple and misses some cases which should be handled when
+// motivating examples are found.
+//
+// The copy rewriting logic should look at uses as well as defs and be able to
+// eliminate copies across blocks.
+//
+// Later copies that are subregister extracts will also not be eliminated since
+// only the first copy is considered.
+//
+// e.g.
+// %1 = COPY %0
+// %2 = COPY %0:sub1
+//
+// Should replace %2 uses with %1:sub1
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs) {
+ assert(MI.isCopy() && "expected a COPY machine instruction");
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned SrcSubReg = MI.getOperand(1).getSubReg();
+ if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
+ return false;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!DstReg.isVirtual())
+ return false;
+
+ RegSubRegPair SrcPair(SrcReg, SrcSubReg);
+
+ if (CopyMIs.insert(std::make_pair(SrcPair, &MI)).second) {
+ // First copy of this reg seen.
+ return false;
+ }
+
+ MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second;
+
+ assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() &&
+ "Unexpected mismatching subreg!");
+
+ Register PrevDstReg = PrevCopy->getOperand(0).getReg();
+
+ // Only replace if the copy register class is the same.
+ //
+ // TODO: If we have multiple copies to different register classes, we may want
+ // to track multiple copies of the same source register.
+ if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
+ return false;
+
+ MRI->replaceRegWith(DstReg, PrevDstReg);
+
+ // Lifetime of the previous copy has been extended.
+ MRI->clearKillFlags(PrevDstReg);
+ return true;
+}
+
+bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
+ return Reg.isPhysical() && !MRI->isAllocatable(Reg);
+}
+
+bool PeepholeOptimizer::foldRedundantNAPhysCopy(
+ MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
+ assert(MI.isCopy() && "expected a COPY machine instruction");
+
+ if (DisableNAPhysCopyOpt)
+ return false;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
+ // %vreg = COPY $physreg
+ // Avoid using a datastructure which can track multiple live non-allocatable
+ // phys->virt copies since LLVM doesn't seem to do this.
+ NAPhysToVirtMIs.insert({SrcReg, &MI});
+ return false;
+ }
+
+ if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
+ return false;
+
+ // $physreg = COPY %vreg
+ auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
+ if (PrevCopy == NAPhysToVirtMIs.end()) {
+ // We can't remove the copy: there was an intervening clobber of the
+ // non-allocatable physical register after the copy to virtual.
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
+ << MI);
+ return false;
+ }
+
+ Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+ if (PrevDstReg == SrcReg) {
+ // Remove the virt->phys copy: we saw the virtual register definition, and
+ // the non-allocatable physical register's state hasn't changed since then.
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
+ ++NumNAPhysCopies;
+ return true;
+ }
+
+ // Potential missed optimization opportunity: we saw a different virtual
+ // register get a copy of the non-allocatable physical register, and we only
+ // track one such copy. Avoid getting confused by this new non-allocatable
+ // physical register definition, and remove it from the tracked copies.
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
+ NAPhysToVirtMIs.erase(PrevCopy);
+ return false;
+}
+
+/// \bried Returns true if \p MO is a virtual register operand.
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isVirtual();
+}
+
+bool PeepholeOptimizer::findTargetRecurrence(
+ Register Reg, const SmallSet<Register, 2> &TargetRegs,
+ RecurrenceCycle &RC) {
+ // Recurrence found if Reg is in TargetRegs.
+ if (TargetRegs.count(Reg))
+ return true;
+
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
+ // cycle (the instruction that feeds the PHI instruction) to have more than
+ // one uses to guarantee that commuting operands does not tie registers
+ // with overlapping live range. Once we have actual live range info of
+ // each register, this constraint can be relaxed.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ // Give up if the reccurrence chain length is longer than the limit.
+ if (RC.size() >= MaxRecurrenceChain)
+ return false;
+
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+
+ // Only interested in recurrences whose instructions have only one def, which
+ // is a virtual register.
+ if (MI.getDesc().getNumDefs() != 1)
+ return false;
+
+ MachineOperand &DefOp = MI.getOperand(0);
+ if (!isVirtualRegisterOperand(DefOp))
+ return false;
+
+ // Check if def operand of MI is tied to any use operand. We are only
+ // interested in the case that all the instructions in the recurrence chain
+ // have there def operand tied with one of the use operand.
+ unsigned TiedUseIdx;
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
+ return false;
+
+ if (Idx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ } else {
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ }
+ }
+
+ return false;
+}
+
+/// Phi instructions will eventually be lowered to copy instructions.
+/// If phi is in a loop header, a recurrence may formulated around the source
+/// and destination of the phi. For such case commuting operands of the
+/// instructions in the recurrence may enable coalescing of the copy instruction
+/// generated from the phi. For example, if there is a recurrence of
+///
+/// LoopHeader:
+/// %1 = phi(%0, %100)
+/// LoopLatch:
+/// %0<def, tied1> = ADD %2<def, tied0>, %1
+///
+/// , the fact that %0 and %2 are in the same tied operands set makes
+/// the coalescing of copy instruction generated from the phi in
+/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
+/// %2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %2 and
+/// %1 of ADD instruction, the redundant move instruction can be
+/// avoided.
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
+ SmallSet<Register, 2> TargetRegs;
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
+ MachineOperand &MO = PHI.getOperand(Idx);
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
+ TargetRegs.insert(MO.getReg());
+ }
+
+ bool Changed = false;
+ RecurrenceCycle RC;
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
+ // Commutes operands of instructions in RC if necessary so that the copy to
+ // be generated from PHI can be coalesced.
+ LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ for (auto &RI : RC) {
+ LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ auto CP = RI.getCommutePair();
+ if (CP) {
+ Changed = true;
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
+ (*CP).second);
+ LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+
+ if (DisablePeephole)
+ return false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+ MLI = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ bool SeenMoveImm = false;
+
+ // During this forward scan, at some point it needs to answer the question
+ // "given a pointer to an MI in the current BB, is it located before or
+ // after the current instruction".
+ // To perform this, the following set keeps track of the MIs already seen
+ // during the scan, if a MI is not in the set, it is assumed to be located
+ // after. Newly created MIs have to be inserted in the set as well.
+ SmallPtrSet<MachineInstr*, 16> LocalMIs;
+ SmallSet<Register, 4> ImmDefRegs;
+ DenseMap<Register, MachineInstr *> ImmDefMIs;
+ SmallSet<Register, 16> FoldAsLoadDefCandidates;
+
+ // Track when a non-allocatable physical register is copied to a virtual
+ // register so that useless moves can be removed.
+ //
+ // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
+ // without any intervening re-definition of $physreg.
+ DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
+
+ // Set of copies to virtual registers keyed by source register. Never
+ // holds any physreg which requires def tracking.
+ DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
+
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
+
+ for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+ MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
+ LocalMIs.insert(MI);
+
+ // Skip debug instructions. They should not affect this peephole
+ // optimization.
+ if (MI->isDebugInstr())
+ continue;
+
+ if (MI->isPosition())
+ continue;
+
+ if (IsLoopHeader && MI->isPHI()) {
+ if (optimizeRecurrence(*MI)) {
+ Changed = true;
+ continue;
+ }
+ }
+
+ if (!MI->isCopy()) {
+ for (const MachineOperand &MO : MI->operands()) {
+ // Visit all operands: definitions can be implicit or explicit.
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (MO.isDef() && isNAPhysCopy(Reg)) {
+ const auto &Def = NAPhysToVirtMIs.find(Reg);
+ if (Def != NAPhysToVirtMIs.end()) {
+ // A new definition of the non-allocatable physical register
+ // invalidates previous copies.
+ LLVM_DEBUG(dbgs()
+ << "NAPhysCopy: invalidating because of " << *MI);
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ } else if (MO.isRegMask()) {
+ const uint32_t *RegMask = MO.getRegMask();
+ for (auto &RegMI : NAPhysToVirtMIs) {
+ Register Def = RegMI.first;
+ if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
+ LLVM_DEBUG(dbgs()
+ << "NAPhysCopy: invalidating because of " << *MI);
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ }
+ }
+ }
+
+ if (MI->isImplicitDef() || MI->isKill())
+ continue;
+
+ if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
+ // Blow away all non-allocatable physical registers knowledge since we
+ // don't know what's correct anymore.
+ //
+ // FIXME: handle explicit asm clobbers.
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
+ << *MI);
+ NAPhysToVirtMIs.clear();
+ }
+
+ if ((isUncoalescableCopy(*MI) &&
+ optimizeUncoalescableCopy(*MI, LocalMIs)) ||
+ (MI->isCompare() && optimizeCmpInstr(*MI)) ||
+ (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
+ }
+
+ if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
+ Changed = true;
+ continue;
+ }
+
+ if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
+ // MI is just rewritten.
+ Changed = true;
+ continue;
+ }
+
+ if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) ||
+ foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
+ LocalMIs.erase(MI);
+ LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
+ } else {
+ Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
+ // optimizeExtInstr might have created new instructions after MI
+ // and before the already incremented MII. Adjust MII so that the
+ // next iteration sees the new instructions.
+ MII = MI;
+ ++MII;
+ if (SeenMoveImm)
+ Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs);
+ }
+
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
+ !FoldAsLoadDefCandidates.empty()) {
+
+ // We visit each operand even after successfully folding a previous
+ // one. This allows us to fold multiple loads into a single
+ // instruction. We do assume that optimizeLoadInstr doesn't insert
+ // foldable uses earlier in the argument list. Since we don't restart
+ // iteration, we'd miss such cases.
+ const MCInstrDesc &MIDesc = MI->getDesc();
+ for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
+ ++i) {
+ const MachineOperand &MOp = MI->getOperand(i);
+ if (!MOp.isReg())
+ continue;
+ Register FoldAsLoadDefReg = MOp.getReg();
+ if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
+ // We need to fold load after optimizeCmpInstr, since
+ // optimizeCmpInstr can enable folding by converting SUB to CMP.
+ // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
+ // we need it for markUsesInDebugValueAsUndef().
+ Register FoldedReg = FoldAsLoadDefReg;
+ MachineInstr *DefMI = nullptr;
+ if (MachineInstr *FoldMI =
+ TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted
+ // DefMI.
+ LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
+ LLVM_DEBUG(dbgs() << " With: " << *FoldMI);
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ // Update the call site info.
+ if (MI->shouldUpdateCallSiteInfo())
+ MI->getMF()->moveCallSiteInfo(MI, FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ MRI->markUsesInDebugValueAsUndef(FoldedReg);
+ FoldAsLoadDefCandidates.erase(FoldedReg);
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI so we can continue trying to fold
+ Changed = true;
+ MI = FoldMI;
+ }
+ }
+ }
+ }
+
+ // If we run into an instruction we can't fold across, discard
+ // the load candidates. Note: We might be able to fold *into* this
+ // instruction, so this needs to be after the folding logic.
+ if (MI->isLoadFoldBarrier()) {
+ LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
+ FoldAsLoadDefCandidates.clear();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
+ assert(Def->isCopy() && "Invalid definition");
+ // Copy instruction are supposed to be: Def = Src.
+ // If someone breaks this assumption, bad things will happen everywhere.
+ // There may be implicit uses preventing the copy to be moved across
+ // some target specific register definitions
+ assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
+ "Invalid number of operands");
+ assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
+
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of src.
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
+ // Otherwise, we want the whole source.
+ const MachineOperand &Src = Def->getOperand(1);
+ if (Src.isUndef())
+ return ValueTrackerResult();
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
+ assert(Def->isBitcast() && "Invalid definition");
+
+ // Bail if there are effects that a plain copy will not expose.
+ if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
+ return ValueTrackerResult();
+
+ // Bitcasts with more than one def are not supported.
+ if (Def->getDesc().getNumDefs() != 1)
+ return ValueTrackerResult();
+ const MachineOperand DefOp = Def->getOperand(DefIdx);
+ if (DefOp.getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of the src.
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
+
+ unsigned SrcIdx = Def->getNumOperands();
+ for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = Def->getOperand(OpIdx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
+ assert(!MO.isDef() && "We should have skipped all the definitions by now");
+ if (SrcIdx != EndOpIdx)
+ // Multiple sources?
+ return ValueTrackerResult();
+ SrcIdx = OpIdx;
+ }
+
+ // In some rare case, Def has no input, SrcIdx is out of bound,
+ // getOperand(SrcIdx) will fail below.
+ if (SrcIdx >= Def->getNumOperands())
+ return ValueTrackerResult();
+
+ // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
+ // will break the assumed guarantees for the upper bits.
+ for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
+ if (UseMI.isSubregToReg())
+ return ValueTrackerResult();
+ }
+
+ const MachineOperand &Src = Def->getOperand(SrcIdx);
+ if (Src.isUndef())
+ return ValueTrackerResult();
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
+ assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
+ "Invalid definition");
+
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subregs, bail out.
+ // The case we are checking is Def.<subreg> = REG_SEQUENCE.
+ // This should almost never happen as the SSA property is tracked at
+ // the register level (as opposed to the subreg level).
+ // I.e.,
+ // Def.sub0 =
+ // Def.sub1 =
+ // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
+ // Def. Thus, it must not be generated.
+ // However, some code could theoretically generates a single
+ // Def.sub0 (i.e, not defining the other subregs) and we would
+ // have this case.
+ // If we can ascertain (or force) that this never happens, we could
+ // turn that into an assertion.
+ return ValueTrackerResult();
+
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return ValueTrackerResult();
+
+ SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs;
+ if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
+ return ValueTrackerResult();
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ // Check if one of the operand defines the subreg we are interested in.
+ for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
+ if (RegSeqInput.SubIdx == DefSubReg)
+ return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
+ }
+
+ // If the subreg we are tracking is super-defined by another subreg,
+ // we could follow this value. However, this would require to compose
+ // the subreg and we do not do that for now.
+ return ValueTrackerResult();
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
+ assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
+ "Invalid definition");
+
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subreg, bail out.
+ // Same remark as getNextSourceFromRegSequence.
+ // I.e., this may be turned into an assert.
+ return ValueTrackerResult();
+
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return ValueTrackerResult();
+
+ RegSubRegPair BaseReg;
+ RegSubRegPairAndIdx InsertedReg;
+ if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
+ return ValueTrackerResult();
+
+ // We are looking at:
+ // Def = INSERT_SUBREG v0, v1, sub1
+ // There are two cases:
+ // 1. DefSubReg == sub1, get v1.
+ // 2. DefSubReg != sub1, the value may be available through v0.
+
+ // #1 Check if the inserted register matches the required sub index.
+ if (InsertedReg.SubIdx == DefSubReg) {
+ return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
+ }
+ // #2 Otherwise, if the sub register we are looking for is not partial
+ // defined by the inserted element, we can look through the main
+ // register (v0).
+ const MachineOperand &MODef = Def->getOperand(DefIdx);
+ // If the result register (Def) and the base register (v0) do not
+ // have the same register class or if we have to compose
+ // subregisters, bail out.
+ if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
+ BaseReg.SubReg)
+ return ValueTrackerResult();
+
+ // Get the TRI and check if the inserted sub-register overlaps with the
+ // sub-register we are tracking.
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ if (!TRI ||
+ !(TRI->getSubRegIndexLaneMask(DefSubReg) &
+ TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
+ return ValueTrackerResult();
+ // At this point, the value is available in v0 via the same subreg
+ // we used for Def.
+ return ValueTrackerResult(BaseReg.Reg, DefSubReg);
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
+ assert((Def->isExtractSubreg() ||
+ Def->isExtractSubregLike()) && "Invalid definition");
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0, sub0
+
+ // Bail if we have to compose sub registers.
+ // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
+ if (DefSubReg)
+ return ValueTrackerResult();
+
+ if (!TII)
+ // We could handle the EXTRACT_SUBREG here, but we do not want to
+ // duplicate the code from the generic TII.
+ return ValueTrackerResult();
+
+ RegSubRegPairAndIdx ExtractSubregInputReg;
+ if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
+ return ValueTrackerResult();
+
+ // Bail if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
+ if (ExtractSubregInputReg.SubReg)
+ return ValueTrackerResult();
+ // Otherwise, the value is available in the v0.sub0.
+ return ValueTrackerResult(ExtractSubregInputReg.Reg,
+ ExtractSubregInputReg.SubIdx);
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
+ assert(Def->isSubregToReg() && "Invalid definition");
+ // We are looking at:
+ // Def = SUBREG_TO_REG Imm, v0, sub0
+
+ // Bail if we have to compose sub registers.
+ // If DefSubReg != sub0, we would have to check that all the bits
+ // we track are included in sub0 and if yes, we would have to
+ // determine the right subreg in v0.
+ if (DefSubReg != Def->getOperand(3).getImm())
+ return ValueTrackerResult();
+ // Bail if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
+ if (Def->getOperand(2).getSubReg())
+ return ValueTrackerResult();
+
+ return ValueTrackerResult(Def->getOperand(2).getReg(),
+ Def->getOperand(3).getImm());
+}
+
+/// Explore each PHI incoming operand and return its sources.
+ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
+ assert(Def->isPHI() && "Invalid definition");
+ ValueTrackerResult Res;
+
+ // If we look for a different subreg, bail as we do not support composing
+ // subregs yet.
+ if (Def->getOperand(0).getSubReg() != DefSubReg)
+ return ValueTrackerResult();
+
+ // Return all register sources for PHI instructions.
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
+ const MachineOperand &MO = Def->getOperand(i);
+ assert(MO.isReg() && "Invalid PHI instruction");
+ // We have no code to deal with undef operands. They shouldn't happen in
+ // normal programs anyway.
+ if (MO.isUndef())
+ return ValueTrackerResult();
+ Res.addSource(MO.getReg(), MO.getSubReg());
+ }
+
+ return Res;
+}
+
+ValueTrackerResult ValueTracker::getNextSourceImpl() {
+ assert(Def && "This method needs a valid definition");
+
+ assert(((Def->getOperand(DefIdx).isDef() &&
+ (DefIdx < Def->getDesc().getNumDefs() ||
+ Def->getDesc().isVariadic())) ||
+ Def->getOperand(DefIdx).isImplicit()) &&
+ "Invalid DefIdx");
+ if (Def->isCopy())
+ return getNextSourceFromCopy();
+ if (Def->isBitcast())
+ return getNextSourceFromBitcast();
+ // All the remaining cases involve "complex" instructions.
+ // Bail if we did not ask for the advanced tracking.
+ if (DisableAdvCopyOpt)
+ return ValueTrackerResult();
+ if (Def->isRegSequence() || Def->isRegSequenceLike())
+ return getNextSourceFromRegSequence();
+ if (Def->isInsertSubreg() || Def->isInsertSubregLike())
+ return getNextSourceFromInsertSubreg();
+ if (Def->isExtractSubreg() || Def->isExtractSubregLike())
+ return getNextSourceFromExtractSubreg();
+ if (Def->isSubregToReg())
+ return getNextSourceFromSubregToReg();
+ if (Def->isPHI())
+ return getNextSourceFromPHI();
+ return ValueTrackerResult();
+}
+
+ValueTrackerResult ValueTracker::getNextSource() {
+ // If we reach a point where we cannot move up in the use-def chain,
+ // there is nothing we can get.
+ if (!Def)
+ return ValueTrackerResult();
+
+ ValueTrackerResult Res = getNextSourceImpl();
+ if (Res.isValid()) {
+ // Update definition, definition index, and subregister for the
+ // next call of getNextSource.
+ // Update the current register.
+ bool OneRegSrc = Res.getNumSources() == 1;
+ if (OneRegSrc)
+ Reg = Res.getSrcReg(0);
+ // Update the result before moving up in the use-def chain
+ // with the instruction containing the last found sources.
+ Res.setInst(Def);
+
+ // If we can still move up in the use-def chain, move to the next
+ // definition.
+ if (!Reg.isPhysical() && OneRegSrc) {
+ MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
+ if (DI != MRI.def_end()) {
+ Def = DI->getParent();
+ DefIdx = DI.getOperandNo();
+ DefSubReg = Res.getSrcSubReg(0);
+ } else {
+ Def = nullptr;
+ }
+ return Res;
+ }
+ }
+ // If we end up here, this means we will not be able to find another source
+ // for the next iteration. Make sure any new call to getNextSource bails out
+ // early by cutting the use-def chain.
+ Def = nullptr;
+ return Res;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
new file mode 100644
index 000000000000..97b1532300b1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -0,0 +1,96 @@
+//===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This runs the hazard recognizer and emits noops when necessary. This
+/// gives targets a way to run the hazard recognizer without running one of
+/// the schedulers. Example use cases for this pass would be:
+///
+/// - Targets that need the hazard recognizer to be run at -O0.
+/// - Targets that want to guarantee that hazards at the beginning of
+/// scheduling regions are handled correctly. The post-RA scheduler is
+/// a top-down scheduler, but when there are multiple scheduling regions
+/// in a basic block, it visits the regions in bottom-up order. This
+/// makes it impossible for the scheduler to gauranttee it can correctly
+/// handle hazards at the beginning of scheduling regions.
+///
+/// This pass traverses all the instructions in a program in top-down order.
+/// In contrast to the instruction scheduling passes, this pass never resets
+/// the hazard recognizer to ensure it can correctly handles noop hazards at
+/// the beginning of blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-hazard-rec"
+
+STATISTIC(NumNoops, "Number of noops inserted");
+
+namespace {
+ class PostRAHazardRecognizer : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ PostRAHazardRecognizer() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ };
+ char PostRAHazardRecognizer::ID = 0;
+
+}
+
+char &llvm::PostRAHazardRecognizerID = PostRAHazardRecognizer::ID;
+
+INITIALIZE_PASS(PostRAHazardRecognizer, DEBUG_TYPE,
+ "Post RA hazard recognizer", false, false)
+
+bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ std::unique_ptr<ScheduleHazardRecognizer> HazardRec(
+ TII->CreateTargetPostRAHazardRecognizer(Fn));
+
+ // Return if the target has not implemented a hazard recognizer.
+ if (!HazardRec)
+ return false;
+
+ // Loop over all of the basic blocks
+ bool Changed = false;
+ for (auto &MBB : Fn) {
+ // We do not call HazardRec->reset() here to make sure we are handling noop
+ // hazards at the start of basic blocks.
+ for (MachineInstr &MI : MBB) {
+ // If we need to emit noops prior to this instruction, then do so.
+ unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
+ HazardRec->EmitNoops(NumPreNoops);
+ TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
+ NumNoops += NumPreNoops;
+ if (NumPreNoops)
+ Changed = true;
+
+ HazardRec->EmitInstruction(&MI);
+ if (HazardRec->atIssueLimit()) {
+ HazardRec->AdvanceCycle();
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 000000000000..170008ab67cb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,696 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/AntiDepBreaker.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+ cl::desc("Enable scheduling after register allocation"),
+ cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies: "
+ "\"critical\", \"all\", or \"none\""),
+ cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() = default;
+
+namespace {
+ class PostRAScheduler : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+ public:
+ static char ID;
+ PostRAScheduler() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ private:
+ bool enablePostRAScheduler(
+ const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
+ };
+ char PostRAScheduler::ID = 0;
+
+ class SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+ AntiDepBreaker *AntiDepBreak;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+ /// The index in BB of RegionEnd.
+ ///
+ /// This is the instruction number from the top of the current block, not
+ /// the SlotIndex. It is only used by the AntiDepBreaker.
+ unsigned EndIndex = 0;
+
+ public:
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
+
+ ~SchedulePostRATDList() override;
+
+ /// startBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void startBlock(MachineBasicBlock *BB) override;
+
+ // Set the index of RegionEnd within the current BB.
+ void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
+
+ /// Initialize the scheduler state for the next scheduling region.
+ void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs) override;
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ void exitRegion() override;
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void schedule() override;
+
+ void EmitSchedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr &MI, unsigned Count);
+
+ /// finishBlock - Clean up register live-range state.
+ ///
+ void finishBlock() override;
+
+ private:
+ /// Apply each ScheduleDAGMutation step in order.
+ void postProcessDAG();
+
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+
+ void dumpSchedule() const;
+ void emitNoop(unsigned CurCycle);
+ };
+}
+
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
+ "Post RA top-down list latency scheduler", false, false)
+
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
+
+ const InstrItineraryData *InstrItins =
+ MF.getSubtarget().getInstrItineraryData();
+ HazardRec =
+ MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
+ InstrItins, this);
+ MF.getSubtarget().getPostRAMutations(Mutations);
+
+ assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
+ MRI.tracksLiveness()) &&
+ "Live-ins must be accurate for anti-dependency breaking");
+ AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL)
+ ? createAggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs)
+ : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL)
+ ? createCriticalAntiDepBreaker(MF, RCI)
+ : nullptr));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dumpSchedule - dump the scheduled Sequence.
+LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
+ for (const SUnit *SU : Sequence) {
+ if (SU)
+ dumpNode(*SU);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+bool PostRAScheduler::enablePostRAScheduler(
+ const TargetSubtargetInfo &ST,
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
+ Mode = ST.getAntiDepBreakMode();
+ ST.getCriticalPathRCs(CriticalPathRCs);
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ if (EnablePostRAScheduler.getPosition() > 0)
+ return EnablePostRAScheduler;
+
+ return ST.enablePostRAScheduler() &&
+ OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+
+ TII = Fn.getSubtarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
+ RegClassInfo.runOnMachineFunction(Fn);
+
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
+
+ // Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
+ if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
+ AntiDepMode, CriticalPathRCs))
+ return false;
+
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking == "all")
+ ? TargetSubtargetInfo::ANTIDEP_ALL
+ : ((EnableAntiDepBreaking == "critical")
+ ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+ : TargetSubtargetInfo::ANTIDEP_NONE);
+ }
+
+ LLVM_DEBUG(dbgs() << "PostRAScheduler\n");
+
+ SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
+ CriticalPathRCs);
+
+ // Loop over all of the basic blocks
+ for (auto &MBB : Fn) {
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int bbcnt = 0;
+ if (bbcnt++ % DebugDiv != DebugMod)
+ continue;
+ dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"
+ << printMBBReference(MBB) << " ***\n";
+ }
+#endif
+
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.startBlock(&MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB.end();
+ unsigned Count = MBB.size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
+ MachineInstr &MI = *std::prev(I);
+ --Count;
+ // Calls are not scheduling boundaries before register allocation, but
+ // post-ra we don't gain anything by scheduling across calls since we
+ // don't need to worry about register pressure.
+ if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+ Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
+ Scheduler.setEndIndex(CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+ Current = &MI;
+ CurrentCount = Count;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ if (MI.isBundle())
+ Count -= MI.getBundleSize();
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB.begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
+ Scheduler.setEndIndex(CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.finishBlock();
+
+ // Update register kills
+ Scheduler.fixupKills(MBB);
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::startBlock(BB);
+
+ // Reset the hazard recognizer and anti-dep breaker.
+ HazardRec->Reset();
+ if (AntiDepBreak)
+ AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(AA);
+
+ if (AntiDepBreak) {
+ unsigned Broken =
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
+
+ if (Broken != 0) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
+
+ NumFixedAnti += Broken;
+ }
+ }
+
+ postProcessDAG();
+
+ LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
+ LLVM_DEBUG(dump());
+
+ AvailableQueue.initNodes(SUnits);
+ ListScheduleTopDown();
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {
+ if (AntiDepBreak)
+ AntiDepBreak->Observe(MI, Count, EndIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::finishBlock() {
+ if (AntiDepBreak)
+ AntiDepBreak->FinishBlock();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void SchedulePostRATDList::postProcessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*SuccSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // Standard scheduler algorithms will recompute the depth of the successor
+ // here as such:
+ // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ //
+ // However, we lazily compute node depth instead. Note that
+ // ScheduleNodeTopDown has already updated the depth of this node which causes
+ // all descendents to be marked dirty. Setting the successor depth explicitly
+ // here would cause depth to be recomputed for all its ancestors. If the
+ // successor is not yet ready (because of a transitively redundant edge) then
+ // this causes depth computation to be quadratic in the size of the DAG.
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() &&
+ "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.scheduledNode(SU);
+}
+
+/// emitNoop - Add a noop to the current instruction sequence.
+void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
+ LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ HazardRec->EmitNoop();
+ Sequence.push_back(nullptr); // NULL here means noop
+ ++NumNoops;
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // We're scheduling top-down but we're visiting the regions in
+ // bottom-up order, so we don't know the hazards at the start of a
+ // region. So assume no hazards (this should usually be ok as most
+ // blocks are a single region).
+ HazardRec->Reset();
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // Add all leaves to Available queue.
+ for (SUnit &SUnit : SUnits) {
+ // It is available if it has no predecessors.
+ if (!SUnit.NumPredsLeft && !SUnit.isAvailable) {
+ AvailableQueue.push(&SUnit);
+ SUnit.isAvailable = true;
+ }
+ }
+
+ // In any cycle where we can't schedule any instructions, we must
+ // stall or emit a noop, depending on the target.
+ bool CycleHasInsts = false;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ LLVM_DEBUG(dbgs() << "\n*** Examining Available\n";
+ AvailableQueue.dump(this));
+
+ SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ if (HazardRec->ShouldPreferAnother(CurSUnit)) {
+ if (!NotPreferredSUnit) {
+ // If this is the first non-preferred node for this cycle, then
+ // record it and continue searching for a preferred node. If this
+ // is not the first non-preferred node, then treat it as though
+ // there had been a hazard.
+ NotPreferredSUnit = CurSUnit;
+ continue;
+ }
+ } else {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // If we have a non-preferred node, push it back onto the available list.
+ // If we did not find a preferred node, then schedule this first
+ // non-preferred node.
+ if (NotPreferredSUnit) {
+ if (!FoundSUnit) {
+ LLVM_DEBUG(
+ dbgs() << "*** Will schedule a non-preferred instruction...\n");
+ FoundSUnit = NotPreferredSUnit;
+ } else {
+ AvailableQueue.push(NotPreferredSUnit);
+ }
+
+ NotPreferredSUnit = nullptr;
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule...
+ if (FoundSUnit) {
+ // If we need to emit noops prior to this instruction, then do so.
+ unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
+ for (unsigned i = 0; i != NumPreNoops; ++i)
+ emitNoop(CurCycle);
+
+ // ... schedule the node...
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+ CycleHasInsts = true;
+ if (HazardRec->atIssueLimit()) {
+ LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle
+ << '\n');
+ HazardRec->AdvanceCycle();
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ } else {
+ if (CycleHasInsts) {
+ LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem,
+ // just advance the current cycle and try again.
+ LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ emitNoop(CurCycle);
+ }
+
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ }
+
+#ifndef NDEBUG
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = llvm::count(Sequence, nullptr);
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = std::prev(RegionEnd);
+ }
+
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
new file mode 100644
index 000000000000..5b822b5d7b95
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -0,0 +1,415 @@
+//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR lowering for the llvm.memcpy, llvm.memmove,
+// llvm.memset, llvm.load.relative and llvm.objc.* intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+
+using namespace llvm;
+
+/// Threshold to leave statically sized memory intrinsic calls. Calls of known
+/// size larger than this will be expanded by the pass. Calls of unknown or
+/// lower size will be left for expansion in codegen.
+static cl::opt<int64_t> MemIntrinsicExpandSizeThresholdOpt(
+ "mem-intrinsic-expand-size",
+ cl::desc("Set minimum mem intrinsic size to expand in IR"), cl::init(-1),
+ cl::Hidden);
+
+namespace {
+
+struct PreISelIntrinsicLowering {
+ const TargetMachine &TM;
+ const function_ref<TargetTransformInfo &(Function &)> LookupTTI;
+
+ /// If this is true, assume it's preferably to leave memory intrinsic calls
+ /// for replacement with a library call later. Otherwise this depends on
+ /// TargetLoweringInfo availability of the corresponding function.
+ const bool UseMemIntrinsicLibFunc;
+
+ explicit PreISelIntrinsicLowering(
+ const TargetMachine &TM_,
+ function_ref<TargetTransformInfo &(Function &)> LookupTTI_,
+ bool UseMemIntrinsicLibFunc_ = true)
+ : TM(TM_), LookupTTI(LookupTTI_),
+ UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {}
+
+ static bool shouldExpandMemIntrinsicWithSize(Value *Size,
+ const TargetTransformInfo &TTI);
+ bool expandMemIntrinsicUses(Function &F) const;
+ bool lowerIntrinsics(Module &M) const;
+};
+
+} // namespace
+
+static bool lowerLoadRelative(Function &F) {
+ if (F.use_empty())
+ return false;
+
+ bool Changed = false;
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Type *Int32PtrTy = Int32Ty->getPointerTo();
+ Type *Int8Ty = Type::getInt8Ty(F.getContext());
+
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto CI = dyn_cast<CallInst>(U.getUser());
+ if (!CI || CI->getCalledOperand() != &F)
+ continue;
+
+ IRBuilder<> B(CI);
+ Value *OffsetPtr =
+ B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
+ Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
+ Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, Align(4));
+
+ Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
+
+ CI->replaceAllUsesWith(ResultPtr);
+ CI->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// ObjCARC has knowledge about whether an obj-c runtime function needs to be
+// always tail-called or never tail-called.
+static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
+ objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(&F);
+ if (objcarc::IsAlwaysTail(Kind))
+ return CallInst::TCK_Tail;
+ else if (objcarc::IsNeverTail(Kind))
+ return CallInst::TCK_NoTail;
+ return CallInst::TCK_None;
+}
+
+static bool lowerObjCCall(Function &F, const char *NewFn,
+ bool setNonLazyBind = false) {
+ assert(IntrinsicInst::mayLowerToFunctionCall(F.getIntrinsicID()) &&
+ "Pre-ISel intrinsics do lower into regular function calls");
+ if (F.use_empty())
+ return false;
+
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = F.getParent();
+ FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+
+ if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) {
+ Fn->setLinkage(F.getLinkage());
+ if (setNonLazyBind && !Fn->isWeakForLinker()) {
+ // If we have Native ARC, set nonlazybind attribute for these APIs for
+ // performance.
+ Fn->addFnAttr(Attribute::NonLazyBind);
+ }
+ }
+
+ CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
+
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto *CB = cast<CallBase>(U.getUser());
+
+ if (CB->getCalledFunction() != &F) {
+ objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
+ (void)Kind;
+ assert((Kind == objcarc::ARCInstKind::RetainRV ||
+ Kind == objcarc::ARCInstKind::UnsafeClaimRV) &&
+ "use expected to be the argument of operand bundle "
+ "\"clang.arc.attachedcall\"");
+ U.set(FCache.getCallee());
+ continue;
+ }
+
+ auto *CI = cast<CallInst>(CB);
+ assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
+
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+ SmallVector<Value *, 8> Args(CI->args());
+ SmallVector<llvm::OperandBundleDef, 1> BundleList;
+ CI->getOperandBundlesAsDefs(BundleList);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args, BundleList);
+ NewCI->setName(CI->getName());
+
+ // Try to set the most appropriate TailCallKind based on both the current
+ // attributes and the ones that we could get from ObjCARC's special
+ // knowledge of the runtime functions.
+ //
+ // std::max respects both requirements of notail and tail here:
+ // * notail on either the call or from ObjCARC becomes notail
+ // * tail on either side is stronger than none, but not notail
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ NewCI->setTailCallKind(std::max(TCK, OverridingTCK));
+
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ }
+
+ return true;
+}
+
+// TODO: Should refine based on estimated number of accesses (e.g. does it
+// require splitting based on alignment)
+bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize(
+ Value *Size, const TargetTransformInfo &TTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Size);
+ if (!CI)
+ return true;
+ uint64_t Threshold = MemIntrinsicExpandSizeThresholdOpt.getNumOccurrences()
+ ? MemIntrinsicExpandSizeThresholdOpt
+ : TTI.getMaxMemIntrinsicInlineSizeThreshold();
+ uint64_t SizeVal = CI->getZExtValue();
+
+ // Treat a threshold of 0 as a special case to force expansion of all
+ // intrinsics, including size 0.
+ return SizeVal > Threshold || Threshold == 0;
+}
+
+static bool canEmitLibcall(const TargetMachine &TM, Function *F,
+ RTLIB::Libcall LC) {
+ // TODO: Should this consider the address space of the memcpy?
+ const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+ return TLI->getLibcallName(LC) != nullptr;
+}
+
+// TODO: Handle atomic memcpy and memcpy.inline
+// TODO: Pass ScalarEvolution
+bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
+ Intrinsic::ID ID = F.getIntrinsicID();
+ bool Changed = false;
+
+ for (User *U : llvm::make_early_inc_range(F.users())) {
+ Instruction *Inst = cast<Instruction>(U);
+
+ switch (ID) {
+ case Intrinsic::memcpy: {
+ auto *Memcpy = cast<MemCpyInst>(Inst);
+ Function *ParentFunc = Memcpy->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ canEmitLibcall(TM, ParentFunc, RTLIB::MEMCPY))
+ break;
+
+ // TODO: For optsize, emit the loop into a separate function
+ expandMemCpyAsLoop(Memcpy, TTI);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memmove: {
+ auto *Memmove = cast<MemMoveInst>(Inst);
+ Function *ParentFunc = Memmove->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ canEmitLibcall(TM, ParentFunc, RTLIB::MEMMOVE))
+ break;
+
+ if (expandMemMoveAsLoop(Memmove, TTI)) {
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
+ }
+
+ break;
+ }
+ case Intrinsic::memset: {
+ auto *Memset = cast<MemSetInst>(Inst);
+ Function *ParentFunc = Memset->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ canEmitLibcall(TM, ParentFunc, RTLIB::MEMSET))
+ break;
+
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ }
+
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled intrinsic");
+ }
+ }
+
+ return Changed;
+}
+
+bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
+ bool Changed = false;
+ for (Function &F : M) {
+ switch (F.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ Changed |= expandMemIntrinsicUses(F);
+ break;
+ case Intrinsic::load_relative:
+ Changed |= lowerLoadRelative(F);
+ break;
+ case Intrinsic::objc_autorelease:
+ Changed |= lowerObjCCall(F, "objc_autorelease");
+ break;
+ case Intrinsic::objc_autoreleasePoolPop:
+ Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop");
+ break;
+ case Intrinsic::objc_autoreleasePoolPush:
+ Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush");
+ break;
+ case Intrinsic::objc_autoreleaseReturnValue:
+ Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue");
+ break;
+ case Intrinsic::objc_copyWeak:
+ Changed |= lowerObjCCall(F, "objc_copyWeak");
+ break;
+ case Intrinsic::objc_destroyWeak:
+ Changed |= lowerObjCCall(F, "objc_destroyWeak");
+ break;
+ case Intrinsic::objc_initWeak:
+ Changed |= lowerObjCCall(F, "objc_initWeak");
+ break;
+ case Intrinsic::objc_loadWeak:
+ Changed |= lowerObjCCall(F, "objc_loadWeak");
+ break;
+ case Intrinsic::objc_loadWeakRetained:
+ Changed |= lowerObjCCall(F, "objc_loadWeakRetained");
+ break;
+ case Intrinsic::objc_moveWeak:
+ Changed |= lowerObjCCall(F, "objc_moveWeak");
+ break;
+ case Intrinsic::objc_release:
+ Changed |= lowerObjCCall(F, "objc_release", true);
+ break;
+ case Intrinsic::objc_retain:
+ Changed |= lowerObjCCall(F, "objc_retain", true);
+ break;
+ case Intrinsic::objc_retainAutorelease:
+ Changed |= lowerObjCCall(F, "objc_retainAutorelease");
+ break;
+ case Intrinsic::objc_retainAutoreleaseReturnValue:
+ Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue");
+ break;
+ case Intrinsic::objc_retainAutoreleasedReturnValue:
+ Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue");
+ break;
+ case Intrinsic::objc_retainBlock:
+ Changed |= lowerObjCCall(F, "objc_retainBlock");
+ break;
+ case Intrinsic::objc_storeStrong:
+ Changed |= lowerObjCCall(F, "objc_storeStrong");
+ break;
+ case Intrinsic::objc_storeWeak:
+ Changed |= lowerObjCCall(F, "objc_storeWeak");
+ break;
+ case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+ Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue");
+ break;
+ case Intrinsic::objc_retainedObject:
+ Changed |= lowerObjCCall(F, "objc_retainedObject");
+ break;
+ case Intrinsic::objc_unretainedObject:
+ Changed |= lowerObjCCall(F, "objc_unretainedObject");
+ break;
+ case Intrinsic::objc_unretainedPointer:
+ Changed |= lowerObjCCall(F, "objc_unretainedPointer");
+ break;
+ case Intrinsic::objc_retain_autorelease:
+ Changed |= lowerObjCCall(F, "objc_retain_autorelease");
+ break;
+ case Intrinsic::objc_sync_enter:
+ Changed |= lowerObjCCall(F, "objc_sync_enter");
+ break;
+ case Intrinsic::objc_sync_exit:
+ Changed |= lowerObjCCall(F, "objc_sync_exit");
+ break;
+ }
+ }
+ return Changed;
+}
+
+namespace {
+
+class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto LookupTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ const auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ PreISelIntrinsicLowering Lowering(TM, LookupTTI);
+ return Lowering.lowerIntrinsics(M);
+ }
+};
+
+} // end anonymous namespace
+
+char PreISelIntrinsicLoweringLegacyPass::ID;
+
+INITIALIZE_PASS_BEGIN(PreISelIntrinsicLoweringLegacyPass,
+ "pre-isel-intrinsic-lowering",
+ "Pre-ISel Intrinsic Lowering", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(PreISelIntrinsicLoweringLegacyPass,
+ "pre-isel-intrinsic-lowering",
+ "Pre-ISel Intrinsic Lowering", false, false)
+
+ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
+ return new PreISelIntrinsicLoweringLegacyPass();
+}
+
+PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ PreISelIntrinsicLowering Lowering(TM, LookupTTI);
+ if (!Lowering.lowerIntrinsics(M))
+ return PreservedAnalyses::all();
+ else
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 000000000000..be81ecab9c89
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,168 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "processimpdefs"
+
+namespace {
+/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+/// for each use. Add isUndef marker to implicit_def defs and their uses.
+class ProcessImplicitDefs : public MachineFunctionPass {
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+
+ SmallSetVector<MachineInstr*, 16> WorkList;
+
+ void processImplicitDef(MachineInstr *MI);
+ bool canTurnIntoImplicitDef(MachineInstr *MI);
+
+public:
+ static char ID;
+
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {
+ initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &au) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+} // end anonymous namespace
+
+char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
+INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE,
+ "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
+ if (!MI->isCopyLike() &&
+ !MI->isInsertSubreg() &&
+ !MI->isRegSequence() &&
+ !MI->isPHI())
+ return false;
+ for (const MachineOperand &MO : MI->all_uses())
+ if (MO.readsReg())
+ return false;
+ return true;
+}
+
+void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "Processing " << *MI);
+ Register Reg = MI->getOperand(0).getReg();
+
+ if (Reg.isVirtual()) {
+ // For virtual registers, mark all uses as <undef>, and convert users to
+ // implicit-def when possible.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MO.setIsUndef();
+ MachineInstr *UserMI = MO.getParent();
+ if (!canTurnIntoImplicitDef(UserMI))
+ continue;
+ LLVM_DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ WorkList.insert(UserMI);
+ }
+ MI->eraseFromParent();
+ return;
+ }
+
+ // This is a physreg implicit-def.
+ // Look for the first instruction to use or define an alias.
+ MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
+ MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ bool Found = false;
+ for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (MachineOperand &MO : UserMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register UserReg = MO.getReg();
+ if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
+ continue;
+ // UserMI uses or redefines Reg. Set <undef> flags on all uses.
+ Found = true;
+ if (MO.isUse())
+ MO.setIsUndef();
+ }
+ if (Found)
+ break;
+ }
+
+ // If we found the using MI, we can erase the IMPLICIT_DEF.
+ if (Found) {
+ LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Using instr wasn't found, it could be in another block.
+ // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->removeOperand(i);
+ LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
+/// <undef> operands.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
+
+ LLVM_DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ bool Changed = false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ assert(WorkList.empty() && "Inconsistent worklist state");
+
+ for (MachineBasicBlock &MBB : MF) {
+ // Scan the basic block for implicit defs.
+ for (MachineInstr &MI : MBB)
+ if (MI.isImplicitDef())
+ WorkList.insert(&MI);
+
+ if (WorkList.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << printMBBReference(MBB) << " has " << WorkList.size()
+ << " implicit defs.\n");
+ Changed = true;
+
+ // Drain the WorkList to recursively process any new implicit defs.
+ do processImplicitDef(WorkList.pop_back_val());
+ while (!WorkList.empty());
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 000000000000..e323aaaeefaf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,1580 @@
+//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "prologepilog"
+
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+
+STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
+STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+
+
+namespace {
+
+class PEI : public MachineFunctionPass {
+public:
+ static char ID;
+
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ RegScavenger *RS = nullptr;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max();
+ unsigned MaxCSFrameIndex = 0;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ MBBVector SaveBlocks;
+ MBBVector RestoreBlocks;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the current function.
+ bool FrameIndexVirtualScavenging = false;
+
+ // Flag to control whether the scavenger should be passed even though
+ // FrameIndexVirtualScavenging is used.
+ bool FrameIndexEliminationScavenging = false;
+
+ // Emit remarks.
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+
+ void calculateCallFrameInfo(MachineFunction &MF);
+ void calculateSaveRestoreBlocks(MachineFunction &MF);
+ void spillCalleeSavedRegs(MachineFunction &MF);
+
+ void calculateFrameObjectOffsets(MachineFunction &MF);
+ void replaceFrameIndices(MachineFunction &MF);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
+ int &SPAdj);
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+ unsigned OpIdx, int SPAdj = 0);
+ // Does same as replaceFrameIndices but using the backward MIR walk and
+ // backward register scavenger walk. Does not yet support call sequence
+ // processing.
+ void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF,
+ int &SPAdj);
+
+ void insertPrologEpilogCode(MachineFunction &MF);
+ void insertZeroCallUsedRegs(MachineFunction &MF);
+};
+
+} // end anonymous namespace
+
+char PEI::ID = 0;
+
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
+
+INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
+ "Prologue/Epilogue Insertion & Frame Finalization", false,
+ false)
+
+MachineFunctionPass *llvm::createPrologEpilogInserterPass() {
+ return new PEI();
+}
+
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// StackObjSet - A set of stack object indexes
+using StackObjSet = SmallSetVector<int, 8>;
+
+using SavedDbgValuesMap =
+ SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>;
+
+/// Stash DBG_VALUEs that describe parameters and which are placed at the start
+/// of the block. Later on, after the prologue code has been emitted, the
+/// stashed DBG_VALUEs will be reinserted at the start of the block.
+static void stashEntryDbgValues(MachineBasicBlock &MBB,
+ SavedDbgValuesMap &EntryDbgValues) {
+ SmallVector<const MachineInstr *, 4> FrameIndexValues;
+
+ for (auto &MI : MBB) {
+ if (!MI.isDebugInstr())
+ break;
+ if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter())
+ continue;
+ if (any_of(MI.debug_operands(),
+ [](const MachineOperand &MO) { return MO.isFI(); })) {
+ // We can only emit valid locations for frame indices after the frame
+ // setup, so do not stash away them.
+ FrameIndexValues.push_back(&MI);
+ continue;
+ }
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ auto Overlaps = [Var, Expr](const MachineInstr *DV) {
+ return Var == DV->getDebugVariable() &&
+ Expr->fragmentsOverlap(DV->getDebugExpression());
+ };
+ // See if the debug value overlaps with any preceding debug value that will
+ // not be stashed. If that is the case, then we can't stash this value, as
+ // we would then reorder the values at reinsertion.
+ if (llvm::none_of(FrameIndexValues, Overlaps))
+ EntryDbgValues[&MBB].push_back(&MI);
+ }
+
+ // Remove stashed debug values from the block.
+ if (EntryDbgValues.count(&MBB))
+ for (auto *MI : EntryDbgValues[&MBB])
+ MI->removeFromParent();
+}
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+bool PEI::runOnMachineFunction(MachineFunction &MF) {
+ NumFuncSeen++;
+ const Function &F = MF.getFunction();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+ RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallFrameInfo(MF);
+
+ // Determine placement of CSR spill/restore code and prolog/epilog code:
+ // place all spills in the entry block, all restores in return blocks.
+ calculateSaveRestoreBlocks(MF);
+
+ // Stash away DBG_VALUEs that should not be moved by insertion of prolog code.
+ SavedDbgValuesMap EntryDbgValues;
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ stashEntryDbgValues(*SaveBlock, EntryDbgValues);
+
+ // Handle CSR spilling and restoring, for targets that need it.
+ if (MF.getTarget().usesPhysRegsForValues())
+ spillCalleeSavedRegs(MF);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(MF, RS);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(MF);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F.hasFnAttribute(Attribute::Naked))
+ insertPrologEpilogCode(MF);
+
+ // Reinsert stashed debug values at the start of the entry blocks.
+ for (auto &I : EntryDbgValues)
+ I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(MF);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimination
+ // inserted.
+ if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(MF, *RS);
+
+ // Warn on stack size when we exceeds the given limit.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint64_t StackSize = MFI.getStackSize();
+
+ unsigned Threshold = UINT_MAX;
+ if (MF.getFunction().hasFnAttribute("warn-stack-size")) {
+ bool Failed = MF.getFunction()
+ .getFnAttribute("warn-stack-size")
+ .getValueAsString()
+ .getAsInteger(10, Threshold);
+ // Verifier should have caught this.
+ assert(!Failed && "Invalid warn-stack-size fn attr value");
+ (void)Failed;
+ }
+ uint64_t UnsafeStackSize = MFI.getUnsafeStackSize();
+ if (MF.getFunction().hasFnAttribute(Attribute::SafeStack))
+ StackSize += UnsafeStackSize;
+
+ if (StackSize > Threshold) {
+ DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
+ F.getContext().diagnose(DiagStackSize);
+ int64_t SpillSize = 0;
+ for (int Idx = MFI.getObjectIndexBegin(), End = MFI.getObjectIndexEnd();
+ Idx != End; ++Idx) {
+ if (MFI.isSpillSlotObjectIndex(Idx))
+ SpillSize += MFI.getObjectSize(Idx);
+ }
+
+ [[maybe_unused]] float SpillPct =
+ static_cast<float>(SpillSize) / static_cast<float>(StackSize);
+ LLVM_DEBUG(
+ dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables",
+ SpillSize, StackSize, StackSize - SpillSize, SpillPct,
+ 1.0f - SpillPct));
+ if (UnsafeStackSize != 0) {
+ LLVM_DEBUG(dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack",
+ UnsafeStackSize,
+ static_cast<float>(UnsafeStackSize) /
+ static_cast<float>(StackSize),
+ StackSize));
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
+ MF.getFunction().getSubprogram(),
+ &MF.front())
+ << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
+ });
+
+ delete RS;
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ MFI.setSavePoint(nullptr);
+ MFI.setRestorePoint(nullptr);
+ return true;
+}
+
+/// Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallFrameInfo(MachineFunction &MF) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI.adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineBasicBlock &BB : MF)
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I)
+ if (TII.isFrameInstr(*I)) {
+ unsigned Size = TII.getFrameSize(*I);
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ assert(!MFI.isMaxCallFrameSizeComputed() ||
+ (MFI.getMaxCallFrameSize() >= MaxCallFrameSize &&
+ !(AdjustsStack && !MFI.adjustsStack())));
+ MFI.setAdjustsStack(AdjustsStack);
+ MFI.setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (MachineBasicBlock::iterator I : FrameSDOps) {
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(MF))
+ TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I);
+ }
+}
+
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI.getSavePoint()) {
+ SaveBlocks.push_back(MFI.getSavePoint());
+ assert(MFI.getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&MF.front());
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+static void assignCalleeSavedSpillSlots(MachineFunction &F,
+ const BitVector &SavedRegs,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex) {
+ if (SavedRegs.empty())
+ return;
+
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
+ BitVector CSMask(SavedRegs.size());
+
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CSMask.set(CSRegs[i]);
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg)) {
+ bool SavedSuper = false;
+ for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) {
+ // Some backends set all aliases for some registers as saved, such as
+ // Mips's $fp, so they appear in SavedRegs but not CSRegs.
+ if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) {
+ SavedSuper = true;
+ break;
+ }
+ }
+
+ if (!SavedSuper)
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
+ }
+
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+ MachineFrameInfo &MFI = F.getFrameInfo();
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI, MinCSFrameIndex,
+ MaxCSFrameIndex)) {
+ // If target doesn't implement this, use generic code.
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (auto &CS : CSI) {
+ // If the target has spilled this register to another register, we don't
+ // need to allocate a stack slot.
+ if (CS.isSpilledToReg())
+ continue;
+
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ CS.setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ unsigned Size = RegInfo->getSpillSize(*RC);
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ Align Alignment = RegInfo->getSpillAlign(*RC);
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Alignment = std::min(Alignment, TFI->getStackAlign());
+ FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset);
+ }
+
+ CS.setFrameIdx(FrameIdx);
+ }
+ }
+
+ MFI.setCalleeSavedInfo(CSI);
+}
+
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // Visited will contain all the basic blocks that are in the region
+ // where the callee saved registers are alive:
+ // - Anything that is not Save or Restore -> LiveThrough.
+ // - Save -> LiveIn.
+ // - Restore -> LiveOut.
+ // The live-out is not attached to the block, so no need to keep
+ // Restore in this set.
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ SmallVector<MachineBasicBlock *, 8> WorkList;
+ MachineBasicBlock *Entry = &MF.front();
+ MachineBasicBlock *Save = MFI.getSavePoint();
+
+ if (!Save)
+ Save = Entry;
+
+ if (Entry != Save) {
+ WorkList.push_back(Entry);
+ Visited.insert(Entry);
+ }
+ Visited.insert(Save);
+
+ MachineBasicBlock *Restore = MFI.getRestorePoint();
+ if (Restore)
+ // By construction Restore cannot be visited, otherwise it
+ // means there exists a path to Restore that does not go
+ // through Save.
+ WorkList.push_back(Restore);
+
+ while (!WorkList.empty()) {
+ const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+ // By construction, the region that is after the save point is
+ // dominated by the Save and post-dominated by the Restore.
+ if (CurBB == Save && Save != Restore)
+ continue;
+ // Enqueue all the successors not already visited.
+ // Those are by construction either before Save or after Restore.
+ for (MachineBasicBlock *SuccBB : CurBB->successors())
+ if (Visited.insert(SuccBB).second)
+ WorkList.push_back(SuccBB);
+ }
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const CalleeSavedInfo &I : CSI) {
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = I.getReg();
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
+ // If callee-saved register is spilled to another register rather than
+ // spilling to stack, the destination register has to be marked as live for
+ // each MBB between the prologue and epilogue so that it is not clobbered
+ // before it is reloaded in the epilogue. The Visited set contains all
+ // blocks outside of the region delimited by prologue/epilogue.
+ if (I.isSpilledToReg()) {
+ for (MachineBasicBlock &MBB : MF) {
+ if (Visited.count(&MBB))
+ continue;
+ MCPhysReg DstReg = I.getDstReg();
+ if (!MBB.isLiveIn(DstReg))
+ MBB.addLiveIn(DstReg);
+ }
+ }
+ }
+}
+
+/// Insert spill code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+ ArrayRef<CalleeSavedInfo> CSI) {
+ MachineFunction &MF = *SaveBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineBasicBlock::iterator I = SaveBlock.begin();
+ if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CS : CSI) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CS.getReg();
+
+ if (CS.isSpilledToReg()) {
+ BuildMI(SaveBlock, I, DebugLoc(),
+ TII.get(TargetOpcode::COPY), CS.getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI, Register());
+ }
+ }
+ }
+}
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+ std::vector<CalleeSavedInfo> &CSI) {
+ MachineFunction &MF = *RestoreBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CI : reverse(CSI)) {
+ unsigned Reg = CI.getReg();
+ if (CI.isSpilledToReg()) {
+ BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
+ .addReg(CI.getDstReg(), getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC,
+ TRI, Register());
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ }
+ }
+ }
+}
+
+void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
+ // We can't list this requirement in getRequiredProperties because some
+ // targets (WebAssembly) use virtual registers past this point, and the pass
+ // pipeline is set up without giving the passes a chance to look at the
+ // TargetMachine.
+ // FIXME: Find a way to express this in getRequiredProperties.
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
+ const Function &F = MF.getFunction();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MinCSFrameIndex = std::numeric_limits<unsigned>::max();
+ MaxCSFrameIndex = 0;
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSaves(MF, SavedRegs, RS);
+
+ // Assign stack slots for any callee-saved registers that must be spilled.
+ assignCalleeSavedSpillSlots(MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F.hasFnAttribute(Attribute::Naked)) {
+ MFI.setCalleeSavedInfoValid(true);
+
+ std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (!CSI.empty()) {
+ if (!MFI.hasCalls())
+ NumLeafFuncWithSpills++;
+
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ insertCSRSaves(*SaveBlock, CSI);
+
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(MF);
+
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ insertCSRRestores(*RestoreBlock, CSI);
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ Align &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI.getObjectSize(FrameIdx);
+
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Alignment);
+
+ // Adjust to alignment boundary.
+ Offset = alignTo(Offset, Alignment);
+
+ if (StackGrowsDown) {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
+ << "]\n");
+ MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset
+ << "]\n");
+ MFI.setObjectOffset(FrameIdx, Offset);
+ Offset += MFI.getObjectSize(FrameIdx);
+ }
+}
+
+/// Compute which bytes of fixed and callee-save stack area are unused and keep
+/// track of them in StackBytesFree.
+static inline void
+computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
+ unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
+ int64_t FixedCSEnd, BitVector &StackBytesFree) {
+ // Avoid undefined int64_t -> int conversion below in extreme case.
+ if (FixedCSEnd > std::numeric_limits<int>::max())
+ return;
+
+ StackBytesFree.resize(FixedCSEnd, true);
+
+ SmallVector<int, 16> AllocatedFrameSlots;
+ // Add fixed objects.
+ for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
+ // StackSlot scavenging is only implemented for the default stack.
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
+ // Add callee-save objects if there are any.
+ if (MinCSFrameIndex <= MaxCSFrameIndex) {
+ for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
+ }
+
+ for (int i : AllocatedFrameSlots) {
+ // These are converted from int64_t, but they should always fit in int
+ // because of the FixedCSEnd check above.
+ int ObjOffset = MFI.getObjectOffset(i);
+ int ObjSize = MFI.getObjectSize(i);
+ int ObjStart, ObjEnd;
+ if (StackGrowsDown) {
+ // ObjOffset is negative when StackGrowsDown is true.
+ ObjStart = -ObjOffset - ObjSize;
+ ObjEnd = -ObjOffset;
+ } else {
+ ObjStart = ObjOffset;
+ ObjEnd = ObjOffset + ObjSize;
+ }
+ // Ignore fixed holes that are in the previous stack frame.
+ if (ObjEnd > 0)
+ StackBytesFree.reset(ObjStart, ObjEnd);
+ }
+}
+
+/// Assign frame object to an unused portion of the stack in the fixed stack
+/// object range. Return true if the allocation was successful.
+static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
+ bool StackGrowsDown, Align MaxAlign,
+ BitVector &StackBytesFree) {
+ if (MFI.isVariableSizedObjectIndex(FrameIdx))
+ return false;
+
+ if (StackBytesFree.none()) {
+ // clear it to speed up later scavengeStackSlot calls to
+ // StackBytesFree.none()
+ StackBytesFree.clear();
+ return false;
+ }
+
+ Align ObjAlign = MFI.getObjectAlign(FrameIdx);
+ if (ObjAlign > MaxAlign)
+ return false;
+
+ int64_t ObjSize = MFI.getObjectSize(FrameIdx);
+ int FreeStart;
+ for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
+ FreeStart = StackBytesFree.find_next(FreeStart)) {
+
+ // Check that free space has suitable alignment.
+ unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart;
+ if (alignTo(ObjStart, ObjAlign) != ObjStart)
+ continue;
+
+ if (FreeStart + ObjSize > StackBytesFree.size())
+ return false;
+
+ bool AllBytesFree = true;
+ for (unsigned Byte = 0; Byte < ObjSize; ++Byte)
+ if (!StackBytesFree.test(FreeStart + Byte)) {
+ AllBytesFree = false;
+ break;
+ }
+ if (AllBytesFree)
+ break;
+ }
+
+ if (FreeStart == -1)
+ return false;
+
+ if (StackGrowsDown) {
+ int ObjStart = -(FreeStart + ObjSize);
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP["
+ << ObjStart << "]\n");
+ MFI.setObjectOffset(FrameIdx, ObjStart);
+ } else {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP["
+ << FreeStart << "]\n");
+ MFI.setObjectOffset(FrameIdx, FreeStart);
+ }
+
+ StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
+ return true;
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo &MFI, bool StackGrowsDown,
+ int64_t &Offset, Align &MaxAlign) {
+
+ for (int i : UnassignedObjs) {
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ ProtectedObjs.insert(i);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+#ifdef EXPENSIVE_CHECKS
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
+ if (!MFI.isDeadObjectIndex(i) &&
+ MFI.getStackID(i) == TargetStackID::Default)
+ assert(MFI.getObjectAlign(i) <= MFI.getMaxAlign() &&
+ "MaxAlignment is invalid");
+#endif
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // Adjust 'Offset' to point to the end of last fixed sized preallocated
+ // object.
+ for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
+ continue;
+
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI.getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ Align MaxAlign = MFI.getMaxAlign();
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (MaxCSFrameIndex >= MinCSFrameIndex) {
+ for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) {
+ unsigned FrameIndex =
+ StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i;
+
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(FrameIndex) != TargetStackID::Default)
+ continue;
+
+ // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex))
+ if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex))
+ continue;
+
+ AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign);
+ }
+ }
+
+ assert(MaxAlign == MFI.getMaxAlign() &&
+ "MFI.getMaxAlign should already account for all callee-saved "
+ "registers without a fixed stack slot");
+
+ // FixedCSEnd is the stack offset to the end of the fixed and callee-save
+ // stack area.
+ int64_t FixedCSEnd = Offset;
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // incoming stack pointer if a frame pointer is required and is closer
+ // to the incoming rather than the final stack pointer.
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF);
+ if (RS && EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (int SFI : SFIs)
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI.getUseLocalStackAllocationBlock()) {
+ Align Alignment = MFI.getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = alignTo(Offset, Alignment);
+
+ LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset
+ << "]\n");
+ MFI.setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI.getLocalFrameSize();
+
+ MaxAlign = std::max(Alignment, MaxAlign);
+ }
+
+ // Retrieve the Exception Handler registration node.
+ int EHRegNodeFrameIndex = std::numeric_limits<int>::max();
+ if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo())
+ EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> ProtectedObjs;
+ if (MFI.hasStackProtectorIndex()) {
+ int StackProtectorFI = MFI.getStackProtectorIndex();
+ StackObjSet LargeArrayObjs;
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+ // If we need a stack protector, we need to make sure that
+ // LocalStackSlotPass didn't already allocate a slot for it.
+ // If we are told to use the LocalStackAllocationBlock, the stack protector
+ // is expected to be already pre-allocated.
+ if (MFI.getStackID(StackProtectorFI) != TargetStackID::Default) {
+ // If the stack protector isn't on the default stack then it's up to the
+ // target to set the stack offset.
+ assert(MFI.getObjectOffset(StackProtectorFI) != 0 &&
+ "Offset of stack protector on non-default stack expected to be "
+ "already set.");
+ assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) &&
+ "Stack protector on non-default stack expected to not be "
+ "pre-allocated by LocalStackSlotPass.");
+ } else if (!MFI.getUseLocalStackAllocationBlock()) {
+ AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset,
+ MaxAlign);
+ } else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) {
+ llvm_unreachable(
+ "Stack protector not pre-allocated by LocalStackSlotPass.");
+ }
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI.isDeadObjectIndex(i))
+ continue;
+ if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
+ continue;
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
+ continue;
+
+ switch (MFI.getObjectSSPLayout(i)) {
+ case MachineFrameInfo::SSPLK_None:
+ continue;
+ case MachineFrameInfo::SSPLK_SmallArray:
+ SmallArrayObjs.insert(i);
+ continue;
+ case MachineFrameInfo::SSPLK_AddrOf:
+ AddrOfObjs.insert(i);
+ continue;
+ case MachineFrameInfo::SSPLK_LargeArray:
+ LargeArrayObjs.insert(i);
+ continue;
+ }
+ llvm_unreachable("Unexpected SSPLayoutKind.");
+ }
+
+ // We expect **all** the protected stack objects to be pre-allocated by
+ // LocalStackSlotPass. If it turns out that PEI still has to allocate some
+ // of them, we may end up messing up the expected order of the objects.
+ if (MFI.getUseLocalStackAllocationBlock() &&
+ !(LargeArrayObjs.empty() && SmallArrayObjs.empty() &&
+ AddrOfObjs.empty()))
+ llvm_unreachable("Found protected stack objects not pre-allocated by "
+ "LocalStackSlotPass.");
+
+ AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ }
+
+ SmallVector<int, 8> ObjectsToAllocate;
+
+ // Then prepare to assign frame offsets to stack objects that are not used to
+ // spill callee saved registers.
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI.isDeadObjectIndex(i))
+ continue;
+ if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i)
+ continue;
+ if (ProtectedObjs.count(i))
+ continue;
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
+ continue;
+
+ // Add the objects that we need to allocate to our working set.
+ ObjectsToAllocate.push_back(i);
+ }
+
+ // Allocate the EH registration node first if one is present.
+ if (EHRegNodeFrameIndex != std::numeric_limits<int>::max())
+ AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
+ MaxAlign);
+
+ // Give the targets a chance to order the objects the way they like it.
+ if (MF.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MF.getTarget().Options.StackSymbolOrdering)
+ TFI.orderFrameObjects(MF, ObjectsToAllocate);
+
+ // Keep track of which bytes in the fixed and callee-save range are used so we
+ // can use the holes when allocating later stack objects. Only do this if
+ // stack protector isn't being used and the target requests it and we're
+ // optimizing.
+ BitVector StackBytesFree;
+ if (!ObjectsToAllocate.empty() &&
+ MF.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF))
+ computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
+ FixedCSEnd, StackBytesFree);
+
+ // Now walk the objects and actually assign base offsets to them.
+ for (auto &Object : ObjectsToAllocate)
+ if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
+ StackBytesFree))
+ AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign);
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (int SFI : SFIs)
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF))
+ Offset += MFI.getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ Align StackAlign;
+ if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
+ (RegInfo->hasStackRealignment(MF) && MFI.getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlign();
+ else
+ StackAlign = TFI.getTransientStackAlign();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ int64_t OffsetBeforeAlignment = Offset;
+ Offset = alignTo(Offset, StackAlign);
+
+ // If we have increased the offset to fulfill the alignment constrants,
+ // then the scavenging spill slots may become harder to reach from the
+ // stack pointer, float them so they stay close.
+ if (StackGrowsDown && OffsetBeforeAlignment != Offset && RS &&
+ !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs()
+ << "Adjusting emergency spill slots!\n";);
+ int64_t Delta = Offset - OffsetBeforeAlignment;
+ for (int SFI : SFIs) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Adjusting offset of emergency spill slot #" << SFI
+ << " from " << MFI.getObjectOffset(SFI););
+ MFI.setObjectOffset(SFI, MFI.getObjectOffset(SFI) - Delta);
+ LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(SFI) << "\n";);
+ }
+ }
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI.setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+void PEI::insertPrologEpilogCode(MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+
+ // Add prologue to the function...
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.emitPrologue(MF, *SaveBlock);
+
+ // Add epilogue to restore the callee-save registers in each exiting block.
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ TFI.emitEpilogue(MF, *RestoreBlock);
+
+ // Zero call used registers before restoring callee-saved registers.
+ insertZeroCallUsedRegs(MF);
+
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.inlineStackProbe(MF, *SaveBlock);
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (MF.shouldSplitStack()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForSegmentedStacks(MF, *SaveBlock);
+ }
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (MF.getFunction().getCallingConv() == CallingConv::HiPE)
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForHiPEPrologue(MF, *SaveBlock);
+}
+
+/// insertZeroCallUsedRegs - Zero out call used registers.
+void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+
+ if (!F.hasFnAttribute("zero-call-used-regs"))
+ return;
+
+ using namespace ZeroCallUsedRegs;
+
+ ZeroCallUsedRegsKind ZeroRegsKind =
+ StringSwitch<ZeroCallUsedRegsKind>(
+ F.getFnAttribute("zero-call-used-regs").getValueAsString())
+ .Case("skip", ZeroCallUsedRegsKind::Skip)
+ .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg)
+ .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR)
+ .Case("used-arg", ZeroCallUsedRegsKind::UsedArg)
+ .Case("used", ZeroCallUsedRegsKind::Used)
+ .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg)
+ .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR)
+ .Case("all-arg", ZeroCallUsedRegsKind::AllArg)
+ .Case("all", ZeroCallUsedRegsKind::All);
+
+ if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip)
+ return;
+
+ const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR;
+ const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED;
+ const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG;
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const BitVector AllocatableSet(TRI.getAllocatableSet(MF));
+
+ // Mark all used registers.
+ BitVector UsedRegs(TRI.getNumRegs());
+ if (OnlyUsed)
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB) {
+ // skip debug instructions
+ if (MI.isDebugInstr())
+ continue;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ MCRegister Reg = MO.getReg();
+ if (AllocatableSet[Reg] && !MO.isImplicit() &&
+ (MO.isDef() || MO.isUse()))
+ UsedRegs.set(Reg);
+ }
+ }
+
+ // Get a list of registers that are used.
+ BitVector LiveIns(TRI.getNumRegs());
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MF.front().liveins())
+ LiveIns.set(LI.PhysReg);
+
+ BitVector RegsToZero(TRI.getNumRegs());
+ for (MCRegister Reg : AllocatableSet.set_bits()) {
+ // Skip over fixed registers.
+ if (TRI.isFixedRegister(MF, Reg))
+ continue;
+
+ // Want only general purpose registers.
+ if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg))
+ continue;
+
+ // Want only used registers.
+ if (OnlyUsed && !UsedRegs[Reg])
+ continue;
+
+ // Want only registers used for arguments.
+ if (OnlyArg) {
+ if (OnlyUsed) {
+ if (!LiveIns[Reg])
+ continue;
+ } else if (!TRI.isArgumentRegister(MF, Reg)) {
+ continue;
+ }
+ }
+
+ RegsToZero.set(Reg);
+ }
+
+ // Don't clear registers that are live when leaving the function.
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB.terminators()) {
+ if (!MI.isReturn())
+ continue;
+
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ MCRegister Reg = MO.getReg();
+
+ // This picks up sibling registers (e.q. %al -> %ah).
+ for (MCRegUnit Unit : TRI.regunits(Reg))
+ RegsToZero.reset(Unit);
+
+ for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
+ RegsToZero.reset(SReg);
+ }
+ }
+
+ // Don't need to clear registers that are used/clobbered by terminating
+ // instructions.
+ for (const MachineBasicBlock &MBB : MF) {
+ if (!MBB.isReturnBlock())
+ continue;
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+ for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E;
+ ++I) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ for (const MCPhysReg &Reg :
+ TRI.sub_and_superregs_inclusive(MO.getReg()))
+ RegsToZero.reset(Reg);
+ }
+ }
+ }
+
+ // Don't clear registers that must be preserved.
+ for (const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ MCPhysReg CSReg = *CSRegs; ++CSRegs)
+ for (MCRegister Reg : TRI.sub_and_superregs_inclusive(CSReg))
+ RegsToZero.reset(Reg);
+
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isReturnBlock())
+ TFI.emitZeroCallUsedRegs(RegsToZero, MBB);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+void PEI::replaceFrameIndices(MachineFunction &MF) {
+ const auto &ST = MF.getSubtarget();
+ const TargetFrameLowering &TFI = *ST.getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(MF))
+ return;
+
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+ // Allow the target to determine this after knowing the frame size.
+ FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+ TRI->requiresFrameIndexReplacementScavenging(MF);
+
+ // Store SPAdj at exit of a basic block.
+ SmallVector<int, 8> SPState;
+ SPState.resize(MF.getNumBlockIDs());
+ df_iterator_default_set<MachineBasicBlock*> Reachable;
+
+ // Iterate over the reachable blocks in DFS order.
+ for (auto DFI = df_ext_begin(&MF, Reachable), DFE = df_ext_end(&MF, Reachable);
+ DFI != DFE; ++DFI) {
+ int SPAdj = 0;
+ // Check the exit state of the DFS stack predecessor.
+ if (DFI.getPathLength() >= 2) {
+ MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+ assert(Reachable.count(StackPred) &&
+ "DFS stack predecessor is already visited.\n");
+ SPAdj = SPState[StackPred->getNumber()];
+ }
+ MachineBasicBlock *BB = *DFI;
+ replaceFrameIndices(BB, MF, SPAdj);
+ SPState[BB->getNumber()] = SPAdj;
+ }
+
+ // Handle the unreachable blocks.
+ for (auto &BB : MF) {
+ if (Reachable.count(&BB))
+ // Already handled in DFS traversal.
+ continue;
+ int SPAdj = 0;
+ replaceFrameIndices(&BB, MF, SPAdj);
+ }
+}
+
+bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+ unsigned OpIdx, int SPAdj) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ if (MI.isDebugValue()) {
+
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ assert(MI.isDebugOperand(&Op) &&
+ "Frame indices can only appear as a debug operand in a DBG_VALUE*"
+ " machine instruction");
+ Register Reg;
+ unsigned FrameIdx = Op.getIndex();
+ unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
+
+ StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
+ Op.ChangeToRegister(Reg, false /*isDef*/);
+
+ const DIExpression *DIExpr = MI.getDebugExpression();
+
+ // If we have a direct DBG_VALUE, and its location expression isn't
+ // currently complex, then adding an offset will morph it into a
+ // complex location that is interpreted as being a memory address.
+ // This changes a pointer-valued variable to dereference that pointer,
+ // which is incorrect. Fix by adding DW_OP_stack_value.
+
+ if (MI.isNonListDebugValue()) {
+ unsigned PrependFlags = DIExpression::ApplyOffset;
+ if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
+ PrependFlags |= DIExpression::StackValue;
+
+ // If we have DBG_VALUE that is indirect and has a Implicit location
+ // expression need to insert a deref before prepending a Memory
+ // location expression. Also after doing this we change the DBG_VALUE
+ // to be direct.
+ if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ bool WithStackValue = true;
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ // Make the DBG_VALUE direct.
+ MI.getDebugOffset().ChangeToRegister(0, false);
+ }
+ DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
+ } else {
+ // The debug operand at DebugOpIndex was a frame index at offset
+ // `Offset`; now the operand has been replaced with the frame
+ // register, we must add Offset with `register x, plus Offset`.
+ unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
+ SmallVector<uint64_t, 3> Ops;
+ TRI.getOffsetOpcodes(Offset, Ops);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
+ }
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
+ return true;
+ }
+
+ if (MI.isDebugPHI()) {
+ // Allow stack ref to continue onwards.
+ return true;
+ }
+
+ // TODO: This code should be commoned with the code for
+ // PATCHPOINT. There's no good reason for the difference in
+ // implementation other than historical accident. The only
+ // remaining difference is the unconditional use of the stack
+ // pointer as the base register.
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI.isDebugValue() || OpIdx == 0) &&
+ "Frame indicies can only appear as the first operand of a "
+ "DBG_VALUE machine instruction");
+ Register Reg;
+ MachineOperand &Offset = MI.getOperand(OpIdx + 1);
+ StackOffset refOffset = TFI->getFrameIndexReferencePreferSP(
+ MF, MI.getOperand(OpIdx).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
+ assert(!refOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj);
+ MI.getOperand(OpIdx).ChangeToRegister(Reg, false /*isDef*/);
+ return true;
+ }
+ return false;
+}
+
+void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
+ MachineFunction &MF, int &SPAdj) {
+ assert(MF.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+
+ RegScavenger *LocalRS = FrameIndexEliminationScavenging ? RS : nullptr;
+ if (LocalRS)
+ LocalRS->enterBasicBlockEnd(*BB);
+
+ for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+ if (TII.isFrameInstr(MI)) {
+ TFI.eliminateCallFramePseudoInstr(MF, *BB, &MI);
+ continue;
+ }
+
+ // Step backwards to get the liveness state at (immedately after) MI.
+ if (LocalRS)
+ LocalRS->backward(MI);
+
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
+ continue;
+
+ // Eliminate this FrameIndex operand.
+ //
+ // Save and restore the scavenger's position around the call to
+ // eliminateFrameIndex in case it erases MI and invalidates the iterator.
+ MachineBasicBlock::iterator Save;
+ if (LocalRS)
+ Save = std::next(LocalRS->getCurrentPosition());
+ bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
+ if (LocalRS)
+ LocalRS->skipTo(std::prev(Save));
+
+ if (Removed)
+ break;
+ }
+ }
+}
+
+void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
+ int &SPAdj) {
+ assert(MF.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+ if (TRI.supportsBackwardScavenger())
+ return replaceFrameIndicesBackward(BB, MF, SPAdj);
+
+ if (RS && FrameIndexEliminationScavenging)
+ RS->enterBasicBlock(*BB);
+
+ bool InsideCallSequence = false;
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ if (TII.isFrameInstr(*I)) {
+ InsideCallSequence = TII.isFrameSetup(*I);
+ SPAdj += TII.getSPAdjust(*I);
+ I = TFI->eliminateCallFramePseudoInstr(MF, *BB, I);
+ continue;
+ }
+
+ MachineInstr &MI = *I;
+ bool DoIncr = true;
+ bool DidFinishLoop = true;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
+ continue;
+
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexEliminationScavenging ? RS : nullptr);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ DidFinishLoop = false;
+ break;
+ }
+
+ // If we are looking at a call sequence, we need to keep track of
+ // the SP adjustment made by each instruction in the sequence.
+ // This includes both the frame setup/destroy pseudos (handled above),
+ // as well as other instructions that have side effects w.r.t the SP.
+ // Note that this must come after eliminateFrameIndex, because
+ // if I itself referred to a frame index, we shouldn't count its own
+ // adjustment.
+ if (DidFinishLoop && InsideCallSequence)
+ SPAdj += TII.getSPAdjust(MI);
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && FrameIndexEliminationScavenging && DidFinishLoop)
+ RS->forward(MI);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
new file mode 100644
index 000000000000..913e0035b046
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -0,0 +1,150 @@
+//===- PseudoProbeInserter.cpp - Insert annotation for callsite profiling -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PseudoProbeInserter pass, which inserts pseudo probe
+// annotations for call instructions with a pseudo-probe-specific dwarf
+// discriminator. such discriminator indicates that the call instruction comes
+// with a pseudo probe, and the discriminator value holds information to
+// identify the corresponding counter.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "pseudo-probe-inserter"
+
+using namespace llvm;
+
+namespace {
+class PseudoProbeInserter : public MachineFunctionPass {
+public:
+ static char ID;
+
+ PseudoProbeInserter() : MachineFunctionPass(ID) {
+ initializePseudoProbeInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Pseudo Probe Inserter"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool doInitialization(Module &M) override {
+ ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName);
+ return false;
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!ShouldRun)
+ return false;
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ MachineInstr *FirstInstr = nullptr;
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isPseudo())
+ FirstInstr = &MI;
+ if (MI.isCall()) {
+ if (DILocation *DL = MI.getDebugLoc()) {
+ auto Value = DL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Value)) {
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::PSEUDO_PROBE))
+ .addImm(getFuncGUID(MF.getFunction().getParent(), DL))
+ .addImm(
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Value))
+ .addImm(
+ PseudoProbeDwarfDiscriminator::extractProbeType(Value))
+ .addImm(PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+ Value));
+ Changed = true;
+ }
+ }
+ }
+ }
+
+ // Walk the block backwards, move PSEUDO_PROBE before the first real
+ // instruction to fix out-of-order probes. There is a problem with probes
+ // as the terminator of the block. During the offline counts processing,
+ // the samples collected on the first physical instruction following a
+ // probe will be counted towards the probe. This logically equals to
+ // treating the instruction next to a probe as if it is from the same
+ // block of the probe. This is accurate most of the time unless the
+ // instruction can be reached from multiple flows, which means it actually
+ // starts a new block. Samples collected on such probes may cause
+ // imprecision with the counts inference algorithm. Fortunately, if
+ // there are still other native instructions preceding the probe we can
+ // use them as a place holder to collect samples for the probe.
+ if (FirstInstr) {
+ auto MII = MBB.rbegin();
+ while (MII != MBB.rend()) {
+ // Skip all pseudo probes followed by a real instruction since they
+ // are not dangling.
+ if (!MII->isPseudo())
+ break;
+ auto Cur = MII++;
+ if (Cur->getOpcode() != TargetOpcode::PSEUDO_PROBE)
+ continue;
+ // Move the dangling probe before FirstInstr.
+ auto *ProbeInstr = &*Cur;
+ MBB.remove(ProbeInstr);
+ MBB.insert(FirstInstr, ProbeInstr);
+ Changed = true;
+ }
+ } else {
+ // Probes not surrounded by any real instructions in the same block are
+ // called dangling probes. Since there's no good way to pick up a sample
+ // collection point for dangling probes at compile time, they are being
+ // removed so that the profile correlation tool will not report any
+ // samples collected for them and it's up to the counts inference tool
+ // to get them a reasonable count.
+ SmallVector<MachineInstr *, 4> ToBeRemoved;
+ for (MachineInstr &MI : MBB) {
+ if (MI.isPseudoProbe())
+ ToBeRemoved.push_back(&MI);
+ }
+
+ for (auto *MI : ToBeRemoved)
+ MI->eraseFromParent();
+
+ Changed |= !ToBeRemoved.empty();
+ }
+ }
+
+ return Changed;
+ }
+
+private:
+ uint64_t getFuncGUID(Module *M, DILocation *DL) {
+ auto Name = DL->getSubprogramLinkageName();
+ return Function::getGUID(Name);
+ }
+
+ bool ShouldRun = false;
+};
+} // namespace
+
+char PseudoProbeInserter::ID = 0;
+INITIALIZE_PASS_BEGIN(PseudoProbeInserter, DEBUG_TYPE,
+ "Insert pseudo probe annotations for value profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(PseudoProbeInserter, DEBUG_TYPE,
+ "Insert pseudo probe annotations for value profiling",
+ false, false)
+
+FunctionPass *llvm::createPseudoProbeInserter() {
+ return new PseudoProbeInserter();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 000000000000..40c52b9d9707
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,146 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static const char *const PSVNames[] = {
+ "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
+ "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
+
+PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetMachine &TM)
+ : Kind(Kind) {
+ AddressSpace = TM.getAddressSpaceForPseudoSourceKind(Kind);
+}
+
+PseudoSourceValue::~PseudoSourceValue() = default;
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+ if (Kind < TargetCustom)
+ O << PSVNames[Kind];
+ else
+ O << "TargetCustom" << Kind;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (isStack())
+ return false;
+ if (isGOT() || isConstantPool() || isJumpTable())
+ return true;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ if (isStack() || isGOT() || isConstantPool() || isJumpTable())
+ return false;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return !(isGOT() || isConstantPool() || isJumpTable());
+}
+
+bool FixedStackPseudoSourceValue::isConstant(
+ const MachineFrameInfo *MFI) const {
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ return MFI->isAliasedObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ // Spill slots will not alias any LLVM IR value.
+ return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
+
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(unsigned Kind,
+ const TargetMachine &TM)
+ : PseudoSourceValue(Kind, TM) {}
+
+bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return false;
+}
+
+GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
+ const GlobalValue *GV, const TargetMachine &TM)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry, TM), GV(GV) {}
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(
+ const char *ES, const TargetMachine &TM)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TM), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager(const TargetMachine &TMInfo)
+ : TM(TMInfo), StackPSV(PseudoSourceValue::Stack, TM),
+ GOTPSV(PseudoSourceValue::GOT, TM),
+ JumpTablePSV(PseudoSourceValue::JumpTable, TM),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool, TM) {}
+
+const PseudoSourceValue *PseudoSourceValueManager::getStack() {
+ return &StackPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; }
+
+const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() {
+ return &ConstantPoolPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
+ return &JumpTablePSV;
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getFixedStack(int FI) {
+ std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
+ if (!V)
+ V = std::make_unique<FixedStackPseudoSourceValue>(FI, TM);
+ return V.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
+ std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
+ GlobalCallEntries[GV];
+ if (!E)
+ E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TM);
+ return E.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
+ std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
+ ExternalCallEntries[ES];
+ if (!E)
+ E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TM);
+ return E.get();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
new file mode 100644
index 000000000000..abf3b1e6fbb9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -0,0 +1,1799 @@
+//===- RDFGraph.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF).
+//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <set>
+#include <utility>
+#include <vector>
+
+// Printing functions. Have them here first, so that the rest of the code
+// can use them.
+namespace llvm::rdf {
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P) {
+ P.G.getPRI().print(OS, P.Obj);
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P) {
+ if (P.Obj == 0)
+ return OS << "null";
+ auto NA = P.G.addr<NodeBase *>(P.Obj);
+ uint16_t Attrs = NA.Addr->getAttrs();
+ uint16_t Kind = NodeAttrs::kind(Attrs);
+ uint16_t Flags = NodeAttrs::flags(Attrs);
+ switch (NodeAttrs::type(Attrs)) {
+ case NodeAttrs::Code:
+ switch (Kind) {
+ case NodeAttrs::Func:
+ OS << 'f';
+ break;
+ case NodeAttrs::Block:
+ OS << 'b';
+ break;
+ case NodeAttrs::Stmt:
+ OS << 's';
+ break;
+ case NodeAttrs::Phi:
+ OS << 'p';
+ break;
+ default:
+ OS << "c?";
+ break;
+ }
+ break;
+ case NodeAttrs::Ref:
+ if (Flags & NodeAttrs::Undef)
+ OS << '/';
+ if (Flags & NodeAttrs::Dead)
+ OS << '\\';
+ if (Flags & NodeAttrs::Preserving)
+ OS << '+';
+ if (Flags & NodeAttrs::Clobbering)
+ OS << '~';
+ switch (Kind) {
+ case NodeAttrs::Use:
+ OS << 'u';
+ break;
+ case NodeAttrs::Def:
+ OS << 'd';
+ break;
+ case NodeAttrs::Block:
+ OS << 'b';
+ break;
+ default:
+ OS << "r?";
+ break;
+ }
+ break;
+ default:
+ OS << '?';
+ break;
+ }
+ OS << P.Obj;
+ if (Flags & NodeAttrs::Shadow)
+ OS << '"';
+ return OS;
+}
+
+static void printRefHeader(raw_ostream &OS, const Ref RA,
+ const DataFlowGraph &G) {
+ OS << Print(RA.Id, G) << '<' << Print(RA.Addr->getRegRef(G), G) << '>';
+ if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+ OS << '!';
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Def> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getReachedDef())
+ OS << Print(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getReachedUse())
+ OS << Print(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print(N, P.G);
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Use> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print(N, P.G);
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<PhiUse> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getPredecessor())
+ OS << Print(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print(N, P.G);
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Ref> &P) {
+ switch (P.Obj.Addr->getKind()) {
+ case NodeAttrs::Def:
+ OS << PrintNode<DefNode *>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Use:
+ if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
+ OS << PrintNode<PhiUseNode *>(P.Obj, P.G);
+ else
+ OS << PrintNode<UseNode *>(P.Obj, P.G);
+ break;
+ }
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P) {
+ unsigned N = P.Obj.size();
+ for (auto I : P.Obj) {
+ OS << Print(I.Id, P.G);
+ if (--N)
+ OS << ' ';
+ }
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P) {
+ unsigned N = P.Obj.size();
+ for (auto I : P.Obj) {
+ OS << Print(I, P.G);
+ if (--N)
+ OS << ' ';
+ }
+ return OS;
+}
+
+namespace {
+
+template <typename T> struct PrintListV {
+ PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+
+ using Type = T;
+ const NodeList &List;
+ const DataFlowGraph &G;
+};
+
+template <typename T>
+raw_ostream &operator<<(raw_ostream &OS, const PrintListV<T> &P) {
+ unsigned N = P.List.size();
+ for (NodeAddr<T> A : P.List) {
+ OS << PrintNode<T>(A, P.G);
+ if (--N)
+ OS << ", ";
+ }
+ return OS;
+}
+
+} // end anonymous namespace
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Phi> &P) {
+ OS << Print(P.Obj.Id, P.G) << ": phi ["
+ << PrintListV<RefNode *>(P.Obj.Addr->members(P.G), P.G) << ']';
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Stmt> &P) {
+ const MachineInstr &MI = *P.Obj.Addr->getCode();
+ unsigned Opc = MI.getOpcode();
+ OS << Print(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
+ // Print the target for calls and branches (for readability).
+ if (MI.isCall() || MI.isBranch()) {
+ MachineInstr::const_mop_iterator T =
+ llvm::find_if(MI.operands(), [](const MachineOperand &Op) -> bool {
+ return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+ });
+ if (T != MI.operands_end()) {
+ OS << ' ';
+ if (T->isMBB())
+ OS << printMBBReference(*T->getMBB());
+ else if (T->isGlobal())
+ OS << T->getGlobal()->getName();
+ else if (T->isSymbol())
+ OS << T->getSymbolName();
+ }
+ }
+ OS << " [" << PrintListV<RefNode *>(P.Obj.Addr->members(P.G), P.G) << ']';
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Instr> &P) {
+ switch (P.Obj.Addr->getKind()) {
+ case NodeAttrs::Phi:
+ OS << PrintNode<PhiNode *>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Stmt:
+ OS << PrintNode<StmtNode *>(P.Obj, P.G);
+ break;
+ default:
+ OS << "instr? " << Print(P.Obj.Id, P.G);
+ break;
+ }
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Block> &P) {
+ MachineBasicBlock *BB = P.Obj.Addr->getCode();
+ unsigned NP = BB->pred_size();
+ std::vector<int> Ns;
+ auto PrintBBs = [&OS](std::vector<int> Ns) -> void {
+ unsigned N = Ns.size();
+ for (int I : Ns) {
+ OS << "%bb." << I;
+ if (--N)
+ OS << ", ";
+ }
+ };
+
+ OS << Print(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB)
+ << " --- preds(" << NP << "): ";
+ for (MachineBasicBlock *B : BB->predecessors())
+ Ns.push_back(B->getNumber());
+ PrintBBs(Ns);
+
+ unsigned NS = BB->succ_size();
+ OS << " succs(" << NS << "): ";
+ Ns.clear();
+ for (MachineBasicBlock *B : BB->successors())
+ Ns.push_back(B->getNumber());
+ PrintBBs(Ns);
+ OS << '\n';
+
+ for (auto I : P.Obj.Addr->members(P.G))
+ OS << PrintNode<InstrNode *>(I, P.G) << '\n';
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Func> &P) {
+ OS << "DFG dump:[\n"
+ << Print(P.Obj.Id, P.G)
+ << ": Function: " << P.Obj.Addr->getCode()->getName() << '\n';
+ for (auto I : P.Obj.Addr->members(P.G))
+ OS << PrintNode<BlockNode *>(I, P.G) << '\n';
+ OS << "]\n";
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P) {
+ OS << '{';
+ for (auto I : P.Obj)
+ OS << ' ' << Print(I, P.G);
+ OS << " }";
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P) {
+ OS << P.Obj;
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P) {
+ for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E;) {
+ OS << Print(I->Id, P.G) << '<' << Print(I->Addr->getRegRef(P.G), P.G)
+ << '>';
+ I.down();
+ if (I != E)
+ OS << ' ';
+ }
+ return OS;
+}
+
+// Node allocation functions.
+//
+// Node allocator is like a slab memory allocator: it allocates blocks of
+// memory in sizes that are multiples of the size of a node. Each block has
+// the same size. Nodes are allocated from the currently active block, and
+// when it becomes full, a new one is created.
+// There is a mapping scheme between node id and its location in a block,
+// and within that block is described in the header file.
+//
+void NodeAllocator::startNewBlock() {
+ void *T = MemPool.Allocate(NodesPerBlock * NodeMemSize, NodeMemSize);
+ char *P = static_cast<char *>(T);
+ Blocks.push_back(P);
+ // Check if the block index is still within the allowed range, i.e. less
+ // than 2^N, where N is the number of bits in NodeId for the block index.
+ // BitsPerIndex is the number of bits per node index.
+ assert((Blocks.size() < ((size_t)1 << (8 * sizeof(NodeId) - BitsPerIndex))) &&
+ "Out of bits for block index");
+ ActiveEnd = P;
+}
+
+bool NodeAllocator::needNewBlock() {
+ if (Blocks.empty())
+ return true;
+
+ char *ActiveBegin = Blocks.back();
+ uint32_t Index = (ActiveEnd - ActiveBegin) / NodeMemSize;
+ return Index >= NodesPerBlock;
+}
+
+Node NodeAllocator::New() {
+ if (needNewBlock())
+ startNewBlock();
+
+ uint32_t ActiveB = Blocks.size() - 1;
+ uint32_t Index = (ActiveEnd - Blocks[ActiveB]) / NodeMemSize;
+ Node NA = {reinterpret_cast<NodeBase *>(ActiveEnd), makeId(ActiveB, Index)};
+ ActiveEnd += NodeMemSize;
+ return NA;
+}
+
+NodeId NodeAllocator::id(const NodeBase *P) const {
+ uintptr_t A = reinterpret_cast<uintptr_t>(P);
+ for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
+ uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
+ if (A < B || A >= B + NodesPerBlock * NodeMemSize)
+ continue;
+ uint32_t Idx = (A - B) / NodeMemSize;
+ return makeId(i, Idx);
+ }
+ llvm_unreachable("Invalid node address");
+}
+
+void NodeAllocator::clear() {
+ MemPool.Reset();
+ Blocks.clear();
+ ActiveEnd = nullptr;
+}
+
+// Insert node NA after "this" in the circular chain.
+void NodeBase::append(Node NA) {
+ NodeId Nx = Next;
+ // If NA is already "next", do nothing.
+ if (Next != NA.Id) {
+ Next = NA.Id;
+ NA.Addr->Next = Nx;
+ }
+}
+
+// Fundamental node manipulator functions.
+
+// Obtain the register reference from a reference node.
+RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
+ return G.unpack(RefData.PR);
+ assert(RefData.Op != nullptr);
+ return G.makeRegRef(*RefData.Op);
+}
+
+// Set the register reference in the reference node directly (for references
+// in phi nodes).
+void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
+ RefData.PR = G.pack(RR);
+}
+
+// Set the register reference in the reference node based on a machine
+// operand (for references in statement nodes).
+void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
+ (void)G;
+ RefData.Op = Op;
+}
+
+// Get the owner of a given reference node.
+Node RefNode::getOwner(const DataFlowGraph &G) {
+ Node NA = G.addr<NodeBase *>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Code)
+ return NA;
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
+ }
+ llvm_unreachable("No owner in circular list");
+}
+
+// Connect the def node to the reaching def node.
+void DefNode::linkToDef(NodeId Self, Def DA) {
+ RefData.RD = DA.Id;
+ RefData.Sib = DA.Addr->getReachedDef();
+ DA.Addr->setReachedDef(Self);
+}
+
+// Connect the use node to the reaching def node.
+void UseNode::linkToDef(NodeId Self, Def DA) {
+ RefData.RD = DA.Id;
+ RefData.Sib = DA.Addr->getReachedUse();
+ DA.Addr->setReachedUse(Self);
+}
+
+// Get the first member of the code node.
+Node CodeNode::getFirstMember(const DataFlowGraph &G) const {
+ if (CodeData.FirstM == 0)
+ return Node();
+ return G.addr<NodeBase *>(CodeData.FirstM);
+}
+
+// Get the last member of the code node.
+Node CodeNode::getLastMember(const DataFlowGraph &G) const {
+ if (CodeData.LastM == 0)
+ return Node();
+ return G.addr<NodeBase *>(CodeData.LastM);
+}
+
+// Add node NA at the end of the member list of the given code node.
+void CodeNode::addMember(Node NA, const DataFlowGraph &G) {
+ Node ML = getLastMember(G);
+ if (ML.Id != 0) {
+ ML.Addr->append(NA);
+ } else {
+ CodeData.FirstM = NA.Id;
+ NodeId Self = G.id(this);
+ NA.Addr->setNext(Self);
+ }
+ CodeData.LastM = NA.Id;
+}
+
+// Add node NA after member node MA in the given code node.
+void CodeNode::addMemberAfter(Node MA, Node NA, const DataFlowGraph &G) {
+ MA.Addr->append(NA);
+ if (CodeData.LastM == MA.Id)
+ CodeData.LastM = NA.Id;
+}
+
+// Remove member node NA from the given code node.
+void CodeNode::removeMember(Node NA, const DataFlowGraph &G) {
+ Node MA = getFirstMember(G);
+ assert(MA.Id != 0);
+
+ // Special handling if the member to remove is the first member.
+ if (MA.Id == NA.Id) {
+ if (CodeData.LastM == MA.Id) {
+ // If it is the only member, set both first and last to 0.
+ CodeData.FirstM = CodeData.LastM = 0;
+ } else {
+ // Otherwise, advance the first member.
+ CodeData.FirstM = MA.Addr->getNext();
+ }
+ return;
+ }
+
+ while (MA.Addr != this) {
+ NodeId MX = MA.Addr->getNext();
+ if (MX == NA.Id) {
+ MA.Addr->setNext(NA.Addr->getNext());
+ // If the member to remove happens to be the last one, update the
+ // LastM indicator.
+ if (CodeData.LastM == NA.Id)
+ CodeData.LastM = MA.Id;
+ return;
+ }
+ MA = G.addr<NodeBase *>(MX);
+ }
+ llvm_unreachable("No such member");
+}
+
+// Return the list of all members of the code node.
+NodeList CodeNode::members(const DataFlowGraph &G) const {
+ static auto True = [](Node) -> bool { return true; };
+ return members_if(True, G);
+}
+
+// Return the owner of the given instr node.
+Node InstrNode::getOwner(const DataFlowGraph &G) {
+ Node NA = G.addr<NodeBase *>(getNext());
+
+ while (NA.Addr != this) {
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ if (NA.Addr->getKind() == NodeAttrs::Block)
+ return NA;
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
+ }
+ llvm_unreachable("No owner in circular list");
+}
+
+// Add the phi node PA to the given block node.
+void BlockNode::addPhi(Phi PA, const DataFlowGraph &G) {
+ Node M = getFirstMember(G);
+ if (M.Id == 0) {
+ addMember(PA, G);
+ return;
+ }
+
+ assert(M.Addr->getType() == NodeAttrs::Code);
+ if (M.Addr->getKind() == NodeAttrs::Stmt) {
+ // If the first member of the block is a statement, insert the phi as
+ // the first member.
+ CodeData.FirstM = PA.Id;
+ PA.Addr->setNext(M.Id);
+ } else {
+ // If the first member is a phi, find the last phi, and append PA to it.
+ assert(M.Addr->getKind() == NodeAttrs::Phi);
+ Node MN = M;
+ do {
+ M = MN;
+ MN = G.addr<NodeBase *>(M.Addr->getNext());
+ assert(MN.Addr->getType() == NodeAttrs::Code);
+ } while (MN.Addr->getKind() == NodeAttrs::Phi);
+
+ // M is the last phi.
+ addMemberAfter(M, PA, G);
+ }
+}
+
+// Find the block node corresponding to the machine basic block BB in the
+// given func node.
+Block FuncNode::findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const {
+ auto EqBB = [BB](Node NA) -> bool { return Block(NA).Addr->getCode() == BB; };
+ NodeList Ms = members_if(EqBB, G);
+ if (!Ms.empty())
+ return Ms[0];
+ return Block();
+}
+
+// Get the block node for the entry block in the given function.
+Block FuncNode::getEntryBlock(const DataFlowGraph &G) {
+ MachineBasicBlock *EntryB = &getCode()->front();
+ return findBlock(EntryB, G);
+}
+
+// Target operand information.
+//
+
+// For a given instruction, check if there are any bits of RR that can remain
+// unchanged across this def.
+bool TargetOperandInfo::isPreserving(const MachineInstr &In,
+ unsigned OpNum) const {
+ return TII.isPredicated(In);
+}
+
+// Check if the definition of RR produces an unspecified value.
+bool TargetOperandInfo::isClobbering(const MachineInstr &In,
+ unsigned OpNum) const {
+ const MachineOperand &Op = In.getOperand(OpNum);
+ if (Op.isRegMask())
+ return true;
+ assert(Op.isReg());
+ if (In.isCall())
+ if (Op.isDef() && Op.isDead())
+ return true;
+ return false;
+}
+
+// Check if the given instruction specifically requires
+bool TargetOperandInfo::isFixedReg(const MachineInstr &In,
+ unsigned OpNum) const {
+ if (In.isCall() || In.isReturn() || In.isInlineAsm())
+ return true;
+ // Check for a tail call.
+ if (In.isBranch())
+ for (const MachineOperand &O : In.operands())
+ if (O.isGlobal() || O.isSymbol())
+ return true;
+
+ const MCInstrDesc &D = In.getDesc();
+ if (D.implicit_defs().empty() && D.implicit_uses().empty())
+ return false;
+ const MachineOperand &Op = In.getOperand(OpNum);
+ // If there is a sub-register, treat the operand as non-fixed. Currently,
+ // fixed registers are those that are listed in the descriptor as implicit
+ // uses or defs, and those lists do not allow sub-registers.
+ if (Op.getSubReg() != 0)
+ return false;
+ Register Reg = Op.getReg();
+ ArrayRef<MCPhysReg> ImpOps =
+ Op.isDef() ? D.implicit_defs() : D.implicit_uses();
+ return is_contained(ImpOps, Reg);
+}
+
+//
+// The data flow graph construction.
+//
+
+DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri,
+ const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf)
+ : DefaultTOI(std::make_unique<TargetOperandInfo>(tii)), MF(mf), TII(tii),
+ TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(*DefaultTOI),
+ LiveIns(PRI) {}
+
+DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri,
+ const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf,
+ const TargetOperandInfo &toi)
+ : MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
+ LiveIns(PRI) {}
+
+// The implementation of the definition stack.
+// Each register reference has its own definition stack. In particular,
+// for a register references "Reg" and "Reg:subreg" will each have their
+// own definition stacks.
+
+// Construct a stack iterator.
+DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
+ bool Top)
+ : DS(S) {
+ if (!Top) {
+ // Initialize to bottom.
+ Pos = 0;
+ return;
+ }
+ // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
+ Pos = DS.Stack.size();
+ while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos - 1]))
+ Pos--;
+}
+
+// Return the size of the stack, including block delimiters.
+unsigned DataFlowGraph::DefStack::size() const {
+ unsigned S = 0;
+ for (auto I = top(), E = bottom(); I != E; I.down())
+ S++;
+ return S;
+}
+
+// Remove the top entry from the stack. Remove all intervening delimiters
+// so that after this, the stack is either empty, or the top of the stack
+// is a non-delimiter.
+void DataFlowGraph::DefStack::pop() {
+ assert(!empty());
+ unsigned P = nextDown(Stack.size());
+ Stack.resize(P);
+}
+
+// Push a delimiter for block node N on the stack.
+void DataFlowGraph::DefStack::start_block(NodeId N) {
+ assert(N != 0);
+ Stack.push_back(Def(nullptr, N));
+}
+
+// Remove all nodes from the top of the stack, until the delimited for
+// block node N is encountered. Remove the delimiter as well. In effect,
+// this will remove from the stack all definitions from block N.
+void DataFlowGraph::DefStack::clear_block(NodeId N) {
+ assert(N != 0);
+ unsigned P = Stack.size();
+ while (P > 0) {
+ bool Found = isDelimiter(Stack[P - 1], N);
+ P--;
+ if (Found)
+ break;
+ }
+ // This will also remove the delimiter, if found.
+ Stack.resize(P);
+}
+
+// Move the stack iterator up by one.
+unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
+ // Get the next valid position after P (skipping all delimiters).
+ // The input position P does not have to point to a non-delimiter.
+ unsigned SS = Stack.size();
+ bool IsDelim;
+ assert(P < SS);
+ do {
+ P++;
+ IsDelim = isDelimiter(Stack[P - 1]);
+ } while (P < SS && IsDelim);
+ assert(!IsDelim);
+ return P;
+}
+
+// Move the stack iterator down by one.
+unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
+ // Get the preceding valid position before P (skipping all delimiters).
+ // The input position P does not have to point to a non-delimiter.
+ assert(P > 0 && P <= Stack.size());
+ bool IsDelim = isDelimiter(Stack[P - 1]);
+ do {
+ if (--P == 0)
+ break;
+ IsDelim = isDelimiter(Stack[P - 1]);
+ } while (P > 0 && IsDelim);
+ assert(!IsDelim);
+ return P;
+}
+
+// Register information.
+
+RegisterAggr DataFlowGraph::getLandingPadLiveIns() const {
+ RegisterAggr LR(getPRI());
+ const Function &F = MF.getFunction();
+ const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr;
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ if (RegisterId R = TLI.getExceptionPointerRegister(PF))
+ LR.insert(RegisterRef(R));
+ if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
+ if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
+ LR.insert(RegisterRef(R));
+ }
+ return LR;
+}
+
+// Node management functions.
+
+// Get the pointer to the node with the id N.
+NodeBase *DataFlowGraph::ptr(NodeId N) const {
+ if (N == 0)
+ return nullptr;
+ return Memory.ptr(N);
+}
+
+// Get the id of the node at the address P.
+NodeId DataFlowGraph::id(const NodeBase *P) const {
+ if (P == nullptr)
+ return 0;
+ return Memory.id(P);
+}
+
+// Allocate a new node and set the attributes to Attrs.
+Node DataFlowGraph::newNode(uint16_t Attrs) {
+ Node P = Memory.New();
+ P.Addr->init();
+ P.Addr->setAttrs(Attrs);
+ return P;
+}
+
+// Make a copy of the given node B, except for the data-flow links, which
+// are set to 0.
+Node DataFlowGraph::cloneNode(const Node B) {
+ Node NA = newNode(0);
+ memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
+ // Ref nodes need to have the data-flow links reset.
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ Ref RA = NA;
+ RA.Addr->setReachingDef(0);
+ RA.Addr->setSibling(0);
+ if (NA.Addr->getKind() == NodeAttrs::Def) {
+ Def DA = NA;
+ DA.Addr->setReachedDef(0);
+ DA.Addr->setReachedUse(0);
+ }
+ }
+ return NA;
+}
+
+// Allocation routines for specific node types/kinds.
+
+Use DataFlowGraph::newUse(Instr Owner, MachineOperand &Op, uint16_t Flags) {
+ Use UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+ UA.Addr->setRegRef(&Op, *this);
+ return UA;
+}
+
+PhiUse DataFlowGraph::newPhiUse(Phi Owner, RegisterRef RR, Block PredB,
+ uint16_t Flags) {
+ PhiUse PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+ assert(Flags & NodeAttrs::PhiRef);
+ PUA.Addr->setRegRef(RR, *this);
+ PUA.Addr->setPredecessor(PredB.Id);
+ return PUA;
+}
+
+Def DataFlowGraph::newDef(Instr Owner, MachineOperand &Op, uint16_t Flags) {
+ Def DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+ DA.Addr->setRegRef(&Op, *this);
+ return DA;
+}
+
+Def DataFlowGraph::newDef(Instr Owner, RegisterRef RR, uint16_t Flags) {
+ Def DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+ assert(Flags & NodeAttrs::PhiRef);
+ DA.Addr->setRegRef(RR, *this);
+ return DA;
+}
+
+Phi DataFlowGraph::newPhi(Block Owner) {
+ Phi PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
+ Owner.Addr->addPhi(PA, *this);
+ return PA;
+}
+
+Stmt DataFlowGraph::newStmt(Block Owner, MachineInstr *MI) {
+ Stmt SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
+ SA.Addr->setCode(MI);
+ Owner.Addr->addMember(SA, *this);
+ return SA;
+}
+
+Block DataFlowGraph::newBlock(Func Owner, MachineBasicBlock *BB) {
+ Block BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
+ BA.Addr->setCode(BB);
+ Owner.Addr->addMember(BA, *this);
+ return BA;
+}
+
+Func DataFlowGraph::newFunc(MachineFunction *MF) {
+ Func FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
+ FA.Addr->setCode(MF);
+ return FA;
+}
+
+// Build the data flow graph.
+void DataFlowGraph::build(const Config &config) {
+ reset();
+ BuildCfg = config;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ ReservedRegs = MRI.getReservedRegs();
+ bool SkipReserved = BuildCfg.Options & BuildOptions::OmitReserved;
+
+ auto Insert = [](auto &Set, auto &&Range) {
+ Set.insert(Range.begin(), Range.end());
+ };
+
+ if (BuildCfg.TrackRegs.empty()) {
+ std::set<RegisterId> BaseSet;
+ if (BuildCfg.Classes.empty()) {
+ // Insert every register.
+ for (unsigned R = 0, E = getPRI().getTRI().getNumRegs(); R != E; ++R)
+ BaseSet.insert(R);
+ } else {
+ for (const TargetRegisterClass *RC : BuildCfg.Classes) {
+ for (MCPhysReg R : *RC)
+ BaseSet.insert(R);
+ }
+ }
+ for (RegisterId R : BaseSet) {
+ if (SkipReserved && ReservedRegs[R])
+ continue;
+ Insert(TrackedUnits, getPRI().getUnits(RegisterRef(R)));
+ }
+ } else {
+ // Track set in Config overrides everything.
+ for (unsigned R : BuildCfg.TrackRegs) {
+ if (SkipReserved && ReservedRegs[R])
+ continue;
+ Insert(TrackedUnits, getPRI().getUnits(RegisterRef(R)));
+ }
+ }
+
+ TheFunc = newFunc(&MF);
+
+ if (MF.empty())
+ return;
+
+ for (MachineBasicBlock &B : MF) {
+ Block BA = newBlock(TheFunc, &B);
+ BlockNodes.insert(std::make_pair(&B, BA));
+ for (MachineInstr &I : B) {
+ if (I.isDebugInstr())
+ continue;
+ buildStmt(BA, I);
+ }
+ }
+
+ Block EA = TheFunc.Addr->getEntryBlock(*this);
+ NodeList Blocks = TheFunc.Addr->members(*this);
+
+ // Collect function live-ins and entry block live-ins.
+ MachineBasicBlock &EntryB = *EA.Addr->getCode();
+ assert(EntryB.pred_empty() && "Function entry block has predecessors");
+ for (std::pair<unsigned, unsigned> P : MRI.liveins())
+ LiveIns.insert(RegisterRef(P.first));
+ if (MRI.tracksLiveness()) {
+ for (auto I : EntryB.liveins())
+ LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask));
+ }
+
+ // Add function-entry phi nodes for the live-in registers.
+ for (RegisterRef RR : LiveIns.refs()) {
+ if (RR.isReg() && !isTracked(RR)) // isReg is likely guaranteed
+ continue;
+ Phi PA = newPhi(EA);
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+ Def DA = newDef(PA, RR, PhiFlags);
+ PA.Addr->addMember(DA, *this);
+ }
+
+ // Add phis for landing pads.
+ // Landing pads, unlike usual backs blocks, are not entered through
+ // branches in the program, or fall-throughs from other blocks. They
+ // are entered from the exception handling runtime and target's ABI
+ // may define certain registers as defined on entry to such a block.
+ RegisterAggr EHRegs = getLandingPadLiveIns();
+ if (!EHRegs.empty()) {
+ for (Block BA : Blocks) {
+ const MachineBasicBlock &B = *BA.Addr->getCode();
+ if (!B.isEHPad())
+ continue;
+
+ // Prepare a list of NodeIds of the block's predecessors.
+ NodeList Preds;
+ for (MachineBasicBlock *PB : B.predecessors())
+ Preds.push_back(findBlock(PB));
+
+ // Build phi nodes for each live-in.
+ for (RegisterRef RR : EHRegs.refs()) {
+ if (RR.isReg() && !isTracked(RR))
+ continue;
+ Phi PA = newPhi(BA);
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+ // Add def:
+ Def DA = newDef(PA, RR, PhiFlags);
+ PA.Addr->addMember(DA, *this);
+ // Add uses (no reaching defs for phi uses):
+ for (Block PBA : Preds) {
+ PhiUse PUA = newPhiUse(PA, RR, PBA);
+ PA.Addr->addMember(PUA, *this);
+ }
+ }
+ }
+ }
+
+ // Build a map "PhiM" which will contain, for each block, the set
+ // of references that will require phi definitions in that block.
+ BlockRefsMap PhiM(getPRI());
+ for (Block BA : Blocks)
+ recordDefsForDF(PhiM, BA);
+ for (Block BA : Blocks)
+ buildPhis(PhiM, BA);
+
+ // Link all the refs. This will recursively traverse the dominator tree.
+ DefStackMap DM;
+ linkBlockRefs(DM, EA);
+
+ // Finally, remove all unused phi nodes.
+ if (!(BuildCfg.Options & BuildOptions::KeepDeadPhis))
+ removeUnusedPhis();
+}
+
+RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
+ assert(RegisterRef::isRegId(Reg) || RegisterRef::isMaskId(Reg));
+ assert(Reg != 0);
+ if (Sub != 0)
+ Reg = TRI.getSubReg(Reg, Sub);
+ return RegisterRef(Reg);
+}
+
+RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
+ assert(Op.isReg() || Op.isRegMask());
+ if (Op.isReg())
+ return makeRegRef(Op.getReg(), Op.getSubReg());
+ return RegisterRef(getPRI().getRegMaskId(Op.getRegMask()),
+ LaneBitmask::getAll());
+}
+
+// For each stack in the map DefM, push the delimiter for block B on it.
+void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
+ // Push block delimiters.
+ for (auto &P : DefM)
+ P.second.start_block(B);
+}
+
+// Remove all definitions coming from block B from each stack in DefM.
+void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
+ // Pop all defs from this block from the definition stack. Defs that were
+ // added to the map during the traversal of instructions will not have a
+ // delimiter, but for those, the whole stack will be emptied.
+ for (auto &P : DefM)
+ P.second.clear_block(B);
+
+ // Finally, remove empty stacks from the map.
+ for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) {
+ NextI = std::next(I);
+ // This preserves the validity of iterators other than I.
+ if (I->second.empty())
+ DefM.erase(I);
+ }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushAllDefs(Instr IA, DefStackMap &DefM) {
+ pushClobbers(IA, DefM);
+ pushDefs(IA, DefM);
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushClobbers(Instr IA, DefStackMap &DefM) {
+ NodeSet Visited;
+ std::set<RegisterId> Defined;
+
+ // The important objectives of this function are:
+ // - to be able to handle instructions both while the graph is being
+ // constructed, and after the graph has been constructed, and
+ // - maintain proper ordering of definitions on the stack for each
+ // register reference:
+ // - if there are two or more related defs in IA (i.e. coming from
+ // the same machine operand), then only push one def on the stack,
+ // - if there are multiple unrelated defs of non-overlapping
+ // subregisters of S, then the stack for S will have both (in an
+ // unspecified order), but the order does not matter from the data-
+ // -flow perspective.
+
+ for (Def DA : IA.Addr->members_if(IsDef, *this)) {
+ if (Visited.count(DA.Id))
+ continue;
+ if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering))
+ continue;
+
+ NodeList Rel = getRelatedRefs(IA, DA);
+ Def PDA = Rel.front();
+ RegisterRef RR = PDA.Addr->getRegRef(*this);
+
+ // Push the definition on the stack for the register and all aliases.
+ // The def stack traversal in linkNodeUp will check the exact aliasing.
+ DefM[RR.Reg].push(DA);
+ Defined.insert(RR.Reg);
+ for (RegisterId A : getPRI().getAliasSet(RR.Reg)) {
+ if (RegisterRef::isRegId(A) && !isTracked(RegisterRef(A)))
+ continue;
+ // Check that we don't push the same def twice.
+ assert(A != RR.Reg);
+ if (!Defined.count(A))
+ DefM[A].push(DA);
+ }
+ // Mark all the related defs as visited.
+ for (Node T : Rel)
+ Visited.insert(T.Id);
+ }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushDefs(Instr IA, DefStackMap &DefM) {
+ NodeSet Visited;
+#ifndef NDEBUG
+ std::set<RegisterId> Defined;
+#endif
+
+ // The important objectives of this function are:
+ // - to be able to handle instructions both while the graph is being
+ // constructed, and after the graph has been constructed, and
+ // - maintain proper ordering of definitions on the stack for each
+ // register reference:
+ // - if there are two or more related defs in IA (i.e. coming from
+ // the same machine operand), then only push one def on the stack,
+ // - if there are multiple unrelated defs of non-overlapping
+ // subregisters of S, then the stack for S will have both (in an
+ // unspecified order), but the order does not matter from the data-
+ // -flow perspective.
+
+ for (Def DA : IA.Addr->members_if(IsDef, *this)) {
+ if (Visited.count(DA.Id))
+ continue;
+ if (DA.Addr->getFlags() & NodeAttrs::Clobbering)
+ continue;
+
+ NodeList Rel = getRelatedRefs(IA, DA);
+ Def PDA = Rel.front();
+ RegisterRef RR = PDA.Addr->getRegRef(*this);
+#ifndef NDEBUG
+ // Assert if the register is defined in two or more unrelated defs.
+ // This could happen if there are two or more def operands defining it.
+ if (!Defined.insert(RR.Reg).second) {
+ MachineInstr *MI = Stmt(IA).Addr->getCode();
+ dbgs() << "Multiple definitions of register: " << Print(RR, *this)
+ << " in\n " << *MI << "in " << printMBBReference(*MI->getParent())
+ << '\n';
+ llvm_unreachable(nullptr);
+ }
+#endif
+ // Push the definition on the stack for the register and all aliases.
+ // The def stack traversal in linkNodeUp will check the exact aliasing.
+ DefM[RR.Reg].push(DA);
+ for (RegisterId A : getPRI().getAliasSet(RR.Reg)) {
+ if (RegisterRef::isRegId(A) && !isTracked(RegisterRef(A)))
+ continue;
+ // Check that we don't push the same def twice.
+ assert(A != RR.Reg);
+ DefM[A].push(DA);
+ }
+ // Mark all the related defs as visited.
+ for (Node T : Rel)
+ Visited.insert(T.Id);
+ }
+}
+
+// Return the list of all reference nodes related to RA, including RA itself.
+// See "getNextRelated" for the meaning of a "related reference".
+NodeList DataFlowGraph::getRelatedRefs(Instr IA, Ref RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ NodeList Refs;
+ NodeId Start = RA.Id;
+ do {
+ Refs.push_back(RA);
+ RA = getNextRelated(IA, RA);
+ } while (RA.Id != 0 && RA.Id != Start);
+ return Refs;
+}
+
+// Clear all information in the graph.
+void DataFlowGraph::reset() {
+ Memory.clear();
+ BlockNodes.clear();
+ TrackedUnits.clear();
+ ReservedRegs.clear();
+ TheFunc = Func();
+}
+
+// Return the next reference node in the instruction node IA that is related
+// to RA. Conceptually, two reference nodes are related if they refer to the
+// same instance of a register access, but differ in flags or other minor
+// characteristics. Specific examples of related nodes are shadow reference
+// nodes.
+// Return the equivalent of nullptr if there are no more related references.
+Ref DataFlowGraph::getNextRelated(Instr IA, Ref RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ auto IsRelated = [this, RA](Ref TA) -> bool {
+ if (TA.Addr->getKind() != RA.Addr->getKind())
+ return false;
+ if (!getPRI().equal_to(TA.Addr->getRegRef(*this),
+ RA.Addr->getRegRef(*this))) {
+ return false;
+ }
+ return true;
+ };
+
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+ if (IA.Addr->getKind() == NodeAttrs::Stmt) {
+ auto Cond = [&IsRelated, RA](Ref TA) -> bool {
+ return IsRelated(TA) && &RA.Addr->getOp() == &TA.Addr->getOp();
+ };
+ return RA.Addr->getNextRef(RR, Cond, true, *this);
+ }
+
+ assert(IA.Addr->getKind() == NodeAttrs::Phi);
+ auto Cond = [&IsRelated, RA](Ref TA) -> bool {
+ if (!IsRelated(TA))
+ return false;
+ if (TA.Addr->getKind() != NodeAttrs::Use)
+ return true;
+ // For phi uses, compare predecessor blocks.
+ return PhiUse(TA).Addr->getPredecessor() ==
+ PhiUse(RA).Addr->getPredecessor();
+ };
+ return RA.Addr->getNextRef(RR, Cond, true, *this);
+}
+
+// Find the next node related to RA in IA that satisfies condition P.
+// If such a node was found, return a pair where the second element is the
+// located node. If such a node does not exist, return a pair where the
+// first element is the element after which such a node should be inserted,
+// and the second element is a null-address.
+template <typename Predicate>
+std::pair<Ref, Ref> DataFlowGraph::locateNextRef(Instr IA, Ref RA,
+ Predicate P) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ Ref NA;
+ NodeId Start = RA.Id;
+ while (true) {
+ NA = getNextRelated(IA, RA);
+ if (NA.Id == 0 || NA.Id == Start)
+ break;
+ if (P(NA))
+ break;
+ RA = NA;
+ }
+
+ if (NA.Id != 0 && NA.Id != Start)
+ return std::make_pair(RA, NA);
+ return std::make_pair(RA, Ref());
+}
+
+// Get the next shadow node in IA corresponding to RA, and optionally create
+// such a node if it does not exist.
+Ref DataFlowGraph::getNextShadow(Instr IA, Ref RA, bool Create) {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+ auto IsShadow = [Flags](Ref TA) -> bool {
+ return TA.Addr->getFlags() == Flags;
+ };
+ auto Loc = locateNextRef(IA, RA, IsShadow);
+ if (Loc.second.Id != 0 || !Create)
+ return Loc.second;
+
+ // Create a copy of RA and mark is as shadow.
+ Ref NA = cloneNode(RA);
+ NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
+ IA.Addr->addMemberAfter(Loc.first, NA, *this);
+ return NA;
+}
+
+// Create a new statement node in the block node BA that corresponds to
+// the machine instruction MI.
+void DataFlowGraph::buildStmt(Block BA, MachineInstr &In) {
+ Stmt SA = newStmt(BA, &In);
+
+ auto isCall = [](const MachineInstr &In) -> bool {
+ if (In.isCall())
+ return true;
+ // Is tail call?
+ if (In.isBranch()) {
+ for (const MachineOperand &Op : In.operands())
+ if (Op.isGlobal() || Op.isSymbol())
+ return true;
+ // Assume indirect branches are calls. This is for the purpose of
+ // keeping implicit operands, and so it won't hurt on intra-function
+ // indirect branches.
+ if (In.isIndirectBranch())
+ return true;
+ }
+ return false;
+ };
+
+ auto isDefUndef = [this](const MachineInstr &In, RegisterRef DR) -> bool {
+ // This instruction defines DR. Check if there is a use operand that
+ // would make DR live on entry to the instruction.
+ for (const MachineOperand &Op : In.all_uses()) {
+ if (Op.getReg() == 0 || Op.isUndef())
+ continue;
+ RegisterRef UR = makeRegRef(Op);
+ if (getPRI().alias(DR, UR))
+ return false;
+ }
+ return true;
+ };
+
+ bool IsCall = isCall(In);
+ unsigned NumOps = In.getNumOperands();
+
+ // Avoid duplicate implicit defs. This will not detect cases of implicit
+ // defs that define registers that overlap, but it is not clear how to
+ // interpret that in the absence of explicit defs. Overlapping explicit
+ // defs are likely illegal already.
+ BitVector DoneDefs(TRI.getNumRegs());
+ // Process explicit defs first.
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ continue;
+ Register R = Op.getReg();
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)))
+ continue;
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isPreserving(In, OpN)) {
+ Flags |= NodeAttrs::Preserving;
+ // If the def is preserving, check if it is also undefined.
+ if (isDefUndef(In, makeRegRef(Op)))
+ Flags |= NodeAttrs::Undef;
+ }
+ if (TOI.isClobbering(In, OpN))
+ Flags |= NodeAttrs::Clobbering;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ if (IsCall && Op.isDead())
+ Flags |= NodeAttrs::Dead;
+ Def DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ assert(!DoneDefs.test(R));
+ DoneDefs.set(R);
+ }
+
+ // Process reg-masks (as clobbers).
+ BitVector DoneClobbers(TRI.getNumRegs());
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isRegMask())
+ continue;
+ uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed | NodeAttrs::Dead;
+ Def DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ // Record all clobbered registers in DoneDefs.
+ const uint32_t *RM = Op.getRegMask();
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (!isTracked(RegisterRef(i)))
+ continue;
+ if (!(RM[i / 32] & (1u << (i % 32))))
+ DoneClobbers.set(i);
+ }
+ }
+
+ // Process implicit defs, skipping those that have already been added
+ // as explicit.
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
+ continue;
+ Register R = Op.getReg();
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)) || DoneDefs.test(R))
+ continue;
+ RegisterRef RR = makeRegRef(Op);
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isPreserving(In, OpN)) {
+ Flags |= NodeAttrs::Preserving;
+ // If the def is preserving, check if it is also undefined.
+ if (isDefUndef(In, RR))
+ Flags |= NodeAttrs::Undef;
+ }
+ if (TOI.isClobbering(In, OpN))
+ Flags |= NodeAttrs::Clobbering;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ if (IsCall && Op.isDead()) {
+ if (DoneClobbers.test(R))
+ continue;
+ Flags |= NodeAttrs::Dead;
+ }
+ Def DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ DoneDefs.set(R);
+ }
+
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ Register R = Op.getReg();
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)))
+ continue;
+ uint16_t Flags = NodeAttrs::None;
+ if (Op.isUndef())
+ Flags |= NodeAttrs::Undef;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ Use UA = newUse(SA, Op, Flags);
+ SA.Addr->addMember(UA, *this);
+ }
+}
+
+// Scan all defs in the block node BA and record in PhiM the locations of
+// phi nodes corresponding to these defs.
+void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) {
+ // Check all defs from block BA and record them in each block in BA's
+ // iterated dominance frontier. This information will later be used to
+ // create phi nodes.
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ assert(BB);
+ auto DFLoc = MDF.find(BB);
+ if (DFLoc == MDF.end() || DFLoc->second.empty())
+ return;
+
+ // Traverse all instructions in the block and collect the set of all
+ // defined references. For each reference there will be a phi created
+ // in the block's iterated dominance frontier.
+ // This is done to make sure that each defined reference gets only one
+ // phi node, even if it is defined multiple times.
+ RegisterAggr Defs(getPRI());
+ for (Instr IA : BA.Addr->members(*this)) {
+ for (Ref RA : IA.Addr->members_if(IsDef, *this)) {
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+ if (RR.isReg() && isTracked(RR))
+ Defs.insert(RR);
+ }
+ }
+
+ // Calculate the iterated dominance frontier of BB.
+ const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
+ SetVector<MachineBasicBlock *> IDF(DF.begin(), DF.end());
+ for (unsigned i = 0; i < IDF.size(); ++i) {
+ auto F = MDF.find(IDF[i]);
+ if (F != MDF.end())
+ IDF.insert(F->second.begin(), F->second.end());
+ }
+
+ // Finally, add the set of defs to each block in the iterated dominance
+ // frontier.
+ for (auto *DB : IDF) {
+ Block DBA = findBlock(DB);
+ PhiM[DBA.Id].insert(Defs);
+ }
+}
+
+// Given the locations of phi nodes in the map PhiM, create the phi nodes
+// that are located in the block node BA.
+void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) {
+ // Check if this blocks has any DF defs, i.e. if there are any defs
+ // that this block is in the iterated dominance frontier of.
+ auto HasDF = PhiM.find(BA.Id);
+ if (HasDF == PhiM.end() || HasDF->second.empty())
+ return;
+
+ // Prepare a list of NodeIds of the block's predecessors.
+ NodeList Preds;
+ const MachineBasicBlock *MBB = BA.Addr->getCode();
+ for (MachineBasicBlock *PB : MBB->predecessors())
+ Preds.push_back(findBlock(PB));
+
+ const RegisterAggr &Defs = PhiM[BA.Id];
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+
+ for (RegisterRef RR : Defs.refs()) {
+ Phi PA = newPhi(BA);
+ PA.Addr->addMember(newDef(PA, RR, PhiFlags), *this);
+
+ // Add phi uses.
+ for (Block PBA : Preds) {
+ PA.Addr->addMember(newPhiUse(PA, RR, PBA), *this);
+ }
+ }
+}
+
+// Remove any unneeded phi nodes that were created during the build process.
+void DataFlowGraph::removeUnusedPhis() {
+ // This will remove unused phis, i.e. phis where each def does not reach
+ // any uses or other defs. This will not detect or remove circular phi
+ // chains that are otherwise dead. Unused/dead phis are created during
+ // the build process and this function is intended to remove these cases
+ // that are easily determinable to be unnecessary.
+
+ SetVector<NodeId> PhiQ;
+ for (Block BA : TheFunc.Addr->members(*this)) {
+ for (auto P : BA.Addr->members_if(IsPhi, *this))
+ PhiQ.insert(P.Id);
+ }
+
+ static auto HasUsedDef = [](NodeList &Ms) -> bool {
+ for (Node M : Ms) {
+ if (M.Addr->getKind() != NodeAttrs::Def)
+ continue;
+ Def DA = M;
+ if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
+ return true;
+ }
+ return false;
+ };
+
+ // Any phi, if it is removed, may affect other phis (make them dead).
+ // For each removed phi, collect the potentially affected phis and add
+ // them back to the queue.
+ while (!PhiQ.empty()) {
+ auto PA = addr<PhiNode *>(PhiQ[0]);
+ PhiQ.remove(PA.Id);
+ NodeList Refs = PA.Addr->members(*this);
+ if (HasUsedDef(Refs))
+ continue;
+ for (Ref RA : Refs) {
+ if (NodeId RD = RA.Addr->getReachingDef()) {
+ auto RDA = addr<DefNode *>(RD);
+ Instr OA = RDA.Addr->getOwner(*this);
+ if (IsPhi(OA))
+ PhiQ.insert(OA.Id);
+ }
+ if (RA.Addr->isDef())
+ unlinkDef(RA, true);
+ else
+ unlinkUse(RA, true);
+ }
+ Block BA = PA.Addr->getOwner(*this);
+ BA.Addr->removeMember(PA, *this);
+ }
+}
+
+// For a given reference node TA in an instruction node IA, connect the
+// reaching def of TA to the appropriate def node. Create any shadow nodes
+// as appropriate.
+template <typename T>
+void DataFlowGraph::linkRefUp(Instr IA, NodeAddr<T> TA, DefStack &DS) {
+ if (DS.empty())
+ return;
+ RegisterRef RR = TA.Addr->getRegRef(*this);
+ NodeAddr<T> TAP;
+
+ // References from the def stack that have been examined so far.
+ RegisterAggr Defs(getPRI());
+
+ for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
+ RegisterRef QR = I->Addr->getRegRef(*this);
+
+ // Skip all defs that are aliased to any of the defs that we have already
+ // seen. If this completes a cover of RR, stop the stack traversal.
+ bool Alias = Defs.hasAliasOf(QR);
+ bool Cover = Defs.insert(QR).hasCoverOf(RR);
+ if (Alias) {
+ if (Cover)
+ break;
+ continue;
+ }
+
+ // The reaching def.
+ Def RDA = *I;
+
+ // Pick the reached node.
+ if (TAP.Id == 0) {
+ TAP = TA;
+ } else {
+ // Mark the existing ref as "shadow" and create a new shadow.
+ TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow);
+ TAP = getNextShadow(IA, TAP, true);
+ }
+
+ // Create the link.
+ TAP.Addr->linkToDef(TAP.Id, RDA);
+
+ if (Cover)
+ break;
+ }
+}
+
+// Create data-flow links for all reference nodes in the statement node SA.
+template <typename Predicate>
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P) {
+#ifndef NDEBUG
+ RegisterSet Defs(getPRI());
+#endif
+
+ // Link all nodes (upwards in the data-flow) with their reaching defs.
+ for (Ref RA : SA.Addr->members_if(P, *this)) {
+ uint16_t Kind = RA.Addr->getKind();
+ assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+#ifndef NDEBUG
+ // Do not expect multiple defs of the same reference.
+ assert(Kind != NodeAttrs::Def || !Defs.count(RR));
+ Defs.insert(RR);
+#endif
+
+ auto F = DefM.find(RR.Reg);
+ if (F == DefM.end())
+ continue;
+ DefStack &DS = F->second;
+ if (Kind == NodeAttrs::Use)
+ linkRefUp<UseNode *>(SA, RA, DS);
+ else if (Kind == NodeAttrs::Def)
+ linkRefUp<DefNode *>(SA, RA, DS);
+ else
+ llvm_unreachable("Unexpected node in instruction");
+ }
+}
+
+// Create data-flow links for all instructions in the block node BA. This
+// will include updating any phi nodes in BA.
+void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) {
+ // Push block delimiters.
+ markBlock(BA.Id, DefM);
+
+ auto IsClobber = [](Ref RA) -> bool {
+ return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering);
+ };
+ auto IsNoClobber = [](Ref RA) -> bool {
+ return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering);
+ };
+
+ assert(BA.Addr && "block node address is needed to create a data-flow link");
+ // For each non-phi instruction in the block, link all the defs and uses
+ // to their reaching defs. For any member of the block (including phis),
+ // push the defs on the corresponding stacks.
+ for (Instr IA : BA.Addr->members(*this)) {
+ // Ignore phi nodes here. They will be linked part by part from the
+ // predecessors.
+ if (IA.Addr->getKind() == NodeAttrs::Stmt) {
+ linkStmtRefs(DefM, IA, IsUse);
+ linkStmtRefs(DefM, IA, IsClobber);
+ }
+
+ // Push the definitions on the stack.
+ pushClobbers(IA, DefM);
+
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ linkStmtRefs(DefM, IA, IsNoClobber);
+
+ pushDefs(IA, DefM);
+ }
+
+ // Recursively process all children in the dominator tree.
+ MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+ for (auto *I : *N) {
+ MachineBasicBlock *SB = I->getBlock();
+ Block SBA = findBlock(SB);
+ linkBlockRefs(DefM, SBA);
+ }
+
+ // Link the phi uses from the successor blocks.
+ auto IsUseForBA = [BA](Node NA) -> bool {
+ if (NA.Addr->getKind() != NodeAttrs::Use)
+ return false;
+ assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
+ return PhiUse(NA).Addr->getPredecessor() == BA.Id;
+ };
+
+ RegisterAggr EHLiveIns = getLandingPadLiveIns();
+ MachineBasicBlock *MBB = BA.Addr->getCode();
+
+ for (MachineBasicBlock *SB : MBB->successors()) {
+ bool IsEHPad = SB->isEHPad();
+ Block SBA = findBlock(SB);
+ for (Instr IA : SBA.Addr->members_if(IsPhi, *this)) {
+ // Do not link phi uses for landing pad live-ins.
+ if (IsEHPad) {
+ // Find what register this phi is for.
+ Ref RA = IA.Addr->getFirstMember(*this);
+ assert(RA.Id != 0);
+ if (EHLiveIns.hasCoverOf(RA.Addr->getRegRef(*this)))
+ continue;
+ }
+ // Go over each phi use associated with MBB, and link it.
+ for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
+ PhiUse PUA = U;
+ RegisterRef RR = PUA.Addr->getRegRef(*this);
+ linkRefUp<UseNode *>(IA, PUA, DefM[RR.Reg]);
+ }
+ }
+ }
+
+ // Pop all defs from this block from the definition stacks.
+ releaseBlock(BA.Id, DefM);
+}
+
+// Remove the use node UA from any data-flow and structural links.
+void DataFlowGraph::unlinkUseDF(Use UA) {
+ NodeId RD = UA.Addr->getReachingDef();
+ NodeId Sib = UA.Addr->getSibling();
+
+ if (RD == 0) {
+ assert(Sib == 0);
+ return;
+ }
+
+ auto RDA = addr<DefNode *>(RD);
+ auto TA = addr<UseNode *>(RDA.Addr->getReachedUse());
+ if (TA.Id == UA.Id) {
+ RDA.Addr->setReachedUse(Sib);
+ return;
+ }
+
+ while (TA.Id != 0) {
+ NodeId S = TA.Addr->getSibling();
+ if (S == UA.Id) {
+ TA.Addr->setSibling(UA.Addr->getSibling());
+ return;
+ }
+ TA = addr<UseNode *>(S);
+ }
+}
+
+// Remove the def node DA from any data-flow and structural links.
+void DataFlowGraph::unlinkDefDF(Def DA) {
+ //
+ // RD
+ // | reached
+ // | def
+ // :
+ // .
+ // +----+
+ // ... -- | DA | -- ... -- 0 : sibling chain of DA
+ // +----+
+ // | | reached
+ // | : def
+ // | .
+ // | ... : Siblings (defs)
+ // |
+ // : reached
+ // . use
+ // ... : sibling chain of reached uses
+
+ NodeId RD = DA.Addr->getReachingDef();
+
+ // Visit all siblings of the reached def and reset their reaching defs.
+ // Also, defs reached by DA are now "promoted" to being reached by RD,
+ // so all of them will need to be spliced into the sibling chain where
+ // DA belongs.
+ auto getAllNodes = [this](NodeId N) -> NodeList {
+ NodeList Res;
+ while (N) {
+ auto RA = addr<RefNode *>(N);
+ // Keep the nodes in the exact sibling order.
+ Res.push_back(RA);
+ N = RA.Addr->getSibling();
+ }
+ return Res;
+ };
+ NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef());
+ NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
+
+ if (RD == 0) {
+ for (Ref I : ReachedDefs)
+ I.Addr->setSibling(0);
+ for (Ref I : ReachedUses)
+ I.Addr->setSibling(0);
+ }
+ for (Def I : ReachedDefs)
+ I.Addr->setReachingDef(RD);
+ for (Use I : ReachedUses)
+ I.Addr->setReachingDef(RD);
+
+ NodeId Sib = DA.Addr->getSibling();
+ if (RD == 0) {
+ assert(Sib == 0);
+ return;
+ }
+
+ // Update the reaching def node and remove DA from the sibling list.
+ auto RDA = addr<DefNode *>(RD);
+ auto TA = addr<DefNode *>(RDA.Addr->getReachedDef());
+ if (TA.Id == DA.Id) {
+ // If DA is the first reached def, just update the RD's reached def
+ // to the DA's sibling.
+ RDA.Addr->setReachedDef(Sib);
+ } else {
+ // Otherwise, traverse the sibling list of the reached defs and remove
+ // DA from it.
+ while (TA.Id != 0) {
+ NodeId S = TA.Addr->getSibling();
+ if (S == DA.Id) {
+ TA.Addr->setSibling(Sib);
+ break;
+ }
+ TA = addr<DefNode *>(S);
+ }
+ }
+
+ // Splice the DA's reached defs into the RDA's reached def chain.
+ if (!ReachedDefs.empty()) {
+ auto Last = Def(ReachedDefs.back());
+ Last.Addr->setSibling(RDA.Addr->getReachedDef());
+ RDA.Addr->setReachedDef(ReachedDefs.front().Id);
+ }
+ // Splice the DA's reached uses into the RDA's reached use chain.
+ if (!ReachedUses.empty()) {
+ auto Last = Use(ReachedUses.back());
+ Last.Addr->setSibling(RDA.Addr->getReachedUse());
+ RDA.Addr->setReachedUse(ReachedUses.front().Id);
+ }
+}
+
+bool DataFlowGraph::isTracked(RegisterRef RR) const {
+ return !disjoint(getPRI().getUnits(RR), TrackedUnits);
+}
+
+bool DataFlowGraph::hasUntrackedRef(Stmt S, bool IgnoreReserved) const {
+ SmallVector<MachineOperand *> Ops;
+
+ for (Ref R : S.Addr->members(*this)) {
+ Ops.push_back(&R.Addr->getOp());
+ RegisterRef RR = R.Addr->getRegRef(*this);
+ if (IgnoreReserved && RR.isReg() && ReservedRegs[RR.idx()])
+ continue;
+ if (!isTracked(RR))
+ return true;
+ }
+ for (const MachineOperand &Op : S.Addr->getCode()->operands()) {
+ if (!Op.isReg() && !Op.isRegMask())
+ continue;
+ if (llvm::find(Ops, &Op) == Ops.end())
+ return true;
+ }
+ return false;
+}
+
+} // end namespace llvm::rdf
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
new file mode 100644
index 000000000000..11f3fedaa5f9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -0,0 +1,1177 @@
+//===- RDFLiveness.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Computation of the liveness information from the data-flow graph.
+//
+// The main functionality of this code is to compute block live-in
+// information. With the live-in information in place, the placement
+// of kill flags can also be recalculated.
+//
+// The block live-in calculation is based on the ideas from the following
+// publication:
+//
+// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin.
+// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs."
+// ACM Transactions on Architecture and Code Optimization, Association for
+// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance
+// and Embedded Architectures and Compilers", 8 (4),
+// <10.1145/2086696.2086706>. <hal-00647369>
+//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
+ cl::Hidden,
+ cl::desc("Maximum recursion level"));
+
+namespace llvm::rdf {
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P) {
+ OS << '{';
+ for (const auto &I : P.Obj) {
+ OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
+ for (auto J = I.second.begin(), E = I.second.end(); J != E;) {
+ OS << Print(J->first, P.G) << PrintLaneMaskShort(J->second);
+ if (++J != E)
+ OS << ',';
+ }
+ OS << '}';
+ }
+ OS << " }";
+ return OS;
+}
+
+// The order in the returned sequence is the order of reaching defs in the
+// upward traversal: the first def is the closest to the given reference RefA,
+// the next one is further up, and so on.
+// The list ends at a reaching phi def, or when the reference from RefA is
+// covered by the defs in the list (see FullChain).
+// This function provides two modes of operation:
+// (1) Returning the sequence of reaching defs for a particular reference
+// node. This sequence will terminate at the first phi node [1].
+// (2) Returning a partial sequence of reaching defs, where the final goal
+// is to traverse past phi nodes to the actual defs arising from the code
+// itself.
+// In mode (2), the register reference for which the search was started
+// may be different from the reference node RefA, for which this call was
+// made, hence the argument RefRR, which holds the original register.
+// Also, some definitions may have already been encountered in a previous
+// call that will influence register covering. The register references
+// already defined are passed in through DefRRs.
+// In mode (1), the "continuation" considerations do not apply, and the
+// RefRR is the same as the register in RefA, and the set DefRRs is empty.
+//
+// [1] It is possible for multiple phi nodes to be included in the returned
+// sequence:
+// SubA = phi ...
+// SubB = phi ...
+// ... = SuperAB(rdef:SubA), SuperAB"(rdef:SubB)
+// However, these phi nodes are independent from one another in terms of
+// the data-flow.
+
+NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
+ NodeAddr<RefNode *> RefA, bool TopShadows,
+ bool FullChain,
+ const RegisterAggr &DefRRs) {
+ NodeList RDefs; // Return value.
+ SetVector<NodeId> DefQ;
+ DenseMap<MachineInstr *, uint32_t> OrdMap;
+
+ // Dead defs will be treated as if they were live, since they are actually
+ // on the data-flow path. They cannot be ignored because even though they
+ // do not generate meaningful values, they still modify registers.
+
+ // If the reference is undefined, there is nothing to do.
+ if (RefA.Addr->getFlags() & NodeAttrs::Undef)
+ return RDefs;
+
+ // The initial queue should not have reaching defs for shadows. The
+ // whole point of a shadow is that it will have a reaching def that
+ // is not aliased to the reaching defs of the related shadows.
+ NodeId Start = RefA.Id;
+ auto SNA = DFG.addr<RefNode *>(Start);
+ if (NodeId RD = SNA.Addr->getReachingDef())
+ DefQ.insert(RD);
+ if (TopShadows) {
+ for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA))
+ if (NodeId RD = NodeAddr<RefNode *>(S).Addr->getReachingDef())
+ DefQ.insert(RD);
+ }
+
+ // Collect all the reaching defs, going up until a phi node is encountered,
+ // or there are no more reaching defs. From this set, the actual set of
+ // reaching defs will be selected.
+ // The traversal upwards must go on until a covering def is encountered.
+ // It is possible that a collection of non-covering (individually) defs
+ // will be sufficient, but keep going until a covering one is found.
+ for (unsigned i = 0; i < DefQ.size(); ++i) {
+ auto TA = DFG.addr<DefNode *>(DefQ[i]);
+ if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
+ continue;
+ // Stop at the covering/overwriting def of the initial register reference.
+ RegisterRef RR = TA.Addr->getRegRef(DFG);
+ if (!DFG.IsPreservingDef(TA))
+ if (RegisterAggr::isCoverOf(RR, RefRR, PRI))
+ continue;
+ // Get the next level of reaching defs. This will include multiple
+ // reaching defs for shadows.
+ for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
+ if (NodeId RD = NodeAddr<RefNode *>(S).Addr->getReachingDef())
+ DefQ.insert(RD);
+ // Don't visit sibling defs. They share the same reaching def (which
+ // will be visited anyway), but they define something not aliased to
+ // this ref.
+ }
+
+ // Return the MachineBasicBlock containing a given instruction.
+ auto Block = [this](NodeAddr<InstrNode *> IA) -> MachineBasicBlock * {
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ return NodeAddr<StmtNode *>(IA).Addr->getCode()->getParent();
+ assert(IA.Addr->getKind() == NodeAttrs::Phi);
+ NodeAddr<PhiNode *> PA = IA;
+ NodeAddr<BlockNode *> BA = PA.Addr->getOwner(DFG);
+ return BA.Addr->getCode();
+ };
+
+ SmallSet<NodeId, 32> Defs;
+
+ // Remove all non-phi defs that are not aliased to RefRR, and separate
+ // the the remaining defs into buckets for containing blocks.
+ std::map<NodeId, NodeAddr<InstrNode *>> Owners;
+ std::map<MachineBasicBlock *, SmallVector<NodeId, 32>> Blocks;
+ for (NodeId N : DefQ) {
+ auto TA = DFG.addr<DefNode *>(N);
+ bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
+ if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
+ continue;
+ Defs.insert(TA.Id);
+ NodeAddr<InstrNode *> IA = TA.Addr->getOwner(DFG);
+ Owners[TA.Id] = IA;
+ Blocks[Block(IA)].push_back(IA.Id);
+ }
+
+ auto Precedes = [this, &OrdMap](NodeId A, NodeId B) {
+ if (A == B)
+ return false;
+ NodeAddr<InstrNode *> OA = DFG.addr<InstrNode *>(A);
+ NodeAddr<InstrNode *> OB = DFG.addr<InstrNode *>(B);
+ bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
+ bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
+ if (StmtA && StmtB) {
+ const MachineInstr *InA = NodeAddr<StmtNode *>(OA).Addr->getCode();
+ const MachineInstr *InB = NodeAddr<StmtNode *>(OB).Addr->getCode();
+ assert(InA->getParent() == InB->getParent());
+ auto FA = OrdMap.find(InA);
+ if (FA != OrdMap.end())
+ return FA->second < OrdMap.find(InB)->second;
+ const MachineBasicBlock *BB = InA->getParent();
+ for (auto It = BB->begin(), E = BB->end(); It != E; ++It) {
+ if (It == InA->getIterator())
+ return true;
+ if (It == InB->getIterator())
+ return false;
+ }
+ llvm_unreachable("InA and InB should be in the same block");
+ }
+ // One of them is a phi node.
+ if (!StmtA && !StmtB) {
+ // Both are phis, which are unordered. Break the tie by id numbers.
+ return A < B;
+ }
+ // Only one of them is a phi. Phis always precede statements.
+ return !StmtA;
+ };
+
+ auto GetOrder = [&OrdMap](MachineBasicBlock &B) {
+ uint32_t Pos = 0;
+ for (MachineInstr &In : B)
+ OrdMap.insert({&In, ++Pos});
+ };
+
+ // For each block, sort the nodes in it.
+ std::vector<MachineBasicBlock *> TmpBB;
+ for (auto &Bucket : Blocks) {
+ TmpBB.push_back(Bucket.first);
+ if (Bucket.second.size() > 2)
+ GetOrder(*Bucket.first);
+ llvm::sort(Bucket.second, Precedes);
+ }
+
+ // Sort the blocks with respect to dominance.
+ llvm::sort(TmpBB,
+ [this](auto A, auto B) { return MDT.properlyDominates(A, B); });
+
+ std::vector<NodeId> TmpInst;
+ for (MachineBasicBlock *MBB : llvm::reverse(TmpBB)) {
+ auto &Bucket = Blocks[MBB];
+ TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend());
+ }
+
+ // The vector is a list of instructions, so that defs coming from
+ // the same instruction don't need to be artificially ordered.
+ // Then, when computing the initial segment, and iterating over an
+ // instruction, pick the defs that contribute to the covering (i.e. is
+ // not covered by previously added defs). Check the defs individually,
+ // i.e. first check each def if is covered or not (without adding them
+ // to the tracking set), and then add all the selected ones.
+
+ // The reason for this is this example:
+ // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes).
+ // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be
+ // covered if we added A first, and A would be covered
+ // if we added B first.
+ // In this example we want both A and B, because we don't want to give
+ // either one priority over the other, since they belong to the same
+ // statement.
+
+ RegisterAggr RRs(DefRRs);
+
+ auto DefInSet = [&Defs](NodeAddr<RefNode *> TA) -> bool {
+ return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id);
+ };
+
+ for (NodeId T : TmpInst) {
+ if (!FullChain && RRs.hasCoverOf(RefRR))
+ break;
+ auto TA = DFG.addr<InstrNode *>(T);
+ bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
+ NodeList Ds;
+ for (NodeAddr<DefNode *> DA : TA.Addr->members_if(DefInSet, DFG)) {
+ RegisterRef QR = DA.Addr->getRegRef(DFG);
+ // Add phi defs even if they are covered by subsequent defs. This is
+ // for cases where the reached use is not covered by any of the defs
+ // encountered so far: the phi def is needed to expose the liveness
+ // of that use to the entry of the block.
+ // Example:
+ // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2.
+ // d2<R3>(d1,,u3), ...
+ // ..., u3<D1>(d2) This use needs to be live on entry.
+ if (FullChain || IsPhi || !RRs.hasCoverOf(QR))
+ Ds.push_back(DA);
+ }
+ llvm::append_range(RDefs, Ds);
+ for (NodeAddr<DefNode *> DA : Ds) {
+ // When collecting a full chain of definitions, do not consider phi
+ // defs to actually define a register.
+ uint16_t Flags = DA.Addr->getFlags();
+ if (!FullChain || !(Flags & NodeAttrs::PhiRef))
+ if (!(Flags & NodeAttrs::Preserving)) // Don't care about Undef here.
+ RRs.insert(DA.Addr->getRegRef(DFG));
+ }
+ }
+
+ auto DeadP = [](const NodeAddr<DefNode *> DA) -> bool {
+ return DA.Addr->getFlags() & NodeAttrs::Dead;
+ };
+ llvm::erase_if(RDefs, DeadP);
+
+ return RDefs;
+}
+
+std::pair<NodeSet, bool>
+Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs) {
+ return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest);
+}
+
+std::pair<NodeSet, bool>
+Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest) {
+ if (Nest > MaxNest)
+ return {NodeSet(), false};
+ // Collect all defined registers. Do not consider phis to be defining
+ // anything, only collect "real" definitions.
+ RegisterAggr DefRRs(PRI);
+ for (NodeId D : Defs) {
+ const auto DA = DFG.addr<const DefNode *>(D);
+ if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
+ DefRRs.insert(DA.Addr->getRegRef(DFG));
+ }
+
+ NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs);
+ if (RDs.empty())
+ return {Defs, true};
+
+ // Make a copy of the preexisting definitions and add the newly found ones.
+ NodeSet TmpDefs = Defs;
+ for (NodeAddr<NodeBase *> R : RDs)
+ TmpDefs.insert(R.Id);
+
+ NodeSet Result = Defs;
+
+ for (NodeAddr<DefNode *> DA : RDs) {
+ Result.insert(DA.Id);
+ if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
+ continue;
+ NodeAddr<PhiNode *> PA = DA.Addr->getOwner(DFG);
+ if (!Visited.insert(PA.Id).second)
+ continue;
+ // Go over all phi uses and get the reaching defs for each use.
+ for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
+ Nest + 1, MaxNest);
+ if (!T.second)
+ return {T.first, false};
+ Result.insert(T.first.begin(), T.first.end());
+ }
+ }
+
+ return {Result, true};
+}
+
+/// Find the nearest ref node aliased to RefRR, going upwards in the data
+/// flow, starting from the instruction immediately preceding Inst.
+NodeAddr<RefNode *> Liveness::getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode *> IA) {
+ NodeAddr<BlockNode *> BA = IA.Addr->getOwner(DFG);
+ NodeList Ins = BA.Addr->members(DFG);
+ NodeId FindId = IA.Id;
+ auto E = Ins.rend();
+ auto B =
+ std::find_if(Ins.rbegin(), E, [FindId](const NodeAddr<InstrNode *> T) {
+ return T.Id == FindId;
+ });
+ // Do not scan IA (which is what B would point to).
+ if (B != E)
+ ++B;
+
+ do {
+ // Process the range of instructions from B to E.
+ for (NodeAddr<InstrNode *> I : make_range(B, E)) {
+ NodeList Refs = I.Addr->members(DFG);
+ NodeAddr<RefNode *> Clob, Use;
+ // Scan all the refs in I aliased to RefRR, and return the one that
+ // is the closest to the output of I, i.e. def > clobber > use.
+ for (NodeAddr<RefNode *> R : Refs) {
+ if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR))
+ continue;
+ if (DFG.IsDef(R)) {
+ // If it's a non-clobbering def, just return it.
+ if (!(R.Addr->getFlags() & NodeAttrs::Clobbering))
+ return R;
+ Clob = R;
+ } else {
+ Use = R;
+ }
+ }
+ if (Clob.Id != 0)
+ return Clob;
+ if (Use.Id != 0)
+ return Use;
+ }
+
+ // Go up to the immediate dominator, if any.
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ BA = NodeAddr<BlockNode *>();
+ if (MachineDomTreeNode *N = MDT.getNode(BB)) {
+ if ((N = N->getIDom()))
+ BA = DFG.findBlock(N->getBlock());
+ }
+ if (!BA.Id)
+ break;
+
+ Ins = BA.Addr->members(DFG);
+ B = Ins.rbegin();
+ E = Ins.rend();
+ } while (true);
+
+ return NodeAddr<RefNode *>();
+}
+
+NodeSet Liveness::getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode *> DefA,
+ const RegisterAggr &DefRRs) {
+ NodeSet Uses;
+
+ // If the original register is already covered by all the intervening
+ // defs, no more uses can be reached.
+ if (DefRRs.hasCoverOf(RefRR))
+ return Uses;
+
+ // Add all directly reached uses.
+ // If the def is dead, it does not provide a value for any use.
+ bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead;
+ NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0;
+ while (U != 0) {
+ auto UA = DFG.addr<UseNode *>(U);
+ if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
+ RegisterRef UR = UA.Addr->getRegRef(DFG);
+ if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
+ Uses.insert(U);
+ }
+ U = UA.Addr->getSibling();
+ }
+
+ // Traverse all reached defs. This time dead defs cannot be ignored.
+ for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) {
+ auto DA = DFG.addr<DefNode *>(D);
+ NextD = DA.Addr->getSibling();
+ RegisterRef DR = DA.Addr->getRegRef(DFG);
+ // If this def is already covered, it cannot reach anything new.
+ // Similarly, skip it if it is not aliased to the interesting register.
+ if (DefRRs.hasCoverOf(DR) || !PRI.alias(RefRR, DR))
+ continue;
+ NodeSet T;
+ if (DFG.IsPreservingDef(DA)) {
+ // If it is a preserving def, do not update the set of intervening defs.
+ T = getAllReachedUses(RefRR, DA, DefRRs);
+ } else {
+ RegisterAggr NewDefRRs = DefRRs;
+ NewDefRRs.insert(DR);
+ T = getAllReachedUses(RefRR, DA, NewDefRRs);
+ }
+ Uses.insert(T.begin(), T.end());
+ }
+ return Uses;
+}
+
+void Liveness::computePhiInfo() {
+ RealUseMap.clear();
+
+ NodeList Phis;
+ NodeAddr<FuncNode *> FA = DFG.getFunc();
+ NodeList Blocks = FA.Addr->members(DFG);
+ for (NodeAddr<BlockNode *> BA : Blocks) {
+ auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+ llvm::append_range(Phis, Ps);
+ }
+
+ // phi use -> (map: reaching phi -> set of registers defined in between)
+ std::map<NodeId, std::map<NodeId, RegisterAggr>> PhiUp;
+ std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+ std::unordered_map<NodeId, RegisterAggr>
+ PhiDRs; // Phi -> registers defined by it.
+
+ // Go over all phis.
+ for (NodeAddr<PhiNode *> PhiA : Phis) {
+ // Go over all defs and collect the reached uses that are non-phi uses
+ // (i.e. the "real uses").
+ RefMap &RealUses = RealUseMap[PhiA.Id];
+ NodeList PhiRefs = PhiA.Addr->members(DFG);
+
+ // Have a work queue of defs whose reached uses need to be found.
+ // For each def, add to the queue all reached (non-phi) defs.
+ SetVector<NodeId> DefQ;
+ NodeSet PhiDefs;
+ RegisterAggr DRs(PRI);
+ for (NodeAddr<RefNode *> R : PhiRefs) {
+ if (!DFG.IsRef<NodeAttrs::Def>(R))
+ continue;
+ DRs.insert(R.Addr->getRegRef(DFG));
+ DefQ.insert(R.Id);
+ PhiDefs.insert(R.Id);
+ }
+ PhiDRs.insert(std::make_pair(PhiA.Id, DRs));
+
+ // Collect the super-set of all possible reached uses. This set will
+ // contain all uses reached from this phi, either directly from the
+ // phi defs, or (recursively) via non-phi defs reached by the phi defs.
+ // This set of uses will later be trimmed to only contain these uses that
+ // are actually reached by the phi defs.
+ for (unsigned i = 0; i < DefQ.size(); ++i) {
+ NodeAddr<DefNode *> DA = DFG.addr<DefNode *>(DefQ[i]);
+ // Visit all reached uses. Phi defs should not really have the "dead"
+ // flag set, but check it anyway for consistency.
+ bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead;
+ NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0;
+ while (UN != 0) {
+ NodeAddr<UseNode *> A = DFG.addr<UseNode *>(UN);
+ uint16_t F = A.Addr->getFlags();
+ if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
+ RegisterRef R = A.Addr->getRegRef(DFG);
+ RealUses[R.Reg].insert({A.Id, R.Mask});
+ }
+ UN = A.Addr->getSibling();
+ }
+ // Visit all reached defs, and add them to the queue. These defs may
+ // override some of the uses collected here, but that will be handled
+ // later.
+ NodeId DN = DA.Addr->getReachedDef();
+ while (DN != 0) {
+ NodeAddr<DefNode *> A = DFG.addr<DefNode *>(DN);
+ for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
+ uint16_t Flags = NodeAddr<DefNode *>(T).Addr->getFlags();
+ // Must traverse the reached-def chain. Consider:
+ // def(D0) -> def(R0) -> def(R0) -> use(D0)
+ // The reachable use of D0 passes through a def of R0.
+ if (!(Flags & NodeAttrs::PhiRef))
+ DefQ.insert(T.Id);
+ }
+ DN = A.Addr->getSibling();
+ }
+ }
+ // Filter out these uses that appear to be reachable, but really
+ // are not. For example:
+ //
+ // R1:0 = d1
+ // = R1:0 u2 Reached by d1.
+ // R0 = d3
+ // = R1:0 u4 Still reached by d1: indirectly through
+ // the def d3.
+ // R1 = d5
+ // = R1:0 u6 Not reached by d1 (covered collectively
+ // by d3 and d5), but following reached
+ // defs and uses from d1 will lead here.
+ for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE;) {
+ // For each reached register UI->first, there is a set UI->second, of
+ // uses of it. For each such use, check if it is reached by this phi,
+ // i.e. check if the set of its reaching uses intersects the set of
+ // this phi's defs.
+ NodeRefSet Uses = UI->second;
+ UI->second.clear();
+ for (std::pair<NodeId, LaneBitmask> I : Uses) {
+ auto UA = DFG.addr<UseNode *>(I.first);
+ // Undef flag is checked above.
+ assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
+ RegisterRef UseR(UI->first, I.second); // Ref from Uses
+ // R = intersection of the ref from the phi and the ref from Uses
+ RegisterRef R = PhiDRs.at(PhiA.Id).intersectWith(UseR);
+ if (!R)
+ continue;
+ // Calculate the exposed part of the reached use.
+ RegisterAggr Covered(PRI);
+ for (NodeAddr<DefNode *> DA : getAllReachingDefs(R, UA)) {
+ if (PhiDefs.count(DA.Id))
+ break;
+ Covered.insert(DA.Addr->getRegRef(DFG));
+ }
+ if (RegisterRef RC = Covered.clearIn(R)) {
+ // We are updating the map for register UI->first, so we need
+ // to map RC to be expressed in terms of that register.
+ RegisterRef S = PRI.mapTo(RC, UI->first);
+ UI->second.insert({I.first, S.Mask});
+ }
+ }
+ UI = UI->second.empty() ? RealUses.erase(UI) : std::next(UI);
+ }
+
+ // If this phi reaches some "real" uses, add it to the queue for upward
+ // propagation.
+ if (!RealUses.empty())
+ PhiUQ.push_back(PhiA.Id);
+
+ // Go over all phi uses and check if the reaching def is another phi.
+ // Collect the phis that are among the reaching defs of these uses.
+ // While traversing the list of reaching defs for each phi use, accumulate
+ // the set of registers defined between this phi (PhiA) and the owner phi
+ // of the reaching def.
+ NodeSet SeenUses;
+
+ for (auto I : PhiRefs) {
+ if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
+ continue;
+ NodeAddr<PhiUseNode *> PUA = I;
+ if (PUA.Addr->getReachingDef() == 0)
+ continue;
+
+ RegisterRef UR = PUA.Addr->getRegRef(DFG);
+ NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs);
+ RegisterAggr DefRRs(PRI);
+
+ for (NodeAddr<DefNode *> D : Ds) {
+ if (D.Addr->getFlags() & NodeAttrs::PhiRef) {
+ NodeId RP = D.Addr->getOwner(DFG).Id;
+ std::map<NodeId, RegisterAggr> &M = PhiUp[PUA.Id];
+ auto F = M.find(RP);
+ if (F == M.end())
+ M.insert(std::make_pair(RP, DefRRs));
+ else
+ F->second.insert(DefRRs);
+ }
+ DefRRs.insert(D.Addr->getRegRef(DFG));
+ }
+
+ for (NodeAddr<PhiUseNode *> T : DFG.getRelatedRefs(PhiA, PUA))
+ SeenUses.insert(T.Id);
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Phi-up-to-phi map with intervening defs:\n";
+ for (auto I : PhiUp) {
+ dbgs() << "phi " << Print(I.first, DFG) << " -> {";
+ for (auto R : I.second)
+ dbgs() << ' ' << Print(R.first, DFG) << Print(R.second, DFG);
+ dbgs() << " }\n";
+ }
+ }
+
+ // Propagate the reached registers up in the phi chain.
+ //
+ // The following type of situation needs careful handling:
+ //
+ // phi d1<R1:0> (1)
+ // |
+ // ... d2<R1>
+ // |
+ // phi u3<R1:0> (2)
+ // |
+ // ... u4<R1>
+ //
+ // The phi node (2) defines a register pair R1:0, and reaches a "real"
+ // use u4 of just R1. The same phi node is also known to reach (upwards)
+ // the phi node (1). However, the use u4 is not reached by phi (1),
+ // because of the intervening definition d2 of R1. The data flow between
+ // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0.
+ //
+ // When propagating uses up the phi chains, get the all reaching defs
+ // for a given phi use, and traverse the list until the propagated ref
+ // is covered, or until reaching the final phi. Only assume that the
+ // reference reaches the phi in the latter case.
+
+ // The operation "clearIn" can be expensive. For a given set of intervening
+ // defs, cache the result of subtracting these defs from a given register
+ // ref.
+ using RefHash = std::hash<RegisterRef>;
+ using RefEqual = std::equal_to<RegisterRef>;
+ using SubMap = std::unordered_map<RegisterRef, RegisterRef>;
+ std::unordered_map<RegisterAggr, SubMap> Subs;
+ auto ClearIn = [](RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
+ if (Mid.empty())
+ return RR;
+ auto F = SM.find(RR);
+ if (F != SM.end())
+ return F->second;
+ RegisterRef S = Mid.clearIn(RR);
+ SM.insert({RR, S});
+ return S;
+ };
+
+ // Go over all phis.
+ for (unsigned i = 0; i < PhiUQ.size(); ++i) {
+ auto PA = DFG.addr<PhiNode *>(PhiUQ[i]);
+ NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
+ RefMap &RUM = RealUseMap[PA.Id];
+
+ for (NodeAddr<UseNode *> UA : PUs) {
+ std::map<NodeId, RegisterAggr> &PUM = PhiUp[UA.Id];
+ RegisterRef UR = UA.Addr->getRegRef(DFG);
+ for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
+ bool Changed = false;
+ const RegisterAggr &MidDefs = P.second;
+ // Collect the set PropUp of uses that are reached by the current
+ // phi PA, and are not covered by any intervening def between the
+ // currently visited use UA and the upward phi P.
+
+ if (MidDefs.hasCoverOf(UR))
+ continue;
+ if (Subs.find(MidDefs) == Subs.end()) {
+ Subs.insert({MidDefs, SubMap(1, RefHash(), RefEqual(PRI))});
+ }
+ SubMap &SM = Subs.at(MidDefs);
+
+ // General algorithm:
+ // for each (R,U) : U is use node of R, U is reached by PA
+ // if MidDefs does not cover (R,U)
+ // then add (R-MidDefs,U) to RealUseMap[P]
+ //
+ for (const std::pair<const RegisterId, NodeRefSet> &T : RUM) {
+ RegisterRef R(T.first);
+ // The current phi (PA) could be a phi for a regmask. It could
+ // reach a whole variety of uses that are not related to the
+ // specific upward phi (P.first).
+ const RegisterAggr &DRs = PhiDRs.at(P.first);
+ if (!DRs.hasAliasOf(R))
+ continue;
+ R = PRI.mapTo(DRs.intersectWith(R), T.first);
+ for (std::pair<NodeId, LaneBitmask> V : T.second) {
+ LaneBitmask M = R.Mask & V.second;
+ if (M.none())
+ continue;
+ if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) {
+ NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
+ Changed |= RS.insert({V.first, SS.Mask}).second;
+ }
+ }
+ }
+
+ if (Changed)
+ PhiUQ.push_back(P.first);
+ }
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Real use map:\n";
+ for (auto I : RealUseMap) {
+ dbgs() << "phi " << Print(I.first, DFG);
+ NodeAddr<PhiNode *> PA = DFG.addr<PhiNode *>(I.first);
+ NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
+ if (!Ds.empty()) {
+ RegisterRef RR = NodeAddr<DefNode *>(Ds[0]).Addr->getRegRef(DFG);
+ dbgs() << '<' << Print(RR, DFG) << '>';
+ } else {
+ dbgs() << "<noreg>";
+ }
+ dbgs() << " -> " << Print(I.second, DFG) << '\n';
+ }
+ }
+}
+
+void Liveness::computeLiveIns() {
+ // Populate the node-to-block map. This speeds up the calculations
+ // significantly.
+ NBMap.clear();
+ for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ for (NodeAddr<InstrNode *> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode *> RA : IA.Addr->members(DFG))
+ NBMap.insert(std::make_pair(RA.Id, BB));
+ NBMap.insert(std::make_pair(IA.Id, BB));
+ }
+ }
+
+ MachineFunction &MF = DFG.getMF();
+
+ // Compute IDF first, then the inverse.
+ decltype(IIDF) IDF;
+ for (MachineBasicBlock &B : MF) {
+ auto F1 = MDF.find(&B);
+ if (F1 == MDF.end())
+ continue;
+ SetVector<MachineBasicBlock *> IDFB(F1->second.begin(), F1->second.end());
+ for (unsigned i = 0; i < IDFB.size(); ++i) {
+ auto F2 = MDF.find(IDFB[i]);
+ if (F2 != MDF.end())
+ IDFB.insert(F2->second.begin(), F2->second.end());
+ }
+ // Add B to the IDF(B). This will put B in the IIDF(B).
+ IDFB.insert(&B);
+ IDF[&B].insert(IDFB.begin(), IDFB.end());
+ }
+
+ for (auto I : IDF)
+ for (auto *S : I.second)
+ IIDF[S].insert(I.first);
+
+ computePhiInfo();
+
+ NodeAddr<FuncNode *> FA = DFG.getFunc();
+ NodeList Blocks = FA.Addr->members(DFG);
+
+ // Build the phi live-on-entry map.
+ for (NodeAddr<BlockNode *> BA : Blocks) {
+ MachineBasicBlock *MB = BA.Addr->getCode();
+ RefMap &LON = PhiLON[MB];
+ for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG)) {
+ for (const RefMap::value_type &S : RealUseMap[P.Id])
+ LON[S.first].insert(S.second.begin(), S.second.end());
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Phi live-on-entry map:\n";
+ for (auto &I : PhiLON)
+ dbgs() << "block #" << I.first->getNumber() << " -> "
+ << Print(I.second, DFG) << '\n';
+ }
+
+ // Build the phi live-on-exit map. Each phi node has some set of reached
+ // "real" uses. Propagate this set backwards into the block predecessors
+ // through the reaching defs of the corresponding phi uses.
+ for (NodeAddr<BlockNode *> BA : Blocks) {
+ NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+ for (NodeAddr<PhiNode *> PA : Phis) {
+ RefMap &RUs = RealUseMap[PA.Id];
+ if (RUs.empty())
+ continue;
+
+ NodeSet SeenUses;
+ for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ if (!SeenUses.insert(U.Id).second)
+ continue;
+ NodeAddr<PhiUseNode *> PUA = U;
+ if (PUA.Addr->getReachingDef() == 0)
+ continue;
+
+ // Each phi has some set (possibly empty) of reached "real" uses,
+ // that is, uses that are part of the compiled program. Such a use
+ // may be located in some farther block, but following a chain of
+ // reaching defs will eventually lead to this phi.
+ // Any chain of reaching defs may fork at a phi node, but there
+ // will be a path upwards that will lead to this phi. Now, this
+ // chain will need to fork at this phi, since some of the reached
+ // uses may have definitions joining in from multiple predecessors.
+ // For each reached "real" use, identify the set of reaching defs
+ // coming from each predecessor P, and add them to PhiLOX[P].
+ //
+ auto PrA = DFG.addr<BlockNode *>(PUA.Addr->getPredecessor());
+ RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
+
+ for (const std::pair<const RegisterId, NodeRefSet> &RS : RUs) {
+ // We need to visit each individual use.
+ for (std::pair<NodeId, LaneBitmask> P : RS.second) {
+ // Create a register ref corresponding to the use, and find
+ // all reaching defs starting from the phi use, and treating
+ // all related shadows as a single use cluster.
+ RegisterRef S(RS.first, P.second);
+ NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
+ for (NodeAddr<DefNode *> D : Ds) {
+ // Calculate the mask corresponding to the visited def.
+ RegisterAggr TA(PRI);
+ TA.insert(D.Addr->getRegRef(DFG)).intersect(S);
+ LaneBitmask TM = TA.makeRegRef().Mask;
+ LOX[S.Reg].insert({D.Id, TM});
+ }
+ }
+ }
+
+ for (NodeAddr<PhiUseNode *> T : DFG.getRelatedRefs(PA, PUA))
+ SeenUses.insert(T.Id);
+ } // for U : phi uses
+ } // for P : Phis
+ } // for B : Blocks
+
+ if (Trace) {
+ dbgs() << "Phi live-on-exit map:\n";
+ for (auto &I : PhiLOX)
+ dbgs() << "block #" << I.first->getNumber() << " -> "
+ << Print(I.second, DFG) << '\n';
+ }
+
+ RefMap LiveIn;
+ traverse(&MF.front(), LiveIn);
+
+ // Add function live-ins to the live-in set of the function entry block.
+ LiveMap[&MF.front()].insert(DFG.getLiveIns());
+
+ if (Trace) {
+ // Dump the liveness map
+ for (MachineBasicBlock &B : MF) {
+ std::vector<RegisterRef> LV;
+ for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
+ LV.push_back(RegisterRef(LI.PhysReg, LI.LaneMask));
+ llvm::sort(LV, std::less<RegisterRef>(PRI));
+ dbgs() << printMBBReference(B) << "\t rec = {";
+ for (auto I : LV)
+ dbgs() << ' ' << Print(I, DFG);
+ dbgs() << " }\n";
+ // dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n';
+
+ LV.clear();
+ for (RegisterRef RR : LiveMap[&B].refs())
+ LV.push_back(RR);
+ llvm::sort(LV, std::less<RegisterRef>(PRI));
+ dbgs() << "\tcomp = {";
+ for (auto I : LV)
+ dbgs() << ' ' << Print(I, DFG);
+ dbgs() << " }\n";
+ }
+ }
+}
+
+void Liveness::resetLiveIns() {
+ for (auto &B : DFG.getMF()) {
+ // Remove all live-ins.
+ std::vector<unsigned> T;
+ for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
+ T.push_back(LI.PhysReg);
+ for (auto I : T)
+ B.removeLiveIn(I);
+ // Add the newly computed live-ins.
+ const RegisterAggr &LiveIns = LiveMap[&B];
+ for (RegisterRef R : LiveIns.refs())
+ B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
+ }
+}
+
+void Liveness::resetKills() {
+ for (auto &B : DFG.getMF())
+ resetKills(&B);
+}
+
+void Liveness::resetKills(MachineBasicBlock *B) {
+ auto CopyLiveIns = [this](MachineBasicBlock *B, BitVector &LV) -> void {
+ for (auto I : B->liveins()) {
+ MCSubRegIndexIterator S(I.PhysReg, &TRI);
+ if (!S.isValid()) {
+ LV.set(I.PhysReg);
+ continue;
+ }
+ do {
+ LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
+ if ((M & I.LaneMask).any())
+ LV.set(S.getSubReg());
+ ++S;
+ } while (S.isValid());
+ }
+ };
+
+ BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
+ CopyLiveIns(B, LiveIn);
+ for (auto *SI : B->successors())
+ CopyLiveIns(SI, Live);
+
+ for (MachineInstr &MI : llvm::reverse(*B)) {
+ if (MI.isDebugInstr())
+ continue;
+
+ MI.clearKillInfo();
+ for (auto &Op : MI.all_defs()) {
+ // An implicit def of a super-register may not necessarily start a
+ // live range of it, since an implicit use could be used to keep parts
+ // of it live. Instead of analyzing the implicit operands, ignore
+ // implicit defs.
+ if (Op.isImplicit())
+ continue;
+ Register R = Op.getReg();
+ if (!R.isPhysical())
+ continue;
+ for (MCPhysReg SR : TRI.subregs_inclusive(R))
+ Live.reset(SR);
+ }
+ for (auto &Op : MI.all_uses()) {
+ if (Op.isUndef())
+ continue;
+ Register R = Op.getReg();
+ if (!R.isPhysical())
+ continue;
+ bool IsLive = false;
+ for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) {
+ if (!Live[*AR])
+ continue;
+ IsLive = true;
+ break;
+ }
+ if (!IsLive)
+ Op.setIsKill(true);
+ for (MCPhysReg SR : TRI.subregs_inclusive(R))
+ Live.set(SR);
+ }
+ }
+}
+
+// Helper function to obtain the basic block containing the reaching def
+// of the given use.
+MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
+ auto F = NBMap.find(RN);
+ if (F != NBMap.end())
+ return F->second;
+ llvm_unreachable("Node id not in map");
+}
+
+void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
+ // The LiveIn map, for each (physical) register, contains the set of live
+ // reaching defs of that register that are live on entry to the associated
+ // block.
+
+ // The summary of the traversal algorithm:
+ //
+ // R is live-in in B, if there exists a U(R), such that rdef(R) dom B
+ // and (U \in IDF(B) or B dom U).
+ //
+ // for (C : children) {
+ // LU = {}
+ // traverse(C, LU)
+ // LiveUses += LU
+ // }
+ //
+ // LiveUses -= Defs(B);
+ // LiveUses += UpwardExposedUses(B);
+ // for (C : IIDF[B])
+ // for (U : LiveUses)
+ // if (Rdef(U) dom C)
+ // C.addLiveIn(U)
+ //
+
+ // Go up the dominator tree (depth-first).
+ MachineDomTreeNode *N = MDT.getNode(B);
+ for (auto *I : *N) {
+ RefMap L;
+ MachineBasicBlock *SB = I->getBlock();
+ traverse(SB, L);
+
+ for (auto S : L)
+ LiveIn[S.first].insert(S.second.begin(), S.second.end());
+ }
+
+ if (Trace) {
+ dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__
+ << " after recursion into: {";
+ for (auto *I : *N)
+ dbgs() << ' ' << I->getBlock()->getNumber();
+ dbgs() << " }\n";
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
+ }
+
+ // Add reaching defs of phi uses that are live on exit from this block.
+ RefMap &PUs = PhiLOX[B];
+ for (auto &S : PUs)
+ LiveIn[S.first].insert(S.second.begin(), S.second.end());
+
+ if (Trace) {
+ dbgs() << "after LOX\n";
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
+ }
+
+ // The LiveIn map at this point has all defs that are live-on-exit from B,
+ // as if they were live-on-entry to B. First, we need to filter out all
+ // defs that are present in this block. Then we will add reaching defs of
+ // all upward-exposed uses.
+
+ // To filter out the defs, first make a copy of LiveIn, and then re-populate
+ // LiveIn with the defs that should remain.
+ RefMap LiveInCopy = LiveIn;
+ LiveIn.clear();
+
+ for (const std::pair<const RegisterId, NodeRefSet> &LE : LiveInCopy) {
+ RegisterRef LRef(LE.first);
+ NodeRefSet &NewDefs = LiveIn[LRef.Reg]; // To be filled.
+ const NodeRefSet &OldDefs = LE.second;
+ for (NodeRef OR : OldDefs) {
+ // R is a def node that was live-on-exit
+ auto DA = DFG.addr<DefNode *>(OR.first);
+ NodeAddr<InstrNode *> IA = DA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode *> BA = IA.Addr->getOwner(DFG);
+ if (B != BA.Addr->getCode()) {
+ // Defs from a different block need to be preserved. Defs from this
+ // block will need to be processed further, except for phi defs, the
+ // liveness of which is handled through the PhiLON/PhiLOX maps.
+ NewDefs.insert(OR);
+ continue;
+ }
+
+ // Defs from this block need to stop the liveness from being
+ // propagated upwards. This only applies to non-preserving defs,
+ // and to the parts of the register actually covered by those defs.
+ // (Note that phi defs should always be preserving.)
+ RegisterAggr RRs(PRI);
+ LRef.Mask = OR.second;
+
+ if (!DFG.IsPreservingDef(DA)) {
+ assert(!(IA.Addr->getFlags() & NodeAttrs::Phi));
+ // DA is a non-phi def that is live-on-exit from this block, and
+ // that is also located in this block. LRef is a register ref
+ // whose use this def reaches. If DA covers LRef, then no part
+ // of LRef is exposed upwards.A
+ if (RRs.insert(DA.Addr->getRegRef(DFG)).hasCoverOf(LRef))
+ continue;
+ }
+
+ // DA itself was not sufficient to cover LRef. In general, it is
+ // the last in a chain of aliased defs before the exit from this block.
+ // There could be other defs in this block that are a part of that
+ // chain. Check that now: accumulate the registers from these defs,
+ // and if they all together cover LRef, it is not live-on-entry.
+ for (NodeAddr<DefNode *> TA : getAllReachingDefs(DA)) {
+ // DefNode -> InstrNode -> BlockNode.
+ NodeAddr<InstrNode *> ITA = TA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode *> BTA = ITA.Addr->getOwner(DFG);
+ // Reaching defs are ordered in the upward direction.
+ if (BTA.Addr->getCode() != B) {
+ // We have reached past the beginning of B, and the accumulated
+ // registers are not covering LRef. The first def from the
+ // upward chain will be live.
+ // Subtract all accumulated defs (RRs) from LRef.
+ RegisterRef T = RRs.clearIn(LRef);
+ assert(T);
+ NewDefs.insert({TA.Id, T.Mask});
+ break;
+ }
+
+ // TA is in B. Only add this def to the accumulated cover if it is
+ // not preserving.
+ if (!(TA.Addr->getFlags() & NodeAttrs::Preserving))
+ RRs.insert(TA.Addr->getRegRef(DFG));
+ // If this is enough to cover LRef, then stop.
+ if (RRs.hasCoverOf(LRef))
+ break;
+ }
+ }
+ }
+
+ emptify(LiveIn);
+
+ if (Trace) {
+ dbgs() << "after defs in block\n";
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
+ }
+
+ // Scan the block for upward-exposed uses and add them to the tracking set.
+ for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
+ NodeAddr<InstrNode *> IA = I;
+ if (IA.Addr->getKind() != NodeAttrs::Stmt)
+ continue;
+ for (NodeAddr<UseNode *> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ if (UA.Addr->getFlags() & NodeAttrs::Undef)
+ continue;
+ RegisterRef RR = UA.Addr->getRegRef(DFG);
+ for (NodeAddr<DefNode *> D : getAllReachingDefs(UA))
+ if (getBlockWithRef(D.Id) != B)
+ LiveIn[RR.Reg].insert({D.Id, RR.Mask});
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "after uses in block\n";
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
+ }
+
+ // Phi uses should not be propagated up the dominator tree, since they
+ // are not dominated by their corresponding reaching defs.
+ RegisterAggr &Local = LiveMap[B];
+ RefMap &LON = PhiLON[B];
+ for (auto &R : LON) {
+ LaneBitmask M;
+ for (auto P : R.second)
+ M |= P.second;
+ Local.insert(RegisterRef(R.first, M));
+ }
+
+ if (Trace) {
+ dbgs() << "after phi uses in block\n";
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(Local, DFG) << '\n';
+ }
+
+ for (auto *C : IIDF[B]) {
+ RegisterAggr &LiveC = LiveMap[C];
+ for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn)
+ for (auto R : S.second)
+ if (MDT.properlyDominates(getBlockWithRef(R.first), C))
+ LiveC.insert(RegisterRef(S.first, R.second));
+ }
+}
+
+void Liveness::emptify(RefMap &M) {
+ for (auto I = M.begin(), E = M.end(); I != E;)
+ I = I->second.empty() ? M.erase(I) : std::next(I);
+}
+
+} // namespace llvm::rdf
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
new file mode 100644
index 000000000000..90520c4c3c71
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -0,0 +1,444 @@
+//===- RDFRegisters.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/RDFRegisters.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <set>
+#include <utility>
+
+namespace llvm::rdf {
+
+PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf)
+ : TRI(tri) {
+ RegInfos.resize(TRI.getNumRegs());
+
+ BitVector BadRC(TRI.getNumRegs());
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ for (MCPhysReg R : *RC) {
+ RegInfo &RI = RegInfos[R];
+ if (RI.RegClass != nullptr && !BadRC[R]) {
+ if (RC->LaneMask != RI.RegClass->LaneMask) {
+ BadRC.set(R);
+ RI.RegClass = nullptr;
+ }
+ } else
+ RI.RegClass = RC;
+ }
+ }
+
+ UnitInfos.resize(TRI.getNumRegUnits());
+
+ for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
+ if (UnitInfos[U].Reg != 0)
+ continue;
+ MCRegUnitRootIterator R(U, &TRI);
+ assert(R.isValid());
+ RegisterId F = *R;
+ ++R;
+ if (R.isValid()) {
+ UnitInfos[U].Mask = LaneBitmask::getAll();
+ UnitInfos[U].Reg = F;
+ } else {
+ for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) {
+ std::pair<uint32_t, LaneBitmask> P = *I;
+ UnitInfo &UI = UnitInfos[P.first];
+ UI.Reg = F;
+ if (P.second.any()) {
+ UI.Mask = P.second;
+ } else {
+ if (const TargetRegisterClass *RC = RegInfos[F].RegClass)
+ UI.Mask = RC->LaneMask;
+ else
+ UI.Mask = LaneBitmask::getAll();
+ }
+ }
+ }
+ }
+
+ for (const uint32_t *RM : TRI.getRegMasks())
+ RegMasks.insert(RM);
+ for (const MachineBasicBlock &B : mf)
+ for (const MachineInstr &In : B)
+ for (const MachineOperand &Op : In.operands())
+ if (Op.isRegMask())
+ RegMasks.insert(Op.getRegMask());
+
+ MaskInfos.resize(RegMasks.size() + 1);
+ for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
+ BitVector PU(TRI.getNumRegUnits());
+ const uint32_t *MB = RegMasks.get(M);
+ for (unsigned I = 1, E = TRI.getNumRegs(); I != E; ++I) {
+ if (!(MB[I / 32] & (1u << (I % 32))))
+ continue;
+ for (MCRegUnit Unit : TRI.regunits(MCRegister::from(I)))
+ PU.set(Unit);
+ }
+ MaskInfos[M].Units = PU.flip();
+ }
+
+ AliasInfos.resize(TRI.getNumRegUnits());
+ for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
+ BitVector AS(TRI.getNumRegs());
+ for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R)
+ for (MCPhysReg S : TRI.superregs_inclusive(*R))
+ AS.set(S);
+ AliasInfos[U].Regs = AS;
+ }
+}
+
+bool PhysicalRegisterInfo::alias(RegisterRef RA, RegisterRef RB) const {
+ return !disjoint(getUnits(RA), getUnits(RB));
+}
+
+std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
+ // Do not include Reg in the alias set.
+ std::set<RegisterId> AS;
+ assert(!RegisterRef::isUnitId(Reg) && "No units allowed");
+ if (RegisterRef::isMaskId(Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = getRegMaskBits(Reg);
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (MB[i / 32] & (1u << (i % 32)))
+ continue;
+ AS.insert(i);
+ }
+ return AS;
+ }
+
+ assert(RegisterRef::isRegId(Reg));
+ for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
+ AS.insert(*AI);
+
+ return AS;
+}
+
+std::set<RegisterId> PhysicalRegisterInfo::getUnits(RegisterRef RR) const {
+ std::set<RegisterId> Units;
+
+ if (RR.Reg == 0)
+ return Units; // Empty
+
+ if (RR.isReg()) {
+ if (RR.Mask.none())
+ return Units; // Empty
+ for (MCRegUnitMaskIterator UM(RR.idx(), &TRI); UM.isValid(); ++UM) {
+ auto [U, M] = *UM;
+ if (M.none() || (M & RR.Mask).any())
+ Units.insert(U);
+ }
+ return Units;
+ }
+
+ assert(RR.isMask());
+ unsigned NumRegs = TRI.getNumRegs();
+ const uint32_t *MB = getRegMaskBits(RR.idx());
+ for (unsigned I = 0, E = (NumRegs + 31) / 32; I != E; ++I) {
+ uint32_t C = ~MB[I]; // Clobbered regs
+ if (I == 0) // Reg 0 should be ignored
+ C &= maskLeadingOnes<unsigned>(31);
+ if (I + 1 == E && NumRegs % 32 != 0) // Last word may be partial
+ C &= maskTrailingOnes<unsigned>(NumRegs % 32);
+ if (C == 0)
+ continue;
+ while (C != 0) {
+ unsigned T = llvm::countr_zero(C);
+ unsigned CR = 32 * I + T; // Clobbered reg
+ for (MCRegUnit U : TRI.regunits(CR))
+ Units.insert(U);
+ C &= ~(1u << T);
+ }
+ }
+ return Units;
+}
+
+RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const {
+ if (RR.Reg == R)
+ return RR;
+ if (unsigned Idx = TRI.getSubRegIndex(R, RR.Reg))
+ return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask));
+ if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) {
+ const RegInfo &RI = RegInfos[R];
+ LaneBitmask RCM =
+ RI.RegClass ? RI.RegClass->LaneMask : LaneBitmask::getAll();
+ LaneBitmask M = TRI.reverseComposeSubRegIndexLaneMask(Idx, RR.Mask);
+ return RegisterRef(R, M & RCM);
+ }
+ llvm_unreachable("Invalid arguments: unrelated registers?");
+}
+
+bool PhysicalRegisterInfo::equal_to(RegisterRef A, RegisterRef B) const {
+ if (!A.isReg() || !B.isReg()) {
+ // For non-regs, or comparing reg and non-reg, use only the Reg member.
+ return A.Reg == B.Reg;
+ }
+
+ if (A.Reg == B.Reg)
+ return A.Mask == B.Mask;
+
+ // Compare reg units lexicographically.
+ MCRegUnitMaskIterator AI(A.Reg, &getTRI());
+ MCRegUnitMaskIterator BI(B.Reg, &getTRI());
+ while (AI.isValid() && BI.isValid()) {
+ auto [AReg, AMask] = *AI;
+ auto [BReg, BMask] = *BI;
+
+ // Lane masks are "none" for units that don't correspond to subregs
+ // e.g. a single unit in a leaf register, or aliased unit.
+ if (AMask.none())
+ AMask = LaneBitmask::getAll();
+ if (BMask.none())
+ BMask = LaneBitmask::getAll();
+
+ // If both iterators point to a unit contained in both A and B, then
+ // compare the units.
+ if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
+ if (AReg != BReg)
+ return false;
+ // Units are equal, move on to the next ones.
+ ++AI;
+ ++BI;
+ continue;
+ }
+
+ if ((AMask & A.Mask).none())
+ ++AI;
+ if ((BMask & B.Mask).none())
+ ++BI;
+ }
+ // One or both have reached the end.
+ return static_cast<int>(AI.isValid()) == static_cast<int>(BI.isValid());
+}
+
+bool PhysicalRegisterInfo::less(RegisterRef A, RegisterRef B) const {
+ if (!A.isReg() || !B.isReg()) {
+ // For non-regs, or comparing reg and non-reg, use only the Reg member.
+ return A.Reg < B.Reg;
+ }
+
+ if (A.Reg == B.Reg)
+ return A.Mask < B.Mask;
+ if (A.Mask == B.Mask)
+ return A.Reg < B.Reg;
+
+ // Compare reg units lexicographically.
+ llvm::MCRegUnitMaskIterator AI(A.Reg, &getTRI());
+ llvm::MCRegUnitMaskIterator BI(B.Reg, &getTRI());
+ while (AI.isValid() && BI.isValid()) {
+ auto [AReg, AMask] = *AI;
+ auto [BReg, BMask] = *BI;
+
+ // Lane masks are "none" for units that don't correspond to subregs
+ // e.g. a single unit in a leaf register, or aliased unit.
+ if (AMask.none())
+ AMask = LaneBitmask::getAll();
+ if (BMask.none())
+ BMask = LaneBitmask::getAll();
+
+ // If both iterators point to a unit contained in both A and B, then
+ // compare the units.
+ if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
+ if (AReg != BReg)
+ return AReg < BReg;
+ // Units are equal, move on to the next ones.
+ ++AI;
+ ++BI;
+ continue;
+ }
+
+ if ((AMask & A.Mask).none())
+ ++AI;
+ if ((BMask & B.Mask).none())
+ ++BI;
+ }
+ // One or both have reached the end: assume invalid < valid.
+ return static_cast<int>(AI.isValid()) < static_cast<int>(BI.isValid());
+}
+
+void PhysicalRegisterInfo::print(raw_ostream &OS, RegisterRef A) const {
+ if (A.Reg == 0 || A.isReg()) {
+ if (0 < A.idx() && A.idx() < TRI.getNumRegs())
+ OS << TRI.getName(A.idx());
+ else
+ OS << printReg(A.idx(), &TRI);
+ OS << PrintLaneMaskShort(A.Mask);
+ } else if (A.isUnit()) {
+ OS << printRegUnit(A.idx(), &TRI);
+ } else {
+ assert(A.isMask());
+ // RegMask SS flag is preserved by idx().
+ unsigned Idx = Register::stackSlot2Index(A.idx());
+ const char *Fmt = Idx < 0x10000 ? "%04x" : "%08x";
+ OS << "M#" << format(Fmt, Idx);
+ }
+}
+
+void PhysicalRegisterInfo::print(raw_ostream &OS, const RegisterAggr &A) const {
+ OS << '{';
+ for (unsigned U : A.units())
+ OS << ' ' << printRegUnit(U, &TRI);
+ OS << " }";
+}
+
+bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
+ if (RR.isMask())
+ return Units.anyCommon(PRI.getMaskUnits(RR.Reg));
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t, LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ if (Units.test(P.first))
+ return true;
+ }
+ return false;
+}
+
+bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
+ if (RR.isMask()) {
+ BitVector T(PRI.getMaskUnits(RR.Reg));
+ return T.reset(Units).none();
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t, LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ if (!Units.test(P.first))
+ return false;
+ }
+ return true;
+}
+
+RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
+ if (RR.isMask()) {
+ Units |= PRI.getMaskUnits(RR.Reg);
+ return *this;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t, LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ Units.set(P.first);
+ }
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
+ Units |= RG.Units;
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::intersect(RegisterRef RR) {
+ return intersect(RegisterAggr(PRI).insert(RR));
+}
+
+RegisterAggr &RegisterAggr::intersect(const RegisterAggr &RG) {
+ Units &= RG.Units;
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
+ return clear(RegisterAggr(PRI).insert(RR));
+}
+
+RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
+ Units.reset(RG.Units);
+ return *this;
+}
+
+RegisterRef RegisterAggr::intersectWith(RegisterRef RR) const {
+ RegisterAggr T(PRI);
+ T.insert(RR).intersect(*this);
+ if (T.empty())
+ return RegisterRef();
+ RegisterRef NR = T.makeRegRef();
+ assert(NR);
+ return NR;
+}
+
+RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
+ return RegisterAggr(PRI).insert(RR).clear(*this).makeRegRef();
+}
+
+RegisterRef RegisterAggr::makeRegRef() const {
+ int U = Units.find_first();
+ if (U < 0)
+ return RegisterRef();
+
+ // Find the set of all registers that are aliased to all the units
+ // in this aggregate.
+
+ // Get all the registers aliased to the first unit in the bit vector.
+ BitVector Regs = PRI.getUnitAliases(U);
+ U = Units.find_next(U);
+
+ // For each other unit, intersect it with the set of all registers
+ // aliased that unit.
+ while (U >= 0) {
+ Regs &= PRI.getUnitAliases(U);
+ U = Units.find_next(U);
+ }
+
+ // If there is at least one register remaining, pick the first one,
+ // and consolidate the masks of all of its units contained in this
+ // aggregate.
+
+ int F = Regs.find_first();
+ if (F <= 0)
+ return RegisterRef();
+
+ LaneBitmask M;
+ for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
+ std::pair<uint32_t, LaneBitmask> P = *I;
+ if (Units.test(P.first))
+ M |= P.second.none() ? LaneBitmask::getAll() : P.second;
+ }
+ return RegisterRef(F, M);
+}
+
+RegisterAggr::ref_iterator::ref_iterator(const RegisterAggr &RG, bool End)
+ : Owner(&RG) {
+ for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) {
+ RegisterRef R = RG.PRI.getRefForUnit(U);
+ Masks[R.Reg] |= R.Mask;
+ }
+ Pos = End ? Masks.end() : Masks.begin();
+ Index = End ? Masks.size() : 0;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const RegisterAggr &A) {
+ A.getPRI().print(OS, A);
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMaskShort &P) {
+ if (P.Mask.all())
+ return OS;
+ if (P.Mask.none())
+ return OS << ":*none*";
+
+ LaneBitmask::Type Val = P.Mask.getAsInteger();
+ if ((Val & 0xffff) == Val)
+ return OS << ':' << format("%04llX", Val);
+ if ((Val & 0xffffffff) == Val)
+ return OS << ':' << format("%08llX", Val);
+ return OS << ':' << PrintLaneMask(P.Mask);
+}
+
+} // namespace llvm::rdf
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
new file mode 100644
index 000000000000..75fbc8ba35b1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -0,0 +1,712 @@
+//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "reaching-deps-analysis"
+
+char ReachingDefAnalysis::ID = 0;
+INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false,
+ true)
+
+static bool isValidReg(const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg();
+}
+
+static bool isValidRegUse(const MachineOperand &MO) {
+ return isValidReg(MO) && MO.isUse();
+}
+
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegUse(MO))
+ return false;
+ return TRI->regsOverlap(MO.getReg(), PhysReg);
+}
+
+static bool isValidRegDef(const MachineOperand &MO) {
+ return isValidReg(MO) && MO.isDef();
+}
+
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegDef(MO))
+ return false;
+ return TRI->regsOverlap(MO.getReg(), PhysReg);
+}
+
+void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ MBBReachingDefs[MBBNumber].resize(NumRegUnits);
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ // Default values are 'nothing happened a long time ago'.
+ if (LiveRegs.empty())
+ LiveRegs.assign(NumRegUnits, ReachingDefDefaultVal);
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (const auto &LI : MBB->liveins()) {
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ if (LiveRegs[Unit] != -1) {
+ LiveRegs[Unit] = -1;
+ MBBReachingDefs[MBBNumber][Unit].push_back(-1);
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock *pred : MBB->predecessors()) {
+ assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming.empty())
+ continue;
+
+ // Find the most recent reaching definition from a predecessor.
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit)
+ LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]);
+ }
+
+ // Insert the most recent reaching definition we found.
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit)
+ if (LiveRegs[Unit] != ReachingDefDefaultVal)
+ MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]);
+}
+
+void ReachingDefAnalysis::leaveBasicBlock(MachineBasicBlock *MBB) {
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBOutRegsInfos.size() &&
+ "Unexpected basic block number.");
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ MBBOutRegsInfos[MBBNumber] = LiveRegs;
+
+ // While processing the basic block, we kept `Def` relative to the start
+ // of the basic block for convenience. However, future use of this information
+ // only cares about the clearance from the end of the block, so adjust
+ // everything to be relative to the end of the basic block.
+ for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber])
+ if (OutLiveReg != ReachingDefDefaultVal)
+ OutLiveReg -= CurInstr;
+ LiveRegs.clear();
+}
+
+void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
+ assert(!MI->isDebugInstr() && "Won't process debug instructions");
+
+ unsigned MBBNumber = MI->getParent()->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegDef(MO))
+ continue;
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg().asMCReg())) {
+ // This instruction explicitly defines the current reg unit.
+ LLVM_DEBUG(dbgs() << printRegUnit(Unit, TRI) << ":\t" << CurInstr << '\t'
+ << *MI);
+
+ // How many instructions since this reg unit was last written?
+ if (LiveRegs[Unit] != CurInstr) {
+ LiveRegs[Unit] = CurInstr;
+ MBBReachingDefs[MBBNumber][Unit].push_back(CurInstr);
+ }
+ }
+ }
+ InstIds[MI] = CurInstr;
+ ++CurInstr;
+}
+
+void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+
+ // Count number of non-debug instructions for end of block adjustment.
+ auto NonDbgInsts =
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end());
+ int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
+
+ // When reprocessing a block, the only thing we need to do is check whether
+ // there is now a more recent incoming reaching definition from a predecessor.
+ for (MachineBasicBlock *pred : MBB->predecessors()) {
+ assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
+ // Incoming may be empty for dead predecessors.
+ if (Incoming.empty())
+ continue;
+
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) {
+ int Def = Incoming[Unit];
+ if (Def == ReachingDefDefaultVal)
+ continue;
+
+ auto Start = MBBReachingDefs[MBBNumber][Unit].begin();
+ if (Start != MBBReachingDefs[MBBNumber][Unit].end() && *Start < 0) {
+ if (*Start >= Def)
+ continue;
+
+ // Update existing reaching def from predecessor to a more recent one.
+ *Start = Def;
+ } else {
+ // Insert new reaching def from predecessor.
+ MBBReachingDefs[MBBNumber][Unit].insert(Start, Def);
+ }
+
+ // Update reaching def at end of of BB. Keep in mind that these are
+ // adjusted relative to the end of the basic block.
+ if (MBBOutRegsInfos[MBBNumber][Unit] < Def - NumInsts)
+ MBBOutRegsInfos[MBBNumber][Unit] = Def - NumInsts;
+ }
+ }
+}
+
+void ReachingDefAnalysis::processBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ MachineBasicBlock *MBB = TraversedMBB.MBB;
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
+ << (!TraversedMBB.IsDone ? ": incomplete\n"
+ : ": all preds known\n"));
+
+ if (!TraversedMBB.PrimaryPass) {
+ // Reprocess MBB that is part of a loop.
+ reprocessBasicBlock(MBB);
+ return;
+ }
+
+ enterBasicBlock(MBB);
+ for (MachineInstr &MI :
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end()))
+ processDefs(&MI);
+ leaveBasicBlock(MBB);
+}
+
+bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TRI = MF->getSubtarget().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n");
+ init();
+ traverse();
+ return false;
+}
+
+void ReachingDefAnalysis::releaseMemory() {
+ // Clear the internal vectors.
+ MBBOutRegsInfos.clear();
+ MBBReachingDefs.clear();
+ InstIds.clear();
+ LiveRegs.clear();
+}
+
+void ReachingDefAnalysis::reset() {
+ releaseMemory();
+ init();
+ traverse();
+}
+
+void ReachingDefAnalysis::init() {
+ NumRegUnits = TRI->getNumRegUnits();
+ MBBReachingDefs.resize(MF->getNumBlockIDs());
+ // Initialize the MBBOutRegsInfos
+ MBBOutRegsInfos.resize(MF->getNumBlockIDs());
+ LoopTraversal Traversal;
+ TraversedMBBOrder = Traversal.traverse(*MF);
+}
+
+void ReachingDefAnalysis::traverse() {
+ // Traverse the basic blocks.
+ for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder)
+ processBasicBlock(TraversedMBB);
+#ifndef NDEBUG
+ // Make sure reaching defs are sorted and unique.
+ for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
+ for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) {
+ int LastDef = ReachingDefDefaultVal;
+ for (int Def : RegUnitDefs) {
+ assert(Def > LastDef && "Defs must be sorted and unique");
+ LastDef = Def;
+ }
+ }
+ }
+#endif
+}
+
+int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ assert(InstIds.count(MI) && "Unexpected machine instuction.");
+ int InstId = InstIds.lookup(MI);
+ int DefRes = ReachingDefDefaultVal;
+ unsigned MBBNumber = MI->getParent()->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ int LatestDef = ReachingDefDefaultVal;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ for (int Def : MBBReachingDefs[MBBNumber][Unit]) {
+ if (Def >= InstId)
+ break;
+ DefRes = Def;
+ }
+ LatestDef = std::max(LatestDef, DefRes);
+ }
+ return LatestDef;
+}
+
+MachineInstr *
+ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ return hasLocalDefBefore(MI, PhysReg)
+ ? getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg))
+ : nullptr;
+}
+
+bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
+ MCRegister PhysReg) const {
+ MachineBasicBlock *ParentA = A->getParent();
+ MachineBasicBlock *ParentB = B->getParent();
+ if (ParentA != ParentB)
+ return false;
+
+ return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
+ int InstId) const {
+ assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ assert(InstId < static_cast<int>(MBB->size()) &&
+ "Unexpected instruction id.");
+
+ if (InstId < 0)
+ return nullptr;
+
+ for (auto &MI : *MBB) {
+ auto F = InstIds.find(&MI);
+ if (F != InstIds.end() && F->second == InstId)
+ return &MI;
+ }
+
+ return nullptr;
+}
+
+int ReachingDefAnalysis::getClearance(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ assert(InstIds.count(MI) && "Unexpected machine instuction.");
+ return InstIds.lookup(MI) - getReachingDef(MI, PhysReg);
+}
+
+bool ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ return getReachingDef(MI, PhysReg) >= 0;
+}
+
+void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
+ MCRegister PhysReg,
+ InstSet &Uses) const {
+ MachineBasicBlock *MBB = Def->getParent();
+ MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
+ while (++MI != MBB->end()) {
+ if (MI->isDebugInstr())
+ continue;
+
+ // If/when we find a new reaching def, we know that there's no more uses
+ // of 'Def'.
+ if (getReachingLocalMIDef(&*MI, PhysReg) != Def)
+ return;
+
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
+ continue;
+
+ Uses.insert(&*MI);
+ if (MO.isKill())
+ return;
+ }
+ }
+}
+
+bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
+ MCRegister PhysReg,
+ InstSet &Uses) const {
+ for (MachineInstr &MI :
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
+ for (auto &MO : MI.operands()) {
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
+ continue;
+ if (getReachingDef(&MI, PhysReg) >= 0)
+ return false;
+ Uses.insert(&MI);
+ }
+ }
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last == MBB->end())
+ return true;
+ return isReachingDefLiveOut(&*Last, PhysReg);
+}
+
+void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Uses) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Collect the uses that each def touches within the block.
+ getReachingLocalUses(MI, PhysReg, Uses);
+
+ // Handle live-out values.
+ if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), PhysReg)) {
+ if (LiveOut != MI)
+ return;
+
+ SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());
+ SmallPtrSet<MachineBasicBlock*, 4>Visited;
+ while (!ToVisit.empty()) {
+ MachineBasicBlock *MBB = ToVisit.pop_back_val();
+ if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
+ continue;
+ if (getLiveInUses(MBB, PhysReg, Uses))
+ llvm::append_range(ToVisit, MBB->successors());
+ Visited.insert(MBB);
+ }
+ }
+}
+
+void ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI,
+ MCRegister PhysReg,
+ InstSet &Defs) const {
+ if (auto *Def = getUniqueReachingMIDef(MI, PhysReg)) {
+ Defs.insert(Def);
+ return;
+ }
+
+ for (auto *MBB : MI->getParent()->predecessors())
+ getLiveOuts(MBB, PhysReg, Defs);
+}
+
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
+ MCRegister PhysReg, InstSet &Defs) const {
+ SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs;
+ getLiveOuts(MBB, PhysReg, Defs, VisitedBBs);
+}
+
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
+ MCRegister PhysReg, InstSet &Defs,
+ BlockSet &VisitedBBs) const {
+ if (VisitedBBs.count(MBB))
+ return;
+
+ VisitedBBs.insert(MBB);
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ return;
+
+ if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
+ Defs.insert(Def);
+ else
+ for (auto *Pred : MBB->predecessors())
+ getLiveOuts(Pred, PhysReg, Defs, VisitedBBs);
+}
+
+MachineInstr *
+ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ // If there's a local def before MI, return it.
+ MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg);
+ if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
+ return LocalDef;
+
+ SmallPtrSet<MachineInstr*, 2> Incoming;
+ MachineBasicBlock *Parent = MI->getParent();
+ for (auto *Pred : Parent->predecessors())
+ getLiveOuts(Pred, PhysReg, Incoming);
+
+ // Check that we have a single incoming value and that it does not
+ // come from the same block as MI - since it would mean that the def
+ // is executed after MI.
+ if (Incoming.size() == 1 && (*Incoming.begin())->getParent() != Parent)
+ return *Incoming.begin();
+ return nullptr;
+}
+
+MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
+ unsigned Idx) const {
+ assert(MI->getOperand(Idx).isReg() && "Expected register operand");
+ return getUniqueReachingMIDef(MI, MI->getOperand(Idx).getReg());
+}
+
+MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
+ MachineOperand &MO) const {
+ assert(MO.isReg() && "Expected register operand");
+ return getUniqueReachingMIDef(MI, MO.getReg());
+}
+
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+
+ // Yes if the register is live out of the basic block.
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ return true;
+
+ // Walk backwards through the block to see if the register is live at some
+ // point.
+ for (MachineInstr &Last :
+ instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
+ LiveRegs.stepBackward(Last);
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ return InstIds.lookup(&Last) > InstIds.lookup(MI);
+ }
+ return false;
+}
+
+bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last != MBB->end() &&
+ getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg))
+ return true;
+
+ if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
+ return Def == getReachingLocalMIDef(MI, PhysReg);
+
+ return false;
+}
+
+bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ return false;
+
+ auto Last = MBB->getLastNonDebugInstr();
+ int Def = getReachingDef(MI, PhysReg);
+ if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def)
+ return false;
+
+ // Finally check that the last instruction doesn't redefine the register.
+ for (auto &MO : Last->operands())
+ if (isValidRegDefOf(MO, PhysReg, TRI))
+ return false;
+
+ return true;
+}
+
+MachineInstr *
+ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ MCRegister PhysReg) const {
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ return nullptr;
+
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last == MBB->end())
+ return nullptr;
+
+ int Def = getReachingDef(&*Last, PhysReg);
+ for (auto &MO : Last->operands())
+ if (isValidRegDefOf(MO, PhysReg, TRI))
+ return &*Last;
+
+ return Def < 0 ? nullptr : getInstFromId(MBB, Def);
+}
+
+static bool mayHaveSideEffects(MachineInstr &MI) {
+ return MI.mayLoadOrStore() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects() || MI.isTerminator() ||
+ MI.isCall() || MI.isBarrier() || MI.isBranch() || MI.isReturn();
+}
+
+// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must
+// not define a register that is used by any instructions, after and including,
+// 'To'. These instructions also must not redefine any of Froms operands.
+template<typename Iterator>
+bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From,
+ MachineInstr *To) const {
+ if (From->getParent() != To->getParent() || From == To)
+ return false;
+
+ SmallSet<int, 2> Defs;
+ // First check that From would compute the same value if moved.
+ for (auto &MO : From->operands()) {
+ if (!isValidReg(MO))
+ continue;
+ if (MO.isDef())
+ Defs.insert(MO.getReg());
+ else if (!hasSameReachingDef(From, To, MO.getReg()))
+ return false;
+ }
+
+ // Now walk checking that the rest of the instructions will compute the same
+ // value and that we're not overwriting anything. Don't move the instruction
+ // past any memory, control-flow or other ambiguous instructions.
+ for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) {
+ if (mayHaveSideEffects(*I))
+ return false;
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.getReg() && Defs.count(MO.getReg()))
+ return false;
+ }
+ return true;
+}
+
+bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From,
+ MachineInstr *To) const {
+ using Iterator = MachineBasicBlock::iterator;
+ // Walk forwards until we find the instruction.
+ for (auto I = Iterator(From), E = From->getParent()->end(); I != E; ++I)
+ if (&*I == To)
+ return isSafeToMove<Iterator>(From, To);
+ return false;
+}
+
+bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From,
+ MachineInstr *To) const {
+ using Iterator = MachineBasicBlock::reverse_iterator;
+ // Walk backwards until we find the instruction.
+ for (auto I = Iterator(From), E = From->getParent()->rend(); I != E; ++I)
+ if (&*I == To)
+ return isSafeToMove<Iterator>(From, To);
+ return false;
+}
+
+bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI,
+ InstSet &ToRemove) const {
+ SmallPtrSet<MachineInstr*, 1> Ignore;
+ SmallPtrSet<MachineInstr*, 2> Visited;
+ return isSafeToRemove(MI, Visited, ToRemove, Ignore);
+}
+
+bool
+ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &ToRemove,
+ InstSet &Ignore) const {
+ SmallPtrSet<MachineInstr*, 2> Visited;
+ return isSafeToRemove(MI, Visited, ToRemove, Ignore);
+}
+
+bool
+ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
+ InstSet &ToRemove, InstSet &Ignore) const {
+ if (Visited.count(MI) || Ignore.count(MI))
+ return true;
+ else if (mayHaveSideEffects(*MI)) {
+ // Unless told to ignore the instruction, don't remove anything which has
+ // side effects.
+ return false;
+ }
+
+ Visited.insert(MI);
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegDef(MO))
+ continue;
+
+ SmallPtrSet<MachineInstr*, 4> Uses;
+ getGlobalUses(MI, MO.getReg(), Uses);
+
+ for (auto *I : Uses) {
+ if (Ignore.count(I) || ToRemove.count(I))
+ continue;
+ if (!isSafeToRemove(I, Visited, ToRemove, Ignore))
+ return false;
+ }
+ }
+ ToRemove.insert(MI);
+ return true;
+}
+
+void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
+ InstSet &Dead) const {
+ Dead.insert(MI);
+ auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister PhysReg) {
+ if (mayHaveSideEffects(*Def))
+ return false;
+
+ unsigned LiveDefs = 0;
+ for (auto &MO : Def->operands()) {
+ if (!isValidRegDef(MO))
+ continue;
+ if (!MO.isDead())
+ ++LiveDefs;
+ }
+
+ if (LiveDefs > 1)
+ return false;
+
+ SmallPtrSet<MachineInstr*, 4> Uses;
+ getGlobalUses(Def, PhysReg, Uses);
+ return llvm::set_is_subset(Uses, Dead);
+ };
+
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegUse(MO))
+ continue;
+ if (MachineInstr *Def = getMIOperand(MI, MO))
+ if (IsDead(Def, MO.getReg()))
+ collectKilledOperands(Def, Dead);
+ }
+}
+
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ SmallPtrSet<MachineInstr*, 1> Ignore;
+ return isSafeToDefRegAt(MI, PhysReg, Ignore);
+}
+
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Ignore) const {
+ // Check for any uses of the register after MI.
+ if (isRegUsedAfter(MI, PhysReg)) {
+ if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) {
+ SmallPtrSet<MachineInstr*, 2> Uses;
+ getGlobalUses(Def, PhysReg, Uses);
+ if (!llvm::set_is_subset(Uses, Ignore))
+ return false;
+ } else
+ return false;
+ }
+
+ MachineBasicBlock *MBB = MI->getParent();
+ // Check for any defs after MI.
+ if (isRegDefinedAfter(MI, PhysReg)) {
+ auto I = MachineBasicBlock::iterator(MI);
+ for (auto E = MBB->end(); I != E; ++I) {
+ if (Ignore.count(&*I))
+ continue;
+ for (auto &MO : I->operands())
+ if (isValidRegDefOf(MO, PhysReg, TRI))
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..900f0e9079d6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,192 @@
+//===- RegAllocBase.cpp - Register Allocator Base Class -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides common functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumNewQueued, "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::Hidden, cl::desc("Verify during register allocation"));
+
+const char RegAllocBase::TimerGroupName[] = "regalloc";
+const char RegAllocBase::TimerGroupDescription[] = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Pin the vtable to this file.
+void RegAllocBase::anchor() {}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
+ LiveRegMatrix &mat) {
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ Matrix = &mat;
+ MRI->freezeReservedRegs(vrm.getMachineFunction());
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ enqueue(&LIS->getInterval(Reg));
+ }
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (const LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg())) {
+ LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ aboutToRemoveInterval(*VirtReg);
+ LIS->removeInterval(VirtReg->reg());
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ Matrix->invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ LLVM_DEBUG(dbgs() << "\nselectOrSplit "
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg()))
+ << ':' << *VirtReg << " w=" << VirtReg->weight() << '\n');
+
+ using VirtRegVec = SmallVector<Register, 4>;
+
+ VirtRegVec SplitVRegs;
+ MCRegister AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ // Probably caused by an inline asm.
+ MachineInstr *MI = nullptr;
+ for (MachineRegisterInfo::reg_instr_iterator
+ I = MRI->reg_instr_begin(VirtReg->reg()),
+ E = MRI->reg_instr_end();
+ I != E;) {
+ MI = &*(I++);
+ if (MI->isInlineAsm())
+ break;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg->reg());
+ ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
+ if (AllocOrder.empty())
+ report_fatal_error("no registers from class available to allocate");
+ else if (MI && MI->isInlineAsm()) {
+ MI->emitError("inline assembly requires more registers than available");
+ } else if (MI) {
+ LLVMContext &Context =
+ MI->getParent()->getParent()->getMMI().getModule()->getContext();
+ Context.emitError("ran out of registers during register allocation");
+ } else {
+ report_fatal_error("ran out of registers during register allocation");
+ }
+
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front());
+ } else if (AvailablePhysReg)
+ Matrix->assign(*VirtReg, AvailablePhysReg);
+
+ for (Register Reg : SplitVRegs) {
+ assert(LIS->hasInterval(Reg));
+
+ LiveInterval *SplitVirtReg = &LIS->getInterval(Reg);
+ assert(!VRM->hasPhys(SplitVirtReg->reg()) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg())) {
+ assert(SplitVirtReg->empty() && "Non-empty but used interval");
+ LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ aboutToRemoveInterval(*SplitVirtReg);
+ LIS->removeInterval(SplitVirtReg->reg());
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(SplitVirtReg->reg().isVirtual() &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
+
+void RegAllocBase::postOptimization() {
+ spiller().postOptimization();
+ for (auto *DeadInst : DeadRemats) {
+ LIS->RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
+
+void RegAllocBase::enqueue(const LiveInterval *LI) {
+ const Register Reg = LI->reg();
+
+ assert(Reg.isVirtual() && "Can only enqueue virtual registers");
+
+ if (VRM->hasPhys(Reg))
+ return;
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
+ if (ShouldAllocateClass(*TRI, RC)) {
+ LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
+ enqueueImpl(LI);
+ } else {
+ LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI)
+ << " in skipped register class\n");
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..a8bf305a50c9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,131 @@
+//===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H
+#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/RegAllocCommon.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineInstr;
+class MachineRegisterInfo;
+template<typename T> class SmallVectorImpl;
+class Spiller;
+class TargetRegisterInfo;
+class VirtRegMap;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+ virtual void anchor();
+
+protected:
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveIntervals *LIS = nullptr;
+ LiveRegMatrix *Matrix = nullptr;
+ RegisterClassInfo RegClassInfo;
+ const RegClassFilterFunc ShouldAllocateClass;
+
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
+ RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) :
+ ShouldAllocateClass(F) {}
+
+ virtual ~RegAllocBase() = default;
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ void allocatePhysRegs();
+
+ // Include spiller post optimization and removing dead defs left because of
+ // rematerialization.
+ virtual void postOptimization();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ virtual void enqueueImpl(const LiveInterval *LI) = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ void enqueue(const LiveInterval *LI);
+
+ /// dequeue - Return the next unassigned register, or NULL.
+ virtual const LiveInterval *dequeue() = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual MCRegister selectOrSplit(const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &splitLVRs) = 0;
+
+ // Use this group name for NamedRegionTimer.
+ static const char TimerGroupName[];
+ static const char TimerGroupDescription[];
+
+ /// Method called when the allocator is about to remove a LiveInterval.
+ virtual void aboutToRemoveInterval(const LiveInterval &LI) {}
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveRegs();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_REGALLOCBASE_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..666199139630
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,339 @@
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+namespace {
+ struct CompSpillWeight {
+ bool operator()(const LiveInterval *A, const LiveInterval *B) const {
+ return A->weight() < B->weight();
+ }
+ };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+ // context
+ MachineFunction *MF = nullptr;
+
+ // state
+ std::unique_ptr<Spiller> SpillerInstance;
+ std::priority_queue<const LiveInterval *, std::vector<const LiveInterval *>,
+ CompSpillWeight>
+ Queue;
+
+ // Scratch space. Allocated here to avoid repeated malloc calls in
+ // selectOrSplit().
+ BitVector UsableRegs;
+
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
+
+public:
+ RABasic(const RegClassFilterFunc F = allocateAllRegClasses);
+
+ /// Return the pass name.
+ StringRef getPassName() const override { return "Basic Register Allocator"; }
+
+ /// RABasic analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void releaseMemory() override;
+
+ Spiller &spiller() override { return *SpillerInstance; }
+
+ void enqueueImpl(const LiveInterval *LI) override { Queue.push(LI); }
+
+ const LiveInterval *dequeue() override {
+ if (Queue.empty())
+ return nullptr;
+ const LiveInterval *LI = Queue.top();
+ Queue.pop();
+ return LI;
+ }
+
+ MCRegister selectOrSplit(const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(const LiveInterval &VirtReg, MCRegister PhysReg,
+ SmallVectorImpl<Register> &SplitVRegs);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+char &llvm::RABasicID = RABasic::ID;
+
+INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
+ false)
+
+bool RABasic::LRE_CanEraseVirtReg(Register VirtReg) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (VRM->hasPhys(VirtReg)) {
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ // Nonetheless, clear the live-range so that the debug
+ // dump will show the right state for that VirtReg.
+ LI.clear();
+ return false;
+}
+
+void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
+RABasic::RABasic(RegClassFilterFunc F):
+ MachineFunctionPass(ID),
+ RegAllocBase(F) {
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset();
+}
+
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool RABasic::spillInterferences(const LiveInterval &VirtReg,
+ MCRegister PhysReg,
+ SmallVectorImpl<Register> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ SmallVector<const LiveInterval *, 8> Intfs;
+
+ // Collect interferences assigned to any alias of the physical register.
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
+ for (const auto *Intf : reverse(Q.interferingVRegs())) {
+ if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
+ return false;
+ Intfs.push_back(Intf);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI)
+ << " interferences with " << VirtReg << "\n");
+ assert(!Intfs.empty() && "expected interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ const LiveInterval &Spill = *Intfs[i];
+
+ // Skip duplicates.
+ if (!VRM->hasPhys(Spill.reg()))
+ continue;
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ Matrix->unassign(Spill);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ spiller().spill(LRE);
+ }
+ return true;
+}
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+MCRegister RABasic::selectOrSplit(const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<MCRegister, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ for (MCRegister PhysReg : Order) {
+ assert(PhysReg.isValid());
+ // Check for interference in PhysReg
+ switch (Matrix->checkInterference(VirtReg, PhysReg)) {
+ case LiveRegMatrix::IK_Free:
+ // PhysReg is available, allocate it.
+ return PhysReg;
+
+ case LiveRegMatrix::IK_VirtReg:
+ // Only virtual registers in the way, we may be able to spill them.
+ PhysRegSpillCands.push_back(PhysReg);
+ continue;
+
+ default:
+ // RegMask or RegUnit interference.
+ continue;
+ }
+ }
+
+ // Try to spill another interfering reg with less spill weight.
+ for (MCRegister &PhysReg : PhysRegSpillCands) {
+ if (!spillInterferences(VirtReg, PhysReg, SplitVRegs))
+ continue;
+
+ assert(!Matrix->checkInterference(VirtReg, PhysReg) &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return PhysReg;
+ }
+
+ // No other spill candidates were found, so spill the current VirtReg.
+ LLVM_DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ if (!VirtReg.isSpillable())
+ return ~0u;
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ spiller().spill(LRE);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ LLVM_DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
+
+ MF = &mf;
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+ VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(),
+ getAnalysis<MachineBlockFrequencyInfo>());
+ VRAI.calculateSpillWeightsAndHints();
+
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));
+
+ allocatePhysRegs();
+ postOptimization();
+
+ // Diagnostic output before rewriting
+ LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ releaseMemory();
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator() {
+ return new RABasic();
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator(RegClassFilterFunc F) {
+ return new RABasic(F);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
new file mode 100644
index 000000000000..81f3d2c8099f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -0,0 +1,311 @@
+//===- RegAllocEvictionAdvisor.cpp - eviction advisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the default eviction advisor and of the Analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocEvictionAdvisor.h"
+#include "AllocationOrder.h"
+#include "RegAllocGreedy.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
+ "regalloc-enable-advisor", cl::Hidden,
+ cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
+ cl::desc("Enable regalloc advisor mode"),
+ cl::values(
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default,
+ "default", "Default"),
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release,
+ "release", "precompiled"),
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development,
+ "development", "for training")));
+
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
+namespace llvm {
+cl::opt<unsigned> EvictInterferenceCutoff(
+ "regalloc-eviction-max-interference-cutoff", cl::Hidden,
+ cl::desc("Number of interferences after which we declare "
+ "an interference unevictable and bail out. This "
+ "is a compilation cost-saving consideration. To "
+ "disable, pass a very large number."),
+ cl::init(10));
+}
+
+#define DEBUG_TYPE "regalloc"
+#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
+#define LLVM_HAVE_TF_AOT
+#endif
+
+char RegAllocEvictionAdvisorAnalysis::ID = 0;
+INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict",
+ "Regalloc eviction policy", false, true)
+
+namespace {
+class DefaultEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ DefaultEvictionAdvisorAnalysis(bool NotAsRequested)
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Default),
+ NotAsRequested(NotAsRequested) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Default;
+ }
+
+private:
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ return std::make_unique<DefaultEvictionAdvisor>(MF, RA);
+ }
+ bool doInitialization(Module &M) override {
+ if (NotAsRequested)
+ M.getContext().emitError("Requested regalloc eviction advisor analysis "
+ "could be created. Using default");
+ return RegAllocEvictionAdvisorAnalysis::doInitialization(M);
+ }
+ const bool NotAsRequested;
+};
+} // namespace
+
+template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
+ Pass *Ret = nullptr;
+ switch (Mode) {
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default:
+ Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false);
+ break;
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development:
+#if defined(LLVM_HAVE_TFLITE)
+ Ret = createDevelopmentModeAdvisor();
+#endif
+ break;
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release:
+ Ret = createReleaseModeAdvisor();
+ break;
+ }
+ if (Ret)
+ return Ret;
+ return new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ true);
+}
+
+StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
+ switch (getAdvisorMode()) {
+ case AdvisorMode::Default:
+ return "Default Regalloc Eviction Advisor";
+ case AdvisorMode::Release:
+ return "Release mode Regalloc Eviction Advisor";
+ case AdvisorMode::Development:
+ return "Development mode Regalloc Eviction Advisor";
+ }
+ llvm_unreachable("Unknown advisor kind");
+}
+
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
+ const RAGreedy &RA)
+ : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()),
+ LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
+ MRI(&VRM->getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()),
+ RegClassInfo(RA.getRegClassInfo()), RegCosts(TRI->getRegisterCosts(MF)),
+ EnableLocalReassign(EnableLocalReassignment ||
+ MF.getSubtarget().enableRALocalReassignment(
+ MF.getTarget().getOptLevel())) {}
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool DefaultEvictionAdvisor::shouldEvict(const LiveInterval &A, bool IsHint,
+ const LiveInterval &B,
+ bool BreaksHint) const {
+ bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ if (A.weight() > B.weight()) {
+ LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
+ return true;
+ }
+ return false;
+}
+
+/// canEvictHintInterference - return true if the interference for VirtReg
+/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
+bool DefaultEvictionAdvisor::canEvictHintInterference(
+ const LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const {
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
+ return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
+ FixedRegisters);
+}
+
+/// canEvictInterferenceBasedOnCost - Return true if all interferences between
+/// VirtReg and PhysReg can be evicted.
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @param IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
+ const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
+ bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
+
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
+
+ EvictionCost Cost;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
+ // If there is 10 or more interferences, chances are one is heavier.
+ const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff);
+ if (Interferences.size() >= EvictInterferenceCutoff)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (const LiveInterval *Intf : reverse(Interferences)) {
+ assert(Intf->reg().isVirtual() &&
+ "Only expecting virtual register interference from query");
+
+ // Do not allow eviction of a virtual register if we are in the middle
+ // of last-chance recoloring and this virtual register is one that we
+ // have scavenged a physical register for.
+ if (FixedRegisters.count(Intf->reg()))
+ return false;
+
+ // Never evict spill products. They cannot split or spill.
+ if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent =
+ !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+ RegClassInfo.getNumAllocatableRegs(
+ MRI->getRegClass(Intf->reg())));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
+ if (Cascade == IntfCascade)
+ return false;
+
+ if (Cascade < IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ if (Urgent)
+ continue;
+ // Apply the eviction policy for non-urgent evictions.
+ if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ // If !MaxCost.isMax(), then we're just looking for a cheap register.
+ // Evicting another local live range in this case could lead to suboptimal
+ // coloring.
+ if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
+ return false;
+ }
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost;
+ BestCost.setMax();
+ MCRegister BestPhys;
+ auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
+ if (!MaybeOrderLimit)
+ return MCRegister::NoRegister;
+ unsigned OrderLimit = *MaybeOrderLimit;
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < uint8_t(~0u)) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight();
+ }
+
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+ ++I) {
+ MCRegister PhysReg = *I;
+ assert(PhysReg);
+ if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) ||
+ !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (I.isHint())
+ break;
+ }
+ return BestPhys;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
new file mode 100644
index 000000000000..52dd946a6854
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -0,0 +1,223 @@
+//===- RegAllocEvictionAdvisor.h - Interference resolution ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+class AllocationOrder;
+class LiveInterval;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineFunction;
+class MachineRegisterInfo;
+class RegisterClassInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+
+using SmallVirtRegSet = SmallSet<Register, 16>;
+
+// Live ranges pass through a number of stages as we try to allocate them.
+// Some of the stages may also create new live ranges:
+//
+// - Region splitting.
+// - Per-block splitting.
+// - Local splitting.
+// - Spilling.
+//
+// Ranges produced by one of the stages skip the previous stages when they are
+// dequeued. This improves performance because we can skip interference checks
+// that are unlikely to give any results. It also guarantees that the live
+// range splitting algorithm terminates, something that is otherwise hard to
+// ensure.
+enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+};
+
+/// Cost of evicting interference - used by default advisor, and the eviction
+/// chain heuristic in RegAllocGreedy.
+// FIXME: this can be probably made an implementation detail of the default
+// advisor, if the eviction chain logic can be refactored.
+struct EvictionCost {
+ unsigned BrokenHints = 0; ///< Total number of broken hints.
+ float MaxWeight = 0; ///< Maximum spill weight evicted.
+
+ EvictionCost() = default;
+
+ bool isMax() const { return BrokenHints == ~0u; }
+
+ void setMax() { BrokenHints = ~0u; }
+
+ void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
+ bool operator<(const EvictionCost &O) const {
+ return std::tie(BrokenHints, MaxWeight) <
+ std::tie(O.BrokenHints, O.MaxWeight);
+ }
+};
+
+/// Interface to the eviction advisor, which is responsible for making a
+/// decision as to which live ranges should be evicted (if any).
+class RAGreedy;
+class RegAllocEvictionAdvisor {
+public:
+ RegAllocEvictionAdvisor(const RegAllocEvictionAdvisor &) = delete;
+ RegAllocEvictionAdvisor(RegAllocEvictionAdvisor &&) = delete;
+ virtual ~RegAllocEvictionAdvisor() = default;
+
+ /// Find a physical register that can be freed by evicting the FixedRegisters,
+ /// or return NoRegister. The eviction decision is assumed to be correct (i.e.
+ /// no fixed live ranges are evicted) and profitable.
+ virtual MCRegister tryFindEvictionCandidate(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const = 0;
+
+ /// Find out if we can evict the live ranges occupying the given PhysReg,
+ /// which is a hint (preferred register) for VirtReg.
+ virtual bool
+ canEvictHintInterference(const LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const = 0;
+
+ /// Returns true if the given \p PhysReg is a callee saved register and has
+ /// not been used for allocation yet.
+ bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
+
+protected:
+ RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
+
+ bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const;
+
+ // Get the upper limit of elements in the given Order we need to analize.
+ // TODO: is this heuristic, we could consider learning it.
+ std::optional<unsigned> getOrderLimit(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned CostPerUseLimit) const;
+
+ // Determine if it's worth trying to allocate this reg, given the
+ // CostPerUseLimit
+ // TODO: this is a heuristic component we could consider learning, too.
+ bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const;
+
+ const MachineFunction &MF;
+ const RAGreedy &RA;
+ LiveRegMatrix *const Matrix;
+ LiveIntervals *const LIS;
+ VirtRegMap *const VRM;
+ MachineRegisterInfo *const MRI;
+ const TargetRegisterInfo *const TRI;
+ const RegisterClassInfo &RegClassInfo;
+ const ArrayRef<uint8_t> RegCosts;
+
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ const bool EnableLocalReassign;
+};
+
+/// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it
+/// as an analysis to decouple the user from the implementation insofar as
+/// dependencies on other analyses goes. The motivation for it being an
+/// immutable pass is twofold:
+/// - in the ML implementation case, the evaluator is stateless but (especially
+/// in the development mode) expensive to set up. With an immutable pass, we set
+/// it up once.
+/// - in the 'development' mode ML case, we want to capture the training log
+/// during allocation (this is a log of features encountered and decisions
+/// made), and then measure a score, potentially a few steps after allocation
+/// completes. So we need the properties of an immutable pass to keep the logger
+/// state around until we can make that measurement.
+///
+/// Because we need to offer additional services in 'development' mode, the
+/// implementations of this analysis need to implement RTTI support.
+class RegAllocEvictionAdvisorAnalysis : public ImmutablePass {
+public:
+ enum class AdvisorMode : int { Default, Release, Development };
+
+ RegAllocEvictionAdvisorAnalysis(AdvisorMode Mode)
+ : ImmutablePass(ID), Mode(Mode){};
+ static char ID;
+
+ /// Get an advisor for the given context (i.e. machine function, etc)
+ virtual std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+ AdvisorMode getAdvisorMode() const { return Mode; }
+ virtual void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward){};
+
+protected:
+ // This analysis preserves everything, and subclasses may have additional
+ // requirements.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+private:
+ StringRef getPassName() const override;
+ const AdvisorMode Mode;
+};
+
+/// Specialization for the API used by the analysis infrastructure to create
+/// an instance of the eviction advisor.
+template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>();
+
+RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor();
+
+RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
+
+// TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation
+// out of RegAllocGreedy.cpp
+class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
+public:
+ DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA)
+ : RegAllocEvictionAdvisor(MF, RA) {}
+
+private:
+ MCRegister tryFindEvictionCandidate(const LiveInterval &,
+ const AllocationOrder &, uint8_t,
+ const SmallVirtRegSet &) const override;
+ bool canEvictHintInterference(const LiveInterval &, MCRegister,
+ const SmallVirtRegSet &) const override;
+ bool canEvictInterferenceBasedOnCost(const LiveInterval &, MCRegister, bool,
+ EvictionCost &,
+ const SmallVirtRegSet &) const;
+ bool shouldEvict(const LiveInterval &A, bool, const LiveInterval &B,
+ bool) const;
+};
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 000000000000..864beb8720f4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1673 @@
+//===- RegAllocFast.cpp - A fast register allocator for debug code --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This register allocator allocates registers to a basic block at a
+/// time, attempting to keep values in registers and reusing registers as
+/// appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocCommon.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <tuple>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCoalesced, "Number of copies coalesced");
+
+// FIXME: Remove this switch when all testcases are fixed!
+static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs",
+ cl::Hidden);
+
+static RegisterRegAlloc
+ fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+
+ class RegAllocFast : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses,
+ bool ClearVirtRegs_ = true) :
+ MachineFunctionPass(ID),
+ ShouldAllocateClass(F),
+ StackSlotForVirtReg(-1),
+ ClearVirtRegs(ClearVirtRegs_) {
+ }
+
+ private:
+ MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ RegisterClassInfo RegClassInfo;
+ const RegClassFilterFunc ShouldAllocateClass;
+
+ /// Basic block currently being allocated.
+ MachineBasicBlock *MBB = nullptr;
+
+ /// Maps virtual regs to the frame index where these values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ bool ClearVirtRegs;
+
+ /// Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
+ Register VirtReg; ///< Virtual register number.
+ MCPhysReg PhysReg = 0; ///< Currently held here.
+ bool LiveOut = false; ///< Register is possibly live out.
+ bool Reloaded = false; ///< Register was reloaded.
+ bool Error = false; ///< Could not allocate.
+
+ explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
+
+ unsigned getSparseSetIndex() const {
+ return Register::virtReg2Index(VirtReg);
+ }
+ };
+
+ using LiveRegMap = SparseSet<LiveReg, identity<unsigned>, uint16_t>;
+ /// This map contains entries for each virtual register that is currently
+ /// available in a physical register.
+ LiveRegMap LiveVirtRegs;
+
+ /// Stores assigned virtual registers present in the bundle MI.
+ DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+
+ DenseMap<unsigned, SmallVector<MachineOperand *, 2>> LiveDbgValueMap;
+ /// List of DBG_VALUE that we encountered without the vreg being assigned
+ /// because they were placed after the last use of the vreg.
+ DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
+
+ /// Has a bit set for every virtual register for which it was determined
+ /// that it is alive across blocks.
+ BitVector MayLiveAcrossBlocks;
+
+ /// State of a register unit.
+ enum RegUnitState {
+ /// A free register is not currently in use and can be allocated
+ /// immediately without checking aliases.
+ regFree,
+
+ /// A pre-assigned register has been assigned before register allocation
+ /// (e.g., setting up a call parameter).
+ regPreAssigned,
+
+ /// Used temporarily in reloadAtBegin() to mark register units that are
+ /// live-in to the basic block.
+ regLiveIn,
+
+ /// A register state may also be a virtual register number, indication
+ /// that the physical register is currently allocated to a virtual
+ /// register. In that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ /// Maps each physical register to a RegUnitState enum or virtual register.
+ std::vector<unsigned> RegUnitStates;
+
+ SmallVector<MachineInstr *, 32> Coalesced;
+
+ using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
+ /// Set of register units that are used in the current instruction, and so
+ /// cannot be allocated.
+ RegUnitSet UsedInInstr;
+ RegUnitSet PhysRegUses;
+ SmallVector<uint16_t, 8> DefOperandIndexes;
+ // Register masks attached to the current instruction.
+ SmallVector<const uint32_t *> RegMasks;
+
+ void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
+ bool isPhysRegFree(MCPhysReg PhysReg) const;
+
+ /// Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.insert(Unit);
+ }
+
+ // Check if physreg is clobbered by instruction's regmask(s).
+ bool isClobberedByRegMasks(MCPhysReg PhysReg) const {
+ return llvm::any_of(RegMasks, [PhysReg](const uint32_t *Mask) {
+ return MachineOperand::clobbersPhysReg(Mask, PhysReg);
+ });
+ }
+
+ /// Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
+ if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
+ return true;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (UsedInInstr.count(Unit))
+ return true;
+ if (LookAtPhysRegUses && PhysRegUses.count(Unit))
+ return true;
+ }
+ return false;
+ }
+
+ /// Mark physical register as being used in a register use operand.
+ /// This is only used by the special livethrough handling code.
+ void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ PhysRegUses.insert(Unit);
+ }
+
+ /// Remove mark of physical register being used in the instruction.
+ void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.erase(Unit);
+ }
+
+ enum : unsigned {
+ spillClean = 50,
+ spillDirty = 100,
+ spillPrefBonus = 20,
+ spillImpossible = ~0u
+ };
+
+ public:
+ StringRef getPassName() const override { return "Fast Register Allocator"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ MachineFunctionProperties getSetProperties() const override {
+ if (ClearVirtRegs) {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ return MachineFunctionProperties();
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ private:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void allocateBasicBlock(MachineBasicBlock &MBB);
+
+ void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const;
+
+ void findAndSortDefOperandIndexes(const MachineInstr &MI);
+
+ void allocateInstruction(MachineInstr &MI);
+ void handleDebugValue(MachineInstr &MI);
+ void handleBundle(MachineInstr &MI);
+
+ bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ void freePhysReg(MCPhysReg PhysReg);
+
+ unsigned calcSpillCost(MCPhysReg PhysReg) const;
+
+ LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
+ }
+
+ LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const {
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
+ }
+
+ void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
+ bool LookAtPhysRegUses = false);
+ void allocVirtRegUndef(MachineOperand &MO);
+ void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
+ MCPhysReg Reg);
+ bool defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg);
+ bool defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ bool LookAtPhysRegUses = false);
+ bool useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
+
+ MachineBasicBlock::iterator
+ getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
+ SmallSet<Register, 2> &PrologLiveIns) const;
+
+ void reloadAtBegin(MachineBasicBlock &MBB);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+
+ Register traceCopies(Register VirtReg) const;
+ Register traceCopyChain(Register Reg) const;
+
+ bool shouldAllocateRegister(const Register Reg) const;
+ int getStackSpaceFor(Register VirtReg);
+ void spill(MachineBasicBlock::iterator Before, Register VirtReg,
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut);
+ void reload(MachineBasicBlock::iterator Before, Register VirtReg,
+ MCPhysReg PhysReg);
+
+ bool mayLiveOut(Register VirtReg);
+ bool mayLiveIn(Register VirtReg);
+
+ void dumpState() const;
+ };
+
+} // end anonymous namespace
+
+char RegAllocFast::ID = 0;
+
+INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
+ false)
+
+bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
+ assert(Reg.isVirtual());
+ const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
+ return ShouldAllocateClass(*TRI, RC);
+}
+
+void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ RegUnitStates[Unit] = NewState;
+}
+
+bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (RegUnitStates[Unit] != regFree)
+ return false;
+ }
+ return true;
+}
+
+/// This allocates space for the specified virtual register to be held on the
+/// stack.
+int RegAllocFast::getStackSpaceFor(Register VirtReg) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ // Already has space allocated?
+ if (SS != -1)
+ return SS;
+
+ // Allocate a new stack object for this spill location...
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ unsigned Size = TRI->getSpillSize(RC);
+ Align Alignment = TRI->getSpillAlign(RC);
+ int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
+
+ // Assign the slot.
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+static bool dominates(MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) {
+ auto MBBEnd = MBB.end();
+ if (B == MBBEnd)
+ return true;
+
+ MachineBasicBlock::const_iterator I = MBB.begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+
+ return &*I == A;
+}
+
+/// Returns false if \p VirtReg is known to not live out of the current block.
+bool RegAllocFast::mayLiveOut(Register VirtReg) {
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+
+ const MachineInstr *SelfLoopDef = nullptr;
+
+ // If this block loops back to itself, it is necessary to check whether the
+ // use comes after the def.
+ if (MBB->isSuccessor(MBB)) {
+ // Find the first def in the self loop MBB.
+ for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+ if (DefInst.getParent() != MBB) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ } else {
+ if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef))
+ SelfLoopDef = &DefInst;
+ }
+ }
+ if (!SelfLoopDef) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
+ }
+
+ // See if the first \p Limit uses of the register are all in the current
+ // block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
+ if (UseInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+
+ if (SelfLoopDef) {
+ // Try to handle some simple cases to avoid spilling and reloading every
+ // value inside a self looping block.
+ if (SelfLoopDef == &UseInst ||
+ !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns false if \p VirtReg is known to not be live into the current block.
+bool RegAllocFast::mayLiveIn(Register VirtReg) {
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
+ return !MBB->pred_empty();
+
+ // See if the first \p Limit def of the register are all in the current block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+ if (DefInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return !MBB->pred_empty();
+ }
+ }
+
+ return false;
+}
+
+/// Insert spill instruction for \p AssignedReg before \p Before. Update
+/// DBG_VALUEs with \p VirtReg operands with the stack slot.
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut) {
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
+ << " in " << printReg(AssignedReg, TRI));
+ int FI = getStackSpaceFor(VirtReg);
+ LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI,
+ VirtReg);
+ ++NumStores;
+
+ MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator();
+
+ // When we spill a virtual register, we will have spill instructions behind
+ // every definition of it, meaning we can switch all the DBG_VALUEs over
+ // to just reference the stack slot.
+ SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg];
+ SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2>
+ SpilledOperandsMap;
+ for (MachineOperand *MO : LRIDbgOperands)
+ SpilledOperandsMap[MO->getParent()].push_back(MO);
+ for (auto MISpilledOperands : SpilledOperandsMap) {
+ MachineInstr &DBG = *MISpilledOperands.first;
+ // We don't have enough support for tracking operands of DBG_VALUE_LISTs.
+ if (DBG.isDebugValueList())
+ continue;
+ MachineInstr *NewDV = buildDbgValueForSpill(
+ *MBB, Before, *MISpilledOperands.first, FI, MISpilledOperands.second);
+ assert(NewDV->getParent() == MBB && "dangling parent pointer");
+ (void)NewDV;
+ LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+
+ if (LiveOut) {
+ // We need to insert a DBG_VALUE at the end of the block if the spill slot
+ // is live out, but there is another use of the value after the
+ // spill. This will allow LiveDebugValues to see the correct live out
+ // value to propagate to the successors.
+ MachineInstr *ClonedDV = MBB->getParent()->CloneMachineInstr(NewDV);
+ MBB->insert(FirstTerm, ClonedDV);
+ LLVM_DEBUG(dbgs() << "Cloning debug info due to live out spill\n");
+ }
+
+ // Rewrite unassigned dbg_values to use the stack slot.
+ // TODO We can potentially do this for list debug values as well if we know
+ // how the dbg_values are getting unassigned.
+ if (DBG.isNonListDebugValue()) {
+ MachineOperand &MO = DBG.getDebugOperand(0);
+ if (MO.isReg() && MO.getReg() == 0) {
+ updateDbgValueForSpill(DBG, FI, 0);
+ }
+ }
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
+ LRIDbgOperands.clear();
+}
+
+/// Insert reload instruction for \p PhysReg before \p Before.
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
+ MCPhysReg PhysReg) {
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+ << printReg(PhysReg, TRI) << '\n');
+ int FI = getStackSpaceFor(VirtReg);
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI, VirtReg);
+ ++NumLoads;
+}
+
+/// Get basic block begin insertion point.
+/// This is not just MBB.begin() because surprisingly we have EH_LABEL
+/// instructions marking the begin of a basic block. This means we must insert
+/// new instructions after such labels...
+MachineBasicBlock::iterator
+RegAllocFast::getMBBBeginInsertionPoint(
+ MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ if (I->isLabel()) {
+ ++I;
+ continue;
+ }
+
+ // Most reloads should be inserted after prolog instructions.
+ if (!TII->isBasicBlockPrologue(*I))
+ break;
+
+ // However if a prolog instruction reads a register that needs to be
+ // reloaded, the reload should be inserted before the prolog.
+ for (MachineOperand &MO : I->operands()) {
+ if (MO.isReg())
+ PrologLiveIns.insert(MO.getReg());
+ }
+
+ ++I;
+ }
+
+ return I;
+}
+
+/// Reload all currently assigned virtual registers.
+void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
+ if (LiveVirtRegs.empty())
+ return;
+
+ for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+ MCPhysReg Reg = P.PhysReg;
+ // Set state to live-in. This possibly overrides mappings to virtual
+ // registers but we don't care anymore at this point.
+ setPhysRegState(Reg, regLiveIn);
+ }
+
+
+ SmallSet<Register, 2> PrologLiveIns;
+
+ // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+ // of spilling here is deterministic, if arbitrary.
+ MachineBasicBlock::iterator InsertBefore
+ = getMBBBeginInsertionPoint(MBB, PrologLiveIns);
+ for (const LiveReg &LR : LiveVirtRegs) {
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg == 0)
+ continue;
+
+ MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
+ if (RegUnitStates[FirstUnit] == regLiveIn)
+ continue;
+
+ assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) &&
+ "no reload in start block. Missing vreg def?");
+
+ if (PrologLiveIns.count(PhysReg)) {
+ // FIXME: Theoretically this should use an insert point skipping labels
+ // but I'm not sure how labels should interact with prolog instruction
+ // that need reloads.
+ reload(MBB.begin(), LR.VirtReg, PhysReg);
+ } else
+ reload(InsertBefore, LR.VirtReg, PhysReg);
+ }
+ LiveVirtRegs.clear();
+}
+
+/// Handle the direct use of a physical register. Check that the register is
+/// not used by a virtreg. Kill the physreg, marking it free. This may add
+/// implicit kills to MO->getParent() and invalidate MO.
+bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+ assert(Register::isPhysicalRegister(Reg) && "expected physreg");
+ bool displacedAny = displacePhysReg(MI, Reg);
+ setPhysRegState(Reg, regPreAssigned);
+ markRegUsedInInstr(Reg);
+ return displacedAny;
+}
+
+bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+ bool displacedAny = displacePhysReg(MI, Reg);
+ setPhysRegState(Reg, regPreAssigned);
+ return displacedAny;
+}
+
+/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
+/// similar to defineVirtReg except the physreg is reserved instead of
+/// allocated.
+bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
+ bool displacedAny = false;
+
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
+ default: {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "datastructures in sync");
+ MachineBasicBlock::iterator ReloadBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ reload(ReloadBefore, VirtReg, LRI->PhysReg);
+
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
+ LRI->Reloaded = true;
+ displacedAny = true;
+ break;
+ }
+ case regPreAssigned:
+ RegUnitStates[Unit] = regFree;
+ displacedAny = true;
+ break;
+ case regFree:
+ break;
+ }
+ }
+ return displacedAny;
+}
+
+void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
+ LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
+
+ MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
+ switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
+ case regFree:
+ LLVM_DEBUG(dbgs() << '\n');
+ return;
+ case regPreAssigned:
+ LLVM_DEBUG(dbgs() << '\n');
+ setPhysRegState(PhysReg, regFree);
+ return;
+ default: {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end());
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
+ }
+ return;
+ }
+}
+
+/// Return the cost of spilling clearing out PhysReg and aliases so it is free
+/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases
+/// disabled - it can be allocated directly.
+/// \returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
+ case regFree:
+ break;
+ case regPreAssigned:
+ LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned "
+ << printReg(PhysReg, TRI) << '\n');
+ return spillImpossible;
+ default: {
+ bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 ||
+ findLiveVirtReg(VirtReg)->LiveOut;
+ return SureSpill ? spillClean : spillDirty;
+ }
+ }
+ }
+ return 0;
+}
+
+void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
+ Register VirtReg, MCPhysReg Reg) {
+ auto UDBGValIter = DanglingDbgValues.find(VirtReg);
+ if (UDBGValIter == DanglingDbgValues.end())
+ return;
+
+ SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
+ for (MachineInstr *DbgValue : Dangling) {
+ assert(DbgValue->isDebugValue());
+ if (!DbgValue->hasDebugOperandForReg(VirtReg))
+ continue;
+
+ // Test whether the physreg survives from the definition to the DBG_VALUE.
+ MCPhysReg SetToReg = Reg;
+ unsigned Limit = 20;
+ for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()),
+ E = DbgValue->getIterator(); I != E; ++I) {
+ if (I->modifiesRegister(Reg, TRI) || --Limit == 0) {
+ LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+ << '\n');
+ SetToReg = 0;
+ break;
+ }
+ }
+ for (MachineOperand &MO : DbgValue->getDebugOperandsForReg(VirtReg)) {
+ MO.setReg(SetToReg);
+ if (SetToReg != 0)
+ MO.setIsRenamable();
+ }
+ }
+ Dangling.clear();
+}
+
+/// This method updates local state so that we know that PhysReg is the
+/// proper container for VirtReg now. The physical register must not be used
+/// for anything else when this is called.
+void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
+ MCPhysReg PhysReg) {
+ Register VirtReg = LR.VirtReg;
+ LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
+ << printReg(PhysReg, TRI) << '\n');
+ assert(LR.PhysReg == 0 && "Already assigned a physreg");
+ assert(PhysReg != 0 && "Trying to assign no register");
+ LR.PhysReg = PhysReg;
+ setPhysRegState(PhysReg, VirtReg);
+
+ assignDanglingDebugValues(AtMI, VirtReg, PhysReg);
+}
+
+static bool isCoalescable(const MachineInstr &MI) {
+ return MI.isFullCopy();
+}
+
+Register RegAllocFast::traceCopyChain(Register Reg) const {
+ static const unsigned ChainLengthLimit = 3;
+ unsigned C = 0;
+ do {
+ if (Reg.isPhysical())
+ return Reg;
+ assert(Reg.isVirtual());
+
+ MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
+ if (!VRegDef || !isCoalescable(*VRegDef))
+ return 0;
+ Reg = VRegDef->getOperand(1).getReg();
+ } while (++C <= ChainLengthLimit);
+ return 0;
+}
+
+/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
+/// chain of copies to check whether we reach a physical register we can
+/// coalesce with.
+Register RegAllocFast::traceCopies(Register VirtReg) const {
+ static const unsigned DefLimit = 3;
+ unsigned C = 0;
+ for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
+ if (isCoalescable(MI)) {
+ Register Reg = MI.getOperand(1).getReg();
+ Reg = traceCopyChain(Reg);
+ if (Reg.isValid())
+ return Reg;
+ }
+
+ if (++C >= DefLimit)
+ break;
+ }
+ return Register();
+}
+
+/// Allocates a physical register for VirtReg.
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
+ Register Hint0, bool LookAtPhysRegUses) {
+ const Register VirtReg = LR.VirtReg;
+ assert(LR.PhysReg == 0);
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
+ << " in class " << TRI->getRegClassName(&RC)
+ << " with hint " << printReg(Hint0, TRI) << '\n');
+
+ // Take hint when possible.
+ if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) &&
+ !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) {
+ // Take hint if the register is currently free.
+ if (isPhysRegFree(Hint0)) {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+ << '\n');
+ assignVirtToPhysReg(MI, LR, Hint0);
+ return;
+ } else {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI)
+ << " occupied\n");
+ }
+ } else {
+ Hint0 = Register();
+ }
+
+
+ // Try other hint.
+ Register Hint1 = traceCopies(VirtReg);
+ if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+ !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) {
+ // Take hint if the register is currently free.
+ if (isPhysRegFree(Hint1)) {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+ << '\n');
+ assignVirtToPhysReg(MI, LR, Hint1);
+ return;
+ } else {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI)
+ << " occupied\n");
+ }
+ } else {
+ Hint1 = Register();
+ }
+
+ MCPhysReg BestReg = 0;
+ unsigned BestCost = spillImpossible;
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ for (MCPhysReg PhysReg : AllocationOrder) {
+ LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
+ if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) {
+ LLVM_DEBUG(dbgs() << "already used in instr.\n");
+ continue;
+ }
+
+ unsigned Cost = calcSpillCost(PhysReg);
+ LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
+ // Immediate take a register with cost 0.
+ if (Cost == 0) {
+ assignVirtToPhysReg(MI, LR, PhysReg);
+ return;
+ }
+
+ if (PhysReg == Hint0 || PhysReg == Hint1)
+ Cost -= spillPrefBonus;
+
+ if (Cost < BestCost) {
+ BestReg = PhysReg;
+ BestCost = Cost;
+ }
+ }
+
+ if (!BestReg) {
+ // Nothing we can do: Report an error and keep going with an invalid
+ // allocation.
+ if (MI.isInlineAsm())
+ MI.emitError("inline assembly requires more registers than available");
+ else
+ MI.emitError("ran out of registers during register allocation");
+
+ LR.Error = true;
+ LR.PhysReg = 0;
+ return;
+ }
+
+ displacePhysReg(MI, BestReg);
+ assignVirtToPhysReg(MI, LR, BestReg);
+}
+
+void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
+ assert(MO.isUndef() && "expected undef use");
+ Register VirtReg = MO.getReg();
+ assert(VirtReg.isVirtual() && "Expected virtreg");
+ if (!shouldAllocateRegister(VirtReg))
+ return;
+
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ MCPhysReg PhysReg;
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ PhysReg = LRI->PhysReg;
+ } else {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ assert(!AllocationOrder.empty() && "Allocation order must not be empty");
+ PhysReg = AllocationOrder[0];
+ }
+
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
+ MO.setSubReg(0);
+ }
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(true);
+}
+
+/// Variation of defineVirtReg() with special handling for livethrough regs
+/// (tied or earlyclobber) that may interfere with preassigned uses.
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg) {
+ if (!shouldAllocateRegister(VirtReg))
+ return false;
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end()) {
+ MCPhysReg PrevReg = LRI->PhysReg;
+ if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) {
+ LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI)
+ << " (tied/earlyclobber resolution)\n");
+ freePhysReg(PrevReg);
+ LRI->PhysReg = 0;
+ allocVirtReg(MI, *LRI, 0, true);
+ MachineBasicBlock::iterator InsertBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to "
+ << printReg(PrevReg, TRI) << '\n');
+ BuildMI(*MBB, InsertBefore, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), PrevReg)
+ .addReg(LRI->PhysReg, llvm::RegState::Kill);
+ }
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (MO.getSubReg() && !MO.isUndef()) {
+ LRI->LastUse = &MI;
+ }
+ }
+ return defineVirtReg(MI, OpNum, VirtReg, true);
+}
+
+/// Allocates a register for VirtReg definition. Typically the register is
+/// already assigned from a use of the virtreg, however we still need to
+/// perform an allocation if:
+/// - It is a dead definition without any uses.
+/// - The value is live out and all uses are in different basic blocks.
+///
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg, bool LookAtPhysRegUses) {
+ assert(VirtReg.isVirtual() && "Not a virtual register");
+ if (!shouldAllocateRegister(VirtReg))
+ return false;
+ MachineOperand &MO = MI.getOperand(OpNum);
+ LiveRegMap::iterator LRI;
+ bool New;
+ std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ if (New) {
+ if (!MO.isDead()) {
+ if (mayLiveOut(VirtReg)) {
+ LRI->LiveOut = true;
+ } else {
+ // It is a dead def without the dead flag; add the flag now.
+ MO.setIsDead(true);
+ }
+ }
+ }
+ if (LRI->PhysReg == 0)
+ allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses);
+ else {
+ assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) &&
+ "TODO: preassign mismatch");
+ LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI)
+ << " use existing assignment to "
+ << printReg(LRI->PhysReg, TRI) << '\n');
+ }
+
+ MCPhysReg PhysReg = LRI->PhysReg;
+ assert(PhysReg != 0 && "Register not assigned");
+ if (LRI->Reloaded || LRI->LiveOut) {
+ if (!MI.isImplicitDef()) {
+ MachineBasicBlock::iterator SpillBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: "
+ << LRI->Reloaded << '\n');
+ bool Kill = LRI->LastUse == nullptr;
+ spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut);
+
+ // We need to place additional spills for each indirect destination of an
+ // INLINEASM_BR.
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) {
+ int FI = StackSlotForVirtReg[VirtReg];
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isMBB()) {
+ MachineBasicBlock *Succ = MO.getMBB();
+ TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill,
+ FI, &RC, TRI, VirtReg);
+ ++NumStores;
+ Succ->addLiveIn(PhysReg);
+ }
+ }
+ }
+
+ LRI->LastUse = nullptr;
+ }
+ LRI->LiveOut = false;
+ LRI->Reloaded = false;
+ }
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ BundleVirtRegsMap[VirtReg] = PhysReg;
+ }
+ markRegUsedInInstr(PhysReg);
+ return setPhysReg(MI, MO, PhysReg);
+}
+
+/// Allocates a register for a VirtReg use.
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg) {
+ assert(VirtReg.isVirtual() && "Not a virtual register");
+ if (!shouldAllocateRegister(VirtReg))
+ return false;
+ MachineOperand &MO = MI.getOperand(OpNum);
+ LiveRegMap::iterator LRI;
+ bool New;
+ std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ if (New) {
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (!MO.isKill()) {
+ if (mayLiveOut(VirtReg)) {
+ LRI->LiveOut = true;
+ } else {
+ // It is a last (killing) use without the kill flag; add the flag now.
+ MO.setIsKill(true);
+ }
+ }
+ } else {
+ assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag");
+ }
+
+ // If necessary allocate a register.
+ if (LRI->PhysReg == 0) {
+ assert(!MO.isTied() && "tied op should be allocated");
+ Register Hint;
+ if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
+ Hint = MI.getOperand(0).getReg();
+ if (Hint.isVirtual()) {
+ assert(!shouldAllocateRegister(Hint));
+ Hint = Register();
+ } else {
+ assert(Hint.isPhysical() &&
+ "Copy destination should already be assigned");
+ }
+ }
+ allocVirtReg(MI, *LRI, Hint, false);
+ if (LRI->Error) {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ return setPhysReg(MI, MO, *AllocationOrder.begin());
+ }
+ }
+
+ LRI->LastUse = &MI;
+
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
+ }
+ markRegUsedInInstr(LRI->PhysReg);
+ return setPhysReg(MI, MO, LRI->PhysReg);
+}
+
+/// Changes operand OpNum in MI the refer the PhysReg, considering subregs.
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+ MCPhysReg PhysReg) {
+ if (!MO.getSubReg()) {
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(true);
+ return false;
+ }
+
+ // Handle subregister index.
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : MCRegister());
+ MO.setIsRenamable(true);
+ // Note: We leave the subreg number around a little longer in case of defs.
+ // This is so that the register freeing logic in allocateInstruction can still
+ // recognize this as subregister defs. The code there will clear the number.
+ if (!MO.isDef())
+ MO.setSubReg(0);
+
+ // A kill flag implies killing the full register. Add corresponding super
+ // register kill.
+ if (MO.isKill()) {
+ MI.addRegisterKilled(PhysReg, TRI, true);
+ // Conservatively assume implicit MOs were re-arranged
+ return true;
+ }
+
+ // A <def,read-undef> of a sub-register requires an implicit def of the full
+ // register.
+ if (MO.isDef() && MO.isUndef()) {
+ if (MO.isDead())
+ MI.addRegisterDead(PhysReg, TRI, true);
+ else
+ MI.addRegisterDefined(PhysReg, TRI);
+ // Conservatively assume implicit MOs were re-arranged
+ return true;
+ }
+ return false;
+}
+
+#ifndef NDEBUG
+
+void RegAllocFast::dumpState() const {
+ for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
+ ++Unit) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
+ case regFree:
+ break;
+ case regPreAssigned:
+ dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
+ break;
+ case regLiveIn:
+ llvm_unreachable("Should not have regLiveIn in map");
+ default: {
+ dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
+ if (I->LiveOut || I->Reloaded) {
+ dbgs() << '[';
+ if (I->LiveOut) dbgs() << 'O';
+ if (I->Reloaded) dbgs() << 'R';
+ dbgs() << ']';
+ }
+ assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
+ break;
+ }
+ }
+ }
+ dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (const LiveReg &LR : LiveVirtRegs) {
+ Register VirtReg = LR.VirtReg;
+ assert(VirtReg.isVirtual() && "Bad map key");
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg != 0) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "mapped to physreg");
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ assert(RegUnitStates[Unit] == VirtReg && "inverse map valid");
+ }
+ }
+ }
+}
+#endif
+
+/// Count number of defs consumed from each register class by \p Reg
+void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const {
+ assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
+
+ if (Reg.isVirtual()) {
+ if (!shouldAllocateRegister(Reg))
+ return;
+ const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
+ for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+ RCIdx != RCIdxEnd; ++RCIdx) {
+ const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+ // FIXME: Consider aliasing sub/super registers.
+ if (OpRC->hasSubClassEq(IdxRC))
+ ++RegClassDefCounts[RCIdx];
+ }
+
+ return;
+ }
+
+ for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+ RCIdx != RCIdxEnd; ++RCIdx) {
+ const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+ for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
+ if (IdxRC->contains(*Alias)) {
+ ++RegClassDefCounts[RCIdx];
+ break;
+ }
+ }
+ }
+}
+
+/// Compute \ref DefOperandIndexes so it contains the indices of "def" operands
+/// that are to be allocated. Those are ordered in a way that small classes,
+/// early clobbers and livethroughs are allocated first.
+void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) {
+ DefOperandIndexes.clear();
+
+ // Track number of defs which may consume a register from the class.
+ std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+ assert(RegClassDefCounts[0] == 0);
+
+ LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MO.readsReg()) {
+ if (Reg.isPhysical()) {
+ LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) << '\n');
+ markPhysRegUsedInInstr(Reg);
+ }
+ }
+
+ if (MO.isDef()) {
+ if (Reg.isVirtual() && shouldAllocateRegister(Reg))
+ DefOperandIndexes.push_back(I);
+
+ addRegClassDefCounts(RegClassDefCounts, Reg);
+ }
+ }
+
+ llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+ const MachineOperand &MO0 = MI.getOperand(I0);
+ const MachineOperand &MO1 = MI.getOperand(I1);
+ Register Reg0 = MO0.getReg();
+ Register Reg1 = MO1.getReg();
+ const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
+ const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
+
+ // Identify regclass that are easy to use up completely just in this
+ // instruction.
+ unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
+ unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
+
+ bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
+ bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
+ if (SmallClass0 > SmallClass1)
+ return true;
+ if (SmallClass0 < SmallClass1)
+ return false;
+
+ // Allocate early clobbers and livethrough operands first.
+ bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
+ (MO0.getSubReg() == 0 && !MO0.isUndef());
+ bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
+ (MO1.getSubReg() == 0 && !MO1.isUndef());
+ if (Livethrough0 > Livethrough1)
+ return true;
+ if (Livethrough0 < Livethrough1)
+ return false;
+
+ // Tie-break rule: operand index.
+ return I0 < I1;
+ });
+}
+
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+ // The basic algorithm here is:
+ // 1. Mark registers of def operands as free
+ // 2. Allocate registers to use operands and place reload instructions for
+ // registers displaced by the allocation.
+ //
+ // However we need to handle some corner cases:
+ // - pre-assigned defs and uses need to be handled before the other def/use
+ // operands are processed to avoid the allocation heuristics clashing with
+ // the pre-assignment.
+ // - The "free def operands" step has to come last instead of first for tied
+ // operands and early-clobbers.
+
+ UsedInInstr.clear();
+ RegMasks.clear();
+ BundleVirtRegsMap.clear();
+
+ auto TiedOpIsUndef = [&](const MachineOperand &MO, unsigned Idx) {
+ assert(MO.isTied());
+ unsigned TiedIdx = MI.findTiedOperandIdx(Idx);
+ const MachineOperand &TiedMO = MI.getOperand(TiedIdx);
+ return TiedMO.isUndef();
+ };
+ // Scan for special cases; Apply pre-assigned register defs to state.
+ bool HasPhysRegUse = false;
+ bool HasRegMask = false;
+ bool HasVRegDef = false;
+ bool HasDef = false;
+ bool HasEarlyClobber = false;
+ bool NeedToAssignLiveThroughs = false;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ if (!shouldAllocateRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ HasDef = true;
+ HasVRegDef = true;
+ if (MO.isEarlyClobber()) {
+ HasEarlyClobber = true;
+ NeedToAssignLiveThroughs = true;
+ }
+ if ((MO.isTied() && !TiedOpIsUndef(MO, I)) ||
+ (MO.getSubReg() != 0 && !MO.isUndef()))
+ NeedToAssignLiveThroughs = true;
+ }
+ } else if (Reg.isPhysical()) {
+ if (!MRI->isReserved(Reg)) {
+ if (MO.isDef()) {
+ HasDef = true;
+ bool displacedAny = definePhysReg(MI, Reg);
+ if (MO.isEarlyClobber())
+ HasEarlyClobber = true;
+ if (!displacedAny)
+ MO.setIsDead(true);
+ }
+ if (MO.readsReg())
+ HasPhysRegUse = true;
+ }
+ }
+ } else if (MO.isRegMask()) {
+ HasRegMask = true;
+ RegMasks.push_back(MO.getRegMask());
+ }
+ }
+
+ // Allocate virtreg defs.
+ if (HasDef) {
+ if (HasVRegDef) {
+ // Note that Implicit MOs can get re-arranged by defineVirtReg(), so loop
+ // multiple times to ensure no operand is missed.
+ bool ReArrangedImplicitOps = true;
+
+ // Special handling for early clobbers, tied operands or subregister defs:
+ // Compared to "normal" defs these:
+ // - Must not use a register that is pre-assigned for a use operand.
+ // - In order to solve tricky inline assembly constraints we change the
+ // heuristic to figure out a good operand order before doing
+ // assignments.
+ if (NeedToAssignLiveThroughs) {
+ PhysRegUses.clear();
+
+ while (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps = false;
+ findAndSortDefOperandIndexes(MI);
+ for (uint16_t OpIdx : DefOperandIndexes) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
+ unsigned Reg = MO.getReg();
+ if (MO.isEarlyClobber() ||
+ (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
+ (MO.getSubReg() && !MO.isUndef())) {
+ ReArrangedImplicitOps = defineLiveThroughVirtReg(MI, OpIdx, Reg);
+ } else {
+ ReArrangedImplicitOps = defineVirtReg(MI, OpIdx, Reg);
+ }
+ if (ReArrangedImplicitOps) {
+ // Implicit operands of MI were re-arranged,
+ // re-compute DefOperandIndexes.
+ break;
+ }
+ }
+ }
+ } else {
+ // Assign virtual register defs.
+ while (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps = false;
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ ReArrangedImplicitOps = defineVirtReg(MI, I, Reg);
+ if (ReArrangedImplicitOps) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Free registers occupied by defs.
+ // Iterate operands in reverse order, so we see the implicit super register
+ // defs first (we added them earlier in case of <def,read-undef>).
+ for (signed I = MI.getNumOperands() - 1; I >= 0; --I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ Register Reg = MO.getReg();
+
+ // subreg defs don't free the full register. We left the subreg number
+ // around as a marker in setPhysReg() to recognize this case here.
+ if (Reg.isPhysical() && MO.getSubReg() != 0) {
+ MO.setSubReg(0);
+ continue;
+ }
+
+ assert((!MO.isTied() || !isClobberedByRegMasks(MO.getReg())) &&
+ "tied def assigned to clobbered register");
+
+ // Do not free tied operands and early clobbers.
+ if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
+ continue;
+ if (!Reg)
+ continue;
+ if (Reg.isVirtual()) {
+ assert(!shouldAllocateRegister(Reg));
+ continue;
+ }
+ assert(Reg.isPhysical());
+ if (MRI->isReserved(Reg))
+ continue;
+ freePhysReg(Reg);
+ unmarkRegUsedInInstr(Reg);
+ }
+ }
+
+ // Displace clobbered registers.
+ if (HasRegMask) {
+ assert(!RegMasks.empty() && "expected RegMask");
+ // MRI bookkeeping.
+ for (const auto *RM : RegMasks)
+ MRI->addPhysRegsUsedFromRegMask(RM);
+
+ // Displace clobbered registers.
+ for (const LiveReg &LR : LiveVirtRegs) {
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg != 0 && isClobberedByRegMasks(PhysReg))
+ displacePhysReg(MI, PhysReg);
+ }
+ }
+
+ // Apply pre-assigned register uses to state.
+ if (HasPhysRegUse) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
+ if (MRI->isReserved(Reg))
+ continue;
+ bool displacedAny = usePhysReg(MI, Reg);
+ if (!displacedAny)
+ MO.setIsKill(true);
+ }
+ }
+
+ // Allocate virtreg uses and insert reloads as necessary.
+ // Implicit MOs can get moved/removed by useVirtReg(), so loop multiple
+ // times to ensure no operand is missed.
+ bool HasUndefUse = false;
+ bool ReArrangedImplicitMOs = true;
+ while (ReArrangedImplicitMOs) {
+ ReArrangedImplicitMOs = false;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
+ continue;
+
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ continue;
+ }
+
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
+
+ assert(!MO.isInternalRead() && "Bundles not supported");
+ assert(MO.readsReg() && "reading use");
+ ReArrangedImplicitMOs = useVirtReg(MI, I, Reg);
+ if (ReArrangedImplicitMOs)
+ break;
+ }
+ }
+
+ // Allocate undef operands. This is a separate step because in a situation
+ // like ` = OP undef %X, %X` both operands need the same register assign
+ // so we should perform the normal assignment first.
+ if (HasUndefUse) {
+ for (MachineOperand &MO : MI.all_uses()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
+ continue;
+
+ assert(MO.isUndef() && "Should only have undef virtreg uses left");
+ allocVirtRegUndef(MO);
+ }
+ }
+
+ // Free early clobbers.
+ if (HasEarlyClobber) {
+ for (MachineOperand &MO : llvm::reverse(MI.all_defs())) {
+ if (!MO.isEarlyClobber())
+ continue;
+ assert(!MO.getSubReg() && "should be already handled in def processing");
+
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg.isVirtual()) {
+ assert(!shouldAllocateRegister(Reg));
+ continue;
+ }
+ assert(Reg.isPhysical() && "should have register assigned");
+
+ // We sometimes get odd situations like:
+ // early-clobber %x0 = INSTRUCTION %x0
+ // which is semantically questionable as the early-clobber should
+ // apply before the use. But in practice we consider the use to
+ // happen before the early clobber now. Don't free the early clobber
+ // register in this case.
+ if (MI.readsRegister(Reg, TRI))
+ continue;
+
+ freePhysReg(Reg);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "<< " << MI);
+ if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getNumOperands() == 2) {
+ LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
+ Coalesced.push_back(&MI);
+ }
+}
+
+void RegAllocFast::handleDebugValue(MachineInstr &MI) {
+ // Ignore DBG_VALUEs that aren't based on virtual registers. These are
+ // mostly constants and frame indices.
+ for (Register Reg : MI.getUsedDebugRegs()) {
+ if (!Reg.isVirtual())
+ continue;
+ if (!shouldAllocateRegister(Reg))
+ continue;
+
+ // Already spilled to a stackslot?
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS, Reg);
+ LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
+ continue;
+ }
+
+ // See if this virtual register has already been allocated to a physical
+ // register or spilled to a stack slot.
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ SmallVector<MachineOperand *> DbgOps;
+ for (MachineOperand &Op : MI.getDebugOperandsForReg(Reg))
+ DbgOps.push_back(&Op);
+
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ // Update every use of Reg within MI.
+ for (auto &RegMO : DbgOps)
+ setPhysReg(MI, *RegMO, LRI->PhysReg);
+ } else {
+ DanglingDbgValues[Reg].push_back(&MI);
+ }
+
+ // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+ // that future spills of Reg will have DBG_VALUEs.
+ LiveDbgValueMap[Reg].append(DbgOps.begin(), DbgOps.end());
+ }
+}
+
+void RegAllocFast::handleBundle(MachineInstr &MI) {
+ MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
+ ++BundledMI;
+ while (BundledMI->isBundledWithPred()) {
+ for (MachineOperand &MO : BundledMI->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
+ continue;
+
+ DenseMap<Register, MCPhysReg>::iterator DI;
+ DI = BundleVirtRegsMap.find(Reg);
+ assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");
+
+ setPhysReg(MI, MO, DI->second);
+ }
+
+ ++BundledMI;
+ }
+}
+
+void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
+ this->MBB = &MBB;
+ LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
+
+ RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
+ assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
+
+ for (const auto &LiveReg : MBB.liveouts())
+ setPhysRegState(LiveReg.PhysReg, regPreAssigned);
+
+ Coalesced.clear();
+
+ // Traverse block in reverse order allocating instructions one by one.
+ for (MachineInstr &MI : reverse(MBB)) {
+ LLVM_DEBUG(
+ dbgs() << "\n>> " << MI << "Regs:";
+ dumpState()
+ );
+
+ // Special handling for debug values. Note that they are not allowed to
+ // affect codegen of the other instructions in any way.
+ if (MI.isDebugValue()) {
+ handleDebugValue(MI);
+ continue;
+ }
+
+ allocateInstruction(MI);
+
+ // Once BUNDLE header is assigned registers, same assignments need to be
+ // done for bundled MIs.
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ handleBundle(MI);
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "Begin Regs:";
+ dumpState()
+ );
+
+ // Spill all physical registers holding virtual registers now.
+ LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n");
+ reloadAtBegin(MBB);
+
+ // Erase all the coalesced copies. We are delaying it until now because
+ // LiveVirtRegs might refer to the instrs.
+ for (MachineInstr *MI : Coalesced)
+ MBB.erase(MI);
+ NumCoalesced += Coalesced.size();
+
+ for (auto &UDBGPair : DanglingDbgValues) {
+ for (MachineInstr *DbgValue : UDBGPair.second) {
+ assert(DbgValue->isDebugValue() && "expected DBG_VALUE");
+ // Nothing to do if the vreg was spilled in the meantime.
+ if (!DbgValue->hasDebugOperandForReg(UDBGPair.first))
+ continue;
+ LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+ << '\n');
+ DbgValue->setDebugValueUndef();
+ }
+ }
+ DanglingDbgValues.clear();
+
+ LLVM_DEBUG(MBB.dump());
+}
+
+bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ MRI = &MF.getRegInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TII = STI.getInstrInfo();
+ MFI = &MF.getFrameInfo();
+ MRI->freezeReservedRegs(MF);
+ RegClassInfo.runOnMachineFunction(MF);
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(NumRegUnits);
+ PhysRegUses.clear();
+ PhysRegUses.setUniverse(NumRegUnits);
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ StackSlotForVirtReg.resize(NumVirtRegs);
+ LiveVirtRegs.setUniverse(NumVirtRegs);
+ MayLiveAcrossBlocks.clear();
+ MayLiveAcrossBlocks.resize(NumVirtRegs);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineBasicBlock &MBB : MF)
+ allocateBasicBlock(MBB);
+
+ if (ClearVirtRegs) {
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+ }
+
+ StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+ return new RegAllocFast();
+}
+
+FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor,
+ bool ClearVirtRegs) {
+ return new RegAllocFast(Ftor, ClearVirtRegs);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..48187e575494
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,2669 @@
+//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocGreedy.h"
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocPriorityAdvisor.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
+ "split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
+ cl::init(SplitEditor::SM_Speed));
+
+static cl::opt<unsigned>
+LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
+ cl::desc("Last chance recoloring max depth"),
+ cl::init(5));
+
+static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
+ "lcr-max-interf", cl::Hidden,
+ cl::desc("Last chance recoloring maximum number of considered"
+ " interference at a time"),
+ cl::init(8));
+
+static cl::opt<bool> ExhaustiveSearch(
+ "exhaustive-register-search", cl::NotHidden,
+ cl::desc("Exhaustive Search for registers bypassing the depth "
+ "and interference cutoffs of last chance recoloring"),
+ cl::Hidden);
+
+static cl::opt<bool> EnableDeferredSpilling(
+ "enable-deferred-spilling", cl::Hidden,
+ cl::desc("Instead of spilling a variable right away, defer the actual "
+ "code insertion to the end of the allocation. That way the "
+ "allocator might still find a suitable coloring for this "
+ "variable because of other evicted variables."),
+ cl::init(false));
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned>
+CSRFirstTimeCost("regalloc-csr-first-time-cost",
+ cl::desc("Cost for first time use of callee-saved register."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned long> GrowRegionComplexityBudget(
+ "grow-region-complexity-budget",
+ cl::desc("growRegion() does not scale with the number of BB edges, so "
+ "limit its budget and bail out once we reach the limit."),
+ cl::init(10000), cl::Hidden);
+
+static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
+ "greedy-regclass-priority-trumps-globalness",
+ cl::desc("Change the greedy register allocator's live range priority "
+ "calculation to make the AllocationPriority of the register class "
+ "more important then whether the range is global"),
+ cl::Hidden);
+
+static cl::opt<bool> GreedyReverseLocalAssignment(
+ "greedy-reverse-local-assignment",
+ cl::desc("Reverse allocation order of local live ranges, such that "
+ "shorter local live ranges will tend to be allocated first"),
+ cl::Hidden);
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+char RAGreedy::ID = 0;
+char &llvm::RAGreedyID = RAGreedy::ID;
+
+INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
+ "Greedy Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
+INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis)
+INITIALIZE_PASS_END(RAGreedy, "greedy",
+ "Greedy Register Allocator", false, false)
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_Assign",
+ "RS_Split",
+ "RS_Split2",
+ "RS_Spill",
+ "RS_Memory",
+ "RS_Done"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
+ return new RAGreedy(Ftor);
+}
+
+RAGreedy::RAGreedy(RegClassFilterFunc F):
+ MachineFunctionPass(ID),
+ RegAllocBase(F) {
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
+ AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (VRM->hasPhys(VirtReg)) {
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ // Nonetheless, clear the live-range so that the debug
+ // dump will show the right state for that VirtReg.
+ LI.clear();
+ return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ RegAllocBase::enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
+ ExtraInfo->LRE_DidCloneVirtReg(New, Old);
+}
+
+void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
+ // Cloning a register we haven't even heard about yet? Just ignore it.
+ if (!Info.inBounds(Old))
+ return;
+
+ // LRE may clone a virtual register because dead code elimination causes it to
+ // be split into connected components. The new components are much smaller
+ // than the original, so they should get a new chance at being assigned.
+ // same stage as the parent.
+ Info[Old].Stage = RS_Assign;
+ Info.grow(New.id());
+ Info[New] = Info[Old];
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset();
+ GlobalCand.clear();
+}
+
+void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
+
+void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
+ // Prioritize live ranges by size, assigning larger ranges first.
+ // The queue holds (size, reg) pairs.
+ const Register Reg = LI->reg();
+ assert(Reg.isVirtual() && "Can only enqueue virtual registers");
+
+ auto Stage = ExtraInfo->getOrInitStage(Reg);
+ if (Stage == RS_New) {
+ Stage = RS_Assign;
+ ExtraInfo->setStage(Reg, Stage);
+ }
+
+ unsigned Ret = PriorityAdvisor->getPriority(*LI);
+
+ // The virtual register number is a tie breaker for same-sized ranges.
+ // Give lower vreg numbers higher priority to assign them first.
+ CurQueue.push(std::make_pair(Ret, ~Reg));
+}
+
+unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ const unsigned Size = LI.getSize();
+ const Register Reg = LI.reg();
+ unsigned Prio;
+ LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
+
+ if (Stage == RS_Split) {
+ // Unsplit ranges that couldn't be allocated immediately are deferred until
+ // everything else has been allocated.
+ Prio = Size;
+ } else if (Stage == RS_Memory) {
+ // Memory operand should be considered last.
+ // Change the priority such that Memory operand are assigned in
+ // the reverse order that they came in.
+ // TODO: Make this a member variable and probably do something about hints.
+ static unsigned MemOp = 0;
+ Prio = MemOp++;
+ } else {
+ // Giant live ranges fall back to the global assignment heuristic, which
+ // prevents excessive spilling in pathological cases.
+ const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
+ bool ForceGlobal = RC.GlobalPriority ||
+ (!ReverseLocalAssignment &&
+ (Size / SlotIndex::InstrDist) >
+ (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
+ unsigned GlobalBit = 0;
+
+ if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
+ LIS->intervalIsInOneMBB(LI)) {
+ // Allocate original local ranges in linear instruction order. Since they
+ // are singly defined, this produces optimal coloring in the absence of
+ // global interference and other constraints.
+ if (!ReverseLocalAssignment)
+ Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
+ else {
+ // Allocating bottom up may allow many short LRGs to be assigned first
+ // to one of the cheap registers. This could be much faster for very
+ // large blocks on targets with many physical registers.
+ Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
+ }
+ } else {
+ // Allocate global and split ranges in long->short order. Long ranges that
+ // don't fit should be spilled (or split) ASAP so they don't create
+ // interference. Mark a bit to prioritize global above local ranges.
+ Prio = Size;
+ GlobalBit = 1;
+ }
+
+ // Priority bit layout:
+ // 31 RS_Assign priority
+ // 30 Preference priority
+ // if (RegClassPriorityTrumpsGlobalness)
+ // 29-25 AllocPriority
+ // 24 GlobalBit
+ // else
+ // 29 Global bit
+ // 28-24 AllocPriority
+ // 0-23 Size/Instr distance
+
+ // Clamp the size to fit with the priority masking scheme
+ Prio = std::min(Prio, (unsigned)maxUIntN(24));
+ assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
+
+ if (RegClassPriorityTrumpsGlobalness)
+ Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
+ else
+ Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
+
+ // Mark a higher bit to prioritize global and local above RS_Split.
+ Prio |= (1u << 31);
+
+ // Boost ranges that have a physical register hint.
+ if (VRM->hasKnownPreference(Reg))
+ Prio |= (1u << 30);
+ }
+
+ return Prio;
+}
+
+const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
+
+const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
+ if (CurQueue.empty())
+ return nullptr;
+ LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
+ CurQueue.pop();
+ return LI;
+}
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
+ MCRegister PhysReg;
+ for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
+ assert(*I);
+ if (!Matrix->checkInterference(VirtReg, *I)) {
+ if (I.isHint())
+ return *I;
+ else
+ PhysReg = *I;
+ }
+ }
+ if (!PhysReg.isValid())
+ return PhysReg;
+
+ // PhysReg is available, but there may be a better choice.
+
+ // If we missed a simple hint, try to cheaply evict interference from the
+ // preferred register.
+ if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
+ if (Order.isHint(Hint)) {
+ MCRegister PhysHint = Hint.asMCReg();
+ LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
+
+ if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
+ FixedRegisters)) {
+ evictInterference(VirtReg, PhysHint, NewVRegs);
+ return PhysHint;
+ }
+ // Record the missed hint, we may be able to recover
+ // at the end if the surrounding allocation changed.
+ SetOfBrokenHints.insert(&VirtReg);
+ }
+
+ // Try to evict interference from a cheaper alternative.
+ uint8_t Cost = RegCosts[PhysReg];
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
+ << (unsigned)Cost << '\n');
+ MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+//===----------------------------------------------------------------------===//
+// Interference eviction
+//===----------------------------------------------------------------------===//
+
+bool RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
+ MCRegister FromReg) const {
+ auto HasRegUnitInterference = [&](MCRegUnit Unit) {
+ // Instantiate a "subquery", not to be confused with the Queries array.
+ LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
+ return SubQ.checkInterference();
+ };
+
+ for (MCRegister Reg :
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix)) {
+ if (Reg == FromReg)
+ continue;
+ // If no units have interference, reassignment is possible.
+ if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
+ LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
+ << printReg(FromReg, TRI) << " to "
+ << printReg(Reg, TRI) << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg,
+ SmallVectorImpl<Register> &NewVRegs) {
+ // Make sure that VirtReg has a cascade number, and assign that cascade
+ // number to every evicted register. These live ranges than then only be
+ // evicted by a newer cascade, preventing infinite loops.
+ unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
+
+ LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
+
+ // Collect all interfering virtregs first.
+ SmallVector<const LiveInterval *, 8> Intfs;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
+ // We usually have the interfering VRegs cached so collectInterferingVRegs()
+ // should be fast, we may need to recalculate if when different physregs
+ // overlap the same register unit so we had different SubRanges queried
+ // against it.
+ ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
+ Intfs.append(IVR.begin(), IVR.end());
+ }
+
+ // Evict them second. This will invalidate the queries.
+ for (const LiveInterval *Intf : Intfs) {
+ // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
+ if (!VRM->hasPhys(Intf->reg()))
+ continue;
+
+ Matrix->unassign(*Intf);
+ assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraInfo->setCascade(Intf->reg(), Cascade);
+ ++NumEvicted;
+ NewVRegs.push_back(Intf->reg());
+ }
+}
+
+/// Returns true if the given \p PhysReg is a callee saved register and has not
+/// been used for allocation yet.
+bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
+ MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
+ if (!CSR)
+ return false;
+
+ return !Matrix->isPhysRegUsed(PhysReg);
+}
+
+std::optional<unsigned>
+RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned CostPerUseLimit) const {
+ unsigned OrderLimit = Order.getOrder().size();
+
+ if (CostPerUseLimit < uint8_t(~0u)) {
+ // Check of any registers in RC are below CostPerUseLimit.
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
+ uint8_t MinCost = RegClassInfo.getMinCost(RC);
+ if (MinCost >= CostPerUseLimit) {
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
+ << MinCost << ", no cheaper registers to be found.\n");
+ return std::nullopt;
+ }
+
+ // It is normal for register classes to have a long tail of registers with
+ // the same cost. We don't need to look at them if they're too expensive.
+ if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
+ OrderLimit = RegClassInfo.getLastCostChange(RC);
+ LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
+ << " regs.\n");
+ }
+ }
+ return OrderLimit;
+}
+
+bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
+ MCRegister PhysReg) const {
+ if (RegCosts[PhysReg] >= CostPerUseLimit)
+ return false;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
+ LLVM_DEBUG(
+ dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
+ << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+ << '\n');
+ return false;
+ }
+ return true;
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) {
+ NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+ TimePassesIsEnabled);
+
+ MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
+ VirtReg, Order, CostPerUseLimit, FixedRegisters);
+ if (BestPhys.isValid())
+ evictInterference(VirtReg, BestPhys, NewVRegs);
+ return BestPhys;
+}
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ BlockFrequency &Cost) {
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+ // Reset interference dependent info.
+ SplitConstraints.resize(UseBlocks.size());
+ BlockFrequency StaticCost = 0;
+ for (unsigned I = 0; I != UseBlocks.size(); ++I) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
+
+ BC.Number = BI.MBB->getNumber();
+ Intf.moveToBlock(BC.Number);
+ BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = (BI.LiveOut &&
+ !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
+ ? SpillPlacement::PrefReg
+ : SpillPlacement::DontCare;
+ BC.ChangesValue = BI.FirstDef.isValid();
+
+ if (!Intf.hasInterference())
+ continue;
+
+ // Number of spill code instructions to insert.
+ unsigned Ins = 0;
+
+ // Interference for the live-in value.
+ if (BI.LiveIn) {
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
+ BC.Entry = SpillPlacement::MustSpill;
+ ++Ins;
+ } else if (Intf.first() < BI.FirstInstr) {
+ BC.Entry = SpillPlacement::PrefSpill;
+ ++Ins;
+ } else if (Intf.first() < BI.LastInstr) {
+ ++Ins;
+ }
+
+ // Abort if the spill cannot be inserted at the MBB' start
+ if (((BC.Entry == SpillPlacement::MustSpill) ||
+ (BC.Entry == SpillPlacement::PrefSpill)) &&
+ SlotIndex::isEarlierInstr(BI.FirstInstr,
+ SA->getFirstSplitPoint(BC.Number)))
+ return false;
+ }
+
+ // Interference for the live-out value.
+ if (BI.LiveOut) {
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
+ BC.Exit = SpillPlacement::MustSpill;
+ ++Ins;
+ } else if (Intf.last() > BI.LastInstr) {
+ BC.Exit = SpillPlacement::PrefSpill;
+ ++Ins;
+ } else if (Intf.last() > BI.FirstInstr) {
+ ++Ins;
+ }
+ }
+
+ // Accumulate the total frequency of inserted spill code.
+ while (Ins--)
+ StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
+ }
+ Cost = StaticCost;
+
+ // Add constraints for use-blocks. Note that these are the only constraints
+ // that may add a positive bias, it is downhill from here.
+ SpillPlacer->addConstraints(SplitConstraints);
+ return SpillPlacer->scanActiveBundles();
+}
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
+ const unsigned GroupSize = 8;
+ SpillPlacement::BlockConstraint BCS[GroupSize];
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
+
+ for (unsigned Number : Blocks) {
+ Intf.moveToBlock(Number);
+
+ if (!Intf.hasInterference()) {
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(ArrayRef(TBS, T));
+ T = 0;
+ }
+ continue;
+ }
+
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
+ // Abort if the spill cannot be inserted at the MBB' start
+ MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+ auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
+ if (FirstNonDebugInstr != MBB->end() &&
+ SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
+ SA->getFirstSplitPoint(Number)))
+ return false;
+ // Interference for the live-in value.
+ if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+ BCS[B].Entry = SpillPlacement::MustSpill;
+ else
+ BCS[B].Entry = SpillPlacement::PrefSpill;
+
+ // Interference for the live-out value.
+ if (Intf.last() >= SA->getLastSplitPoint(Number))
+ BCS[B].Exit = SpillPlacement::MustSpill;
+ else
+ BCS[B].Exit = SpillPlacement::PrefSpill;
+
+ if (++B == GroupSize) {
+ SpillPlacer->addConstraints(ArrayRef(BCS, B));
+ B = 0;
+ }
+ }
+
+ SpillPlacer->addConstraints(ArrayRef(BCS, B));
+ SpillPlacer->addLinks(ArrayRef(TBS, T));
+ return true;
+}
+
+bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ unsigned long Budget = GrowRegionComplexityBudget;
+ while (true) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (unsigned Bundle : NewBundles) {
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ // Limit compilation time by bailing out after we use all our budget.
+ if (Blocks.size() >= Budget)
+ return false;
+ Budget -= Blocks.size();
+ for (unsigned Block : Blocks) {
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() == AddedTo)
+ break;
+
+ // Compute through constraints from the interference, or assume that all
+ // through blocks prefer spilling when forming compact regions.
+ auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
+ if (Cand.PhysReg) {
+ if (!addThroughConstraints(Cand.Intf, NewBlocks))
+ return false;
+ } else
+ // Provide a strong negative bias on through blocks to prevent unwanted
+ // liveness on loop backedges.
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ AddedTo = ActiveBlocks.size();
+
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ LLVM_DEBUG(dbgs() << ", v=" << Visited);
+ return true;
+}
+
+/// calcCompactRegion - Compute the set of edge bundles that should be live
+/// when splitting the current live range into compact regions. Compact
+/// regions can be computed without looking at interference. They are the
+/// regions formed by removing all the live-through blocks from the live range.
+///
+/// Returns false if the current live range is already compact, or if the
+/// compact regions would form single block regions anyway.
+bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
+ // Without any through blocks, the live range is already compact.
+ if (!SA->getNumThroughBlocks())
+ return false;
+
+ // Compact regions don't correspond to any physreg.
+ Cand.reset(IntfCache, MCRegister::NoRegister);
+
+ LLVM_DEBUG(dbgs() << "Compact region bundles");
+
+ // Use the spill placer to determine the live bundles. GrowRegion pretends
+ // that all the through blocks have interference when PhysReg is unset.
+ SpillPlacer->prepare(Cand.LiveBundles);
+
+ // The static split cost will be zero since Cand.Intf reports no interference.
+ BlockFrequency Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ LLVM_DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ return false;
+ }
+
+ SpillPlacer->finish();
+
+ if (!Cand.LiveBundles.any()) {
+ LLVM_DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ LLVM_DEBUG({
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
+ dbgs() << ".\n";
+ });
+ return true;
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+BlockFrequency RAGreedy::calcSpillCost() {
+ BlockFrequency Cost = 0;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
+ const AllocationOrder &Order) {
+ BlockFrequency GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned I = 0; I != UseBlocks.size(); ++I) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
+ unsigned Ins = 0;
+
+ Cand.Intf.moveToBlock(BC.Number);
+
+ if (BI.LiveIn)
+ Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+ if (BI.LiveOut)
+ Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+ while (Ins--)
+ GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+ }
+
+ for (unsigned Number : Cand.ActiveBlocks) {
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
+ if (!RegIn && !RegOut)
+ continue;
+ if (RegIn && RegOut) {
+ // We need double spill code if this block has interference.
+ Cand.Intf.moveToBlock(Number);
+ if (Cand.Intf.hasInterference()) {
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ continue;
+ }
+ // live-in / stack-out or stack-in live-out.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split the current live range around the regions
+/// determined by BundleCand and GlobalCand.
+///
+/// Before calling this function, GlobalCand and BundleCand must be initialized
+/// so each bundle is assigned to a valid candidate, or NoCand for the
+/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
+/// objects must be initialized for the current live range, and intervals
+/// created for the used candidates.
+///
+/// @param LREdit The LiveRangeEdit object handling the current split.
+/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
+/// must appear in this list.
+void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
+ ArrayRef<unsigned> UsedCands) {
+ // These are the intervals created for new global ranges. We may create more
+ // intervals for local ranges.
+ const unsigned NumGlobalIntvs = LREdit.size();
+ LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
+ << " globals.\n");
+ assert(NumGlobalIntvs && "No global intervals configured");
+
+ // Isolate even single instructions when dealing with a proper sub-class.
+ // That guarantees register class inflation for the stack interval because it
+ // is all copies.
+ Register Reg = SA->getParent().reg();
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+
+ // First handle all the blocks with uses.
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
+ unsigned Number = BI.MBB->getNumber();
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+ if (BI.LiveIn) {
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+ }
+ if (BI.LiveOut) {
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ }
+
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!IntvIn && !IntvOut) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ continue;
+ }
+
+ if (IntvIn && IntvOut)
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ else if (IntvIn)
+ SE->splitRegInBlock(BI, IntvIn, IntfIn);
+ else
+ SE->splitRegOutBlock(BI, IntvOut, IntfOut);
+ }
+
+ // Handle live-through blocks. The relevant live-through blocks are stored in
+ // the ActiveBlocks list with each candidate. We need to filter out
+ // duplicates.
+ BitVector Todo = SA->getThroughBlocks();
+ for (unsigned UsedCand : UsedCands) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
+ for (unsigned Number : Blocks) {
+ if (!Todo.test(Number))
+ continue;
+ Todo.reset(Number);
+
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ if (!IntvIn && !IntvOut)
+ continue;
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ }
+ }
+
+ ++NumGlobalSplits;
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
+
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
+ const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
+
+ // Ignore old intervals from DCE.
+ if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[I] == 0) {
+ ExtraInfo->setStage(Reg, RS_Spill);
+ continue;
+ }
+
+ // Global intervals. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[I] < NumGlobalIntvs) {
+ if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+ LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ ExtraInfo->setStage(Reg, RS_Split2);
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around region");
+}
+
+MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
+ if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
+ return MCRegister::NoRegister;
+ unsigned NumCands = 0;
+ BlockFrequency SpillCost = calcSpillCost();
+ BlockFrequency BestCost;
+
+ // Check if we can split this live range around a compact region.
+ bool HasCompact = calcCompactRegion(GlobalCand.front());
+ if (HasCompact) {
+ // Yes, keep GlobalCand[0] as the compact region candidate.
+ NumCands = 1;
+ BestCost = BlockFrequency::getMaxFrequency();
+ } else {
+ // No benefit from the compact region, our fallback will be per-block
+ // splitting. Make sure we find a solution that is cheaper than spilling.
+ BestCost = SpillCost;
+ LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
+ MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
+ }
+
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, false /*IgnoreCSR*/);
+
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
+ return MCRegister::NoRegister;
+
+ return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
+}
+
+unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands,
+ bool IgnoreCSR) {
+ unsigned BestCand = NoCand;
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
+ if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
+ continue;
+
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
+ if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
+ continue;
+ unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
+ if (Count < WorstCount) {
+ Worst = CandIndex;
+ WorstCount = Count;
+ }
+ }
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
+ }
+
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
+
+ SpillPlacer->prepare(Cand.LiveBundles);
+ BlockFrequency Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
+ MBFI->printBlockFreq(dbgs(), Cost));
+ if (Cost >= BestCost) {
+ LLVM_DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ continue;
+ }
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ continue;
+ }
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ LLVM_DEBUG(dbgs() << " no bundles.\n");
+ continue;
+ }
+
+ Cost += calcGlobalSplitCost(Cand, Order);
+ LLVM_DEBUG({
+ dbgs() << ", total = ";
+ MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Cost;
+ }
+ ++NumCands;
+ }
+
+ return BestCand;
+}
+
+unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact,
+ SmallVectorImpl<Register> &NewVRegs) {
+ SmallVector<unsigned, 8> UsedCands;
+ // Prepare split editor.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit, SplitSpillMode);
+
+ // Assign all edge bundles to the preferred candidate, or NoCand.
+ BundleCand.assign(Bundles->getNumBundles(), NoCand);
+
+ // Assign bundles for the best candidate region.
+ if (BestCand != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[BestCand];
+ if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
+ UsedCands.push_back(BestCand);
+ Cand.IntvIdx = SE->openIntv();
+ LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ // Assign bundles for the compact region.
+ if (HasCompact) {
+ GlobalSplitCandidate &Cand = GlobalCand.front();
+ assert(!Cand.PhysReg && "Compact region has no physreg");
+ if (unsigned B = Cand.getBundles(BundleCand, 0)) {
+ UsedCands.push_back(0);
+ Cand.IntvIdx = SE->openIntv();
+ LLVM_DEBUG(dbgs() << "Split for compact region in " << B
+ << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ splitAroundRegion(LREdit, UsedCands);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Per-Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryBlockSplit - Split a global live range around every block with uses. This
+/// creates a lot of local live ranges, that will be split by tryLocalSplit if
+/// they don't allocate.
+unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
+ assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
+ Register Reg = VirtReg.reg();
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit, SplitSpillMode);
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ }
+ // No blocks were split.
+ if (LREdit.empty())
+ return 0;
+
+ // We did split for some blocks.
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+
+ // Tell LiveDebugVariables about the new ranges.
+ DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
+
+ // Sort out the new intervals created by splitting. The remainder interval
+ // goes straight to spilling, the new local ranges get to stay RS_New.
+ for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
+ const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
+ if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
+ ExtraInfo->setStage(LI, RS_Spill);
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// Get the number of allocatable registers that match the constraints of \p Reg
+/// on \p MI and that are also in \p SuperRC.
+static unsigned getNumAllocatableRegsForConstraints(
+ const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
+ const RegisterClassInfo &RCI) {
+ assert(SuperRC && "Invalid register class");
+
+ const TargetRegisterClass *ConstrainedRC =
+ MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
+ /* ExploreBundle */ true);
+ if (!ConstrainedRC)
+ return 0;
+ return RCI.getNumAllocatableRegs(ConstrainedRC);
+}
+
+static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const MachineInstr &MI, Register Reg) {
+ LaneBitmask Mask;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 && MO.isUse()) {
+ Mask |= MRI.getMaxLaneMaskForVReg(Reg);
+ continue;
+ }
+
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef()) {
+ if (!MO.isUndef())
+ Mask |= ~SubRegMask;
+ } else
+ Mask |= SubRegMask;
+ }
+
+ return Mask;
+}
+
+/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
+/// VirtReg.
+static bool readsLaneSubset(const MachineRegisterInfo &MRI,
+ const MachineInstr *MI, const LiveInterval &VirtReg,
+ const TargetRegisterInfo *TRI, SlotIndex Use) {
+ // Early check the common case.
+ if (MI->isCopy() &&
+ MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
+ return false;
+
+ // FIXME: We're only considering uses, but should be consider defs too?
+ LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
+
+ LaneBitmask LiveAtMask;
+ for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
+ if (S.liveAt(Use))
+ LiveAtMask |= S.LaneMask;
+ }
+
+ // If the live lanes aren't different from the lanes used by the instruction,
+ // this doesn't help.
+ return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
+}
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
+ // There is no point to this if there are no larger sub-classes.
+
+ bool SplitSubClass = true;
+ if (!RegClassInfo.isProperSubClass(CurRC)) {
+ if (!VirtReg.hasSubRanges())
+ return 0;
+ SplitSubClass = false;
+ }
+
+ // Always enable split spill mode, since we're effectively spilling to a
+ // register.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit, SplitEditor::SM_Size);
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 1)
+ return 0;
+
+ LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
+ << " individual instrs.\n");
+
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(CurRC, *MF);
+ unsigned SuperRCNumAllocatableRegs =
+ RegClassInfo.getNumAllocatableRegs(SuperRC);
+ // Split around every non-copy instruction if this split will relax
+ // the constraints on the virtual register.
+ // Otherwise, splitting just inserts uncoalescable copies that do not help
+ // the allocation.
+ for (const SlotIndex Use : Uses) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
+ if (MI->isFullCopy() ||
+ (SplitSubClass &&
+ SuperRCNumAllocatableRegs ==
+ getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
+ TII, TRI, RegClassInfo)) ||
+ // TODO: Handle split for subranges with subclass constraints?
+ (!SplitSubClass && VirtReg.hasSubRanges() &&
+ !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
+ LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
+ continue;
+ }
+ }
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Use);
+ SlotIndex SegStop = SE->leaveIntvAfter(Use);
+ SE->useIntv(SegStart, SegStop);
+ }
+
+ if (LREdit.empty()) {
+ LLVM_DEBUG(dbgs() << "All uses were copies.\n");
+ return 0;
+ }
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
+ // Assign all new registers to RS_Spill. This was the last chance.
+ ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
+///
+void RAGreedy::calcGapWeights(MCRegister PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx =
+ BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
+ SlotIndex StopIdx =
+ BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstInstr to
+ // LastInstr, so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI =
+ Matrix->getLiveUnions()[Unit].find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight();
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+
+ // Add fixed interference.
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ const LiveRange &LR = LIS->getRegUnit(Unit);
+ LiveRange::const_iterator I = LR.find(StartIdx);
+ LiveRange::const_iterator E = LR.end();
+
+ // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
+ for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
+ while (Uses[Gap+1].getBoundaryIndex() < I->start)
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = huge_valf;
+ if (Uses[Gap+1].getBaseIndex() >= I->end)
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
+ // TODO: the function currently only handles a single UseBlock; it should be
+ // possible to generalize.
+ if (SA->getUseBlocks().size() != 1)
+ return 0;
+
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstInstr to LastInstr. We should
+ // make sure that we don't do anything illegal to such an interval, though.
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ LLVM_DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (const auto &Use : Uses)
+ dbgs() << ' ' << Use;
+ dbgs() << '\n';
+ });
+
+ // If VirtReg is live across any register mask operands, compute a list of
+ // gaps with register masks.
+ SmallVector<unsigned, 8> RegMaskGaps;
+ if (Matrix->checkRegMaskInterference(VirtReg)) {
+ // Get regmask slots for the whole block.
+ ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+ LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ // Constrain to VirtReg's live range.
+ unsigned RI =
+ llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
+ unsigned RE = RMS.size();
+ for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
+ // Look for Uses[I] <= RMS <= Uses[I + 1].
+ assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
+ if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
+ continue;
+ // Skip a regmask on the same instruction as the last use. It doesn't
+ // overlap the live range.
+ if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
+ break;
+ LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
+ << Uses[I + 1]);
+ RegMaskGaps.push_back(I);
+ // Advance ri to the next gap. A regmask on one of the uses counts in
+ // both gaps.
+ while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
+ ++RI;
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() == RS_Split2. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Split2,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
+
+ // Best split candidate.
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq =
+ SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
+ (1.0f / MBFI->getEntryFreq());
+ SmallVector<float, 8> GapWeight;
+
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Remove any gaps with regmask clobbers.
+ if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
+ for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
+ GapWeight[RegMaskGaps[I]] = huge_valf;
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+
+ while (true) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
+ << '-' << Uses[SplitAfter] << " I=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ LLVM_DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < huge_valf) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
+ //
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(
+ blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
+ 1);
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ LLVM_DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
+ Shrink = false;
+ float Diff = EstWeight - MaxGap;
+ if (Diff > BestDiff) {
+ LLVM_DEBUG(dbgs() << " (best)");
+ BestDiff = Hysteresis * Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ if (++SplitBefore < SplitAfter) {
+ LLVM_DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
+ MaxGap = std::max(MaxGap, GapWeight[I]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ LLVM_DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << " extend\n");
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
+ << Uses[BestAfter] << ", " << BestDiff << ", "
+ << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit);
+
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
+ SE->useIntv(SegStart, SegStop);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Split2 so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
+ if (IntvMap[I] == 1) {
+ ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
+ // Ranges must be Split2 or less.
+ if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
+ return 0;
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
+ SA->analyze(&VirtReg);
+ Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ return tryInstructionSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
+
+ SA->analyze(&VirtReg);
+
+ // First try to split around a region spanning multiple blocks. RS_Split2
+ // ranges already made dubious progress with region splitting, so they go
+ // straight to single block splitting.
+ if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
+ MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ }
+
+ // Then isolate blocks.
+ return tryBlockSplit(VirtReg, Order, NewVRegs);
+}
+
+//===----------------------------------------------------------------------===//
+// Last Chance Recoloring
+//===----------------------------------------------------------------------===//
+
+/// Return true if \p reg has any tied def operand.
+static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
+ for (const MachineOperand &MO : MRI->def_operands(reg))
+ if (MO.isTied())
+ return true;
+
+ return false;
+}
+
+/// Return true if the existing assignment of \p Intf overlaps, but is not the
+/// same, as \p PhysReg.
+static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
+ const VirtRegMap &VRM,
+ MCRegister PhysReg,
+ const LiveInterval &Intf) {
+ MCRegister AssignedReg = VRM.getPhys(Intf.reg());
+ if (PhysReg == AssignedReg)
+ return false;
+ return TRI.regsOverlap(PhysReg, AssignedReg);
+}
+
+/// mayRecolorAllInterferences - Check if the virtual registers that
+/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
+/// recolored to free \p PhysReg.
+/// When true is returned, \p RecoloringCandidates has been augmented with all
+/// the live intervals that need to be recolored in order to free \p PhysReg
+/// for \p VirtReg.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+bool RAGreedy::mayRecolorAllInterferences(
+ MCRegister PhysReg, const LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
+
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
+ // If there is LastChanceRecoloringMaxInterference or more interferences,
+ // chances are one would not be recolorable.
+ if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
+ LastChanceRecoloringMaxInterference &&
+ !ExhaustiveSearch) {
+ LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
+ CutOffInfo |= CO_Interf;
+ return false;
+ }
+ for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ // If Intf is done and sits on the same register class as VirtReg, it
+ // would not be recolorable as it is in the same state as
+ // VirtReg. However there are at least two exceptions.
+ //
+ // If VirtReg has tied defs and Intf doesn't, then
+ // there is still a point in examining if it can be recolorable.
+ //
+ // Additionally, if the register class has overlapping tuple members, it
+ // may still be recolorable using a different tuple. This is more likely
+ // if the existing assignment aliases with the candidate.
+ //
+ if (((ExtraInfo->getStage(*Intf) == RS_Done &&
+ MRI->getRegClass(Intf->reg()) == CurRC &&
+ !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
+ !(hasTiedDef(MRI, VirtReg.reg()) &&
+ !hasTiedDef(MRI, Intf->reg()))) ||
+ FixedRegisters.count(Intf->reg())) {
+ LLVM_DEBUG(
+ dbgs() << "Early abort: the interference is not recolorable.\n");
+ return false;
+ }
+ RecoloringCandidates.insert(Intf);
+ }
+ }
+ return true;
+}
+
+/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
+/// its interferences.
+/// Last chance recoloring chooses a color for \p VirtReg and recolors every
+/// virtual register that was using it. The recoloring process may recursively
+/// use the last chance recoloring. Therefore, when a virtual register has been
+/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
+/// be last-chance-recolored again during this recoloring "session".
+/// E.g.,
+/// Let
+/// vA can use {R1, R2 }
+/// vB can use { R2, R3}
+/// vC can use {R1 }
+/// Where vA, vB, and vC cannot be split anymore (they are reloads for
+/// instance) and they all interfere.
+///
+/// vA is assigned R1
+/// vB is assigned R2
+/// vC tries to evict vA but vA is already done.
+/// Regular register allocation fails.
+///
+/// Last chance recoloring kicks in:
+/// vC does as if vA was evicted => vC uses R1.
+/// vC is marked as fixed.
+/// vA needs to find a color.
+/// None are available.
+/// vA cannot evict vC: vC is a fixed virtual register now.
+/// vA does as if vB was evicted => vA uses R2.
+/// vB needs to find a color.
+/// R3 is available.
+/// Recoloring => vC = R1, vA = R2, vB = R3
+///
+/// \p Order defines the preferred allocation order for \p VirtReg.
+/// \p NewRegs will contain any new virtual register that have been created
+/// (split, spill) during the process and that must be assigned.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+///
+/// \p RecolorStack tracks the original assignments of successfully recolored
+/// registers.
+///
+/// \p Depth gives the current depth of the last chance recoloring.
+/// \return a physical register that can be used for VirtReg or ~0u if none
+/// exists.
+unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
+ unsigned Depth) {
+ if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
+ return ~0u;
+
+ LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+
+ const ssize_t EntryStackSize = RecolorStack.size();
+
+ // Ranges must be Done.
+ assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
+ "Last chance recoloring should really be last chance");
+ // Set the max depth to LastChanceRecoloringMaxDepth.
+ // We may want to reconsider that if we end up with a too large search space
+ // for target with hundreds of registers.
+ // Indeed, in that case we may want to cut the search space earlier.
+ if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
+ LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
+ CutOffInfo |= CO_Depth;
+ return ~0u;
+ }
+
+ // Set of Live intervals that will need to be recolored.
+ SmallLISet RecoloringCandidates;
+
+ // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
+ // this recoloring "session".
+ assert(!FixedRegisters.count(VirtReg.reg()));
+ FixedRegisters.insert(VirtReg.reg());
+ SmallVector<Register, 4> CurrentNewVRegs;
+
+ for (MCRegister PhysReg : Order) {
+ assert(PhysReg.isValid());
+ LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
+ << printReg(PhysReg, TRI) << '\n');
+ RecoloringCandidates.clear();
+ CurrentNewVRegs.clear();
+
+ // It is only possible to recolor virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) >
+ LiveRegMatrix::IK_VirtReg) {
+ LLVM_DEBUG(
+ dbgs() << "Some interferences are not with virtual registers.\n");
+
+ continue;
+ }
+
+ // Early give up on this PhysReg if it is obvious we cannot recolor all
+ // the interferences.
+ if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
+ FixedRegisters)) {
+ LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
+ continue;
+ }
+
+ // RecoloringCandidates contains all the virtual registers that interfere
+ // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
+ // recoloring and perform the actual recoloring.
+ PQueue RecoloringQueue;
+ for (const LiveInterval *RC : RecoloringCandidates) {
+ Register ItVirtReg = RC->reg();
+ enqueue(RecoloringQueue, RC);
+ assert(VRM->hasPhys(ItVirtReg) &&
+ "Interferences are supposed to be with allocated variables");
+
+ // Record the current allocation.
+ RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
+
+ // unset the related struct.
+ Matrix->unassign(*RC);
+ }
+
+ // Do as if VirtReg was assigned to PhysReg so that the underlying
+ // recoloring has the right information about the interferes and
+ // available colors.
+ Matrix->assign(VirtReg, PhysReg);
+
+ // Save the current recoloring state.
+ // If we cannot recolor all the interferences, we will have to start again
+ // at this point for the next physical register.
+ SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
+ if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
+ FixedRegisters, RecolorStack, Depth)) {
+ // Push the queued vregs into the main queue.
+ for (Register NewVReg : CurrentNewVRegs)
+ NewVRegs.push_back(NewVReg);
+ // Do not mess up with the global assignment process.
+ // I.e., VirtReg must be unassigned.
+ Matrix->unassign(VirtReg);
+ return PhysReg;
+ }
+
+ LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
+ << printReg(PhysReg, TRI) << '\n');
+
+ // The recoloring attempt failed, undo the changes.
+ FixedRegisters = SaveFixedRegisters;
+ Matrix->unassign(VirtReg);
+
+ // For a newly created vreg which is also in RecoloringCandidates,
+ // don't add it to NewVRegs because its physical register will be restored
+ // below. Other vregs in CurrentNewVRegs are created by calling
+ // selectOrSplit and should be added into NewVRegs.
+ for (Register R : CurrentNewVRegs) {
+ if (RecoloringCandidates.count(&LIS->getInterval(R)))
+ continue;
+ NewVRegs.push_back(R);
+ }
+
+ // Roll back our unsuccessful recoloring. Also roll back any successful
+ // recolorings in any recursive recoloring attempts, since it's possible
+ // they would have introduced conflicts with assignments we will be
+ // restoring further up the stack. Perform all unassignments prior to
+ // reassigning, since sub-recolorings may have conflicted with the registers
+ // we are going to restore to their original assignments.
+ for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
+ const LiveInterval *LI;
+ MCRegister PhysReg;
+ std::tie(LI, PhysReg) = RecolorStack[I];
+
+ if (VRM->hasPhys(LI->reg()))
+ Matrix->unassign(*LI);
+ }
+
+ for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
+ const LiveInterval *LI;
+ MCRegister PhysReg;
+ std::tie(LI, PhysReg) = RecolorStack[I];
+ if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
+ Matrix->assign(*LI, PhysReg);
+ }
+
+ // Pop the stack of recoloring attempts.
+ RecolorStack.resize(EntryStackSize);
+ }
+
+ // Last chance recoloring did not worked either, give up.
+ return ~0u;
+}
+
+/// tryRecoloringCandidates - Try to assign a new color to every register
+/// in \RecoloringQueue.
+/// \p NewRegs will contain any new virtual register created during the
+/// recoloring process.
+/// \p FixedRegisters[in/out] contains all the registers that have been
+/// recolored.
+/// \return true if all virtual registers in RecoloringQueue were successfully
+/// recolored, false otherwise.
+bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
+ SmallVectorImpl<Register> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
+ unsigned Depth) {
+ while (!RecoloringQueue.empty()) {
+ const LiveInterval *LI = dequeue(RecoloringQueue);
+ LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
+ MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
+ RecolorStack, Depth + 1);
+ // When splitting happens, the live-range may actually be empty.
+ // In that case, this is okay to continue the recoloring even
+ // if we did not find an alternative color for it. Indeed,
+ // there will not be anything to color for LI in the end.
+ if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
+ return false;
+
+ if (!PhysReg) {
+ assert(LI->empty() && "Only empty live-range do not require a register");
+ LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
+ << " succeeded. Empty LI.\n");
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
+ << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
+
+ Matrix->assign(*LI, PhysReg);
+ FixedRegisters.insert(LI->reg());
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs) {
+ CutOffInfo = CO_None;
+ LLVMContext &Ctx = MF->getFunction().getContext();
+ SmallVirtRegSet FixedRegisters;
+ RecoloringStack RecolorStack;
+ MCRegister Reg =
+ selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
+ if (Reg == ~0U && (CutOffInfo != CO_None)) {
+ uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
+ if (CutOffEncountered == CO_Depth)
+ Ctx.emitError("register allocation failed: maximum depth for recoloring "
+ "reached. Use -fexhaustive-register-search to skip "
+ "cutoffs");
+ else if (CutOffEncountered == CO_Interf)
+ Ctx.emitError("register allocation failed: maximum interference for "
+ "recoloring reached. Use -fexhaustive-register-search "
+ "to skip cutoffs");
+ else if (CutOffEncountered == (CO_Depth | CO_Interf))
+ Ctx.emitError("register allocation failed: maximum interference and "
+ "depth for recoloring reached. Use "
+ "-fexhaustive-register-search to skip cutoffs");
+ }
+ return Reg;
+}
+
+/// Using a CSR for the first time has a cost because it causes push|pop
+/// to be added to prologue|epilogue. Splitting a cold section of the live
+/// range can have lower cost than using the CSR for the first time;
+/// Spilling a live range in the cold path can have lower cost than using
+/// the CSR for the first time. Returns the physical register if we decide
+/// to use the CSR; otherwise return 0.
+MCRegister RAGreedy::tryAssignCSRFirstTime(
+ const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
+ uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
+ if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
+ // We choose spill over using the CSR for the first time if the spill cost
+ // is lower than CSRCost.
+ SA->analyze(&VirtReg);
+ if (calcSpillCost() >= CSRCost)
+ return PhysReg;
+
+ // We are going to spill, set CostPerUseLimit to 1 to make sure that
+ // we will not use a callee-saved register in tryEvict.
+ CostPerUseLimit = 1;
+ return 0;
+ }
+ if (ExtraInfo->getStage(VirtReg) < RS_Split) {
+ // We choose pre-splitting over using the CSR for the first time if
+ // the cost of splitting is lower than CSRCost.
+ SA->analyze(&VirtReg);
+ unsigned NumCands = 0;
+ BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, true /*IgnoreCSR*/);
+ if (BestCand == NoCand)
+ // Use the CSR if we can't find a region split below CSRCost.
+ return PhysReg;
+
+ // Perform the actual pre-splitting.
+ doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
+ return 0;
+ }
+ return PhysReg;
+}
+
+void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
+ // Do not keep invalid information around.
+ SetOfBrokenHints.remove(&LI);
+}
+
+void RAGreedy::initializeCSRCost() {
+ // We use the larger one out of the command-line option and the value report
+ // by TRI.
+ CSRCost = BlockFrequency(
+ std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
+ if (!CSRCost.getFrequency())
+ return;
+
+ // Raw cost is relative to Entry == 2^14; scale it appropriately.
+ uint64_t ActualEntry = MBFI->getEntryFreq();
+ if (!ActualEntry) {
+ CSRCost = 0;
+ return;
+ }
+ uint64_t FixedEntry = 1 << 14;
+ if (ActualEntry < FixedEntry)
+ CSRCost *= BranchProbability(ActualEntry, FixedEntry);
+ else if (ActualEntry <= UINT32_MAX)
+ // Invert the fraction and divide.
+ CSRCost /= BranchProbability(FixedEntry, ActualEntry);
+ else
+ // Can't use BranchProbability in general, since it takes 32-bit numbers.
+ CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
+}
+
+/// Collect the hint info for \p Reg.
+/// The results are stored into \p Out.
+/// \p Out is not cleared before being populated.
+void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
+ for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
+ if (!Instr.isFullCopy())
+ continue;
+ // Look for the other end of the copy.
+ Register OtherReg = Instr.getOperand(0).getReg();
+ if (OtherReg == Reg) {
+ OtherReg = Instr.getOperand(1).getReg();
+ if (OtherReg == Reg)
+ continue;
+ }
+ // Get the current assignment.
+ MCRegister OtherPhysReg =
+ OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
+ // Push the collected information.
+ Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
+ OtherPhysReg));
+ }
+}
+
+/// Using the given \p List, compute the cost of the broken hints if
+/// \p PhysReg was used.
+/// \return The cost of \p List for \p PhysReg.
+BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
+ MCRegister PhysReg) {
+ BlockFrequency Cost = 0;
+ for (const HintInfo &Info : List) {
+ if (Info.PhysReg != PhysReg)
+ Cost += Info.Freq;
+ }
+ return Cost;
+}
+
+/// Using the register assigned to \p VirtReg, try to recolor
+/// all the live ranges that are copy-related with \p VirtReg.
+/// The recoloring is then propagated to all the live-ranges that have
+/// been recolored and so on, until no more copies can be coalesced or
+/// it is not profitable.
+/// For a given live range, profitability is determined by the sum of the
+/// frequencies of the non-identity copies it would introduce with the old
+/// and new register.
+void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
+ // We have a broken hint, check if it is possible to fix it by
+ // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
+ // some register and PhysReg may be available for the other live-ranges.
+ SmallSet<Register, 4> Visited;
+ SmallVector<unsigned, 2> RecoloringCandidates;
+ HintsInfo Info;
+ Register Reg = VirtReg.reg();
+ MCRegister PhysReg = VRM->getPhys(Reg);
+ // Start the recoloring algorithm from the input live-interval, then
+ // it will propagate to the ones that are copy-related with it.
+ Visited.insert(Reg);
+ RecoloringCandidates.push_back(Reg);
+
+ LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
+ << '(' << printReg(PhysReg, TRI) << ")\n");
+
+ do {
+ Reg = RecoloringCandidates.pop_back_val();
+
+ // We cannot recolor physical register.
+ if (Reg.isPhysical())
+ continue;
+
+ // This may be a skipped class
+ if (!VRM->hasPhys(Reg)) {
+ assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
+ "We have an unallocated variable which should have been handled");
+ continue;
+ }
+
+ // Get the live interval mapped with this virtual register to be able
+ // to check for the interference with the new color.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ MCRegister CurrPhys = VRM->getPhys(Reg);
+ // Check that the new color matches the register class constraints and
+ // that it is free for this live range.
+ if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
+ Matrix->checkInterference(LI, PhysReg)))
+ continue;
+
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
+ << ") is recolorable.\n");
+
+ // Gather the hint info.
+ Info.clear();
+ collectHintInfo(Reg, Info);
+ // Check if recoloring the live-range will increase the cost of the
+ // non-identity copies.
+ if (CurrPhys != PhysReg) {
+ LLVM_DEBUG(dbgs() << "Checking profitability:\n");
+ BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
+ BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
+ LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
+ << "\nNew Cost: " << NewCopiesCost.getFrequency()
+ << '\n');
+ if (OldCopiesCost < NewCopiesCost) {
+ LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
+ continue;
+ }
+ // At this point, the cost is either cheaper or equal. If it is
+ // equal, we consider this is profitable because it may expose
+ // more recoloring opportunities.
+ LLVM_DEBUG(dbgs() << "=> Profitable.\n");
+ // Recolor the live-range.
+ Matrix->unassign(LI);
+ Matrix->assign(LI, PhysReg);
+ }
+ // Push all copy-related live-ranges to keep reconciling the broken
+ // hints.
+ for (const HintInfo &HI : Info) {
+ if (Visited.insert(HI.Reg).second)
+ RecoloringCandidates.push_back(HI.Reg);
+ }
+ } while (!RecoloringCandidates.empty());
+}
+
+/// Try to recolor broken hints.
+/// Broken hints may be repaired by recoloring when an evicted variable
+/// freed up a register for a larger live-range.
+/// Consider the following example:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// ...
+/// = b
+/// = a
+/// Let us assume b gets split:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// = a
+/// Because of how the allocation work, b, c, and d may be assigned different
+/// colors. Now, if a gets evicted later:
+/// BB1:
+/// a =
+/// st a, SpillSlot
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// e = ld SpillSlot
+/// = e
+/// This is likely that we can assign the same register for b, c, and d,
+/// getting rid of 2 copies.
+void RAGreedy::tryHintsRecoloring() {
+ for (const LiveInterval *LI : SetOfBrokenHints) {
+ assert(LI->reg().isVirtual() &&
+ "Recoloring is possible only for virtual registers");
+ // Some dead defs may be around (e.g., because of debug uses).
+ // Ignore those.
+ if (!VRM->hasPhys(LI->reg()))
+ continue;
+ tryHintRecoloring(*LI);
+ }
+}
+
+MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
+ unsigned Depth) {
+ uint8_t CostPerUseLimit = uint8_t(~0u);
+ // First try assigning a free register.
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ if (MCRegister PhysReg =
+ tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
+ // When NewVRegs is not empty, we may have made decisions such as evicting
+ // a virtual register, go with the earlier decisions and use the physical
+ // register.
+ if (CSRCost.getFrequency() &&
+ EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
+ MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+ CostPerUseLimit, NewVRegs);
+ if (CSRReg || !NewVRegs.empty())
+ // Return now if we decide to use a CSR or create new vregs due to
+ // pre-splitting.
+ return CSRReg;
+ } else
+ return PhysReg;
+ }
+
+ LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
+ LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
+ << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Split ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Split)
+ if (Register PhysReg =
+ tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
+ FixedRegisters)) {
+ Register Hint = MRI->getSimpleHint(VirtReg.reg());
+ // If VirtReg has a hint and that hint is broken record this
+ // virtual register as a recoloring candidate for broken hint.
+ // Indeed, since we evicted a variable in its neighborhood it is
+ // likely we can at least partially recolor some of the
+ // copy-related live-ranges.
+ if (Hint && Hint != PhysReg)
+ SetOfBrokenHints.insert(&VirtReg);
+ return PhysReg;
+ }
+
+ assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
+
+ // The first time we see a live range, don't try to split or spill.
+ // Wait until the second time, when all smaller ranges have been allocated.
+ // This gives a better picture of the interference to split around.
+ if (Stage < RS_Split) {
+ ExtraInfo->setStage(VirtReg, RS_Split);
+ LLVM_DEBUG(dbgs() << "wait for second round\n");
+ NewVRegs.push_back(VirtReg.reg());
+ return 0;
+ }
+
+ if (Stage < RS_Spill) {
+ // Try splitting VirtReg or interferences.
+ unsigned NewVRegSizeBefore = NewVRegs.size();
+ Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
+ if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
+ return PhysReg;
+ }
+
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class will report it.
+ if (Stage >= RS_Done || !VirtReg.isSpillable()) {
+ return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
+ RecolorStack, Depth);
+ }
+
+ // Finally spill VirtReg itself.
+ if ((EnableDeferredSpilling ||
+ TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
+ ExtraInfo->getStage(VirtReg) < RS_Memory) {
+ // TODO: This is experimental and in particular, we do not model
+ // the live range splitting done by spilling correctly.
+ // We would need a deep integration with the spiller to do the
+ // right thing here. Anyway, that is still good for early testing.
+ ExtraInfo->setStage(VirtReg, RS_Memory);
+ LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
+ NewVRegs.push_back(VirtReg.reg());
+ } else {
+ NamedRegionTimer T("spill", "Spiller", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ spiller().spill(LRE);
+ ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+
+ // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
+ // the new regs are kept in LDV (still mapping to the old register), until
+ // we rewrite spilled locations in LDV at a later stage.
+ DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
+
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+ }
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
+ using namespace ore;
+ if (Spills) {
+ R << NV("NumSpills", Spills) << " spills ";
+ R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
+ }
+ if (FoldedSpills) {
+ R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+ R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
+ << " total folded spills cost ";
+ }
+ if (Reloads) {
+ R << NV("NumReloads", Reloads) << " reloads ";
+ R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
+ }
+ if (FoldedReloads) {
+ R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
+ << " total folded reloads cost ";
+ }
+ if (ZeroCostFoldedReloads)
+ R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
+ << " zero cost folded reloads ";
+ if (Copies) {
+ R << NV("NumVRCopies", Copies) << " virtual registers copies ";
+ R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
+ }
+}
+
+RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
+ RAGreedyStats Stats;
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ int FI;
+
+ auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
+ return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
+ A->getPseudoValue())->getFrameIndex());
+ };
+ auto isPatchpointInstr = [](const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT;
+ };
+ for (MachineInstr &MI : MBB) {
+ if (MI.isCopy()) {
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+ Register SrcReg = Src.getReg();
+ Register DestReg = Dest.getReg();
+ // Only count `COPY`s with a virtual register as source or destination.
+ if (SrcReg.isVirtual() || DestReg.isVirtual()) {
+ if (SrcReg.isVirtual()) {
+ SrcReg = VRM->getPhys(SrcReg);
+ if (SrcReg && Src.getSubReg())
+ SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
+ }
+ if (DestReg.isVirtual()) {
+ DestReg = VRM->getPhys(DestReg);
+ if (DestReg && Dest.getSubReg())
+ DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
+ }
+ if (SrcReg != DestReg)
+ ++Stats.Copies;
+ }
+ continue;
+ }
+
+ SmallVector<const MachineMemOperand *, 2> Accesses;
+ if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
+ ++Stats.Reloads;
+ continue;
+ }
+ if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
+ ++Stats.Spills;
+ continue;
+ }
+ if (TII->hasLoadFromStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess)) {
+ if (!isPatchpointInstr(MI)) {
+ Stats.FoldedReloads += Accesses.size();
+ continue;
+ }
+ // For statepoint there may be folded and zero cost folded stack reloads.
+ std::pair<unsigned, unsigned> NonZeroCostRange =
+ TII->getPatchpointUnfoldableRange(MI);
+ SmallSet<unsigned, 16> FoldedReloads;
+ SmallSet<unsigned, 16> ZeroCostFoldedReloads;
+ for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
+ continue;
+ if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
+ FoldedReloads.insert(MO.getIndex());
+ else
+ ZeroCostFoldedReloads.insert(MO.getIndex());
+ }
+ // If stack slot is used in folded reload it is not zero cost then.
+ for (unsigned Slot : FoldedReloads)
+ ZeroCostFoldedReloads.erase(Slot);
+ Stats.FoldedReloads += FoldedReloads.size();
+ Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
+ continue;
+ }
+ Accesses.clear();
+ if (TII->hasStoreToStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess)) {
+ Stats.FoldedSpills += Accesses.size();
+ }
+ }
+ // Set cost of collected statistic by multiplication to relative frequency of
+ // this basic block.
+ float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
+ Stats.ReloadsCost = RelFreq * Stats.Reloads;
+ Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
+ Stats.SpillsCost = RelFreq * Stats.Spills;
+ Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
+ Stats.CopiesCost = RelFreq * Stats.Copies;
+ return Stats;
+}
+
+RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
+ RAGreedyStats Stats;
+
+ // Sum up the spill and reloads in subloops.
+ for (MachineLoop *SubLoop : *L)
+ Stats.add(reportStats(SubLoop));
+
+ for (MachineBasicBlock *MBB : L->getBlocks())
+ // Handle blocks that were not included in subloops.
+ if (Loops->getLoopFor(MBB) == L)
+ Stats.add(computeStats(*MBB));
+
+ if (!Stats.isEmpty()) {
+ using namespace ore;
+
+ ORE->emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
+ L->getStartLoc(), L->getHeader());
+ Stats.report(R);
+ R << "generated in loop";
+ return R;
+ });
+ }
+ return Stats;
+}
+
+void RAGreedy::reportStats() {
+ if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
+ return;
+ RAGreedyStats Stats;
+ for (MachineLoop *L : *Loops)
+ Stats.add(reportStats(L));
+ // Process non-loop blocks.
+ for (MachineBasicBlock &MBB : *MF)
+ if (!Loops->getLoopFor(&MBB))
+ Stats.add(computeStats(MBB));
+ if (!Stats.isEmpty()) {
+ using namespace ore;
+
+ ORE->emit([&]() {
+ DebugLoc Loc;
+ if (auto *SP = MF->getFunction().getSubprogram())
+ Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
+ &MF->front());
+ Stats.report(R);
+ R << "generated in function";
+ return R;
+ });
+ }
+}
+
+bool RAGreedy::hasVirtRegAlloc() {
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (!RC)
+ continue;
+ if (ShouldAllocateClass(*TRI, *RC))
+ return true;
+ }
+
+ return false;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
+
+ MF = &mf;
+ TII = MF->getSubtarget().getInstrInfo();
+
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+
+ // Early return if there is no virtual register to be allocated to a
+ // physical register.
+ if (!hasVirtRegAlloc())
+ return false;
+
+ Indexes = &getAnalysis<SlotIndexes>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
+
+ initializeCSRCost();
+
+ RegCosts = TRI->getRegisterCosts(*MF);
+ RegClassPriorityTrumpsGlobalness =
+ GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
+ ? GreedyRegClassPriorityTrumpsGlobalness
+ : TRI->regClassPriorityTrumpsGlobalness(*MF);
+
+ ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
+ ? GreedyReverseLocalAssignment
+ : TRI->reverseLocalAssignment();
+
+ ExtraInfo.emplace();
+ EvictAdvisor =
+ getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
+ PriorityAdvisor =
+ getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
+
+ VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
+
+ VRAI->calculateSpillWeightsAndHints();
+
+ LLVM_DEBUG(LIS->dump());
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+ SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
+
+ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
+ GlobalCand.resize(32); // This will grow as needed.
+ SetOfBrokenHints.clear();
+
+ allocatePhysRegs();
+ tryHintsRecoloring();
+
+ if (VerifyEnabled)
+ MF->verify(this, "Before post optimization");
+ postOptimization();
+ reportStats();
+
+ releaseMemory();
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
new file mode 100644
index 000000000000..0f8f9a7d5811
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -0,0 +1,447 @@
+//==- RegAllocGreedy.h ------- greedy register allocator ----------*-C++-*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_
+#define LLVM_CODEGEN_REGALLOCGREEDY_H_
+
+#include "InterferenceCache.h"
+#include "RegAllocBase.h"
+#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocPriorityAdvisor.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <queue>
+#include <utility>
+
+namespace llvm {
+class AllocationOrder;
+class AnalysisUsage;
+class EdgeBundles;
+class LiveDebugVariables;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineDominatorTree;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineOptimizationRemarkEmitter;
+class MachineOptimizationRemarkMissed;
+class SlotIndexes;
+class TargetInstrInfo;
+class VirtRegMap;
+
+class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+ // Interface to eviction advisers
+public:
+ /// Track allocation stage and eviction loop prevention during allocation.
+ class ExtraRegInfo final {
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage = RS_New;
+
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
+ unsigned Cascade = 0;
+
+ RegInfo() = default;
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> Info;
+ unsigned NextCascade = 1;
+
+ public:
+ ExtraRegInfo() {}
+ ExtraRegInfo(const ExtraRegInfo &) = delete;
+
+ LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; }
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ Info.grow(Reg.id());
+ Info[Reg].Stage = Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise
+ /// initialize it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ Info.grow(Reg.id());
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
+ Info.grow(Reg.id());
+ Info[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
+ }
+
+ template <typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ for (; Begin != End; ++Begin) {
+ Register Reg = *Begin;
+ Info.grow(Reg.id());
+ if (Info[Reg].Stage == RS_New)
+ Info[Reg].Stage = NewStage;
+ }
+ }
+ void LRE_DidCloneVirtReg(Register New, Register Old);
+ };
+
+ LiveRegMatrix *getInterferenceMatrix() const { return Matrix; }
+ LiveIntervals *getLiveIntervals() const { return LIS; }
+ VirtRegMap *getVirtRegMap() const { return VRM; }
+ const RegisterClassInfo &getRegClassInfo() const { return RegClassInfo; }
+ const ExtraRegInfo &getExtraInfo() const { return *ExtraInfo; }
+ size_t getQueueSize() const { return Queue.size(); }
+ // end (interface to eviction advisers)
+
+ // Interface to priority advisers
+ bool getRegClassPriorityTrumpsGlobalness() const {
+ return RegClassPriorityTrumpsGlobalness;
+ }
+ bool getReverseLocalAssignment() const { return ReverseLocalAssignment; }
+ // end (interface to priority advisers)
+
+private:
+ // Convenient shortcuts.
+ using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
+ using SmallLISet = SmallSetVector<const LiveInterval *, 4>;
+
+ // We need to track all tentative recolorings so we can roll back any
+ // successful and unsuccessful recoloring attempts.
+ using RecoloringStack =
+ SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
+
+ // context
+ MachineFunction *MF = nullptr;
+
+ // Shortcuts to some useful interface.
+ const TargetInstrInfo *TII = nullptr;
+
+ // analyses
+ SlotIndexes *Indexes = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+ EdgeBundles *Bundles = nullptr;
+ SpillPlacement *SpillPlacer = nullptr;
+ LiveDebugVariables *DebugVars = nullptr;
+
+ // state
+ std::unique_ptr<Spiller> SpillerInstance;
+ PQueue Queue;
+ std::unique_ptr<VirtRegAuxInfo> VRAI;
+ std::optional<ExtraRegInfo> ExtraInfo;
+ std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor;
+
+ std::unique_ptr<RegAllocPriorityAdvisor> PriorityAdvisor;
+
+ // Enum CutOffStage to keep a track whether the register allocation failed
+ // because of the cutoffs encountered in last chance recoloring.
+ // Note: This is used as bitmask. New value should be next power of 2.
+ enum CutOffStage {
+ // No cutoffs encountered
+ CO_None = 0,
+
+ // lcr-max-depth cutoff encountered
+ CO_Depth = 1,
+
+ // lcr-max-interf cutoff encountered
+ CO_Interf = 2
+ };
+
+ uint8_t CutOffInfo = CutOffStage::CO_None;
+
+#ifndef NDEBUG
+ static const char *const StageName[];
+#endif
+
+ // splitting state.
+ std::unique_ptr<SplitAnalysis> SA;
+ std::unique_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
+ MCRegister PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
+ InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, MCRegister Reg) {
+ PhysReg = Reg;
+ IntvIdx = 0;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+
+ // Set B[I] = C for every live bundle where B[I] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (unsigned I : LiveBundles.set_bits())
+ if (B[I] == NoCand) {
+ B[I] = C;
+ Count++;
+ }
+ return Count;
+ }
+ };
+
+ /// Candidate info for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+ enum : unsigned { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
+ /// Callee-save register cost, calculated once per machine function.
+ BlockFrequency CSRCost;
+
+ /// Set of broken hints that may be reconciled later because of eviction.
+ SmallSetVector<const LiveInterval *, 8> SetOfBrokenHints;
+
+ /// The register cost values. This list will be recreated for each Machine
+ /// Function
+ ArrayRef<uint8_t> RegCosts;
+
+ /// Flags for the live range priority calculation, determined once per
+ /// machine function.
+ bool RegClassPriorityTrumpsGlobalness = false;
+
+ bool ReverseLocalAssignment = false;
+
+public:
+ RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
+
+ /// Return the pass name.
+ StringRef getPassName() const override { return "Greedy Register Allocator"; }
+
+ /// RAGreedy analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ Spiller &spiller() override { return *SpillerInstance; }
+ void enqueueImpl(const LiveInterval *LI) override;
+ const LiveInterval *dequeue() override;
+ MCRegister selectOrSplit(const LiveInterval &,
+ SmallVectorImpl<Register> &) override;
+ void aboutToRemoveInterval(const LiveInterval &) override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ static char ID;
+
+private:
+ MCRegister selectOrSplitImpl(const LiveInterval &,
+ SmallVectorImpl<Register> &, SmallVirtRegSet &,
+ RecoloringStack &, unsigned = 0);
+
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
+ void LRE_DidCloneVirtReg(Register, Register) override;
+ void enqueue(PQueue &CurQueue, const LiveInterval *LI);
+ const LiveInterval *dequeue(PQueue &CurQueue);
+
+ bool hasVirtRegAlloc();
+ BlockFrequency calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &);
+ bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ bool growRegion(GlobalSplitCandidate &Cand);
+ BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
+ const AllocationOrder &Order);
+ bool calcCompactRegion(GlobalSplitCandidate &);
+ void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>);
+ void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
+ void evictInterference(const LiveInterval &, MCRegister,
+ SmallVectorImpl<Register> &);
+ bool mayRecolorAllInterferences(MCRegister PhysReg,
+ const LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters);
+
+ MCRegister tryAssign(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, const SmallVirtRegSet &);
+ MCRegister tryEvict(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, uint8_t,
+ const SmallVirtRegSet &);
+ MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ /// Calculate cost of region splitting.
+ unsigned calculateRegionSplitCost(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands, bool IgnoreCSR);
+ /// Perform region splitting.
+ unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
+ /// Check other options before using a callee-saved register for the first
+ /// time.
+ MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg,
+ AllocationOrder &Order, MCRegister PhysReg,
+ uint8_t &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs);
+ void initializeCSRCost();
+ unsigned tryBlockSplit(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ unsigned tryInstructionSplit(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ unsigned tryLocalSplit(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
+ unsigned trySplit(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, const SmallVirtRegSet &);
+ unsigned tryLastChanceRecoloring(const LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, RecoloringStack &,
+ unsigned);
+ bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, RecoloringStack &, unsigned);
+ void tryHintRecoloring(const LiveInterval &);
+ void tryHintsRecoloring();
+
+ /// Model the information carried by one end of a copy.
+ struct HintInfo {
+ /// The frequency of the copy.
+ BlockFrequency Freq;
+ /// The virtual register or physical register.
+ Register Reg;
+ /// Its currently assigned register.
+ /// In case of a physical register Reg == PhysReg.
+ MCRegister PhysReg;
+
+ HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg)
+ : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
+ };
+ using HintsInfo = SmallVector<HintInfo, 4>;
+
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
+ void collectHintInfo(Register, HintsInfo &);
+
+ /// Greedy RA statistic to remark.
+ struct RAGreedyStats {
+ unsigned Reloads = 0;
+ unsigned FoldedReloads = 0;
+ unsigned ZeroCostFoldedReloads = 0;
+ unsigned Spills = 0;
+ unsigned FoldedSpills = 0;
+ unsigned Copies = 0;
+ float ReloadsCost = 0.0f;
+ float FoldedReloadsCost = 0.0f;
+ float SpillsCost = 0.0f;
+ float FoldedSpillsCost = 0.0f;
+ float CopiesCost = 0.0f;
+
+ bool isEmpty() {
+ return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
+ ZeroCostFoldedReloads || Copies);
+ }
+
+ void add(RAGreedyStats other) {
+ Reloads += other.Reloads;
+ FoldedReloads += other.FoldedReloads;
+ ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
+ Spills += other.Spills;
+ FoldedSpills += other.FoldedSpills;
+ Copies += other.Copies;
+ ReloadsCost += other.ReloadsCost;
+ FoldedReloadsCost += other.FoldedReloadsCost;
+ SpillsCost += other.SpillsCost;
+ FoldedSpillsCost += other.FoldedSpillsCost;
+ CopiesCost += other.CopiesCost;
+ }
+
+ void report(MachineOptimizationRemarkMissed &R);
+ };
+
+ /// Compute statistic for a basic block.
+ RAGreedyStats computeStats(MachineBasicBlock &MBB);
+
+ /// Compute and report statistic through a remark.
+ RAGreedyStats reportStats(MachineLoop *L);
+
+ /// Report the statistic for each loop.
+ void reportStats();
+};
+} // namespace llvm
+#endif // #ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 000000000000..925a0f085c4b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,954 @@
+//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Math.h"
+#include "llvm/CodeGen/PBQP/Solution.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <limits>
+#include <map>
+#include <memory>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <string>
+#include <system_error>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+static RegisterRegAlloc
+RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+PBQPCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+PBQPDumpGraphs("pbqp-dump-graphs",
+ cl::desc("Dump graphs for each function/round in the compilation unit."),
+ cl::init(false), cl::Hidden);
+#endif
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(char *cPassID = nullptr)
+ : MachineFunctionPass(ID), customPassID(cPassID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Return the pass name.
+ StringRef getPassName() const override { return "PBQP Register Allocator"; }
+
+ /// PBQP analysis usage.
+ void getAnalysisUsage(AnalysisUsage &au) const override;
+
+ /// Perform register allocation
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+private:
+ using RegSet = std::set<Register>;
+
+ char *customPassID;
+
+ RegSet VRegsToAlloc, EmptyIntervalVRegs;
+
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
+ /// Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
+
+ /// Constructs an initial graph.
+ void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
+
+ /// Spill the given VReg.
+ void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals,
+ MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+ Spiller &VRegSpiller);
+
+ /// Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller);
+
+ /// Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM) const;
+
+ void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
+};
+
+char RegAllocPBQP::ID = 0;
+
+/// Set spill costs for each node in the PBQP reg-alloc graph.
+class SpillCosts : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // A minimum spill costs, so that register constraints can can be set
+ // without normalization in the [0.0:MinSpillCost( interval.
+ const PBQP::PBQPNum MinSpillCost = 10.0;
+
+ for (auto NId : G.nodeIds()) {
+ PBQP::PBQPNum SpillCost =
+ LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
+ if (SpillCost == 0.0)
+ SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
+ else
+ SpillCost += MinSpillCost;
+ PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
+ NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
+ G.setNodeCosts(NId, std::move(NodeCosts));
+ }
+ }
+};
+
+/// Add interference edges between overlapping vregs.
+class Interference : public PBQPRAConstraint {
+private:
+ using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
+ using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
+ using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
+ using DisjointAllowedRegsCache = DenseSet<IKey>;
+ using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
+ using IEdgeCache = DenseSet<IEdgeKey>;
+
+ bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ const DisjointAllowedRegsCache &D) const {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ if (NRegs == MRegs)
+ return false;
+
+ if (NRegs < MRegs)
+ return D.contains(IKey(NRegs, MRegs));
+
+ return D.contains(IKey(MRegs, NRegs));
+ }
+
+ void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ DisjointAllowedRegsCache &D) {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
+
+ if (NRegs < MRegs)
+ D.insert(IKey(NRegs, MRegs));
+ else
+ D.insert(IKey(MRegs, NRegs));
+ }
+
+ // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
+ // for the fast interference graph construction algorithm. The last is there
+ // to save us from looking up node ids via the VRegToNode map in the graph
+ // metadata.
+ using IntervalInfo =
+ std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
+
+ static SlotIndex getStartPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].start;
+ }
+
+ static SlotIndex getEndPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].end;
+ }
+
+ static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
+ return std::get<2>(I);
+ }
+
+ static bool lowestStartPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ // Condition reversed because priority queue has the *highest* element at
+ // the front, rather than the lowest.
+ return getStartPoint(I1) > getStartPoint(I2);
+ }
+
+ static bool lowestEndPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ SlotIndex E1 = getEndPoint(I1);
+ SlotIndex E2 = getEndPoint(I2);
+
+ if (E1 < E2)
+ return true;
+
+ if (E1 > E2)
+ return false;
+
+ // If two intervals end at the same point, we need a way to break the tie or
+ // the set will assume they're actually equal and refuse to insert a
+ // "duplicate". Just compare the vregs - fast and guaranteed unique.
+ return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
+ }
+
+ static bool isAtLastSegment(const IntervalInfo &I) {
+ return std::get<1>(I) == std::get<0>(I)->size() - 1;
+ }
+
+ static IntervalInfo nextSegment(const IntervalInfo &I) {
+ return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
+ }
+
+public:
+ void apply(PBQPRAGraph &G) override {
+ // The following is loosely based on the linear scan algorithm introduced in
+ // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
+ // isn't linear, because the size of the active set isn't bound by the
+ // number of registers, but rather the size of the largest clique in the
+ // graph. Still, we expect this to be better than N^2.
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // Interferenc matrices are incredibly regular - they're only a function of
+ // the allowed sets, so we cache them to avoid the overhead of constructing
+ // and uniquing them.
+ IMatrixCache C;
+
+ // Finding an edge is expensive in the worst case (O(max_clique(G))). So
+ // cache locally edges we have already seen.
+ IEdgeCache EC;
+
+ // Cache known disjoint allowed registers pairs
+ DisjointAllowedRegsCache D;
+
+ using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
+ using IntervalQueue =
+ std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)>;
+ IntervalSet Active(lowestEndPoint);
+ IntervalQueue Inactive(lowestStartPoint);
+
+ // Start by building the inactive set.
+ for (auto NId : G.nodeIds()) {
+ Register VReg = G.getNodeMetadata(NId).getVReg();
+ LiveInterval &LI = LIS.getInterval(VReg);
+ assert(!LI.empty() && "PBQP graph contains node for empty interval");
+ Inactive.push(std::make_tuple(&LI, 0, NId));
+ }
+
+ while (!Inactive.empty()) {
+ // Tentatively grab the "next" interval - this choice may be overriden
+ // below.
+ IntervalInfo Cur = Inactive.top();
+
+ // Retire any active intervals that end before Cur starts.
+ IntervalSet::iterator RetireItr = Active.begin();
+ while (RetireItr != Active.end() &&
+ (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
+ // If this interval has subsequent segments, add the next one to the
+ // inactive list.
+ if (!isAtLastSegment(*RetireItr))
+ Inactive.push(nextSegment(*RetireItr));
+
+ ++RetireItr;
+ }
+ Active.erase(Active.begin(), RetireItr);
+
+ // One of the newly retired segments may actually start before the
+ // Cur segment, so re-grab the front of the inactive list.
+ Cur = Inactive.top();
+ Inactive.pop();
+
+ // At this point we know that Cur overlaps all active intervals. Add the
+ // interference edges.
+ PBQP::GraphBase::NodeId NId = getNodeId(Cur);
+ for (const auto &A : Active) {
+ PBQP::GraphBase::NodeId MId = getNodeId(A);
+
+ // Do not add an edge when the nodes' allowed registers do not
+ // intersect: there is obviously no interference.
+ if (haveDisjointAllowedRegs(G, NId, MId, D))
+ continue;
+
+ // Check that we haven't already added this edge
+ IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
+ if (EC.count(EK))
+ continue;
+
+ // This is a new edge - add it to the graph.
+ if (!createInterferenceEdge(G, NId, MId, C))
+ setDisjointAllowedRegs(G, NId, MId, D);
+ else
+ EC.insert(EK);
+ }
+
+ // Finally, add Cur to the Active set.
+ Active.insert(Cur);
+ }
+ }
+
+private:
+ // Create an Interference edge and add it to the graph, unless it is
+ // a null matrix, meaning the nodes' allowed registers do not have any
+ // interference. This case occurs frequently between integer and floating
+ // point registers for example.
+ // return true iff both nodes interferes.
+ bool createInterferenceEdge(PBQPRAGraph &G,
+ PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
+ IMatrixCache &C) {
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+ const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
+ const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
+
+ // Try looking the edge costs up in the IMatrixCache first.
+ IKey K(&NRegs, &MRegs);
+ IMatrixCache::iterator I = C.find(K);
+ if (I != C.end()) {
+ G.addEdgeBypassingCostAllocator(NId, MId, I->second);
+ return true;
+ }
+
+ PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+ bool NodesInterfere = false;
+ for (unsigned I = 0; I != NRegs.size(); ++I) {
+ MCRegister PRegN = NRegs[I];
+ for (unsigned J = 0; J != MRegs.size(); ++J) {
+ MCRegister PRegM = MRegs[J];
+ if (TRI.regsOverlap(PRegN, PRegM)) {
+ M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ NodesInterfere = true;
+ }
+ }
+ }
+
+ if (!NodesInterfere)
+ return false;
+
+ PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
+ C[K] = G.getEdgeCostsPtr(EId);
+
+ return true;
+ }
+};
+
+class Coalescing : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ MachineFunction &MF = G.getMetadata().MF;
+ MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
+ CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (const auto &MBB : MF) {
+ for (const auto &MI : MBB) {
+ // Skip not-coalescable or already coalesced copies.
+ if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
+ continue;
+
+ Register DstReg = CP.getDstReg();
+ Register SrcReg = CP.getSrcReg();
+
+ PBQP::PBQPNum CBenefit = MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
+
+ if (CP.isPhys()) {
+ if (!MF.getRegInfo().isAllocatable(DstReg))
+ continue;
+
+ PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
+
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
+ G.getNodeMetadata(NId).getAllowedRegs();
+
+ unsigned PRegOpt = 0;
+ while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
+ ++PRegOpt;
+
+ if (PRegOpt < Allowed.size()) {
+ PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
+ NewCosts[PRegOpt + 1] -= CBenefit;
+ G.setNodeCosts(NId, std::move(NewCosts));
+ }
+ } else {
+ PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
+ PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
+ &G.getNodeMetadata(N1Id).getAllowedRegs();
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
+ &G.getNodeMetadata(N2Id).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
+ if (EId == G.invalidEdgeId()) {
+ PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
+ Allowed2->size() + 1, 0);
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.addEdge(N1Id, N2Id, std::move(Costs));
+ } else {
+ if (G.getEdgeNode1Id(EId) == N2Id) {
+ std::swap(N1Id, N2Id);
+ std::swap(Allowed1, Allowed2);
+ }
+ PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.updateEdgeCosts(EId, std::move(Costs));
+ }
+ }
+ }
+ }
+ }
+
+private:
+ void addVirtRegCoalesce(
+ PBQPRAGraph::RawMatrix &CostMat,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
+ PBQP::PBQPNum Benefit) {
+ assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
+ assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
+ for (unsigned I = 0; I != Allowed1.size(); ++I) {
+ MCRegister PReg1 = Allowed1[I];
+ for (unsigned J = 0; J != Allowed2.size(); ++J) {
+ MCRegister PReg2 = Allowed2[J];
+ if (PReg1 == PReg2)
+ CostMat[I + 1][J + 1] -= Benefit;
+ }
+ }
+ }
+};
+
+/// PBQP-specific implementation of weight normalization.
+class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
+ float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
+ // All intervals have a spill weight that is mostly proportional to the
+ // number of uses, with uses in loops having a bigger weight.
+ return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
+ }
+
+public:
+ PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+ const MachineLoopInfo &Loops,
+ const MachineBlockFrequencyInfo &MBFI)
+ : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
+};
+} // end anonymous namespace
+
+// Out-of-line destructor/anchor for PBQPRAConstraint.
+PBQPRAConstraint::~PBQPRAConstraint() = default;
+
+void PBQPRAConstraint::anchor() {}
+
+void PBQPRAConstraintList::anchor() {}
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AAResultsWrapperPass>();
+ au.addPreserved<AAResultsWrapperPass>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ if (customPassID)
+ au.addRequiredID(*customPassID);
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineBlockFrequencyInfo>();
+ au.addPreserved<MachineBlockFrequencyInfo>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<VirtRegMap>();
+ au.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
+ LiveIntervals &LIS) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Iterate over all live ranges.
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ VRegsToAlloc.insert(Reg);
+ }
+}
+
+static bool isACalleeSavedRegister(MCRegister Reg,
+ const TargetRegisterInfo &TRI,
+ const MachineFunction &MF) {
+ const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
+ for (unsigned i = 0; CSR[i] != 0; ++i)
+ if (TRI.regsOverlap(Reg, CSR[i]))
+ return true;
+ return false;
+}
+
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
+ MachineFunction &MF = G.getMetadata().MF;
+
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+
+ std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+
+ std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
+
+ while (!Worklist.empty()) {
+ Register VReg = Worklist.back();
+ Worklist.pop_back();
+
+ LiveInterval &VRegLI = LIS.getInterval(VReg);
+
+ // If this is an empty interval move it to the EmptyIntervalVRegs set then
+ // continue.
+ if (VRegLI.empty()) {
+ EmptyIntervalVRegs.insert(VRegLI.reg());
+ VRegsToAlloc.erase(VRegLI.reg());
+ continue;
+ }
+
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+
+ // Record any overlaps with regmask operands.
+ BitVector RegMaskOverlaps;
+ LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
+
+ // Compute an initial allowed set for the current vreg.
+ std::vector<MCRegister> VRegAllowed;
+ ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
+ for (MCPhysReg R : RawPRegOrder) {
+ MCRegister PReg(R);
+ if (MRI.isReserved(PReg))
+ continue;
+
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
+ continue;
+
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnit Unit : TRI.regunits(PReg)) {
+ if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
+ Interference = true;
+ break;
+ }
+ }
+ if (Interference)
+ continue;
+
+ // preg is usable for this virtual register.
+ VRegAllowed.push_back(PReg);
+ }
+
+ // Check for vregs that have no allowed registers. These should be
+ // pre-spilled and the new vregs added to the worklist.
+ if (VRegAllowed.empty()) {
+ SmallVector<Register, 8> NewVRegs;
+ spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+ llvm::append_range(Worklist, NewVRegs);
+ continue;
+ }
+
+ VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
+ }
+
+ for (auto &KV : VRegAllowedMap) {
+ auto VReg = KV.first;
+
+ // Move empty intervals to the EmptyIntervalVReg set.
+ if (LIS.getInterval(VReg).empty()) {
+ EmptyIntervalVRegs.insert(VReg);
+ VRegsToAlloc.erase(VReg);
+ continue;
+ }
+
+ auto &VRegAllowed = KV.second;
+
+ PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
+
+ // Tweak cost of callee saved registers, as using then force spilling and
+ // restoring them. This would only happen in the prologue / epilogue though.
+ for (unsigned i = 0; i != VRegAllowed.size(); ++i)
+ if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
+ NodeCosts[1 + i] += 1.0;
+
+ PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
+ G.getNodeMetadata(NId).setVReg(VReg);
+ G.getNodeMetadata(NId).setAllowedRegs(
+ G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
+ G.getMetadata().setNodeIdForVReg(VReg, NId);
+ }
+}
+
+void RegAllocPBQP::spillVReg(Register VReg,
+ SmallVectorImpl<Register> &NewIntervals,
+ MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM, Spiller &VRegSpiller) {
+ VRegsToAlloc.erase(VReg);
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
+ nullptr, &DeadRemats);
+ VRegSpiller.spill(LRE);
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ (void)TRI;
+ LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
+ << LRE.getParent().weight() << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (const Register &R : LRE) {
+ const LiveInterval &LI = LIS.getInterval(R);
+ assert(!LI.empty() && "Empty spill range.");
+ LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg());
+ }
+
+ LLVM_DEBUG(dbgs() << ")\n");
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
+ MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ (void)TRI;
+
+ // Set to true if we have any spills
+ bool AnotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ VRM.clearAllVirt();
+
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (auto NId : G.nodeIds()) {
+ Register VReg = G.getNodeMetadata(NId).getVReg();
+ unsigned AllocOpt = Solution.getSelection(NId);
+
+ if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
+ MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
+ LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
+ << TRI.getName(PReg) << "\n");
+ assert(PReg != 0 && "Invalid preg selected.");
+ VRM.assignVirt2Phys(VReg, PReg);
+ } else {
+ // Spill VReg. If this introduces new intervals we'll need another round
+ // of allocation.
+ SmallVector<Register, 8> NewVRegs;
+ spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+ AnotherRoundNeeded |= !NewVRegs.empty();
+ }
+ }
+
+ return !AnotherRoundNeeded;
+}
+
+void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
+ LiveIntervals &LIS,
+ VirtRegMap &VRM) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // First allocate registers for the empty intervals.
+ for (const Register &R : EmptyIntervalVRegs) {
+ LiveInterval &LI = LIS.getInterval(R);
+
+ Register PReg = MRI.getSimpleHint(LI.reg());
+
+ if (PReg == 0) {
+ const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
+ const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
+ for (MCRegister CandidateReg : RawPRegOrder) {
+ if (!VRM.getRegInfo().isReserved(CandidateReg)) {
+ PReg = CandidateReg;
+ break;
+ }
+ }
+ assert(PReg &&
+ "No un-reserved physical registers in this register class");
+ }
+
+ VRM.assignVirt2Phys(LI.reg(), PReg);
+ }
+}
+
+void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
+ VRegSpiller.postOptimization();
+ /// Remove dead defs because of rematerialization.
+ for (auto *DeadInst : DeadRemats) {
+ LIS.RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<MachineBlockFrequencyInfo>();
+
+ VirtRegMap &VRM = getAnalysis<VirtRegMap>();
+
+ PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
+ VRAI.calculateSpillWeightsAndHints();
+
+ // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
+ // the VRAI through the spiller to the live range editor. However, it probably
+ // makes more sense to pass the PBQP VRAI. The existing behavior had
+ // LiveRangeEdit make its own VirtRegAuxInfo object.
+ VirtRegAuxInfo DefaultVRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(),
+ MBFI);
+ std::unique_ptr<Spiller> VRegSpiller(
+ createInlineSpiller(*this, MF, VRM, DefaultVRAI));
+
+ MF.getRegInfo().freezeReservedRegs(MF);
+
+ LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc(MF, LIS);
+
+#ifndef NDEBUG
+ const Function &F = MF.getFunction();
+ std::string FullyQualifiedName =
+ F.getParent()->getModuleIdentifier() + "." + F.getName().str();
+#endif
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!VRegsToAlloc.empty()) {
+ const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
+ std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
+ std::make_unique<PBQPRAConstraintList>();
+ ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
+ ConstraintsRoot->addConstraint(std::make_unique<Interference>());
+ if (PBQPCoalescing)
+ ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
+ ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
+
+ bool PBQPAllocComplete = false;
+ unsigned Round = 0;
+
+ while (!PBQPAllocComplete) {
+ LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+ (void) Round;
+
+ PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
+ initializeGraph(G, VRM, *VRegSpiller);
+ ConstraintsRoot->apply(G);
+
+#ifndef NDEBUG
+ if (PBQPDumpGraphs) {
+ std::ostringstream RS;
+ RS << Round;
+ std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
+ ".pbqpgraph";
+ std::error_code EC;
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF);
+ LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
+ << GraphFileName << "\"\n");
+ G.dump(OS);
+ }
+#endif
+
+ PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
+ PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
+ ++Round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc(MF, LIS, VRM);
+ postOptimization(*VRegSpiller, LIS);
+ VRegsToAlloc.clear();
+ EmptyIntervalVRegs.clear();
+
+ LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
+
+ return true;
+}
+
+/// Create Printable object for node and register info.
+static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
+ const PBQP::RegAlloc::PBQPRAGraph &G) {
+ return Printable([NId, &G](raw_ostream &OS) {
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ Register VReg = G.getNodeMetadata(NId).getVReg();
+ const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
+ OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
+ });
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+ for (auto NId : nodeIds()) {
+ const Vector &Costs = getNodeCosts(NId);
+ assert(Costs.getLength() != 0 && "Empty vector in graph.");
+ OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
+ }
+ OS << '\n';
+
+ for (auto EId : edgeIds()) {
+ NodeId N1Id = getEdgeNode1Id(EId);
+ NodeId N2Id = getEdgeNode2Id(EId);
+ assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
+ const Matrix &M = getEdgeCosts(EId);
+ assert(M.getRows() != 0 && "No rows in matrix.");
+ assert(M.getCols() != 0 && "No cols in matrix.");
+ OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
+ OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
+ OS << M << '\n';
+ }
+}
+
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const {
+ dump(dbgs());
+}
+#endif
+
+void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
+ OS << "graph {\n";
+ for (auto NId : nodeIds()) {
+ OS << " node" << NId << " [ label=\""
+ << PrintNodeInfo(NId, *this) << "\\n"
+ << getNodeCosts(NId) << "\" ]\n";
+ }
+
+ OS << " edge [ len=" << nodeIds().size() << " ]\n";
+ for (auto EId : edgeIds()) {
+ OS << " node" << getEdgeNode1Id(EId)
+ << " -- node" << getEdgeNode2Id(EId)
+ << " [ label=\"";
+ const Matrix &EdgeCosts = getEdgeCosts(EId);
+ for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
+ OS << EdgeCosts.getRowAsVector(i) << "\\n";
+ }
+ OS << "\" ]\n";
+ }
+ OS << "}\n";
+}
+
+FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
+ return new RegAllocPBQP(customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ return createPBQPRegisterAllocator();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
new file mode 100644
index 000000000000..e031019a4c91
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -0,0 +1,112 @@
+//===- RegAllocPriorityAdvisor.cpp - live ranges priority advisor ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the default priority advisor and of the Analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocPriorityAdvisor.h"
+#include "RegAllocGreedy.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode(
+ "regalloc-enable-priority-advisor", cl::Hidden,
+ cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default),
+ cl::desc("Enable regalloc advisor mode"),
+ cl::values(
+ clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default,
+ "default", "Default"),
+ clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release,
+ "release", "precompiled"),
+ clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development,
+ "development", "for training")));
+
+char RegAllocPriorityAdvisorAnalysis::ID = 0;
+INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority",
+ "Regalloc priority policy", false, true)
+
+namespace {
+class DefaultPriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ DefaultPriorityAdvisorAnalysis(bool NotAsRequested)
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default),
+ NotAsRequested(NotAsRequested) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Default;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<SlotIndexes>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ return std::make_unique<DefaultPriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexes>());
+ }
+ bool doInitialization(Module &M) override {
+ if (NotAsRequested)
+ M.getContext().emitError("Requested regalloc priority advisor analysis "
+ "could be created. Using default");
+ return RegAllocPriorityAdvisorAnalysis::doInitialization(M);
+ }
+ const bool NotAsRequested;
+};
+} // namespace
+
+template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
+ Pass *Ret = nullptr;
+ switch (Mode) {
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default:
+ Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false);
+ break;
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development:
+#if defined(LLVM_HAVE_TFLITE)
+ Ret = createDevelopmentModePriorityAdvisor();
+#endif
+ break;
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release:
+ Ret = createReleaseModePriorityAdvisor();
+ break;
+ }
+ if (Ret)
+ return Ret;
+ return new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ true);
+}
+
+StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const {
+ switch (getAdvisorMode()) {
+ case AdvisorMode::Default:
+ return "Default Regalloc Priority Advisor";
+ case AdvisorMode::Release:
+ return "Release mode Regalloc Priority Advisor";
+ case AdvisorMode::Development:
+ return "Development mode Regalloc Priority Advisor";
+ }
+ llvm_unreachable("Unknown advisor kind");
+}
+
+RegAllocPriorityAdvisor::RegAllocPriorityAdvisor(const MachineFunction &MF,
+ const RAGreedy &RA,
+ SlotIndexes *const Indexes)
+ : RA(RA), LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
+ MRI(&VRM->getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()),
+ RegClassInfo(RA.getRegClassInfo()), Indexes(Indexes),
+ RegClassPriorityTrumpsGlobalness(
+ RA.getRegClassPriorityTrumpsGlobalness()),
+ ReverseLocalAssignment(RA.getReverseLocalAssignment()) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
new file mode 100644
index 000000000000..1e9fa967214c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
@@ -0,0 +1,96 @@
+//===- RegAllocPriorityAdvisor.h - live ranges priority advisor -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
+#define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
+
+#include "RegAllocEvictionAdvisor.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class MachineFunction;
+class VirtRegMap;
+class RAGreedy;
+
+/// Interface to the priority advisor, which is responsible for prioritizing
+/// live ranges.
+class RegAllocPriorityAdvisor {
+public:
+ RegAllocPriorityAdvisor(const RegAllocPriorityAdvisor &) = delete;
+ RegAllocPriorityAdvisor(RegAllocPriorityAdvisor &&) = delete;
+ virtual ~RegAllocPriorityAdvisor() = default;
+
+ /// Find the priority value for a live range. A float value is used since ML
+ /// prefers it.
+ virtual unsigned getPriority(const LiveInterval &LI) const = 0;
+
+ RegAllocPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes);
+
+protected:
+ const RAGreedy &RA;
+ LiveIntervals *const LIS;
+ VirtRegMap *const VRM;
+ MachineRegisterInfo *const MRI;
+ const TargetRegisterInfo *const TRI;
+ const RegisterClassInfo &RegClassInfo;
+ SlotIndexes *const Indexes;
+ const bool RegClassPriorityTrumpsGlobalness;
+ const bool ReverseLocalAssignment;
+};
+
+class DefaultPriorityAdvisor : public RegAllocPriorityAdvisor {
+public:
+ DefaultPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes)
+ : RegAllocPriorityAdvisor(MF, RA, Indexes) {}
+
+private:
+ unsigned getPriority(const LiveInterval &LI) const override;
+};
+
+class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
+public:
+ enum class AdvisorMode : int { Default, Release, Development };
+
+ RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
+ : ImmutablePass(ID), Mode(Mode){};
+ static char ID;
+
+ /// Get an advisor for the given context (i.e. machine function, etc)
+ virtual std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+ AdvisorMode getAdvisorMode() const { return Mode; }
+ virtual void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward){};
+
+protected:
+ // This analysis preserves everything, and subclasses may have additional
+ // requirements.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+private:
+ StringRef getPassName() const override;
+ const AdvisorMode Mode;
+};
+
+/// Specialization for the API used by the analysis infrastructure to create
+/// an instance of the priority advisor.
+template <> Pass *callDefaultCtor<RegAllocPriorityAdvisorAnalysis>();
+
+RegAllocPriorityAdvisorAnalysis *createReleaseModePriorityAdvisor();
+
+RegAllocPriorityAdvisorAnalysis *createDevelopmentModePriorityAdvisor();
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
new file mode 100644
index 000000000000..e420283dfcfa
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -0,0 +1,121 @@
+//===- RegAllocScore.cpp - evaluate regalloc policy quality ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// Calculate a measure of the register allocation policy quality. This is used
+/// to construct a reward for the training of the ML-driven allocation policy.
+/// Currently, the score is the sum of the machine basic block frequency-weighed
+/// number of loads, stores, copies, and remat instructions, each factored with
+/// a relative weight.
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocScore.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden);
+cl::opt<double> LoadWeight("regalloc-load-weight", cl::init(4.0), cl::Hidden);
+cl::opt<double> StoreWeight("regalloc-store-weight", cl::init(1.0), cl::Hidden);
+cl::opt<double> CheapRematWeight("regalloc-cheap-remat-weight", cl::init(0.2),
+ cl::Hidden);
+cl::opt<double> ExpensiveRematWeight("regalloc-expensive-remat-weight",
+ cl::init(1.0), cl::Hidden);
+#define DEBUG_TYPE "regalloc-score"
+
+RegAllocScore &RegAllocScore::operator+=(const RegAllocScore &Other) {
+ CopyCounts += Other.copyCounts();
+ LoadCounts += Other.loadCounts();
+ StoreCounts += Other.storeCounts();
+ LoadStoreCounts += Other.loadStoreCounts();
+ CheapRematCounts += Other.cheapRematCounts();
+ ExpensiveRematCounts += Other.expensiveRematCounts();
+ return *this;
+}
+
+bool RegAllocScore::operator==(const RegAllocScore &Other) const {
+ return copyCounts() == Other.copyCounts() &&
+ loadCounts() == Other.loadCounts() &&
+ storeCounts() == Other.storeCounts() &&
+ loadStoreCounts() == Other.loadStoreCounts() &&
+ cheapRematCounts() == Other.cheapRematCounts() &&
+ expensiveRematCounts() == Other.expensiveRematCounts();
+}
+
+bool RegAllocScore::operator!=(const RegAllocScore &Other) const {
+ return !(*this == Other);
+}
+
+double RegAllocScore::getScore() const {
+ double Ret = 0.0;
+ Ret += CopyWeight * copyCounts();
+ Ret += LoadWeight * loadCounts();
+ Ret += StoreWeight * storeCounts();
+ Ret += (LoadWeight + StoreWeight) * loadStoreCounts();
+ Ret += CheapRematWeight * cheapRematCounts();
+ Ret += ExpensiveRematWeight * expensiveRematCounts();
+
+ return Ret;
+}
+
+RegAllocScore
+llvm::calculateRegAllocScore(const MachineFunction &MF,
+ const MachineBlockFrequencyInfo &MBFI) {
+ return calculateRegAllocScore(
+ MF,
+ [&](const MachineBasicBlock &MBB) {
+ return MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
+ },
+ [&](const MachineInstr &MI) {
+ return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable(
+ MI);
+ });
+}
+
+RegAllocScore llvm::calculateRegAllocScore(
+ const MachineFunction &MF,
+ llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq,
+ llvm::function_ref<bool(const MachineInstr &)>
+ IsTriviallyRematerializable) {
+ RegAllocScore Total;
+
+ for (const MachineBasicBlock &MBB : MF) {
+ double BlockFreqRelativeToEntrypoint = GetBBFreq(MBB);
+ RegAllocScore MBBScore;
+
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isDebugInstr() || MI.isKill() || MI.isInlineAsm()) {
+ continue;
+ }
+ if (MI.isCopy()) {
+ MBBScore.onCopy(BlockFreqRelativeToEntrypoint);
+ } else if (IsTriviallyRematerializable(MI)) {
+ if (MI.getDesc().isAsCheapAsAMove()) {
+ MBBScore.onCheapRemat(BlockFreqRelativeToEntrypoint);
+ } else {
+ MBBScore.onExpensiveRemat(BlockFreqRelativeToEntrypoint);
+ }
+ } else if (MI.mayLoad() && MI.mayStore()) {
+ MBBScore.onLoadStore(BlockFreqRelativeToEntrypoint);
+ } else if (MI.mayLoad()) {
+ MBBScore.onLoad(BlockFreqRelativeToEntrypoint);
+ } else if (MI.mayStore()) {
+ MBBScore.onStore(BlockFreqRelativeToEntrypoint);
+ }
+ }
+ Total += MBBScore;
+ }
+ return Total;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
new file mode 100644
index 000000000000..b80adae29f23
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
@@ -0,0 +1,73 @@
+//==- RegAllocScore.h - evaluate regalloc policy quality ----------*-C++-*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// Calculate a measure of the register allocation policy quality. This is used
+/// to construct a reward for the training of the ML-driven allocation policy.
+/// Currently, the score is the sum of the machine basic block frequency-weighed
+/// number of loads, stores, copies, and remat instructions, each factored with
+/// a relative weight.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCSCORE_H_
+#define LLVM_CODEGEN_REGALLOCSCORE_H_
+
+#include "llvm/ADT/STLFunctionalExtras.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineInstr;
+
+/// Regalloc score.
+class RegAllocScore final {
+ double CopyCounts = 0.0;
+ double LoadCounts = 0.0;
+ double StoreCounts = 0.0;
+ double CheapRematCounts = 0.0;
+ double LoadStoreCounts = 0.0;
+ double ExpensiveRematCounts = 0.0;
+
+public:
+ RegAllocScore() = default;
+ RegAllocScore(const RegAllocScore &) = default;
+
+ double copyCounts() const { return CopyCounts; }
+ double loadCounts() const { return LoadCounts; }
+ double storeCounts() const { return StoreCounts; }
+ double loadStoreCounts() const { return LoadStoreCounts; }
+ double expensiveRematCounts() const { return ExpensiveRematCounts; }
+ double cheapRematCounts() const { return CheapRematCounts; }
+
+ void onCopy(double Freq) { CopyCounts += Freq; }
+ void onLoad(double Freq) { LoadCounts += Freq; }
+ void onStore(double Freq) { StoreCounts += Freq; }
+ void onLoadStore(double Freq) { LoadStoreCounts += Freq; }
+ void onExpensiveRemat(double Freq) { ExpensiveRematCounts += Freq; }
+ void onCheapRemat(double Freq) { CheapRematCounts += Freq; }
+
+ RegAllocScore &operator+=(const RegAllocScore &Other);
+ bool operator==(const RegAllocScore &Other) const;
+ bool operator!=(const RegAllocScore &Other) const;
+ double getScore() const;
+};
+
+/// Calculate a score. When comparing 2 scores for the same function but
+/// different policies, the better policy would have a smaller score.
+/// The implementation is the overload below (which is also easily unittestable)
+RegAllocScore calculateRegAllocScore(const MachineFunction &MF,
+ const MachineBlockFrequencyInfo &MBFI);
+
+/// Implementation of the above, which is also more easily unittestable.
+RegAllocScore calculateRegAllocScore(
+ const MachineFunction &MF,
+ llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq,
+ llvm::function_ref<bool(const MachineInstr &)> IsTriviallyRematerializable);
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCSCORE_H_
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
new file mode 100644
index 000000000000..6657cf3c1ef4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -0,0 +1,215 @@
+//===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass is simple MachineFunction pass which collects register usage
+/// details by iterating through each physical registers and checking
+/// MRI::isPhysRegUsed() then creates a RegMask based on this details.
+/// The pass then stores this RegMask in PhysicalRegisterUsageInfo.cpp
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+STATISTIC(NumCSROpt,
+ "Number of functions optimized for callee saved registers");
+
+namespace {
+
+class RegUsageInfoCollector : public MachineFunctionPass {
+public:
+ RegUsageInfoCollector() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeRegUsageInfoCollectorPass(Registry);
+ }
+
+ StringRef getPassName() const override {
+ return "Register Usage Information Collector Pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ // Call getCalleeSaves and then also set the bits for subregs and
+ // fully saved superregs.
+ static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
+
+ static char ID;
+};
+
+} // end of anonymous namespace
+
+char RegUsageInfoCollector::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegUsageInfoCollector, "RegUsageInfoCollector",
+ "Register Usage Information Collector", false, false)
+INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
+INITIALIZE_PASS_END(RegUsageInfoCollector, "RegUsageInfoCollector",
+ "Register Usage Information Collector", false, false)
+
+FunctionPass *llvm::createRegUsageInfoCollector() {
+ return new RegUsageInfoCollector();
+}
+
+// TODO: Move to hook somwehere?
+
+// Return true if it is useful to track the used registers for IPRA / no CSR
+// optimizations. This is not useful for entry points, and computing the
+// register usage information is expensive.
+static bool isCallableFunction(const MachineFunction &MF) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_KERNEL:
+ return false;
+ default:
+ return true;
+ }
+}
+
+bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const LLVMTargetMachine &TM = MF.getTarget();
+
+ LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
+ << " -------------------- \nFunction Name : "
+ << MF.getName() << '\n');
+
+ // Analyzing the register usage may be expensive on some targets.
+ if (!isCallableFunction(MF)) {
+ LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
+ return false;
+ }
+
+ // If there are no callers, there's no point in computing more precise
+ // register usage here.
+ if (MF.getFunction().use_empty()) {
+ LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
+ return false;
+ }
+
+ std::vector<uint32_t> RegMask;
+
+ // Compute the size of the bit vector to represent all the registers.
+ // The bit vector is broken into 32-bit chunks, thus takes the ceil of
+ // the number of registers divided by 32 for the size.
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ RegMask.resize(RegMaskSize, ~((uint32_t)0));
+
+ const Function &F = MF.getFunction();
+
+ PhysicalRegisterUsageInfo &PRUI = getAnalysis<PhysicalRegisterUsageInfo>();
+ PRUI.setTargetMachine(TM);
+
+ LLVM_DEBUG(dbgs() << "Clobbered Registers: ");
+
+ BitVector SavedRegs;
+ computeCalleeSavedRegs(SavedRegs, MF);
+
+ const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
+ auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
+ RegMask[Reg / 32] &= ~(1u << Reg % 32);
+ };
+
+ // Some targets can clobber registers "inside" a call, typically in
+ // linker-generated code.
+ for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ SetRegAsDefined(*AI);
+
+ // Scan all the physical registers. When a register is defined in the current
+ // function set it and all the aliasing registers as defined in the regmask.
+ // FIXME: Rewrite to use regunits.
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ // Don't count registers that are saved and restored.
+ if (SavedRegs.test(PReg))
+ continue;
+ // If a register is defined by an instruction mark it as defined together
+ // with all it's unsaved aliases.
+ if (!MRI->def_empty(PReg)) {
+ for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
+ if (!SavedRegs.test(*AI))
+ SetRegAsDefined(*AI);
+ continue;
+ }
+ // If a register is in the UsedPhysRegsMask set then mark it as defined.
+ // All clobbered aliases will also be in the set, so we can skip setting
+ // as defined all the aliases here.
+ if (UsedPhysRegsMask.test(PReg))
+ SetRegAsDefined(PReg);
+ }
+
+ if (TargetFrameLowering::isSafeForNoCSROpt(F) &&
+ MF.getSubtarget().getFrameLowering()->isProfitableForNoCSROpt(F)) {
+ ++NumCSROpt;
+ LLVM_DEBUG(dbgs() << MF.getName()
+ << " function optimized for not having CSR.\n");
+ }
+
+ LLVM_DEBUG(
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
+ dbgs() << printReg(PReg, TRI) << " ";
+ }
+
+ dbgs() << " \n----------------------------------------\n";
+ );
+
+ PRUI.storeUpdateRegUsageInfo(F, RegMask);
+
+ return false;
+}
+
+void RegUsageInfoCollector::
+computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // Target will return the set of registers that it saves/restores as needed.
+ SavedRegs.clear();
+ TFI.getCalleeSaves(MF, SavedRegs);
+ if (SavedRegs.none())
+ return;
+
+ // Insert subregs.
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ MCPhysReg Reg = CSRegs[i];
+ if (SavedRegs.test(Reg)) {
+ // Save subregisters
+ for (MCPhysReg SR : TRI.subregs(Reg))
+ SavedRegs.set(SR);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
new file mode 100644
index 000000000000..d356962e0d78
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -0,0 +1,154 @@
+//=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass iterates through MachineInstrs in a given MachineFunction and at
+/// each callsite queries RegisterUsageInfo for RegMask (calculated based on
+/// actual register allocation) of the callee function, if the RegMask detail
+/// is available then this pass will update the RegMask of the call instruction.
+/// This updated RegMask will be used by the register allocator while allocating
+/// the current MachineFunction.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+#define RUIP_NAME "Register Usage Information Propagation"
+
+namespace {
+
+class RegUsageInfoPropagation : public MachineFunctionPass {
+public:
+ RegUsageInfoPropagation() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeRegUsageInfoPropagationPass(Registry);
+ }
+
+ StringRef getPassName() const override { return RUIP_NAME; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+private:
+ static void setRegMask(MachineInstr &MI, ArrayRef<uint32_t> RegMask) {
+ assert(RegMask.size() ==
+ MachineOperand::getRegMaskSize(MI.getParent()->getParent()
+ ->getRegInfo().getTargetRegisterInfo()
+ ->getNumRegs())
+ && "expected register mask size");
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask())
+ MO.setRegMask(RegMask.data());
+ }
+ }
+};
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS_BEGIN(RegUsageInfoPropagation, "reg-usage-propagation",
+ RUIP_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
+INITIALIZE_PASS_END(RegUsageInfoPropagation, "reg-usage-propagation",
+ RUIP_NAME, false, false)
+
+char RegUsageInfoPropagation::ID = 0;
+
+// Assumes call instructions have a single reference to a function.
+static const Function *findCalledFunction(const Module &M,
+ const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isGlobal())
+ return dyn_cast<const Function>(MO.getGlobal());
+
+ if (MO.isSymbol())
+ return M.getFunction(MO.getSymbolName());
+ }
+
+ return nullptr;
+}
+
+bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) {
+ const Module &M = *MF.getFunction().getParent();
+ PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
+
+ LLVM_DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
+ << " ++++++++++++++++++++ \n");
+ LLVM_DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasCalls() && !MFI.hasTailCall())
+ return false;
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isCall())
+ continue;
+ LLVM_DEBUG(
+ dbgs()
+ << "Call Instruction Before Register Usage Info Propagation : \n"
+ << MI << "\n");
+
+ auto UpdateRegMask = [&](const Function &F) {
+ const ArrayRef<uint32_t> RegMask = PRUI->getRegUsageInfo(F);
+ if (RegMask.empty())
+ return;
+ setRegMask(MI, RegMask);
+ Changed = true;
+ };
+
+ if (const Function *F = findCalledFunction(M, MI)) {
+ if (F->isDefinitionExact()) {
+ UpdateRegMask(*F);
+ } else {
+ LLVM_DEBUG(dbgs() << "Function definition is not exact\n");
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Failed to find call target function\n");
+ }
+
+ LLVM_DEBUG(
+ dbgs()
+ << "Call Instruction After Register Usage Info Propagation : \n"
+ << MI << '\n');
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+ "++++++ \n");
+ return Changed;
+}
+
+FunctionPass *llvm::createRegUsageInfoPropPass() {
+ return new RegUsageInfoPropagation();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
new file mode 100644
index 000000000000..8e0a0b0dc282
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
@@ -0,0 +1,112 @@
+//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegisterBank class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "registerbank"
+
+using namespace llvm;
+
+const unsigned RegisterBank::InvalidID = UINT_MAX;
+
+RegisterBank::RegisterBank(unsigned ID, const char *Name,
+ const uint32_t *CoveredClasses,
+ unsigned NumRegClasses)
+ : ID(ID), Name(Name) {
+ ContainedRegClasses.resize(NumRegClasses);
+ ContainedRegClasses.setBitsInMask(CoveredClasses);
+}
+
+bool RegisterBank::verify(const RegisterBankInfo &RBI,
+ const TargetRegisterInfo &TRI) const {
+ assert(isValid() && "Invalid register bank");
+ for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
+ const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
+
+ if (!covers(RC))
+ continue;
+ // Verify that the register bank covers all the sub classes of the
+ // classes it covers.
+
+ // Use a different (slow in that case) method than
+ // RegisterBankInfo to find the subclasses of RC, to make sure
+ // both agree on the covers.
+ for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) {
+ const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId);
+
+ if (!RC.hasSubClassEq(&SubRC))
+ continue;
+
+ // Verify that the Size of the register bank is big enough to cover
+ // all the register classes it covers.
+ assert(RBI.getMaximumSize(getID()) >= TRI.getRegSizeInBits(SubRC) &&
+ "Size is not big enough for all the subclasses!");
+ assert(covers(SubRC) && "Not all subclasses are covered");
+ }
+ }
+ return true;
+}
+
+bool RegisterBank::covers(const TargetRegisterClass &RC) const {
+ assert(isValid() && "RB hasn't been initialized yet");
+ return ContainedRegClasses.test(RC.getID());
+}
+
+bool RegisterBank::isValid() const {
+ return ID != InvalidID && Name != nullptr &&
+ // A register bank that does not cover anything is useless.
+ !ContainedRegClasses.empty();
+}
+
+bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
+ // There must be only one instance of a given register bank alive
+ // for the whole compilation.
+ // The RegisterBankInfo is supposed to enforce that.
+ assert((OtherRB.getID() != getID() || &OtherRB == this) &&
+ "ID does not uniquely identify a RegisterBank");
+ return &OtherRB == this;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
+ print(dbgs(), /* IsForDebug */ true, TRI);
+}
+#endif
+
+void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
+ const TargetRegisterInfo *TRI) const {
+ OS << getName();
+ if (!IsForDebug)
+ return;
+ OS << "(ID:" << getID() << ")\n"
+ << "isValid:" << isValid() << '\n'
+ << "Number of Covered register classes: " << ContainedRegClasses.count()
+ << '\n';
+ // Print all the subclasses if we can.
+ // This register classes may not be properly initialized yet.
+ if (!TRI || ContainedRegClasses.empty())
+ return;
+ assert(ContainedRegClasses.size() == TRI->getNumRegClasses() &&
+ "TRI does not match the initialization process?");
+ OS << "Covered register classes:\n";
+ ListSeparator LS;
+ for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) {
+ const TargetRegisterClass &RC = *TRI->getRegClass(RCId);
+
+ if (covers(RC))
+ OS << LS << TRI->getRegClassName(&RC);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
new file mode 100644
index 000000000000..658a09fd8700
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -0,0 +1,817 @@
+//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegisterBankInfo class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm> // For std::max.
+
+#define DEBUG_TYPE "registerbankinfo"
+
+using namespace llvm;
+
+STATISTIC(NumPartialMappingsCreated,
+ "Number of partial mappings dynamically created");
+STATISTIC(NumPartialMappingsAccessed,
+ "Number of partial mappings dynamically accessed");
+STATISTIC(NumValueMappingsCreated,
+ "Number of value mappings dynamically created");
+STATISTIC(NumValueMappingsAccessed,
+ "Number of value mappings dynamically accessed");
+STATISTIC(NumOperandsMappingsCreated,
+ "Number of operands mappings dynamically created");
+STATISTIC(NumOperandsMappingsAccessed,
+ "Number of operands mappings dynamically accessed");
+STATISTIC(NumInstructionMappingsCreated,
+ "Number of instruction mappings dynamically created");
+STATISTIC(NumInstructionMappingsAccessed,
+ "Number of instruction mappings dynamically accessed");
+
+const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
+const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
+
+//------------------------------------------------------------------------------
+// RegisterBankInfo implementation.
+//------------------------------------------------------------------------------
+RegisterBankInfo::RegisterBankInfo(const RegisterBank **RegBanks,
+ unsigned NumRegBanks, const unsigned *Sizes,
+ unsigned HwMode)
+ : RegBanks(RegBanks), NumRegBanks(NumRegBanks), Sizes(Sizes),
+ HwMode(HwMode) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+ assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
+ assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid");
+ }
+#endif // NDEBUG
+}
+
+bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+ const RegisterBank &RegBank = getRegBank(Idx);
+ assert(Idx == RegBank.getID() &&
+ "ID does not match the index in the array");
+ LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n');
+ assert(RegBank.verify(*this, TRI) && "RegBank is invalid");
+ }
+#endif // NDEBUG
+ return true;
+}
+
+const RegisterBank *
+RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ if (!Reg.isVirtual()) {
+ // FIXME: This was probably a copy to a virtual register that does have a
+ // type we could use.
+ const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg, TRI);
+ return RC ? &getRegBankFromRegClass(*RC, LLT()) : nullptr;
+ }
+
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ if (auto *RB = dyn_cast_if_present<const RegisterBank *>(RegClassOrBank))
+ return RB;
+ if (auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank))
+ return &getRegBankFromRegClass(*RC, MRI.getType(Reg));
+ return nullptr;
+}
+
+const TargetRegisterClass *
+RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
+ const TargetRegisterInfo &TRI) const {
+ assert(Reg.isPhysical() && "Reg must be a physreg");
+ const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
+ if (RegRCIt != PhysRegMinimalRCs.end())
+ return RegRCIt->second;
+ const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClassLLT(Reg, LLT());
+ PhysRegMinimalRCs[Reg] = PhysRC;
+ return PhysRC;
+}
+
+const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
+ const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+
+ // The mapping of the registers may be available via the
+ // register class constraints.
+ const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI);
+
+ if (!RC)
+ return nullptr;
+
+ Register Reg = MI.getOperand(OpIdx).getReg();
+ const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
+ // Check that the target properly implemented getRegBankFromRegClass.
+ assert(RegBank.covers(*RC) &&
+ "The mapping of the register bank does not make sense");
+ return &RegBank;
+}
+
+const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
+ Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+
+ // If the register already has a class, fallback to MRI::constrainRegClass.
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ if (isa<const TargetRegisterClass *>(RegClassOrBank))
+ return MRI.constrainRegClass(Reg, &RC);
+
+ const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
+ // Otherwise, all we can do is ensure the bank covers the class, and set it.
+ if (RB && !RB->covers(RC))
+ return nullptr;
+
+ // If nothing was set or the class is simply compatible, set it.
+ MRI.setRegClass(Reg, &RC);
+ return &RC;
+}
+
+/// Check whether or not \p MI should be treated like a copy
+/// for the mappings.
+/// Copy like instruction are special for mapping because
+/// they don't have actual register constraints. Moreover,
+/// they sometimes have register classes assigned and we can
+/// just use that instead of failing to provide a generic mapping.
+static bool isCopyLike(const MachineInstr &MI) {
+ return MI.isCopy() || MI.isPHI() ||
+ MI.getOpcode() == TargetOpcode::REG_SEQUENCE;
+}
+
+const RegisterBankInfo::InstructionMapping &
+RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
+ // For copies we want to walk over the operands and try to find one
+ // that has a register bank since the instruction itself will not get
+ // us any constraint.
+ bool IsCopyLike = isCopyLike(MI);
+ // For copy like instruction, only the mapping of the definition
+ // is important. The rest is not constrained.
+ unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands();
+
+ const MachineFunction &MF = *MI.getMF();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ // We may need to query the instruction encoding to guess the mapping.
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Before doing anything complicated check if the mapping is not
+ // directly available.
+ bool CompleteMapping = true;
+
+ SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping);
+ for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ // The register bank of Reg is just a side effect of the current
+ // excution and in particular, there is no reason to believe this
+ // is the best default mapping for the current instruction. Keep
+ // it as an alternative register bank if we cannot figure out
+ // something.
+ const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+ // For copy-like instruction, we want to reuse the register bank
+ // that is already set on Reg, if any, since those instructions do
+ // not have any constraints.
+ const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr;
+ if (!CurRegBank) {
+ // If this is a target specific instruction, we can deduce
+ // the register bank from the encoding constraints.
+ CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI);
+ if (!CurRegBank) {
+ // All our attempts failed, give up.
+ CompleteMapping = false;
+
+ if (!IsCopyLike)
+ // MI does not carry enough information to guess the mapping.
+ return getInvalidInstructionMapping();
+ continue;
+ }
+ }
+
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank);
+ if (IsCopyLike) {
+ if (!OperandsMapping[0]) {
+ if (MI.isRegSequence()) {
+ // For reg_sequence, the result size does not match the input.
+ unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(),
+ MRI, TRI);
+ OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank);
+ } else {
+ OperandsMapping[0] = ValMapping;
+ }
+ }
+
+ // The default handling assumes any register bank can be copied to any
+ // other. If this isn't the case, the target should specially deal with
+ // reg_sequence/phi. There may also be unsatisfiable copies.
+ for (; OpIdx != EndIdx; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+ if (AltRegBank &&
+ cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI)))
+ return getInvalidInstructionMapping();
+ }
+
+ CompleteMapping = true;
+ break;
+ }
+
+ OperandsMapping[OpIdx] = ValMapping;
+ }
+
+ if (IsCopyLike && !CompleteMapping) {
+ // No way to deduce the type from what we have.
+ return getInvalidInstructionMapping();
+ }
+
+ assert(CompleteMapping && "Setting an uncomplete mapping");
+ return getInstructionMapping(
+ DefaultMappingID, /*Cost*/ 1,
+ /*OperandsMapping*/ getOperandsMapping(OperandsMapping),
+ NumOperandsForMapping);
+}
+
+/// Hashing function for PartialMapping.
+static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length,
+ const RegisterBank *RegBank) {
+ return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0);
+}
+
+/// Overloaded version of hash_value for a PartialMapping.
+hash_code
+llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) {
+ return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length,
+ PartMapping.RegBank);
+}
+
+const RegisterBankInfo::PartialMapping &
+RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
+ const RegisterBank &RegBank) const {
+ ++NumPartialMappingsAccessed;
+
+ hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank);
+ const auto &It = MapOfPartialMappings.find(Hash);
+ if (It != MapOfPartialMappings.end())
+ return *It->second;
+
+ ++NumPartialMappingsCreated;
+
+ auto &PartMapping = MapOfPartialMappings[Hash];
+ PartMapping = std::make_unique<PartialMapping>(StartIdx, Length, RegBank);
+ return *PartMapping;
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length,
+ const RegisterBank &RegBank) const {
+ return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1);
+}
+
+static hash_code
+hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown,
+ unsigned NumBreakDowns) {
+ if (LLVM_LIKELY(NumBreakDowns == 1))
+ return hash_value(*BreakDown);
+ SmallVector<size_t, 8> Hashes(NumBreakDowns);
+ for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx)
+ Hashes.push_back(hash_value(BreakDown[Idx]));
+ return hash_combine_range(Hashes.begin(), Hashes.end());
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
+ unsigned NumBreakDowns) const {
+ ++NumValueMappingsAccessed;
+
+ hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns);
+ const auto &It = MapOfValueMappings.find(Hash);
+ if (It != MapOfValueMappings.end())
+ return *It->second;
+
+ ++NumValueMappingsCreated;
+
+ auto &ValMapping = MapOfValueMappings[Hash];
+ ValMapping = std::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
+ return *ValMapping;
+}
+
+template <typename Iterator>
+const RegisterBankInfo::ValueMapping *
+RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
+
+ ++NumOperandsMappingsAccessed;
+
+ // The addresses of the value mapping are unique.
+ // Therefore, we can use them directly to hash the operand mapping.
+ hash_code Hash = hash_combine_range(Begin, End);
+ auto &Res = MapOfOperandsMappings[Hash];
+ if (Res)
+ return Res.get();
+
+ ++NumOperandsMappingsCreated;
+
+ // Create the array of ValueMapping.
+ // Note: this array will not hash to this instance of operands
+ // mapping, because we use the pointer of the ValueMapping
+ // to hash and we expect them to uniquely identify an instance
+ // of value mapping.
+ Res = std::make_unique<ValueMapping[]>(std::distance(Begin, End));
+ unsigned Idx = 0;
+ for (Iterator It = Begin; It != End; ++It, ++Idx) {
+ const ValueMapping *ValMap = *It;
+ if (!ValMap)
+ continue;
+ Res[Idx] = *ValMap;
+ }
+ return Res.get();
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+ const SmallVectorImpl<const RegisterBankInfo::ValueMapping *> &OpdsMapping)
+ const {
+ return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+ std::initializer_list<const RegisterBankInfo::ValueMapping *> OpdsMapping)
+ const {
+ return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
+}
+
+static hash_code
+hashInstructionMapping(unsigned ID, unsigned Cost,
+ const RegisterBankInfo::ValueMapping *OperandsMapping,
+ unsigned NumOperands) {
+ return hash_combine(ID, Cost, OperandsMapping, NumOperands);
+}
+
+const RegisterBankInfo::InstructionMapping &
+RegisterBankInfo::getInstructionMappingImpl(
+ bool IsInvalid, unsigned ID, unsigned Cost,
+ const RegisterBankInfo::ValueMapping *OperandsMapping,
+ unsigned NumOperands) const {
+ assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 &&
+ OperandsMapping == nullptr && NumOperands == 0) ||
+ !IsInvalid) &&
+ "Mismatch argument for invalid input");
+ ++NumInstructionMappingsAccessed;
+
+ hash_code Hash =
+ hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands);
+ const auto &It = MapOfInstructionMappings.find(Hash);
+ if (It != MapOfInstructionMappings.end())
+ return *It->second;
+
+ ++NumInstructionMappingsCreated;
+
+ auto &InstrMapping = MapOfInstructionMappings[Hash];
+ InstrMapping = std::make_unique<InstructionMapping>(
+ ID, Cost, OperandsMapping, NumOperands);
+ return *InstrMapping;
+}
+
+const RegisterBankInfo::InstructionMapping &
+RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ llvm_unreachable("The target must implement this");
+}
+
+RegisterBankInfo::InstructionMappings
+RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
+ InstructionMappings PossibleMappings;
+ const auto &Mapping = getInstrMapping(MI);
+ if (Mapping.isValid()) {
+ // Put the default mapping first.
+ PossibleMappings.push_back(&Mapping);
+ }
+
+ // Then the alternative mapping, if any.
+ InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
+ append_range(PossibleMappings, AltMappings);
+#ifndef NDEBUG
+ for (const InstructionMapping *Mapping : PossibleMappings)
+ assert(Mapping->verify(MI) && "Mapping is invalid");
+#endif
+ return PossibleMappings;
+}
+
+RegisterBankInfo::InstructionMappings
+RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
+ // No alternative for MI.
+ return InstructionMappings();
+}
+
+void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
+ MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+ LLVM_DEBUG(dbgs() << "Applying default-like mapping\n");
+ for (unsigned OpIdx = 0,
+ EndIdx = OpdMapper.getInstrMapping().getNumOperands();
+ OpIdx != EndIdx; ++OpIdx) {
+ LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx);
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n");
+ continue;
+ }
+ if (!MO.getReg()) {
+ LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n");
+ continue;
+ }
+ LLT Ty = MRI.getType(MO.getReg());
+ if (!Ty.isValid())
+ continue;
+ assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
+ 0 &&
+ "Invalid mapping");
+ assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
+ 1 &&
+ "This mapping is too complex for this function");
+ iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs =
+ OpdMapper.getVRegs(OpIdx);
+ if (NewRegs.empty()) {
+ LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
+ continue;
+ }
+ Register OrigReg = MO.getReg();
+ Register NewReg = *NewRegs.begin();
+ LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
+ MO.setReg(NewReg);
+ LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
+
+ // The OperandsMapper creates plain scalar, we may have to fix that.
+ // Check if the types match and if not, fix that.
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT NewTy = MRI.getType(NewReg);
+ if (OrigTy != NewTy) {
+ // The default mapping is not supposed to change the size of
+ // the storage. However, right now we don't necessarily bump all
+ // the types to storage size. For instance, we can consider
+ // s16 G_AND legal whereas the storage size is going to be 32.
+ assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
+ "Types with difference size cannot be handled by the default "
+ "mapping");
+ LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
+ << OrigTy);
+ MRI.setType(NewReg, OrigTy);
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+}
+
+unsigned RegisterBankInfo::getSizeInBits(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ if (Reg.isPhysical()) {
+ // The size is not directly available for physical registers.
+ // Instead, we need to access a register class that contains Reg and
+ // get the size of that register class.
+ // Because this is expensive, we'll cache the register class by calling
+ auto *RC = getMinimalPhysRegClass(Reg, TRI);
+ assert(RC && "Expecting Register class");
+ return TRI.getRegSizeInBits(*RC);
+ }
+ return TRI.getRegSizeInBits(Reg, MRI);
+}
+
+//------------------------------------------------------------------------------
+// Helper classes implementation.
+//------------------------------------------------------------------------------
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+bool RegisterBankInfo::PartialMapping::verify(
+ const RegisterBankInfo &RBI) const {
+ assert(RegBank && "Register bank not set");
+ assert(Length && "Empty mapping");
+ assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?");
+ // Check if the minimum width fits into RegBank.
+ assert(RBI.getMaximumSize(RegBank->getID()) >= Length &&
+ "Register bank too small for Mask");
+ return true;
+}
+
+void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
+ OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = ";
+ if (RegBank)
+ OS << *RegBank;
+ else
+ OS << "nullptr";
+}
+
+bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
+ if (NumBreakDowns < 2)
+ return true;
+
+ const PartialMapping *First = begin();
+ for (const PartialMapping *Part = First + 1; Part != end(); ++Part) {
+ if (Part->Length != First->Length || Part->RegBank != First->RegBank)
+ return false;
+ }
+
+ return true;
+}
+
+bool RegisterBankInfo::ValueMapping::verify(const RegisterBankInfo &RBI,
+ unsigned MeaningfulBitWidth) const {
+ assert(NumBreakDowns && "Value mapped nowhere?!");
+ unsigned OrigValueBitWidth = 0;
+ for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
+ // Check that each register bank is big enough to hold the partial value:
+ // this check is done by PartialMapping::verify
+ assert(PartMap.verify(RBI) && "Partial mapping is invalid");
+ // The original value should completely be mapped.
+ // Thus the maximum accessed index + 1 is the size of the original value.
+ OrigValueBitWidth =
+ std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
+ }
+ assert(OrigValueBitWidth >= MeaningfulBitWidth &&
+ "Meaningful bits not covered by the mapping");
+ APInt ValueMask(OrigValueBitWidth, 0);
+ for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
+ // Check that the union of the partial mappings covers the whole value,
+ // without overlaps.
+ // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1.
+ APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx,
+ PartMap.getHighBitIdx() + 1);
+ ValueMask ^= PartMapMask;
+ assert((ValueMask & PartMapMask) == PartMapMask &&
+ "Some partial mappings overlap");
+ }
+ assert(ValueMask.isAllOnes() && "Value is not fully mapped");
+ return true;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
+ OS << "#BreakDown: " << NumBreakDowns << " ";
+ bool IsFirst = true;
+ for (const PartialMapping &PartMap : *this) {
+ if (!IsFirst)
+ OS << ", ";
+ OS << '[' << PartMap << ']';
+ IsFirst = false;
+ }
+}
+
+bool RegisterBankInfo::InstructionMapping::verify(
+ const MachineInstr &MI) const {
+ // Check that all the register operands are properly mapped.
+ // Check the constructor invariant.
+ // For PHI, we only care about mapping the definition.
+ assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) &&
+ "NumOperands must match, see constructor");
+ assert(MI.getParent() && MI.getMF() &&
+ "MI must be connected to a MachineFunction");
+ const MachineFunction &MF = *MI.getMF();
+ const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
+ (void)RBI;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ const MachineOperand &MO = MI.getOperand(Idx);
+ if (!MO.isReg()) {
+ assert(!getOperandMapping(Idx).isValid() &&
+ "We should not care about non-reg mapping");
+ continue;
+ }
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LLT Ty = MRI.getType(Reg);
+ if (!Ty.isValid())
+ continue;
+ assert(getOperandMapping(Idx).isValid() &&
+ "We must have a mapping for reg operands");
+ const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
+ (void)MOMapping;
+ // Register size in bits.
+ // This size must match what the mapping expects.
+ assert(MOMapping.verify(*RBI, RBI->getSizeInBits(
+ Reg, MF.getRegInfo(),
+ *MF.getSubtarget().getRegisterInfo())) &&
+ "Value mapping is invalid");
+ }
+ return true;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const {
+ OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: ";
+
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ const ValueMapping &ValMapping = getOperandMapping(OpIdx);
+ if (OpIdx)
+ OS << ", ";
+ OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}';
+ }
+}
+
+const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1;
+
+RegisterBankInfo::OperandsMapper::OperandsMapper(
+ MachineInstr &MI, const InstructionMapping &InstrMapping,
+ MachineRegisterInfo &MRI)
+ : MRI(MRI), MI(MI), InstrMapping(InstrMapping) {
+ unsigned NumOpds = InstrMapping.getNumOperands();
+ OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx);
+ assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
+}
+
+iterator_range<SmallVectorImpl<Register>::iterator>
+RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+ unsigned NumPartialVal =
+ getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
+ int StartIdx = OpToNewVRegIdx[OpIdx];
+
+ if (StartIdx == OperandsMapper::DontKnowIdx) {
+ // This is the first time we try to access OpIdx.
+ // Create the cells that will hold all the partial values at the
+ // end of the list of NewVReg.
+ StartIdx = NewVRegs.size();
+ OpToNewVRegIdx[OpIdx] = StartIdx;
+ for (unsigned i = 0; i < NumPartialVal; ++i)
+ NewVRegs.push_back(0);
+ }
+ SmallVectorImpl<Register>::iterator End =
+ getNewVRegsEnd(StartIdx, NumPartialVal);
+
+ return make_range(&NewVRegs[StartIdx], End);
+}
+
+SmallVectorImpl<Register>::const_iterator
+RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
+ unsigned NumVal) const {
+ return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal);
+}
+SmallVectorImpl<Register>::iterator
+RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
+ unsigned NumVal) {
+ assert((NewVRegs.size() == StartIdx + NumVal ||
+ NewVRegs.size() > StartIdx + NumVal) &&
+ "NewVRegs too small to contain all the partial mapping");
+ return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end()
+ : &NewVRegs[StartIdx + NumVal];
+}
+
+void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+ iterator_range<SmallVectorImpl<Register>::iterator> NewVRegsForOpIdx =
+ getVRegsMem(OpIdx);
+ const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
+ const PartialMapping *PartMap = ValMapping.begin();
+ for (Register &NewVReg : NewVRegsForOpIdx) {
+ assert(PartMap != ValMapping.end() && "Out-of-bound access");
+ assert(NewVReg == 0 && "Register has already been created");
+ // The new registers are always bound to scalar with the right size.
+ // The actual type has to be set when the target does the mapping
+ // of the instruction.
+ // The rationale is that this generic code cannot guess how the
+ // target plans to split the input type.
+ NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
+ MRI.setRegBank(NewVReg, *PartMap->RegBank);
+ ++PartMap;
+ }
+}
+
+void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
+ unsigned PartialMapIdx,
+ Register NewVReg) {
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+ assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
+ PartialMapIdx &&
+ "Out-of-bound access for partial mapping");
+ // Make sure the memory is initialized for that operand.
+ (void)getVRegsMem(OpIdx);
+ assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 &&
+ "This value is already set");
+ NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg;
+}
+
+iterator_range<SmallVectorImpl<Register>::const_iterator>
+RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
+ bool ForDebug) const {
+ (void)ForDebug;
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+ int StartIdx = OpToNewVRegIdx[OpIdx];
+
+ if (StartIdx == OperandsMapper::DontKnowIdx)
+ return make_range(NewVRegs.end(), NewVRegs.end());
+
+ unsigned PartMapSize =
+ getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
+ SmallVectorImpl<Register>::const_iterator End =
+ getNewVRegsEnd(StartIdx, PartMapSize);
+ iterator_range<SmallVectorImpl<Register>::const_iterator> Res =
+ make_range(&NewVRegs[StartIdx], End);
+#ifndef NDEBUG
+ for (Register VReg : Res)
+ assert((VReg || ForDebug) && "Some registers are uninitialized");
+#endif
+ return Res;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
+ print(dbgs(), true);
+ dbgs() << '\n';
+}
+#endif
+
+void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
+ bool ForDebug) const {
+ unsigned NumOpds = getInstrMapping().getNumOperands();
+ if (ForDebug) {
+ OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n';
+ // Print out the internal state of the index table.
+ OS << "Populated indices (CellNumber, IndexInNewVRegs): ";
+ bool IsFirst = true;
+ for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
+ if (OpToNewVRegIdx[Idx] != DontKnowIdx) {
+ if (!IsFirst)
+ OS << ", ";
+ OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')';
+ IsFirst = false;
+ }
+ }
+ OS << '\n';
+ } else
+ OS << "Mapping ID: " << getInstrMapping().getID() << ' ';
+
+ OS << "Operand Mapping: ";
+ // If we have a function, we can pretty print the name of the registers.
+ // Otherwise we will print the raw numbers.
+ const TargetRegisterInfo *TRI =
+ getMI().getParent() && getMI().getMF()
+ ? getMI().getMF()->getSubtarget().getRegisterInfo()
+ : nullptr;
+ bool IsFirst = true;
+ for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
+ if (OpToNewVRegIdx[Idx] == DontKnowIdx)
+ continue;
+ if (!IsFirst)
+ OS << ", ";
+ IsFirst = false;
+ OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
+ bool IsFirstNewVReg = true;
+ for (Register VReg : getVRegs(Idx)) {
+ if (!IsFirstNewVReg)
+ OS << ", ";
+ IsFirstNewVReg = false;
+ OS << printReg(VReg, TRI);
+ }
+ OS << "])";
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 000000000000..fba8c35ecec2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,236 @@
+//===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee-saved vs. caller-saved and
+// reserved registers depend on calling conventions and other dynamic
+// information, so some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+ cl::desc("Limit all regclasses to N registers"));
+
+RegisterClassInfo::RegisterClassInfo() = default;
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+ bool Update = false;
+ MF = &mf;
+
+ auto &STI = MF->getSubtarget();
+
+ // Allocate new array the first time we see a new target.
+ if (STI.getRegisterInfo() != TRI) {
+ TRI = STI.getRegisterInfo();
+ RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+ Update = true;
+ }
+
+ // Test if CSRs have changed from the previous function.
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const MCPhysReg *CSR = MRI.getCalleeSavedRegs();
+ bool CSRChanged = true;
+ if (!Update) {
+ CSRChanged = false;
+ size_t LastSize = LastCalleeSavedRegs.size();
+ for (unsigned I = 0;; ++I) {
+ if (CSR[I] == 0) {
+ CSRChanged = I != LastSize;
+ break;
+ }
+ if (I >= LastSize) {
+ CSRChanged = true;
+ break;
+ }
+ if (CSR[I] != LastCalleeSavedRegs[I]) {
+ CSRChanged = true;
+ break;
+ }
+ }
+ }
+
+ // Get the callee saved registers.
+ if (CSRChanged) {
+ LastCalleeSavedRegs.clear();
+ // Build a CSRAlias map. Every CSR alias saves the last
+ // overlapping CSR.
+ CalleeSavedAliases.assign(TRI->getNumRegs(), 0);
+ for (const MCPhysReg *I = CSR; *I; ++I) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CalleeSavedAliases[*AI] = *I;
+ LastCalleeSavedRegs.push_back(*I);
+ }
+
+ Update = true;
+ }
+
+ // Even if CSR list is same, we could have had a different allocation order
+ // if ignoreCSRForAllocationOrder is evaluated differently.
+ BitVector CSRHintsForAllocOrder(TRI->getNumRegs());
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI);
+ if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() ||
+ IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) {
+ Update = true;
+ IgnoreCSRForAllocOrder = CSRHintsForAllocOrder;
+ }
+
+ RegCosts = TRI->getRegisterCosts(*MF);
+
+ // Different reserved registers?
+ const BitVector &RR = MF->getRegInfo().getReservedRegs();
+ if (Reserved.size() != RR.size() || RR != Reserved) {
+ Update = true;
+ Reserved = RR;
+ }
+
+ // Invalidate cached information from previous function.
+ if (Update) {
+ unsigned NumPSets = TRI->getNumRegPressureSets();
+ PSetLimits.reset(new unsigned[NumPSets]);
+ std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);
+ ++Tag;
+ }
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ assert(RC && "no register class given");
+ RCInfo &RCI = RegClass[RC->getID()];
+ auto &STI = MF->getSubtarget();
+
+ // Raw register count, including all reserved regs.
+ unsigned NumRegs = RC->getNumRegs();
+
+ if (!RCI.Order)
+ RCI.Order.reset(new MCPhysReg[NumRegs]);
+
+ unsigned N = 0;
+ SmallVector<MCPhysReg, 16> CSRAlias;
+ uint8_t MinCost = uint8_t(~0u);
+ uint8_t LastCost = uint8_t(~0u);
+ unsigned LastCostChange = 0;
+
+ // FIXME: Once targets reserve registers instead of removing them from the
+ // allocation order, we can simply use begin/end here.
+ ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
+ for (unsigned PhysReg : RawOrder) {
+ // Remove reserved registers from the allocation order.
+ if (Reserved.test(PhysReg))
+ continue;
+ uint8_t Cost = RegCosts[PhysReg];
+ MinCost = std::min(MinCost, Cost);
+
+ if (CalleeSavedAliases[PhysReg] &&
+ !STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
+ // PhysReg aliases a CSR, save it for later.
+ CSRAlias.push_back(PhysReg);
+ else {
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+ }
+ RCI.NumRegs = N + CSRAlias.size();
+ assert(RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+ // CSR aliases go after the volatile registers, preserve the target's order.
+ for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
+ unsigned PhysReg = CSRAlias[i];
+ uint8_t Cost = RegCosts[PhysReg];
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+
+ // Register allocator stress test. Clip register class to N registers.
+ if (StressRA && RCI.NumRegs > StressRA)
+ RCI.NumRegs = StressRA;
+
+ // Check if RC is a proper sub-class.
+ if (const TargetRegisterClass *Super =
+ TRI->getLargestLegalSuperClass(RC, *MF))
+ if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
+ RCI.ProperSubClass = true;
+
+ RCI.MinCost = MinCost;
+ RCI.LastCostChange = LastCostChange;
+
+ LLVM_DEBUG({
+ dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
+ for (unsigned I = 0; I != RCI.NumRegs; ++I)
+ dbgs() << ' ' << printReg(RCI.Order[I], TRI);
+ dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
+ });
+
+ // RCI is now up-to-date.
+ RCI.Tag = Tag;
+}
+
+/// This is not accurate because two overlapping register sets may have some
+/// nonoverlapping reserved registers. However, computing the allocation order
+/// for all register classes would be too expensive.
+unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
+ const TargetRegisterClass *RC = nullptr;
+ unsigned NumRCUnits = 0;
+ for (const TargetRegisterClass *C : TRI->regclasses()) {
+ const int *PSetID = TRI->getRegClassPressureSets(C);
+ for (; *PSetID != -1; ++PSetID) {
+ if ((unsigned)*PSetID == Idx)
+ break;
+ }
+ if (*PSetID == -1)
+ continue;
+
+ // Found a register class that counts against this pressure set.
+ // For efficiency, only compute the set order for the largest set.
+ unsigned NUnits = TRI->getRegClassWeight(C).WeightLimit;
+ if (!RC || NUnits > NumRCUnits) {
+ RC = C;
+ NumRCUnits = NUnits;
+ }
+ }
+ assert(RC && "Failed to find register class");
+ compute(RC);
+ unsigned NAllocatableRegs = getNumAllocatableRegs(RC);
+ unsigned RegPressureSetLimit = TRI->getRegPressureSetLimit(*MF, Idx);
+ // If all the regs are reserved, return raw RegPressureSetLimit.
+ // One example is VRSAVERC in PowerPC.
+ // Avoid returning zero, getRegPressureSetLimit(Idx) assumes computePSetLimit
+ // return non-zero value.
+ if (NAllocatableRegs == 0)
+ return RegPressureSetLimit;
+ unsigned NReserved = RC->getNumRegs() - NAllocatableRegs;
+ return RegPressureSetLimit - TRI->getRegClassWeight(RC).RegWeight * NReserved;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 000000000000..e49885b6ad96
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,4220 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
+STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
+
+static cl::opt<bool> EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+ cl::desc("Apply the terminal rule"),
+ cl::init(false), cl::Hidden);
+
+/// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+/// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+ cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+static cl::opt<unsigned> LateRematUpdateThreshold(
+ "late-remat-update-threshold", cl::Hidden,
+ cl::desc("During rematerialization for a copy, if the def instruction has "
+ "many other copy uses to be rematerialized, delay the multiple "
+ "separate live interval update work and do them all at once after "
+ "all those rematerialization are done. It will save a lot of "
+ "repeated work. "),
+ cl::init(100));
+
+static cl::opt<unsigned> LargeIntervalSizeThreshold(
+ "large-interval-size-threshold", cl::Hidden,
+ cl::desc("If the valnos size of an interval is larger than the threshold, "
+ "it is regarded as a large interval. "),
+ cl::init(100));
+
+static cl::opt<unsigned> LargeIntervalFreqThreshold(
+ "large-interval-freq-threshold", cl::Hidden,
+ cl::desc("For a large interval, if it is coalesed with other live "
+ "intervals many times more than the threshold, stop its "
+ "coalescing to control the compile time. "),
+ cl::init(256));
+
+namespace {
+
+ class JoinVals;
+
+ class RegisterCoalescer : public MachineFunctionPass,
+ private LiveRangeEdit::Delegate {
+ MachineFunction* MF = nullptr;
+ MachineRegisterInfo* MRI = nullptr;
+ const TargetRegisterInfo* TRI = nullptr;
+ const TargetInstrInfo* TII = nullptr;
+ LiveIntervals *LIS = nullptr;
+ const MachineLoopInfo* Loops = nullptr;
+ AliasAnalysis *AA = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+ /// Position and VReg of a PHI instruction during coalescing.
+ struct PHIValPos {
+ SlotIndex SI; ///< Slot where this PHI occurs.
+ Register Reg; ///< VReg the PHI occurs in.
+ unsigned SubReg; ///< Qualifying subregister for Reg.
+ };
+
+ /// Map from debug instruction number to PHI position during coalescing.
+ DenseMap<unsigned, PHIValPos> PHIValToPos;
+ /// Index of, for each VReg, which debug instruction numbers and
+ /// corresponding PHIs are sensitive to coalescing. Each VReg may have
+ /// multiple PHI defs, at different positions.
+ DenseMap<Register, SmallVector<unsigned, 2>> RegToPHIIdx;
+
+ /// Debug variable location tracking -- for each VReg, maintain an
+ /// ordered-by-slot-index set of DBG_VALUEs, to help quick
+ /// identification of whether coalescing may change location validity.
+ using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
+ DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
+
+ /// A LaneMask to remember on which subregister live ranges we need to call
+ /// shrinkToUses() later.
+ LaneBitmask ShrinkMask;
+
+ /// True if the main range of the currently coalesced intervals should be
+ /// checked for smaller live intervals.
+ bool ShrinkMainRange = false;
+
+ /// True if the coalescer should aggressively coalesce global copies
+ /// in favor of keeping local copies.
+ bool JoinGlobalCopies = false;
+
+ /// True if the coalescer should aggressively coalesce fall-thru
+ /// blocks exclusively containing copies.
+ bool JoinSplitEdges = false;
+
+ /// Copy instructions yet to be coalesced.
+ SmallVector<MachineInstr*, 8> WorkList;
+ SmallVector<MachineInstr*, 8> LocalWorkList;
+
+ /// Set of instruction pointers that have been erased, and
+ /// that may be present in WorkList.
+ SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
+
+ /// Dead instructions that are about to be deleted.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ /// Virtual registers to be considered for register class inflation.
+ SmallVector<Register, 8> InflateRegs;
+
+ /// The collection of live intervals which should have been updated
+ /// immediately after rematerialiation but delayed until
+ /// lateLiveIntervalUpdate is called.
+ DenseSet<Register> ToBeUpdated;
+
+ /// Record how many times the large live interval with many valnos
+ /// has been tried to join with other live interval.
+ DenseMap<Register, unsigned long> LargeLIVisitCounter;
+
+ /// Recursively eliminate dead defs in DeadDefs.
+ void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
+
+ /// LiveRangeEdit callback for eliminateDeadDefs().
+ void LRE_WillEraseInstruction(MachineInstr *MI) override;
+
+ /// Coalesce the LocalWorkList.
+ void coalesceLocals();
+
+ /// Join compatible live intervals
+ void joinAllIntervals();
+
+ /// Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into WorkList.
+ void copyCoalesceInMBB(MachineBasicBlock *MBB);
+
+ /// Tries to coalesce all copies in CurrList. Returns true if any progress
+ /// was made.
+ bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
+
+ /// If one def has many copy like uses, and those copy uses are all
+ /// rematerialized, the live interval update needed for those
+ /// rematerializations will be delayed and done all at once instead
+ /// of being done multiple times. This is to save compile cost because
+ /// live interval update is costly.
+ void lateLiveIntervalUpdate();
+
+ /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
+ /// has no value defined in the predecessors. If the incoming value is the
+ /// same as defined by the copy itself, the value is considered undefined.
+ bool copyValueUndefInPredecessors(LiveRange &S,
+ const MachineBasicBlock *MBB,
+ LiveQueryResult SLRQ);
+
+ /// Set necessary undef flags on subregister uses after pruning out undef
+ /// lane segments from the subrange.
+ void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
+ LaneBitmask PrunedLanes);
+
+ /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
+ /// src/dst of the copy instruction CopyMI. This returns true if the copy
+ /// was successfully coalesced away. If it is not currently possible to
+ /// coalesce this interval, but it may be possible if other things get
+ /// coalesced, then it returns true by reference in 'Again'.
+ bool joinCopy(MachineInstr *CopyMI, bool &Again);
+
+ /// Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we
+ /// can use this information below to update aliases.
+ bool joinIntervals(CoalescerPair &CP);
+
+ /// Attempt joining two virtual registers. Return true on success.
+ bool joinVirtRegs(CoalescerPair &CP);
+
+ /// If a live interval has many valnos and is coalesced with other
+ /// live intervals many times, we regard such live interval as having
+ /// high compile time cost.
+ bool isHighCostLiveInterval(LiveInterval &LI);
+
+ /// Attempt joining with a reserved physreg.
+ bool joinReservedPhysReg(CoalescerPair &CP);
+
+ /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
+ /// Subranges in @p LI which only partially interfere with the desired
+ /// LaneMask are split as necessary. @p LaneMask are the lanes that
+ /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
+ /// lanemasks already adjusted to the coalesced register.
+ void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ LaneBitmask LaneMask, CoalescerPair &CP,
+ unsigned DstIdx);
+
+ /// Join the liveranges of two subregisters. Joins @p RRange into
+ /// @p LRange, @p RRange may be invalid afterwards.
+ void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask, const CoalescerPair &CP);
+
+ /// We found a non-trivially-coalescable copy. If the source value number is
+ /// defined by a copy from the destination reg see if we can merge these two
+ /// destination reg valno# into a single value number, eliminating a copy.
+ /// This returns true if an interval was modified.
+ bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ /// This returns a pair of two flags:
+ /// - the first element is true if an interval was modified,
+ /// - the second element is true if the destination interval needs
+ /// to be shrunk after deleting the copy.
+ std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI);
+
+ /// We found a copy which can be moved to its less frequent predecessor.
+ bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
+
+ /// If the source of a copy is defined by a
+ /// trivial computation, replace the copy by rematerialize the definition.
+ bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
+ bool &IsDefCopy);
+
+ /// Return true if a copy involving a physreg should be joined.
+ bool canJoinPhys(const CoalescerPair &CP);
+
+ /// Replace all defs and uses of SrcReg to DstReg and update the subregister
+ /// number if it is not zero. If DstReg is a physical register and the
+ /// existing subregister number of the def / use being updated is not zero,
+ /// make sure to set it to the correct physical subregister.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+
+ /// If the given machine operand reads only undefined lanes add an undef
+ /// flag.
+ /// This can happen when undef uses were previously concealed by a copy
+ /// which we coalesced. Example:
+ /// %0:sub0<def,read-undef> = ...
+ /// %1 = COPY %0 <-- Coalescing COPY reveals undef
+ /// = use %1:sub1 <-- hidden undef use
+ void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
+ MachineOperand &MO, unsigned SubRegIdx);
+
+ /// Handle copies of undef values. If the undef value is an incoming
+ /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
+ /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
+ /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
+ MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
+
+ /// Check whether or not we should apply the terminal rule on the
+ /// destination (Dst) of \p Copy.
+ /// When the terminal rule applies, Copy is not profitable to
+ /// coalesce.
+ /// Dst is terminal if it has exactly one affinity (Dst, Src) and
+ /// at least one interference (Dst, Dst2). If Dst is terminal, the
+ /// terminal rule consists in checking that at least one of
+ /// interfering node, say Dst2, has an affinity of equal or greater
+ /// weight with Src.
+ /// In that case, Dst2 and Dst will not be able to be both coalesced
+ /// with Src. Since Dst2 exposes more coalescing opportunities than
+ /// Dst, we can drop \p Copy.
+ bool applyTerminalRule(const MachineInstr &Copy) const;
+
+ /// Wrapper method for \see LiveIntervals::shrinkToUses.
+ /// This method does the proper fixing of the live-ranges when the afore
+ /// mentioned method returns true.
+ void shrinkToUses(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+ NumShrinkToUses++;
+ if (LIS->shrinkToUses(LI, Dead)) {
+ /// Check whether or not \p LI is composed by multiple connected
+ /// components and if that is the case, fix that.
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS->splitSeparateComponents(*LI, SplitLIs);
+ }
+ }
+
+ /// Wrapper Method to do all the necessary work when an Instruction is
+ /// deleted.
+ /// Optimizations should use this to make sure that deleted instructions
+ /// are always accounted for.
+ void deleteInstr(MachineInstr* MI) {
+ ErasedInstrs.insert(MI);
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+
+ /// Walk over function and initialize the DbgVRegToValues map.
+ void buildVRegToDbgValueMap(MachineFunction &MF);
+
+ /// Test whether, after merging, any DBG_VALUEs would refer to a
+ /// different value number than before merging, and whether this can
+ /// be resolved. If not, mark the DBG_VALUE as being undef.
+ void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
+ JoinVals &LHSVals, LiveRange &RHS,
+ JoinVals &RHSVals);
+
+ void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
+ LiveRange &RegRange, JoinVals &Vals2);
+
+ public:
+ static char ID; ///< Class identification, replacement for typeinfo
+
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void releaseMemory() override;
+
+ /// This is the pass entry point.
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ /// Implement the dump method.
+ void print(raw_ostream &O, const Module* = nullptr) const override;
+ };
+
+} // end anonymous namespace
+
+char RegisterCoalescer::ID = 0;
+
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
+ "Register Coalescer", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
+ "Register Coalescer", false, false)
+
+[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
+ const MachineInstr *MI, Register &Src,
+ Register &Dst, unsigned &SrcSub,
+ unsigned &DstSub) {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else
+ return false;
+ return true;
+}
+
+/// Return true if this block should be vacated by the coalescer to eliminate
+/// branches. The important cases to handle in the coalescer are critical edges
+/// split during phi elimination which contain only copies. Simple blocks that
+/// contain non-branches should also be vacated, but this can be handled by an
+/// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ for (const auto &MI : *MBB) {
+ if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
+ return false;
+ }
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ SrcReg = DstReg = Register();
+ SrcIdx = DstIdx = 0;
+ NewRC = nullptr;
+ Flipped = CrossClass = false;
+
+ Register Src, Dst;
+ unsigned SrcSub = 0, DstSub = 0;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ Partial = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (Src.isPhysical()) {
+ if (Dst.isPhysical())
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ Flipped = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+
+ if (Dst.isPhysical()) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = TRI.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // Copies between different sub-registers are never coalescable.
+ if (Src == Dst && SrcSub != DstSub)
+ return false;
+
+ NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
+ SrcIdx, DstIdx);
+ if (!NewRC)
+ return false;
+ } else if (DstSub) {
+ // SrcReg will be merged with a sub-register of DstReg.
+ SrcIdx = DstSub;
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ } else if (SrcSub) {
+ // DstReg will be merged with a sub-register of SrcReg.
+ DstIdx = SrcSub;
+ NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
+ } else {
+ // This is a straight copy without sub-registers.
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
+ }
+
+ // The combined constraint may be impossible to satisfy.
+ if (!NewRC)
+ return false;
+
+ // Prefer SrcReg to be a sub-register of DstReg.
+ // FIXME: Coalescer should support subregs symmetrically.
+ if (DstIdx && !SrcIdx) {
+ std::swap(Src, Dst);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ }
+
+ CrossClass = NewRC != DstRC || NewRC != SrcRC;
+ }
+ // Check our invariants
+ assert(Src.isVirtual() && "Src must be virtual");
+ assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
+ SrcReg = Src;
+ DstReg = Dst;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (DstReg.isPhysical())
+ return false;
+ std::swap(SrcReg, DstReg);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ Register Src, Dst;
+ unsigned SrcSub = 0, DstSub = 0;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is SrcReg.
+ if (Dst == SrcReg) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != SrcReg) {
+ return false;
+ }
+
+ // Now check that Dst matches DstReg.
+ if (DstReg.isPhysical()) {
+ if (!Dst.isPhysical())
+ return false;
+ assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = TRI.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return DstReg == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
+ } else {
+ // DstReg is virtual.
+ if (DstReg != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+ TRI.composeSubRegIndices(DstIdx, DstSub);
+ }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
+ if (Edit) {
+ Edit->eliminateDeadDefs(DeadDefs);
+ return;
+ }
+ SmallVector<Register, 8> NewRegs;
+ LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
+ nullptr, this).eliminateDeadDefs(DeadDefs);
+}
+
+void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // MI may be in WorkList. Make sure we don't visit it.
+ ErasedInstrs.insert(MI);
+}
+
+bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPartial() && "This doesn't work for partial copies.");
+ assert(!CP.isPhys() && "This doesn't work for physreg copies.");
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+
+ // We have a non-trivially-coalescable copy with IntA being the source and
+ // IntB being the dest, thus this defines a value number in IntB. If the
+ // source value number (in IntA) is defined by a copy from B, see if we can
+ // merge these two pieces of B into a single value number, eliminating a copy.
+ // For example:
+ //
+ // A3 = B0
+ // ...
+ // B1 = A3 <- this copy
+ //
+ // In this case, B0 can be extended to where the B1 copy lives, allowing the
+ // B1 value number to be replaced with B0 (which simplifies the B
+ // liveinterval).
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B1' in
+ // the example above.
+ LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
+ if (BS == IntB.end()) return false;
+ VNInfo *BValNo = BS->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (BValNo->def != CopyIdx) return false;
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
+ LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
+ // The live segment might not exist after fun with physreg coalescing.
+ if (AS == IntA.end()) return false;
+ VNInfo *AValNo = AS->valno;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+ // Don't allow any partial copies, even if isCoalescable() allows them.
+ if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
+ return false;
+
+ // Get the Segment in IntB that this value number starts with.
+ LiveInterval::iterator ValS =
+ IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
+ if (ValS == IntB.end())
+ return false;
+
+ // Make sure that the end of the live segment is inside the same block as
+ // CopyMI.
+ MachineInstr *ValSEndInst =
+ LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
+ if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
+ return false;
+
+ // Okay, we now know that ValS ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live segments between them
+ // in IntB, we can merge them.
+ if (ValS+1 != BS) return false;
+
+ LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
+
+ SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValS.end, BS.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValS->valno)
+ IntB.MergeValueNumberInto(BValNo, ValS->valno);
+
+ // Do the same for the subregister segments.
+ for (LiveInterval::SubRange &S : IntB.subranges()) {
+ // Check for SubRange Segments of the form [1234r,1234d:0) which can be
+ // removed to prevent creating bogus SubRange Segments.
+ LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
+ if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
+ S.removeSegment(*SS, true);
+ continue;
+ }
+ // The subrange may have ended before FillerStart. If so, extend it.
+ if (!S.getVNInfoAt(FillerStart)) {
+ SlotIndex BBStart =
+ LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
+ S.extendInBlock(BBStart, FillerStart);
+ }
+ VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
+ S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
+ VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
+ if (SubBValNo != SubValSNo)
+ S.MergeValueNumberInto(SubBValNo, SubValSNo);
+ }
+
+ LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
+ if (UIdx != -1) {
+ ValSEndInst->getOperand(UIdx).setIsKill(false);
+ }
+
+ // Rewrite the copy.
+ CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
+ // If the copy instruction was killing the destination register or any
+ // subrange before the merge trim the live range.
+ bool RecomputeLiveRange = AS->end == CopyIdx;
+ if (!RecomputeLiveRange) {
+ for (LiveInterval::SubRange &S : IntA.subranges()) {
+ LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
+ if (SS != S.end() && SS->end == CopyIdx) {
+ RecomputeLiveRange = true;
+ break;
+ }
+ }
+ }
+ if (RecomputeLiveRange)
+ shrinkToUses(&IntA);
+
+ ++numExtends;
+ return true;
+}
+
+bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
+ for (LiveRange::Segment &ASeg : IntA.segments) {
+ if (ASeg.valno != AValNo) continue;
+ LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
+ if (BI != IntB.begin())
+ --BI;
+ for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= ASeg.start && BI->end > ASeg.start)
+ return true;
+ if (BI->start > ASeg.start && BI->start < ASeg.end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Copy segments with value number @p SrcValNo from liverange @p Src to live
+/// range @Dst and use value number @p DstValNo there.
+static std::pair<bool,bool>
+addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
+ const VNInfo *SrcValNo) {
+ bool Changed = false;
+ bool MergedWithDead = false;
+ for (const LiveRange::Segment &S : Src.segments) {
+ if (S.valno != SrcValNo)
+ continue;
+ // This is adding a segment from Src that ends in a copy that is about
+ // to be removed. This segment is going to be merged with a pre-existing
+ // segment in Dst. This works, except in cases when the corresponding
+ // segment in Dst is dead. For example: adding [192r,208r:1) from Src
+ // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
+ // Recognized such cases, so that the segments can be shrunk.
+ LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
+ LiveRange::Segment &Merged = *Dst.addSegment(Added);
+ if (Merged.end.isDead())
+ MergedWithDead = true;
+ Changed = true;
+ }
+ return std::make_pair(Changed, MergedWithDead);
+}
+
+std::pair<bool,bool>
+RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPhys());
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // We found a non-trivially-coalescable copy with IntA being the source and
+ // IntB being the dest, thus this defines a value number in IntB. If the
+ // source value number (in IntA) is defined by a commutable instruction and
+ // its other operand is coalesced to the copy dest register, see if we can
+ // transform the copy into a noop by commuting the definition. For example,
+ //
+ // A3 = op A2 killed B0
+ // ...
+ // B1 = A3 <- this copy
+ // ...
+ // = op A3 <- more uses
+ //
+ // ==>
+ //
+ // B2 = op B0 killed A2
+ // ...
+ // B1 = B2 <- now an identity copy
+ // ...
+ // = op B2 <- more uses
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B1' in
+ // the example above.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ assert(BValNo != nullptr && BValNo->def == CopyIdx);
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (AValNo->isPHIDef())
+ return { false, false };
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return { false, false };
+ if (!DefMI->isCommutable())
+ return { false, false };
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return { false, false };
+
+ // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
+ // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
+ // passed to the method. That _other_ operand is chosen by
+ // the findCommutedOpIndices() method.
+ //
+ // That is obviously an area for improvement in case of instructions having
+ // more than 2 operands. For example, if some instruction has 3 commutable
+ // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
+ // op#2<->op#3) of commute transformation should be considered/tried here.
+ unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
+ return { false, false };
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ Register NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
+ return { false, false };
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return { false, false };
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
+ MachineInstr *UseMI = MO.getParent();
+ unsigned OpNo = &MO - &UseMI->getOperand(0);
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
+ LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
+ if (US == IntA.end() || US->valno != AValNo)
+ continue;
+ // If this use is tied to a def, we can't rewrite the register.
+ if (UseMI->isRegTiedToDefOperand(OpNo))
+ return { false, false };
+ }
+
+ LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI =
+ TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
+ if (!NewMI)
+ return { false, false };
+ if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
+ !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
+ return { false, false };
+ if (NewMI != DefMI) {
+ LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
+ MBB->erase(DefMI);
+ }
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = killed A
+ // ...
+ // = B
+
+ // Update uses of IntA of the specific Val# with IntB.
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
+ if (UseMO.isUndef())
+ continue;
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI->isDebugInstr()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
+ LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
+ assert(US != IntA.end() && "Use must be live");
+ if (US->valno != AValNo)
+ continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
+ if (NewReg.isPhysical())
+ UseMO.substPhysReg(NewReg, *TRI);
+ else
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (!UseMI->isCopy())
+ continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg() ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
+ SlotIndex DefIdx = UseIdx.getRegSlot();
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
+ continue;
+ LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
+ for (LiveInterval::SubRange &S : IntB.subranges()) {
+ VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
+ if (!SubDVNI)
+ continue;
+ VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
+ assert(SubBValNo->def == CopyIdx);
+ S.MergeValueNumberInto(SubDVNI, SubBValNo);
+ }
+
+ deleteInstr(UseMI);
+ }
+
+ // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
+ // is updated.
+ bool ShrinkB = false;
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
+ if (!IntA.hasSubRanges()) {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
+ IntA.createSubRangeFrom(Allocator, Mask, IntA);
+ } else if (!IntB.hasSubRanges()) {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
+ IntB.createSubRangeFrom(Allocator, Mask, IntB);
+ }
+ SlotIndex AIdx = CopyIdx.getRegSlot(true);
+ LaneBitmask MaskA;
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
+ for (LiveInterval::SubRange &SA : IntA.subranges()) {
+ VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
+ // Even if we are dealing with a full copy, some lanes can
+ // still be undefined.
+ // E.g.,
+ // undef A.subLow = ...
+ // B = COPY A <== A.subHigh is undefined here and does
+ // not have a value number.
+ if (!ASubValNo)
+ continue;
+ MaskA |= SA.LaneMask;
+
+ IntB.refineSubRanges(
+ Allocator, SA.LaneMask,
+ [&Allocator, &SA, CopyIdx, ASubValNo,
+ &ShrinkB](LiveInterval::SubRange &SR) {
+ VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
+ : SR.getVNInfoAt(CopyIdx);
+ assert(BSubValNo != nullptr);
+ auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ ShrinkB |= P.second;
+ if (P.first)
+ BSubValNo->def = ASubValNo->def;
+ },
+ Indexes, *TRI);
+ }
+ // Go over all subranges of IntB that have not been covered by IntA,
+ // and delete the segments starting at CopyIdx. This can happen if
+ // IntA has undef lanes that are defined in IntB.
+ for (LiveInterval::SubRange &SB : IntB.subranges()) {
+ if ((SB.LaneMask & MaskA).any())
+ continue;
+ if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
+ if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
+ SB.removeSegment(*S, true);
+ }
+ }
+
+ BValNo->def = AValNo->def;
+ auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+ ShrinkB |= P.second;
+ LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+ LIS->removeVRegDefAt(IntA, AValNo->def);
+
+ LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ ++numCommutes;
+ return { true, ShrinkB };
+}
+
+/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
+/// predecessor of BB2, and if B is not redefined on the way from A = B
+/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// execution goes through the path from BB0 to BB2. We may move B = A
+/// to the predecessor without such reversed copy.
+/// So we will transform the program from:
+/// BB0:
+/// A = B; BB1:
+/// ... ...
+/// / \ /
+/// BB2:
+/// ...
+/// B = A;
+///
+/// to:
+///
+/// BB0: BB1:
+/// A = B; ...
+/// ... B = A;
+/// / \ /
+/// BB2:
+/// ...
+///
+/// A special case is when BB0 and BB2 are the same BB which is the only
+/// BB in a loop:
+/// BB1:
+/// ...
+/// BB0/BB2: ----
+/// B = A; |
+/// ... |
+/// A = B; |
+/// |-------
+/// |
+/// We may hoist B = A from BB0/BB2 to BB1.
+///
+/// The major preconditions for correctness to remove such partial
+/// redundancy include:
+/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
+/// the PHI is defined by the reversed copy A = B in BB0.
+/// 2. No B is referenced from the start of BB2 to B = A.
+/// 3. No B is defined from A = B to the end of BB0.
+/// 4. BB1 has only one successor.
+///
+/// 2 and 4 implicitly ensure B is not live at the end of BB1.
+/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
+/// colder place, which not only prevent endless loop, but also make sure
+/// the movement of copy is beneficial.
+bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
+ MachineInstr &CopyMI) {
+ assert(!CP.isPhys());
+ if (!CopyMI.isFullCopy())
+ return false;
+
+ MachineBasicBlock &MBB = *CopyMI.getParent();
+ // If this block is the target of an invoke/inlineasm_br, moving the copy into
+ // the predecessor is tricker, and we don't handle it.
+ if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget())
+ return false;
+
+ if (MBB.pred_size() != 2)
+ return false;
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // A is defined by PHI at the entry of MBB.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (!AValNo->isPHIDef())
+ return false;
+
+ // No B is referenced before CopyMI in MBB.
+ if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
+ return false;
+
+ // MBB has two predecessors: one contains A = B so no copy will be inserted
+ // for it. The other one will have a copy moved from MBB.
+ bool FoundReverseCopy = false;
+ MachineBasicBlock *CopyLeftBB = nullptr;
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
+ if (!DefMI || !DefMI->isFullCopy()) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // Check DefMI is a reverse copy and it is in BB Pred.
+ if (DefMI->getOperand(0).getReg() != IntA.reg() ||
+ DefMI->getOperand(1).getReg() != IntB.reg() ||
+ DefMI->getParent() != Pred) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // If there is any other def of B after DefMI and before the end of Pred,
+ // we need to keep the copy of B = A at the end of Pred if we remove
+ // B = A from MBB.
+ bool ValB_Changed = false;
+ for (auto *VNI : IntB.valnos) {
+ if (VNI->isUnused())
+ continue;
+ if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
+ ValB_Changed = true;
+ break;
+ }
+ }
+ if (ValB_Changed) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ FoundReverseCopy = true;
+ }
+
+ // If no reverse copy is found in predecessors, nothing to do.
+ if (!FoundReverseCopy)
+ return false;
+
+ // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
+ // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
+ // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
+ // update IntA/IntB.
+ //
+ // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
+ // MBB is hotter than CopyLeftBB.
+ if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
+ return false;
+
+ // Now (almost sure it's) ok to move copy.
+ if (CopyLeftBB) {
+ // Position in CopyLeftBB where we should insert new copy.
+ auto InsPos = CopyLeftBB->getFirstTerminator();
+
+ // Make sure that B isn't referenced in the terminators (if any) at the end
+ // of the predecessor since we're about to insert a new definition of B
+ // before them.
+ if (InsPos != CopyLeftBB->end()) {
+ SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
+ if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
+ << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
+
+ // Insert new copy to CopyLeftBB.
+ MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IntB.reg())
+ .addReg(IntA.reg());
+ SlotIndex NewCopyIdx =
+ LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
+ IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+ for (LiveInterval::SubRange &SR : IntB.subranges())
+ SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+
+ // If the newly created Instruction has an address of an instruction that was
+ // deleted before (object recycled by the allocator) it needs to be removed from
+ // the deleted list.
+ ErasedInstrs.erase(NewCopyMI);
+ } else {
+ LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
+ << printMBBReference(MBB) << '\t' << CopyMI);
+ }
+
+ // Remove CopyMI.
+ // Note: This is fine to remove the copy before updating the live-ranges.
+ // While updating the live-ranges, we only look at slot indices and
+ // never go back to the instruction.
+ // Mark instructions as deleted.
+ deleteInstr(&CopyMI);
+
+ // Update the liveness.
+ SmallVector<SlotIndex, 8> EndPoints;
+ VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
+ LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
+ &EndPoints);
+ BValNo->markUnused();
+ // Extend IntB to the EndPoints of its original live interval.
+ LIS->extendToIndices(IntB, EndPoints);
+
+ // Now, do the same for its subranges.
+ for (LiveInterval::SubRange &SR : IntB.subranges()) {
+ EndPoints.clear();
+ VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
+ assert(BValNo && "All sublanes should be live");
+ LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
+ BValNo->markUnused();
+ // We can have a situation where the result of the original copy is live,
+ // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
+ // the copy appear as an endpoint from pruneValue(), but we don't want it
+ // to because the copy has been removed. We can go ahead and remove that
+ // endpoint; there is no other situation here that there could be a use at
+ // the same place as we know that the copy is a full copy.
+ for (unsigned I = 0; I != EndPoints.size(); ) {
+ if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
+ EndPoints[I] = EndPoints.back();
+ EndPoints.pop_back();
+ continue;
+ }
+ ++I;
+ }
+ SmallVector<SlotIndex, 8> Undefs;
+ IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
+ *LIS->getSlotIndexes());
+ LIS->extendToIndices(SR, EndPoints, Undefs);
+ }
+ // If any dead defs were extended, truncate them.
+ shrinkToUses(&IntB);
+
+ // Finally, update the live-range of IntA.
+ shrinkToUses(&IntA);
+ return true;
+}
+
+/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
+/// defining a subregister.
+static bool definesFullReg(const MachineInstr &MI, Register Reg) {
+ assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
+
+ for (const MachineOperand &Op : MI.all_defs()) {
+ if (Op.getReg() != Reg)
+ continue;
+ // Return true if we define the full register or don't care about the value
+ // inside other subregisters.
+ if (Op.getSubReg() == 0 || Op.isUndef())
+ return true;
+ }
+ return false;
+}
+
+bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI,
+ bool &IsDefCopy) {
+ IsDefCopy = false;
+ Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
+ Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
+ if (SrcReg.isPhysical())
+ return false;
+
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
+ VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
+ if (!ValNo)
+ return false;
+ if (ValNo->isPHIDef() || ValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
+ if (DefMI->isCopyLike()) {
+ IsDefCopy = true;
+ return false;
+ }
+ if (!TII->isAsCheapAsAMove(*DefMI))
+ return false;
+
+ SmallVector<Register, 8> NewRegs;
+ LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
+ if (!Edit.checkRematerializable(ValNo, DefMI))
+ return false;
+
+ if (!definesFullReg(*DefMI, SrcReg))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(AA, SawStore))
+ return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ Register CopyDstReg = DstOperand.getReg();
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
+
+ // If both SrcIdx and DstIdx are set, correct rematerialization would widen
+ // the register substantially (beyond both source and dest size). This is bad
+ // for performance since it can cascade through a function, introducing many
+ // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
+ // around after a few subreg copies).
+ if (SrcIdx && DstIdx)
+ return false;
+
+ const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
+ if (!DefMI->isImplicitDef()) {
+ if (DstReg.isPhysical()) {
+ Register NewDstReg = DstReg;
+
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
+ DefMI->getOperand(0).getSubReg());
+ if (NewDstIdx)
+ NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
+
+ // Finally, make sure that the physical subregister that will be
+ // constructed later is permitted for the instruction.
+ if (!DefRC->contains(NewDstReg))
+ return false;
+ } else {
+ // Theoretically, some stack frame reference could exist. Just make sure
+ // it hasn't actually happened.
+ assert(DstReg.isVirtual() &&
+ "Only expect to deal with virtual or physical registers");
+ }
+ }
+
+ LiveRangeEdit::Remat RM(ValNo);
+ RM.OrigMI = DefMI;
+ if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
+ return false;
+
+ DebugLoc DL = CopyMI->getDebugLoc();
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ MachineBasicBlock::iterator MII =
+ std::next(MachineBasicBlock::iterator(CopyMI));
+ Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
+ MachineInstr &NewMI = *std::prev(MII);
+ NewMI.setDebugLoc(DL);
+
+ // In a situation like the following:
+ // %0:subreg = instr ; DefMI, subreg = DstIdx
+ // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
+ // instead of widening %1 to the register class of %0 simply do:
+ // %1 = instr
+ const TargetRegisterClass *NewRC = CP.getNewRC();
+ if (DstIdx != 0) {
+ MachineOperand &DefMO = NewMI.getOperand(0);
+ if (DefMO.getSubReg() == DstIdx) {
+ assert(SrcIdx == 0 && CP.isFlipped()
+ && "Shouldn't have SrcIdx+DstIdx at this point");
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ const TargetRegisterClass *CommonRC =
+ TRI->getCommonSubClass(DefRC, DstRC);
+ if (CommonRC != nullptr) {
+ NewRC = CommonRC;
+
+ // Instruction might contain "undef %0:subreg" as use operand:
+ // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
+ //
+ // Need to check all operands.
+ for (MachineOperand &MO : NewMI.operands()) {
+ if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
+ MO.setSubReg(0);
+ }
+ }
+
+ DstIdx = 0;
+ DefMO.setIsUndef(false); // Only subregs can have def+undef.
+ }
+ }
+ }
+
+ // CopyMI may have implicit operands, save them so that we can transfer them
+ // over to the newly materialized instruction after CopyMI is removed.
+ SmallVector<MachineOperand, 4> ImplicitOps;
+ ImplicitOps.reserve(CopyMI->getNumOperands() -
+ CopyMI->getDesc().getNumOperands());
+ for (unsigned I = CopyMI->getDesc().getNumOperands(),
+ E = CopyMI->getNumOperands();
+ I != E; ++I) {
+ MachineOperand &MO = CopyMI->getOperand(I);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implicit operands.");
+ // Discard VReg implicit defs.
+ if (MO.getReg().isPhysical())
+ ImplicitOps.push_back(MO);
+ }
+ }
+
+ CopyMI->eraseFromParent();
+ ErasedInstrs.insert(CopyMI);
+
+ // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+ // We need to remember these so we can add intervals once we insert
+ // NewMI into SlotIndexes.
+ SmallVector<MCRegister, 4> NewMIImplDefs;
+ for (unsigned i = NewMI.getDesc().getNumOperands(),
+ e = NewMI.getNumOperands();
+ i != e; ++i) {
+ MachineOperand &MO = NewMI.getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical());
+ NewMIImplDefs.push_back(MO.getReg().asMCReg());
+ }
+ }
+
+ if (DstReg.isVirtual()) {
+ unsigned NewIdx = NewMI.getOperand(0).getSubReg();
+
+ if (DefRC != nullptr) {
+ if (NewIdx)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
+ else
+ NewRC = TRI->getCommonSubClass(NewRC, DefRC);
+ assert(NewRC && "subreg chosen for remat incompatible with instruction");
+ }
+ // Remap subranges to new lanemask and change register class.
+ LiveInterval &DstInt = LIS->getInterval(DstReg);
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
+ }
+ MRI->setRegClass(DstReg, NewRC);
+
+ // Update machine operands and add flags.
+ updateRegDefsUses(DstReg, DstReg, DstIdx);
+ NewMI.getOperand(0).setSubReg(NewIdx);
+ // updateRegDefUses can add an "undef" flag to the definition, since
+ // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
+ // sure that "undef" is not set.
+ if (NewIdx == 0)
+ NewMI.getOperand(0).setIsUndef(false);
+ // Add dead subregister definitions if we are defining the whole register
+ // but only part of it is live.
+ // This could happen if the rematerialization instruction is rematerializing
+ // more than actually is used in the register.
+ // An example would be:
+ // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
+ // ; Copying only part of the register here, but the rest is undef.
+ // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
+ // ==>
+ // ; Materialize all the constants but only using one
+ // %2 = LOAD_CONSTANTS 5, 8
+ //
+ // at this point for the part that wasn't defined before we could have
+ // subranges missing the definition.
+ if (NewIdx == 0 && DstInt.hasSubRanges()) {
+ SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
+ SlotIndex DefIndex =
+ CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
+ VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ if (!SR.liveAt(DefIndex))
+ SR.createDeadDef(DefIndex, Alloc);
+ MaxMask &= ~SR.LaneMask;
+ }
+ if (MaxMask.any()) {
+ LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
+ SR->createDeadDef(DefIndex, Alloc);
+ }
+ }
+
+ // Make sure that the subrange for resultant undef is removed
+ // For example:
+ // %1:sub1<def,read-undef> = LOAD CONSTANT 1
+ // %2 = COPY %1
+ // ==>
+ // %2:sub1<def, read-undef> = LOAD CONSTANT 1
+ // ; Correct but need to remove the subrange for %2:sub0
+ // ; as it is now undef
+ if (NewIdx != 0 && DstInt.hasSubRanges()) {
+ // The affected subregister segments can be removed.
+ SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
+ bool UpdatedSubRanges = false;
+ SlotIndex DefIndex =
+ CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
+ VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ if ((SR.LaneMask & DstMask).none()) {
+ LLVM_DEBUG(dbgs()
+ << "Removing undefined SubRange "
+ << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
+
+ if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
+ // VNI is in ValNo - remove any segments in this SubRange that have
+ // this ValNo
+ SR.removeValNo(RmValNo);
+ }
+
+ // We may not have a defined value at this point, but still need to
+ // clear out any empty subranges tentatively created by
+ // updateRegDefUses. The original subrange def may have only undefed
+ // some lanes.
+ UpdatedSubRanges = true;
+ } else {
+ // We know that this lane is defined by this instruction,
+ // but at this point it may be empty because it is not used by
+ // anything. This happens when updateRegDefUses adds the missing
+ // lanes. Assign that lane a dead def so that the interferences
+ // are properly modeled.
+ if (SR.empty())
+ SR.createDeadDef(DefIndex, Alloc);
+ }
+ }
+ if (UpdatedSubRanges)
+ DstInt.removeEmptySubRanges();
+ }
+ } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
+ // The New instruction may be defining a sub-register of what's actually
+ // been asked for. If so it must implicitly define the whole thing.
+ assert(DstReg.isPhysical() &&
+ "Only expect virtual or physical registers in remat");
+ NewMI.getOperand(0).setIsDead(true);
+ NewMI.addOperand(MachineOperand::CreateReg(
+ CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
+ // Record small dead def live-ranges for all the subregisters
+ // of the destination register.
+ // Otherwise, variables that live through may miss some
+ // interferences, thus creating invalid allocation.
+ // E.g., i386 code:
+ // %1 = somedef ; %1 GR8
+ // %2 = remat ; %2 GR32
+ // CL = COPY %2.sub_8bit
+ // = somedef %1 ; %1 GR8
+ // =>
+ // %1 = somedef ; %1 GR8
+ // dead ECX = remat ; implicit-def CL
+ // = somedef %1 ; %1 GR8
+ // %1 will see the interferences with CL but not with CH since
+ // no live-ranges would have been created for ECX.
+ // Fix that!
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
+ LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ }
+
+ if (NewMI.getOperand(0).getSubReg())
+ NewMI.getOperand(0).setIsUndef();
+
+ // Transfer over implicit operands to the rematerialized instruction.
+ for (MachineOperand &MO : ImplicitOps)
+ NewMI.addOperand(MO);
+
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+ MCRegister Reg = NewMIImplDefs[i];
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
+ LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ }
+
+ LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
+ ++NumReMats;
+
+ // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
+ // to describe DstReg instead.
+ if (MRI->use_nodbg_empty(SrcReg)) {
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI->isDebugInstr()) {
+ if (DstReg.isPhysical())
+ UseMO.substPhysReg(DstReg, *TRI);
+ else
+ UseMO.setReg(DstReg);
+ // Move the debug value directly after the def of the rematerialized
+ // value in DstReg.
+ MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
+ LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
+ }
+ }
+ }
+
+ if (ToBeUpdated.count(SrcReg))
+ return true;
+
+ unsigned NumCopyUses = 0;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+ if (UseMO.getParent()->isCopyLike())
+ NumCopyUses++;
+ }
+ if (NumCopyUses < LateRematUpdateThreshold) {
+ // The source interval can become smaller because we removed a use.
+ shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs(&Edit);
+ } else {
+ ToBeUpdated.insert(SrcReg);
+ }
+ return true;
+}
+
+MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
+ // ProcessImplicitDefs may leave some copies of <undef> values, it only
+ // removes local variables. When we have a copy like:
+ //
+ // %1 = COPY undef %2
+ //
+ // We delete the copy and remove the corresponding value number from %1.
+ // Any uses of that value number are marked as <undef>.
+
+ // Note that we do not query CoalescerPair here but redo isMoveInstr as the
+ // CoalescerPair may have a new register class with adjusted subreg indices
+ // at this point.
+ Register SrcReg, DstReg;
+ unsigned SrcSubIdx = 0, DstSubIdx = 0;
+ if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ return nullptr;
+
+ SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
+ const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+ // CopyMI is undef iff SrcReg is not live before the instruction.
+ if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
+ LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
+ for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
+ if ((SR.LaneMask & SrcMask).none())
+ continue;
+ if (SR.liveAt(Idx))
+ return nullptr;
+ }
+ } else if (SrcLI.liveAt(Idx))
+ return nullptr;
+
+ // If the undef copy defines a live-out value (i.e. an input to a PHI def),
+ // then replace it with an IMPLICIT_DEF.
+ LiveInterval &DstLI = LIS->getInterval(DstReg);
+ SlotIndex RegIndex = Idx.getRegSlot();
+ LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
+ assert(Seg != nullptr && "No segment for defining instruction");
+ VNInfo *V = DstLI.getVNInfoAt(Seg->end);
+
+ // The source interval may also have been on an undef use, in which case the
+ // copy introduced a live value.
+ if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
+ CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
+ MachineOperand &MO = CopyMI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse())
+ CopyMI->removeOperand(i-1);
+ }
+ LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
+ "implicit def\n");
+ return CopyMI;
+ }
+
+ // Remove any DstReg segments starting at the instruction.
+ LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
+
+ // Remove value or merge with previous one in case of a subregister def.
+ if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
+ VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
+ DstLI.MergeValueNumberInto(VNI, PrevVNI);
+
+ // The affected subregister segments can be removed.
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+ for (LiveInterval::SubRange &SR : DstLI.subranges()) {
+ if ((SR.LaneMask & DstMask).none())
+ continue;
+
+ VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
+ assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
+ SR.removeValNo(SVNI);
+ }
+ DstLI.removeEmptySubRanges();
+ } else
+ LIS->removeVRegDefAt(DstLI, RegIndex);
+
+ // Mark uses as undef.
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
+ if (MO.isDef() /*|| MO.isUndef()*/)
+ continue;
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI);
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ bool isLive;
+ if (!UseMask.all() && DstLI.hasSubRanges()) {
+ isLive = false;
+ for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
+ if ((SR.LaneMask & UseMask).none())
+ continue;
+ if (SR.liveAt(UseIdx)) {
+ isLive = true;
+ break;
+ }
+ }
+ } else
+ isLive = DstLI.liveAt(UseIdx);
+ if (isLive)
+ continue;
+ MO.setIsUndef(true);
+ LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
+ }
+
+ // A def of a subregister may be a use of the other subregisters, so
+ // deleting a def of a subregister may also remove uses. Since CopyMI
+ // is still part of the function (but about to be erased), mark all
+ // defs of DstReg in it as <undef>, so that shrinkToUses would
+ // ignore them.
+ for (MachineOperand &MO : CopyMI->all_defs())
+ if (MO.getReg() == DstReg)
+ MO.setIsUndef(true);
+ LIS->shrinkToUses(&DstLI);
+
+ return CopyMI;
+}
+
+void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
+ MachineOperand &MO, unsigned SubRegIdx) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ if (MO.isDef())
+ Mask = ~Mask;
+ bool IsUndef = true;
+ for (const LiveInterval::SubRange &S : Int.subranges()) {
+ if ((S.LaneMask & Mask).none())
+ continue;
+ if (S.liveAt(UseIdx)) {
+ IsUndef = false;
+ break;
+ }
+ }
+ if (IsUndef) {
+ MO.setIsUndef(true);
+ // We found out some subregister use is actually reading an undefined
+ // value. In some cases the whole vreg has become undefined at this
+ // point so we have to potentially shrink the main range if the
+ // use was ending a live segment there.
+ LiveQueryResult Q = Int.Query(UseIdx);
+ if (Q.valueOut() == nullptr)
+ ShrinkMainRange = true;
+ }
+}
+
+void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = DstReg.isPhysical();
+ LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
+
+ if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
+ for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 || MO.isUndef())
+ continue;
+ MachineInstr &MI = *MO.getParent();
+ if (MI.isDebugInstr())
+ continue;
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubReg);
+ }
+ }
+
+ SmallPtrSet<MachineInstr*, 8> Visited;
+ for (MachineRegisterInfo::reg_instr_iterator
+ I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
+ I != E; ) {
+ MachineInstr *UseMI = &*(I++);
+
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI).second)
+ continue;
+
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+
+ // If SrcReg wasn't read, it may still be the case that DstReg is live-in
+ // because SrcReg is a sub-register.
+ if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+
+ // Adjust <undef> flags in case of sub-register joins. We don't want to
+ // turn a full def into a read-modify-write sub-register def and vice
+ // versa.
+ if (SubIdx && MO.isDef())
+ MO.setIsUndef(!Reads);
+
+ // A subreg use of a partially undef (super) register may be a complete
+ // undef use now and then has to be marked that way.
+ if (MO.isUse() && !DstIsPhys) {
+ unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+ if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ if (!DstInt->hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+ DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+ // The unused lanes are just empty live-ranges at this point.
+ // It is the caller responsibility to set the proper
+ // dead segments if there is an actual dead def of the
+ // unused lanes. This may happen with rematerialization.
+ DstInt->createSubRange(Allocator, UnusedLanes);
+ }
+ SlotIndex MIIdx = UseMI->isDebugInstr()
+ ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
+ : LIS->getInstructionIndex(*UseMI);
+ SlotIndex UseIdx = MIIdx.getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
+ }
+ }
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *TRI);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugInstr())
+ dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
+ }
+}
+
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
+ // Always join simple intervals that are defined by a single copy from a
+ // reserved register. This doesn't increase register pressure, so it is
+ // always beneficial.
+ if (!MRI->isReserved(CP.getDstReg())) {
+ LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
+ return false;
+ }
+
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+ if (JoinVInt.containsOneValue())
+ return true;
+
+ LLVM_DEBUG(
+ dbgs() << "\tCannot join complex intervals into reserved register.\n");
+ return false;
+}
+
+bool RegisterCoalescer::copyValueUndefInPredecessors(
+ LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
+ if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
+ // If this is a self loop, we may be reading the same value.
+ if (V->id != SLRQ.valueOutOrDead()->id)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
+ Register Reg,
+ LaneBitmask PrunedLanes) {
+ // If we had other instructions in the segment reading the undef sublane
+ // value, we need to mark them with undef.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx == 0 || MO.isUndef())
+ continue;
+
+ LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
+ MO.setIsUndef();
+ break;
+ }
+ }
+ }
+
+ LI.removeEmptySubRanges();
+
+ // A def of a subregister may be a use of other register lanes. Replacing
+ // such a def with a def of a different register will eliminate the use,
+ // and may cause the recorded live range to be larger than the actual
+ // liveness in the program IR.
+ LIS->shrinkToUses(&LI);
+}
+
+bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
+ Again = false;
+ LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
+
+ CoalescerPair CP(*TRI);
+ if (!CP.setRegisters(CopyMI)) {
+ LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
+ }
+
+ if (CP.getNewRC()) {
+ auto SrcRC = MRI->getRegClass(CP.getSrcReg());
+ auto DstRC = MRI->getRegClass(CP.getDstReg());
+ unsigned SrcIdx = CP.getSrcIdx();
+ unsigned DstIdx = CP.getDstIdx();
+ if (CP.isFlipped()) {
+ std::swap(SrcIdx, DstIdx);
+ std::swap(SrcRC, DstRC);
+ }
+ if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
+ CP.getNewRC(), *LIS)) {
+ LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
+ return false;
+ }
+ }
+
+ // Dead code elimination. This really should be handled by MachineDCE, but
+ // sometimes dead copies slip through, and we can't generate invalid live
+ // ranges.
+ if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
+ LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
+ DeadDefs.push_back(CopyMI);
+ eliminateDeadDefs();
+ return true;
+ }
+
+ // Eliminate undefs.
+ if (!CP.isPhys()) {
+ // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
+ if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
+ if (UndefMI->isImplicitDef())
+ return false;
+ deleteInstr(CopyMI);
+ return false; // Not coalescable.
+ }
+ }
+
+ // Coalesced copies are normally removed immediately, but transformations
+ // like removeCopyByCommutingDef() can inadvertently create identity copies.
+ // When that happens, just join the values and remove the copy.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
+ LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
+ LiveQueryResult LRQ = LI.Query(CopyIdx);
+ if (VNInfo *DefVNI = LRQ.valueDefined()) {
+ VNInfo *ReadVNI = LRQ.valueIn();
+ assert(ReadVNI && "No value before copy and no <undef> flag.");
+ assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
+
+ // Track incoming undef lanes we need to eliminate from the subrange.
+ LaneBitmask PrunedLanes;
+ MachineBasicBlock *MBB = CopyMI->getParent();
+
+ // Process subregister liveranges.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveQueryResult SLRQ = S.Query(CopyIdx);
+ if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
+ if (VNInfo *SReadVNI = SLRQ.valueIn())
+ SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
+
+ // If this copy introduced an undef subrange from an incoming value,
+ // we need to eliminate the undef live in values from the subrange.
+ if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
+ LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
+ PrunedLanes |= S.LaneMask;
+ S.removeValNo(SDefVNI);
+ }
+ }
+ }
+
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ if (PrunedLanes.any()) {
+ LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
+ << PrunedLanes << '\n');
+ setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
+ }
+
+ LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ }
+ deleteInstr(CopyMI);
+ return true;
+ }
+
+ // Enforce policies.
+ if (CP.isPhys()) {
+ LLVM_DEBUG(dbgs() << "\tConsidering merging "
+ << printReg(CP.getSrcReg(), TRI) << " with "
+ << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
+ if (!canJoinPhys(CP)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ bool IsDefCopy = false;
+ if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
+ return true;
+ if (IsDefCopy)
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ } else {
+ // When possible, let DstReg be the larger interval.
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
+ LIS->getInterval(CP.getDstReg()).size())
+ CP.flip();
+
+ LLVM_DEBUG({
+ dbgs() << "\tConsidering merging to "
+ << TRI->getRegClassName(CP.getNewRC()) << " with ";
+ if (CP.getDstIdx() && CP.getSrcIdx())
+ dbgs() << printReg(CP.getDstReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
+ << printReg(CP.getSrcReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
+ else
+ dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
+ << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ });
+ }
+
+ ShrinkMask = LaneBitmask::getNone();
+ ShrinkMainRange = false;
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (!joinIntervals(CP)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ bool IsDefCopy = false;
+ if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
+ return true;
+
+ // If we can eliminate the copy without merging the live segments, do so
+ // now.
+ if (!CP.isPartial() && !CP.isPhys()) {
+ bool Changed = adjustCopiesBackFrom(CP, CopyMI);
+ bool Shrink = false;
+ if (!Changed)
+ std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
+ if (Changed) {
+ deleteInstr(CopyMI);
+ if (Shrink) {
+ Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ LiveInterval &DstLI = LIS->getInterval(DstReg);
+ shrinkToUses(&DstLI);
+ LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
+ }
+ LLVM_DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
+ }
+
+ // Try and see if we can partially eliminate the copy by moving the copy to
+ // its predecessor.
+ if (!CP.isPartial() && !CP.isPhys())
+ if (removePartialRedundancy(CP, *CopyMI))
+ return true;
+
+ // Otherwise, we are unable to join the intervals.
+ LLVM_DEBUG(dbgs() << "\tInterference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CP.isCrossClass()) {
+ ++numCrossRCs;
+ MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
+
+ // Removing sub-register copies can ease the register class constraints.
+ // Make sure we attempt to inflate the register class of DstReg.
+ if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
+ InflateRegs.push_back(CP.getDstReg());
+
+ // CopyMI has been erased by joinIntervals at this point. Remove it from
+ // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
+ // to the work list. This keeps ErasedInstrs from growing needlessly.
+ ErasedInstrs.erase(CopyMI);
+
+ // Rewrite all SrcReg operands to DstReg.
+ // Also update DstReg operands to include DstIdx if it is set.
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+
+ // Shrink subregister ranges if necessary.
+ if (ShrinkMask.any()) {
+ LiveInterval &LI = LIS->getInterval(CP.getDstReg());
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & ShrinkMask).none())
+ continue;
+ LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
+ << ")\n");
+ LIS->shrinkToUses(S, LI.reg());
+ ShrinkMainRange = true;
+ }
+ LI.removeEmptySubRanges();
+ }
+
+ // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
+ // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
+ // is not up-to-date, need to update the merged live interval here.
+ if (ToBeUpdated.count(CP.getSrcReg()))
+ ShrinkMainRange = true;
+
+ if (ShrinkMainRange) {
+ LiveInterval &LI = LIS->getInterval(CP.getDstReg());
+ shrinkToUses(&LI);
+ }
+
+ // SrcReg is guaranteed to be the register whose live interval that is
+ // being merged.
+ LIS->removeInterval(CP.getSrcReg());
+
+ // Update regalloc hint.
+ TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+
+ LLVM_DEBUG({
+ dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
+ << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
+ dbgs() << "\tResult = ";
+ if (CP.isPhys())
+ dbgs() << printReg(CP.getDstReg(), TRI);
+ else
+ dbgs() << LIS->getInterval(CP.getDstReg());
+ dbgs() << '\n';
+ });
+
+ ++numJoins;
+ return true;
+}
+
+bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ Register DstReg = CP.getDstReg();
+ Register SrcReg = CP.getSrcReg();
+ assert(CP.isPhys() && "Must be a physreg copy");
+ assert(MRI->isReserved(DstReg) && "Not a reserved register");
+ LiveInterval &RHS = LIS->getInterval(SrcReg);
+ LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
+
+ assert(RHS.containsOneValue() && "Invalid join with reserved register");
+
+ // Optimization for reserved registers like ESP. We can only merge with a
+ // reserved physreg if RHS has a single value that is a copy of DstReg.
+ // The live range of the reserved register will look like a set of dead defs
+ // - we don't properly track the live range of reserved registers.
+
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ if (!MRI->isConstantPhysReg(DstReg)) {
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
+ // Abort if not all the regunits are reserved.
+ for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
+ if (!MRI->isReserved(*RI))
+ return false;
+ }
+ if (RHS.overlaps(LIS->getRegUnit(Unit))) {
+ LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
+ << '\n');
+ return false;
+ }
+ }
+
+ // We must also check for overlaps with regmask clobbers.
+ BitVector RegMaskUsable;
+ if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
+ !RegMaskUsable.test(DstReg)) {
+ LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
+ return false;
+ }
+ }
+
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+
+ // Delete the identity copy.
+ MachineInstr *CopyMI;
+ if (CP.isFlipped()) {
+ // Physreg is copied into vreg
+ // %y = COPY %physreg_x
+ // ... //< no other def of %physreg_x here
+ // use %y
+ // =>
+ // ...
+ // use %physreg_x
+ CopyMI = MRI->getVRegDef(SrcReg);
+ deleteInstr(CopyMI);
+ } else {
+ // VReg is copied into physreg:
+ // %y = def
+ // ... //< no other def or use of %physreg_x here
+ // %physreg_x = COPY %y
+ // =>
+ // %physreg_x = def
+ // ...
+ if (!MRI->hasOneNonDBGUse(SrcReg)) {
+ LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
+ return false;
+ }
+
+ if (!LIS->intervalIsInOneMBB(RHS)) {
+ LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
+ return false;
+ }
+
+ MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
+ CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
+ SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+
+ if (!MRI->isConstantPhysReg(DstReg)) {
+ // We checked above that there are no interfering defs of the physical
+ // register. However, for this case, where we intend to move up the def of
+ // the physical register, we also need to check for interfering uses.
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+ SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+ if (MI->readsRegister(DstReg, TRI)) {
+ LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
+ return false;
+ }
+ }
+ }
+
+ // We're going to remove the copy which defines a physical reserved
+ // register, so remove its valno, etc.
+ LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
+ << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
+
+ LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
+ deleteInstr(CopyMI);
+
+ // Create a new dead def at the new def location.
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
+ LiveRange &LR = LIS->getRegUnit(Unit);
+ LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
+ }
+ }
+
+ // We don't track kills for reserved registers.
+ MRI->clearKillFlags(CP.getSrcReg());
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+// identity copy after joining SrcReg and DstReg. The copy instruction will
+// be removed, and the value will be merged with the source value.
+//
+// There can be several copies back and forth, causing many values to be
+// merged into one. We compute a list of ultimate values in the joined live
+// range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+// predecessors have a live out value. It doesn't cause real interference,
+// and can be merged into the value it overlaps. Like a coalescable copy, it
+// can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+// is already in the other register. This is like a coalescable copy, but
+// the live range of the source register must be trimmed after erasing the
+// copy instruction:
+//
+// %src = COPY %ext
+// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+// defining one lane at a time:
+//
+// %dst:ssub0<def,read-undef> = FOO
+// %src = BAR
+// %dst:ssub1 = COPY %src
+//
+// The live range of %src overlaps the %dst value defined by FOO, but
+// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+// which was undef anyway.
+//
+// The value mapping is more complicated in this case. The final live range
+// will have different value numbers for both FOO and BAR, but there is no
+// simple mapping from old to new values. It may even be necessary to add
+// new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+// is live, but never read. This can happen because we don't compute
+// individual live ranges per lane.
+//
+// %dst = FOO
+// %src = BAR
+// %dst:ssub1 = COPY %src
+//
+// This kind of interference is only resolved locally. If the clobbered
+// lane value escapes the block, the join is aborted.
+
+namespace {
+
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair (or one for each lane of a side of the coalescer
+/// pair)
+class JoinVals {
+ /// Live range we work on.
+ LiveRange &LR;
+
+ /// (Main) register we work on.
+ const Register Reg;
+
+ /// Reg (and therefore the values in this liverange) will end up as
+ /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
+ /// CP.SrcIdx.
+ const unsigned SubIdx;
+
+ /// The LaneMask that this liverange will occupy the coalesced register. May
+ /// be smaller than the lanemask produced by SubIdx when merging subranges.
+ const LaneBitmask LaneMask;
+
+ /// This is true when joining sub register ranges, false when joining main
+ /// ranges.
+ const bool SubRangeJoin;
+
+ /// Whether the current LiveInterval tracks subregister liveness.
+ const bool TrackSubRegLiveness;
+
+ /// Values that will be present in the final live range.
+ SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+ const CoalescerPair &CP;
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ const TargetRegisterInfo *TRI;
+
+ /// Value number assignments. Maps value numbers in LI to entries in
+ /// NewVNInfo. This is suitable for passing to LiveInterval::join().
+ SmallVector<int, 8> Assignments;
+
+ public:
+ /// Conflict resolution for overlapping values.
+ enum ConflictResolution {
+ /// No overlap, simply keep this value.
+ CR_Keep,
+
+ /// Merge this value into OtherVNI and erase the defining instruction.
+ /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
+ /// values.
+ CR_Erase,
+
+ /// Merge this value into OtherVNI but keep the defining instruction.
+ /// This is for the special case where OtherVNI is defined by the same
+ /// instruction.
+ CR_Merge,
+
+ /// Keep this value, and have it replace OtherVNI where possible. This
+ /// complicates value mapping since OtherVNI maps to two different values
+ /// before and after this def.
+ /// Used when clobbering undefined or dead lanes.
+ CR_Replace,
+
+ /// Unresolved conflict. Visit later when all values have been mapped.
+ CR_Unresolved,
+
+ /// Unresolvable conflict. Abort the join.
+ CR_Impossible
+ };
+
+ private:
+ /// Per-value info for LI. The lane bit masks are all relative to the final
+ /// joined register, so they can be compared directly between SrcReg and
+ /// DstReg.
+ struct Val {
+ ConflictResolution Resolution = CR_Keep;
+
+ /// Lanes written by this def, 0 for unanalyzed values.
+ LaneBitmask WriteLanes;
+
+ /// Lanes with defined values in this register. Other lanes are undef and
+ /// safe to clobber.
+ LaneBitmask ValidLanes;
+
+ /// Value in LI being redefined by this def.
+ VNInfo *RedefVNI = nullptr;
+
+ /// Value in the other live range that overlaps this def, if any.
+ VNInfo *OtherVNI = nullptr;
+
+ /// Is this value an IMPLICIT_DEF that can be erased?
+ ///
+ /// IMPLICIT_DEF values should only exist at the end of a basic block that
+ /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+ /// safely erased if they are overlapping a live value in the other live
+ /// interval.
+ ///
+ /// Weird control flow graphs and incomplete PHI handling in
+ /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+ /// longer live ranges. Such IMPLICIT_DEF values should be treated like
+ /// normal values.
+ bool ErasableImplicitDef = false;
+
+ /// True when the live range of this value will be pruned because of an
+ /// overlapping CR_Replace value in the other live range.
+ bool Pruned = false;
+
+ /// True once Pruned above has been computed.
+ bool PrunedComputed = false;
+
+ /// True if this value is determined to be identical to OtherVNI
+ /// (in valuesIdentical). This is used with CR_Erase where the erased
+ /// copy is redundant, i.e. the source value is already the same as
+ /// the destination. In such cases the subranges need to be updated
+ /// properly. See comment at pruneSubRegValues for more info.
+ bool Identical = false;
+
+ Val() = default;
+
+ bool isAnalyzed() const { return WriteLanes.any(); }
+ };
+
+ /// One entry per value number in LI.
+ SmallVector<Val, 8> Vals;
+
+ /// Compute the bitmask of lanes actually written by DefMI.
+ /// Set Redef if there are any partial register definitions that depend on the
+ /// previous value of the register.
+ LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+
+ /// Find the ultimate value that VNI was copied from.
+ std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
+
+ bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
+
+ /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+ /// Return a conflict resolution when possible, but leave the hard cases as
+ /// CR_Unresolved.
+ /// Recursively calls computeAssignment() on this and Other, guaranteeing that
+ /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+ /// The recursion always goes upwards in the dominator tree, making loops
+ /// impossible.
+ ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+
+ /// Compute the value assignment for ValNo in RI.
+ /// This may be called recursively by analyzeValue(), but never for a ValNo on
+ /// the stack.
+ void computeAssignment(unsigned ValNo, JoinVals &Other);
+
+ /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
+ /// the extent of the tainted lanes in the block.
+ ///
+ /// Multiple values in Other.LR can be affected since partial redefinitions
+ /// can preserve previously tainted lanes.
+ ///
+ /// 1 %dst = VLOAD <-- Define all lanes in %dst
+ /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
+ /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
+ /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+ ///
+ /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+ /// entry to TaintedVals.
+ ///
+ /// Returns false if the tainted lanes extend beyond the basic block.
+ bool
+ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
+
+ /// Return true if MI uses any of the given Lanes from Reg.
+ /// This does not include partial redefinitions of Reg.
+ bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
+
+ /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
+ /// be pruned:
+ ///
+ /// %dst = COPY %src
+ /// %src = COPY %dst <-- This value to be pruned.
+ /// %dst = COPY %src <-- This value is a copy of a pruned value.
+ bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+ JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
+ SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
+ LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
+ bool TrackSubRegLiveness)
+ : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
+ SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
+ NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
+ TRI(TRI), Assignments(LR.getNumValNums(), -1),
+ Vals(LR.getNumValNums()) {}
+
+ /// Analyze defs in LR and compute a value mapping in NewVNInfo.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool mapValues(JoinVals &Other);
+
+ /// Try to resolve conflicts that require all values to be mapped.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool resolveConflicts(JoinVals &Other);
+
+ /// Prune the live range of values in Other.LR where they would conflict with
+ /// CR_Replace values in LR. Collect end points for restoring the live range
+ /// after joining.
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
+ bool changeInstrs);
+
+ /// Removes subranges starting at copies that get removed. This sometimes
+ /// happens when undefined subranges are copied around. These ranges contain
+ /// no useful information and can be removed.
+ void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
+
+ /// Pruning values in subranges can lead to removing segments in these
+ /// subranges started by IMPLICIT_DEFs. The corresponding segments in
+ /// the main range also need to be removed. This function will mark
+ /// the corresponding values in the main range as pruned, so that
+ /// eraseInstrs can do the final cleanup.
+ /// The parameter @p LI must be the interval whose main range is the
+ /// live range LR.
+ void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
+
+ /// Erase any machine instructions that have been coalesced away.
+ /// Add erased instructions to ErasedInstrs.
+ /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+ /// the erased instrs.
+ void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
+ SmallVectorImpl<Register> &ShrinkRegs,
+ LiveInterval *LI = nullptr);
+
+ /// Remove liverange defs at places where implicit defs will be removed.
+ void removeImplicitDefs();
+
+ /// Get the value assignments suitable for passing to LiveInterval::join.
+ const int *getAssignments() const { return Assignments.data(); }
+
+ /// Get the conflict resolution for a value number.
+ ConflictResolution getResolution(unsigned Num) const {
+ return Vals[Num].Resolution;
+ }
+};
+
+} // end anonymous namespace
+
+LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
+ const {
+ LaneBitmask L;
+ for (const MachineOperand &MO : DefMI->all_defs()) {
+ if (MO.getReg() != Reg)
+ continue;
+ L |= TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
+ if (MO.readsReg())
+ Redef = true;
+ }
+ return L;
+}
+
+std::pair<const VNInfo *, Register>
+JoinVals::followCopyChain(const VNInfo *VNI) const {
+ Register TrackReg = Reg;
+
+ while (!VNI->isPHIDef()) {
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No defining instruction");
+ if (!MI->isFullCopy())
+ return std::make_pair(VNI, TrackReg);
+ Register SrcReg = MI->getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ return std::make_pair(VNI, TrackReg);
+
+ const LiveInterval &LI = LIS->getInterval(SrcReg);
+ const VNInfo *ValueIn;
+ // No subrange involved.
+ if (!SubRangeJoin || !LI.hasSubRanges()) {
+ LiveQueryResult LRQ = LI.Query(Def);
+ ValueIn = LRQ.valueIn();
+ } else {
+ // Query subranges. Ensure that all matching ones take us to the same def
+ // (allowing some of them to be undef).
+ ValueIn = nullptr;
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ // Transform lanemask to a mask in the joined live interval.
+ LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
+ if ((SMask & LaneMask).none())
+ continue;
+ LiveQueryResult LRQ = S.Query(Def);
+ if (!ValueIn) {
+ ValueIn = LRQ.valueIn();
+ continue;
+ }
+ if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
+ return std::make_pair(VNI, TrackReg);
+ }
+ }
+ if (ValueIn == nullptr) {
+ // Reaching an undefined value is legitimate, for example:
+ //
+ // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
+ // 2 %1 = COPY %0 ;; %1 is defined here.
+ // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
+ // ;; but it's equivalent to "undef".
+ return std::make_pair(nullptr, SrcReg);
+ }
+ VNI = ValueIn;
+ TrackReg = SrcReg;
+ }
+ return std::make_pair(VNI, TrackReg);
+}
+
+bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
+ const JoinVals &Other) const {
+ const VNInfo *Orig0;
+ Register Reg0;
+ std::tie(Orig0, Reg0) = followCopyChain(Value0);
+ if (Orig0 == Value1 && Reg0 == Other.Reg)
+ return true;
+
+ const VNInfo *Orig1;
+ Register Reg1;
+ std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
+ // If both values are undefined, and the source registers are the same
+ // register, the values are identical. Filter out cases where only one
+ // value is defined.
+ if (Orig0 == nullptr || Orig1 == nullptr)
+ return Orig0 == Orig1 && Reg0 == Reg1;
+
+ // The values are equal if they are defined at the same place and use the
+ // same register. Note that we cannot compare VNInfos directly as some of
+ // them might be from a copy created in mergeSubRangeInto() while the other
+ // is from the original LiveInterval.
+ return Orig0->def == Orig1->def && Reg0 == Reg1;
+}
+
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ assert(!V.isAnalyzed() && "Value has already been analyzed!");
+ VNInfo *VNI = LR.getValNumInfo(ValNo);
+ if (VNI->isUnused()) {
+ V.WriteLanes = LaneBitmask::getAll();
+ return CR_Keep;
+ }
+
+ // Get the instruction defining this value, compute the lanes written.
+ const MachineInstr *DefMI = nullptr;
+ if (VNI->isPHIDef()) {
+ // Conservatively assume that all lanes in a PHI are valid.
+ LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
+ : TRI->getSubRegIndexLaneMask(SubIdx);
+ V.ValidLanes = V.WriteLanes = Lanes;
+ } else {
+ DefMI = Indexes->getInstructionFromIndex(VNI->def);
+ assert(DefMI != nullptr);
+ if (SubRangeJoin) {
+ // We don't care about the lanes when joining subregister ranges.
+ V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
+ if (DefMI->isImplicitDef()) {
+ V.ValidLanes = LaneBitmask::getNone();
+ V.ErasableImplicitDef = true;
+ }
+ } else {
+ bool Redef = false;
+ V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+ // If this is a read-modify-write instruction, there may be more valid
+ // lanes than the ones written by this instruction.
+ // This only covers partial redef operands. DefMI may have normal use
+ // operands reading the register. They don't contribute valid lanes.
+ //
+ // This adds ssub1 to the set of valid lanes in %src:
+ //
+ // %src:ssub1 = FOO
+ //
+ // This leaves only ssub1 valid, making any other lanes undef:
+ //
+ // %src:ssub1<def,read-undef> = FOO %src:ssub2
+ //
+ // The <read-undef> flag on the def operand means that old lane values are
+ // not important.
+ if (Redef) {
+ V.RedefVNI = LR.Query(VNI->def).valueIn();
+ assert((TrackSubRegLiveness || V.RedefVNI) &&
+ "Instruction is reading nonexistent value");
+ if (V.RedefVNI != nullptr) {
+ computeAssignment(V.RedefVNI->id, Other);
+ V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+ }
+ }
+
+ // An IMPLICIT_DEF writes undef values.
+ if (DefMI->isImplicitDef()) {
+ // We normally expect IMPLICIT_DEF values to be live only until the end
+ // of their block. If the value is really live longer and gets pruned in
+ // another block, this flag is cleared again.
+ //
+ // Clearing the valid lanes is deferred until it is sure this can be
+ // erased.
+ V.ErasableImplicitDef = true;
+ }
+ }
+ }
+
+ // Find the value in Other that overlaps VNI->def, if any.
+ LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
+
+ // It is possible that both values are defined by the same instruction, or
+ // the values are PHIs defined in the same block. When that happens, the two
+ // values should be merged into one, but not into any preceding value.
+ // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+ if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+ assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+ // One value stays, the other is merged. Keep the earlier one, or the first
+ // one we see.
+ if (OtherVNI->def < VNI->def)
+ Other.computeAssignment(OtherVNI->id, *this);
+ else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+ // This is an early-clobber def overlapping a live-in value in the other
+ // register. Not mergeable.
+ V.OtherVNI = OtherLRQ.valueIn();
+ return CR_Impossible;
+ }
+ V.OtherVNI = OtherVNI;
+ Val &OtherV = Other.Vals[OtherVNI->id];
+ // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
+ // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
+ // is assigned.
+ if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
+ return CR_Keep;
+ // Both sides have been analyzed now.
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Merge;
+ if ((V.ValidLanes & OtherV.ValidLanes).any())
+ // Overlapping lanes can't be resolved.
+ return CR_Impossible;
+ else
+ return CR_Merge;
+ }
+
+ // No simultaneous def. Is Other live at the def?
+ V.OtherVNI = OtherLRQ.valueIn();
+ if (!V.OtherVNI)
+ // No overlap, no conflict.
+ return CR_Keep;
+
+ assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+ // We have overlapping values, or possibly a kill of Other.
+ // Recursively compute assignments up the dominator tree.
+ Other.computeAssignment(V.OtherVNI->id, *this);
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ if (OtherV.ErasableImplicitDef) {
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ MachineBasicBlock *OtherMBB = Indexes->getMBBFromIndex(V.OtherVNI->def);
+ if (DefMI && DefMI->getParent() != OtherMBB) {
+ LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into "
+ << printMBBReference(*DefMI->getParent())
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ } else if (OtherMBB->hasEHPadSuccessor()) {
+ // If OtherV is defined in a basic block that has EH pad successors then
+ // we get the same problem not just if OtherV is live beyond its basic
+ // block, but beyond the last call instruction in its basic block. Handle
+ // this case conservatively.
+ LLVM_DEBUG(
+ dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " may be live into EH pad successors, keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ } else {
+ // We deferred clearing these lanes in case we needed to save them
+ OtherV.ValidLanes &= ~OtherV.WriteLanes;
+ }
+ }
+
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Replace;
+
+ // Check for simple erasable conflicts.
+ if (DefMI->isImplicitDef())
+ return CR_Erase;
+
+ // Include the non-conflict where DefMI is a coalescable copy that kills
+ // OtherVNI. We still want the copy erased and value numbers merged.
+ if (CP.isCoalescable(DefMI)) {
+ // Some of the lanes copied from OtherVNI may be undef, making them undef
+ // here too.
+ V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+ return CR_Erase;
+ }
+
+ // This may not be a real conflict if DefMI simply kills Other and defines
+ // VNI.
+ if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+ return CR_Keep;
+
+ // Handle the case where VNI and OtherVNI can be proven to be identical:
+ //
+ // %other = COPY %ext
+ // %this = COPY %ext <-- Erase this copy
+ //
+ if (DefMI->isFullCopy() && !CP.isPartial() &&
+ valuesIdentical(VNI, V.OtherVNI, Other)) {
+ V.Identical = true;
+ return CR_Erase;
+ }
+
+ // The remaining checks apply to the lanes, which aren't tracked here. This
+ // was already decided to be OK via the following CR_Replace condition.
+ // CR_Replace.
+ if (SubRangeJoin)
+ return CR_Replace;
+
+ // If the lanes written by this instruction were all undef in OtherVNI, it is
+ // still safe to join the live ranges. This can't be done with a simple value
+ // mapping, though - OtherVNI will map to multiple values:
+ //
+ // 1 %dst:ssub0 = FOO <-- OtherVNI
+ // 2 %src = BAR <-- VNI
+ // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
+ // 4 BAZ killed %dst
+ // 5 QUUX killed %src
+ //
+ // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+ // handles this complex value mapping.
+ if ((V.WriteLanes & OtherV.ValidLanes).none())
+ return CR_Replace;
+
+ // If the other live range is killed by DefMI and the live ranges are still
+ // overlapping, it must be because we're looking at an early clobber def:
+ //
+ // %dst<def,early-clobber> = ASM killed %src
+ //
+ // In this case, it is illegal to merge the two live ranges since the early
+ // clobber def would clobber %src before it was read.
+ if (OtherLRQ.isKill()) {
+ // This case where the def doesn't overlap the kill is handled above.
+ assert(VNI->def.isEarlyClobber() &&
+ "Only early clobber defs can overlap a kill");
+ return CR_Impossible;
+ }
+
+ // VNI is clobbering live lanes in OtherVNI, but there is still the
+ // possibility that no instructions actually read the clobbered lanes.
+ // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+ // Otherwise Other.RI wouldn't be live here.
+ if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
+ return CR_Impossible;
+
+ if (TrackSubRegLiveness) {
+ auto &OtherLI = LIS->getInterval(Other.Reg);
+ // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
+ // share the same live range, so we just need to check whether they have
+ // any conflict bit in their LaneMask.
+ if (!OtherLI.hasSubRanges()) {
+ LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
+ return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
+ }
+
+ // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
+ // impossible to resolve the conflict. Otherwise, we can just replace
+ // OtherVNI because of no real conflict.
+ for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
+ LaneBitmask OtherMask =
+ TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
+ if ((OtherMask & V.WriteLanes).none())
+ continue;
+
+ auto OtherSRQ = OtherSR.Query(VNI->def);
+ if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
+ // VNI is clobbering some lanes of OtherVNI, they have real conflict.
+ return CR_Impossible;
+ }
+ }
+
+ // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
+ return CR_Replace;
+ }
+
+ // We need to verify that no instructions are reading the clobbered lanes.
+ // To save compile time, we'll only check that locally. Don't allow the
+ // tainted value to escape the basic block.
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+ return CR_Impossible;
+
+ // There are still some things that could go wrong besides clobbered lanes
+ // being read, for example OtherVNI may be only partially redefined in MBB,
+ // and some clobbered lanes could escape the block. Save this analysis for
+ // resolveConflicts() when all values have been mapped. We need to know
+ // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+ // that now - the recursive analyzeValue() calls must go upwards in the
+ // dominator tree.
+ return CR_Unresolved;
+}
+
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.isAnalyzed()) {
+ // Recursion should always move up the dominator tree, so ValNo is not
+ // supposed to reappear before it has been assigned.
+ assert(Assignments[ValNo] != -1 && "Bad recursion?");
+ return;
+ }
+ switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+ case CR_Erase:
+ case CR_Merge:
+ // Merge this ValNo into OtherVNI.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+ assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+ Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+ LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
+ << LR.getValNumInfo(ValNo)->def << " into "
+ << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
+ << V.OtherVNI->def << " --> @"
+ << NewVNInfo[Assignments[ValNo]]->def << '\n');
+ break;
+ case CR_Replace:
+ case CR_Unresolved: {
+ // The other value is going to be pruned if this join is successful.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+ // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
+ // its lanes.
+ if (OtherV.ErasableImplicitDef &&
+ TrackSubRegLiveness &&
+ (OtherV.ValidLanes & ~V.ValidLanes).any()) {
+ LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
+
+ OtherV.ErasableImplicitDef = false;
+ // The valid lanes written by the implicit_def were speculatively cleared
+ // before, so make this more conservative. It may be better to track this,
+ // I haven't found a testcase where it matters.
+ OtherV.ValidLanes = LaneBitmask::getAll();
+ }
+
+ OtherV.Pruned = true;
+ [[fallthrough]];
+ }
+ default:
+ // This value number needs to go in the final joined live range.
+ Assignments[ValNo] = NewVNInfo.size();
+ NewVNInfo.push_back(LR.getValNumInfo(ValNo));
+ break;
+ }
+}
+
+bool JoinVals::mapValues(JoinVals &Other) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ computeAssignment(i, Other);
+ if (Vals[i].Resolution == CR_Impossible) {
+ LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
+ << '@' << LR.getValNumInfo(i)->def << '\n');
+ return false;
+ }
+ }
+ return true;
+}
+
+bool JoinVals::
+taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
+ VNInfo *VNI = LR.getValNumInfo(ValNo);
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+ // Scan Other.LR from VNI.def to MBBEnd.
+ LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
+ assert(OtherI != Other.LR.end() && "No conflict?");
+ do {
+ // OtherI is pointing to a tainted value. Abort the join if the tainted
+ // lanes escape the block.
+ SlotIndex End = OtherI->end;
+ if (End >= MBBEnd) {
+ LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << '\n');
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << " to "
+ << End << '\n');
+ // A dead def is not a problem.
+ if (End.isDead())
+ break;
+ TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+ // Check for another def in the MBB.
+ if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
+ break;
+
+ // Lanes written by the new def are no longer tainted.
+ const Val &OV = Other.Vals[OtherI->valno->id];
+ TaintedLanes &= ~OV.WriteLanes;
+ if (!OV.RedefVNI)
+ break;
+ } while (TaintedLanes.any());
+ return true;
+}
+
+bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
+ LaneBitmask Lanes) const {
+ if (MI.isDebugOrPseudoInstr())
+ return false;
+ for (const MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg() != Reg)
+ continue;
+ if (!MO.readsReg())
+ continue;
+ unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+ if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
+ return true;
+ }
+ return false;
+}
+
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
+ if (V.Resolution != CR_Unresolved)
+ continue;
+ LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
+ << LR.getValNumInfo(i)->def
+ << ' ' << PrintLaneMask(LaneMask) << '\n');
+ if (SubRangeJoin)
+ return false;
+
+ ++NumLaneConflicts;
+ assert(V.OtherVNI && "Inconsistent conflict resolution.");
+ VNInfo *VNI = LR.getValNumInfo(i);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+ // join, those lanes will be tainted with a wrong value. Get the extent of
+ // the tainted lanes.
+ LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
+ if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+ // Tainted lanes would extend beyond the basic block.
+ return false;
+
+ assert(!TaintExtent.empty() && "There should be at least one conflict.");
+
+ // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ MachineBasicBlock::iterator MI = MBB->begin();
+ if (!VNI->isPHIDef()) {
+ MI = Indexes->getInstructionFromIndex(VNI->def);
+ if (!VNI->def.isEarlyClobber()) {
+ // No need to check the instruction defining VNI for reads.
+ ++MI;
+ }
+ }
+ assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+ "Interference ends on VNI->def. Should have been handled earlier");
+ MachineInstr *LastMI =
+ Indexes->getInstructionFromIndex(TaintExtent.front().first);
+ assert(LastMI && "Range must end at a proper instruction");
+ unsigned TaintNum = 0;
+ while (true) {
+ assert(MI != MBB->end() && "Bad LastMI");
+ if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
+ LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+ return false;
+ }
+ // LastMI is the last instruction to use the current value.
+ if (&*MI == LastMI) {
+ if (++TaintNum == TaintExtent.size())
+ break;
+ LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+ assert(LastMI && "Range must end at a proper instruction");
+ TaintedLanes = TaintExtent[TaintNum].second;
+ }
+ ++MI;
+ }
+
+ // The tainted lanes are unused.
+ V.Resolution = CR_Replace;
+ ++NumLaneResolves;
+ }
+ return true;
+}
+
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.Pruned || V.PrunedComputed)
+ return V.Pruned;
+
+ if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+ return V.Pruned;
+
+ // Follow copies up the dominator tree and check if any intermediate value
+ // has been pruned.
+ V.PrunedComputed = true;
+ V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+ return V.Pruned;
+}
+
+void JoinVals::pruneValues(JoinVals &Other,
+ SmallVectorImpl<SlotIndex> &EndPoints,
+ bool changeInstrs) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ SlotIndex Def = LR.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ break;
+ case CR_Replace: {
+ // This value takes precedence over the value in Other.LR.
+ LIS->pruneValue(Other.LR, Def, &EndPoints);
+ // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+ // instructions are only inserted to provide a live-out value for PHI
+ // predecessors, so the instruction should simply go away once its value
+ // has been replaced.
+ Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+ bool EraseImpDef = OtherV.ErasableImplicitDef &&
+ OtherV.Resolution == CR_Keep;
+ if (!Def.isBlock()) {
+ if (changeInstrs) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove dead flags since the joined live range will
+ // continue past this instruction.
+ for (MachineOperand &MO :
+ Indexes->getInstructionFromIndex(Def)->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
+ if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
+ MO.setIsUndef(false);
+ MO.setIsDead(false);
+ }
+ }
+ }
+ // This value will reach instructions below, but we need to make sure
+ // the live range also reaches the instruction at Def.
+ if (!EraseImpDef)
+ EndPoints.push_back(Def);
+ }
+ LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
+ << ": " << Other.LR << '\n');
+ break;
+ }
+ case CR_Erase:
+ case CR_Merge:
+ if (isPrunedValue(i, Other)) {
+ // This value is ultimately a copy of a pruned value in LR or Other.LR.
+ // We can no longer trust the value mapping computed by
+ // computeAssignment(), the value that was originally copied could have
+ // been replaced.
+ LIS->pruneValue(LR, Def, &EndPoints);
+ LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
+ << Def << ": " << LR << '\n');
+ }
+ break;
+ case CR_Unresolved:
+ case CR_Impossible:
+ llvm_unreachable("Unresolved conflicts");
+ }
+ }
+}
+
+// Check if the segment consists of a copied live-through value (i.e. the copy
+// in the block only extended the liveness, of an undef value which we may need
+// to handle).
+static bool isLiveThrough(const LiveQueryResult Q) {
+ return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
+}
+
+/// Consider the following situation when coalescing the copy between
+/// %31 and %45 at 800. (The vertical lines represent live range segments.)
+///
+/// Main range Subrange 0004 (sub2)
+/// %31 %45 %31 %45
+/// 544 %45 = COPY %28 + +
+/// | v1 | v1
+/// 560B bb.1: + +
+/// 624 = %45.sub2 | v2 | v2
+/// 800 %31 = COPY %45 + + + +
+/// | v0 | v0
+/// 816 %31.sub1 = ... + |
+/// 880 %30 = COPY %31 | v1 +
+/// 928 %45 = COPY %30 | + +
+/// | | v0 | v0 <--+
+/// 992B ; backedge -> bb.1 | + + |
+/// 1040 = %31.sub0 + |
+/// This value must remain
+/// live-out!
+///
+/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
+/// redundant, since it copies the value from %45 back into it. The
+/// conflict resolution for the main range determines that %45.v0 is
+/// to be erased, which is ok since %31.v1 is identical to it.
+/// The problem happens with the subrange for sub2: it has to be live
+/// on exit from the block, but since 928 was actually a point of
+/// definition of %45.sub2, %45.sub2 was not live immediately prior
+/// to that definition. As a result, when 928 was erased, the value v0
+/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
+/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
+/// providing an incorrect value to the use at 624.
+///
+/// Since the main-range values %31.v1 and %45.v0 were proved to be
+/// identical, the corresponding values in subranges must also be the
+/// same. A redundant copy is removed because it's not needed, and not
+/// because it copied an undefined value, so any liveness that originated
+/// from that copy cannot disappear. When pruning a value that started
+/// at the removed copy, the corresponding identical value must be
+/// extended to replace it.
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
+ // Look for values being erased.
+ bool DidPrune = false;
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ // We should trigger in all cases in which eraseInstrs() does something.
+ // match what eraseInstrs() is doing, print a message so
+ if (V.Resolution != CR_Erase &&
+ (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
+ continue;
+
+ // Check subranges at the point where the copy will be removed.
+ SlotIndex Def = LR.getValNumInfo(i)->def;
+ SlotIndex OtherDef;
+ if (V.Identical)
+ OtherDef = V.OtherVNI->def;
+
+ // Print message so mismatches with eraseInstrs() can be diagnosed.
+ LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
+ << '\n');
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveQueryResult Q = S.Query(Def);
+
+ // If a subrange starts at the copy then an undefined value has been
+ // copied and we must remove that subrange value as well.
+ VNInfo *ValueOut = Q.valueOutOrDead();
+ if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
+ (V.Identical && V.Resolution == CR_Erase &&
+ ValueOut->def == Def))) {
+ LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
+ << " at " << Def << "\n");
+ SmallVector<SlotIndex,8> EndPoints;
+ LIS->pruneValue(S, Def, &EndPoints);
+ DidPrune = true;
+ // Mark value number as unused.
+ ValueOut->markUnused();
+
+ if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
+ // If V is identical to V.OtherVNI (and S was live at OtherDef),
+ // then we can't simply prune V from S. V needs to be replaced
+ // with V.OtherVNI.
+ LIS->extendToIndices(S, EndPoints);
+ }
+
+ // We may need to eliminate the subrange if the copy introduced a live
+ // out undef value.
+ if (ValueOut->isPHIDef())
+ ShrinkMask |= S.LaneMask;
+ continue;
+ }
+
+ // If a subrange ends at the copy, then a value was copied but only
+ // partially used later. Shrink the subregister range appropriately.
+ //
+ // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
+ // conservatively correct.
+ if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
+ (V.Resolution == CR_Erase && isLiveThrough(Q))) {
+ LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
+ << PrintLaneMask(S.LaneMask) << " at " << Def
+ << "\n");
+ ShrinkMask |= S.LaneMask;
+ }
+ }
+ }
+ if (DidPrune)
+ LI.removeEmptySubRanges();
+}
+
+/// Check if any of the subranges of @p LI contain a definition at @p Def.
+static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
+ if (VNI->def == Def)
+ return true;
+ }
+ return false;
+}
+
+void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
+ assert(&static_cast<LiveRange&>(LI) == &LR);
+
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ if (Vals[i].Resolution != CR_Keep)
+ continue;
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
+ continue;
+ Vals[i].Pruned = true;
+ ShrinkMainRange = true;
+ }
+}
+
+void JoinVals::removeImplicitDefs() {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
+ continue;
+
+ VNInfo *VNI = LR.getValNumInfo(i);
+ VNI->markUnused();
+ LR.removeValNo(VNI);
+ }
+}
+
+void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
+ SmallVectorImpl<Register> &ShrinkRegs,
+ LiveInterval *LI) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ // Get the def location before markUnused() below invalidates it.
+ VNInfo *VNI = LR.getValNumInfo(i);
+ SlotIndex Def = VNI->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep: {
+ // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+ // longer. The IMPLICIT_DEF instructions are only inserted by
+ // PHIElimination to guarantee that all PHI predecessors have a value.
+ if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
+ break;
+ // Remove value number i from LR.
+ // For intervals with subranges, removing a segment from the main range
+ // may require extending the previous segment: for each definition of
+ // a subregister, there will be a corresponding def in the main range.
+ // That def may fall in the middle of a segment from another subrange.
+ // In such cases, removing this def from the main range must be
+ // complemented by extending the main range to account for the liveness
+ // of the other subrange.
+ // The new end point of the main range segment to be extended.
+ SlotIndex NewEnd;
+ if (LI != nullptr) {
+ LiveRange::iterator I = LR.FindSegmentContaining(Def);
+ assert(I != LR.end());
+ // Do not extend beyond the end of the segment being removed.
+ // The segment may have been pruned in preparation for joining
+ // live ranges.
+ NewEnd = I->end;
+ }
+
+ LR.removeValNo(VNI);
+ // Note that this VNInfo is reused and still referenced in NewVNInfo,
+ // make it appear like an unused value number.
+ VNI->markUnused();
+
+ if (LI != nullptr && LI->hasSubRanges()) {
+ assert(static_cast<LiveRange*>(LI) == &LR);
+ // Determine the end point based on the subrange information:
+ // minimum of (earliest def of next segment,
+ // latest end point of containing segment)
+ SlotIndex ED, LE;
+ for (LiveInterval::SubRange &SR : LI->subranges()) {
+ LiveRange::iterator I = SR.find(Def);
+ if (I == SR.end())
+ continue;
+ if (I->start > Def)
+ ED = ED.isValid() ? std::min(ED, I->start) : I->start;
+ else
+ LE = LE.isValid() ? std::max(LE, I->end) : I->end;
+ }
+ if (LE.isValid())
+ NewEnd = std::min(NewEnd, LE);
+ if (ED.isValid())
+ NewEnd = std::min(NewEnd, ED);
+
+ // We only want to do the extension if there was a subrange that
+ // was live across Def.
+ if (LE.isValid()) {
+ LiveRange::iterator S = LR.find(Def);
+ if (S != LR.begin())
+ std::prev(S)->end = NewEnd;
+ }
+ }
+ LLVM_DEBUG({
+ dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
+ if (LI != nullptr)
+ dbgs() << "\t\t LHS = " << *LI << '\n';
+ });
+ [[fallthrough]];
+ }
+
+ case CR_Erase: {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No instruction to erase");
+ if (MI->isCopy()) {
+ Register Reg = MI->getOperand(1).getReg();
+ if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ ShrinkRegs.push_back(Reg);
+ }
+ ErasedInstrs.insert(MI);
+ LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask,
+ const CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
+ NewVNInfo, CP, LIS, TRI, true, true);
+ JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
+ NewVNInfo, CP, LIS, TRI, true, true);
+
+ // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
+ // We should be able to resolve all conflicts here as we could successfully do
+ // it on the mainrange already. There is however a problem when multiple
+ // ranges get mapped to the "overflow" lane mask bit which creates unexpected
+ // interferences.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
+ }
+ if (!LHSVals.resolveConflicts(RHSVals) ||
+ !RHSVals.resolveConflicts(LHSVals)) {
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
+ }
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints, false);
+ RHSVals.pruneValues(LHSVals, EndPoints, false);
+
+ LHSVals.removeImplicitDefs();
+ RHSVals.removeImplicitDefs();
+
+ LRange.verify();
+ RRange.verify();
+
+ // Join RRange into LHS.
+ LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
+ NewVNInfo);
+
+ LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
+ << ' ' << LRange << "\n");
+ if (EndPoints.empty())
+ return;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ LLVM_DEBUG({
+ dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+ for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+ dbgs() << EndPoints[i];
+ if (i != n-1)
+ dbgs() << ',';
+ }
+ dbgs() << ": " << LRange << '\n';
+ });
+ LIS->extendToIndices(LRange, EndPoints);
+}
+
+void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+ const LiveRange &ToMerge,
+ LaneBitmask LaneMask,
+ CoalescerPair &CP,
+ unsigned ComposeSubRegIdx) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ LI.refineSubRanges(
+ Allocator, LaneMask,
+ [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
+ if (SR.empty()) {
+ SR.assign(ToMerge, Allocator);
+ } else {
+ // joinSubRegRange() destroys the merged range, so we need a copy.
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
+ }
+ },
+ *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
+}
+
+bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
+ if (LI.valnos.size() < LargeIntervalSizeThreshold)
+ return false;
+ auto &Counter = LargeLIVisitCounter[LI.reg()];
+ if (Counter < LargeIntervalFreqThreshold) {
+ Counter++;
+ return false;
+ }
+ return true;
+}
+
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+ bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
+ JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
+ NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
+ JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
+ NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
+
+ LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
+
+ if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
+ return false;
+
+ // First compute NewVNInfo and the simple value mappings.
+ // Detect impossible conflicts early.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+ return false;
+
+ // Some conflicts can only be resolved after all values have been mapped.
+ if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+ return false;
+
+ // All clear, the live ranges can be merged.
+ if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+
+ // Transform lanemasks from the LHS to masks in the coalesced register and
+ // create initial subranges if necessary.
+ unsigned DstIdx = CP.getDstIdx();
+ if (!LHS.hasSubRanges()) {
+ LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(DstIdx);
+ // LHS must support subregs or we wouldn't be in this codepath.
+ assert(Mask.any());
+ LHS.createSubRangeFrom(Allocator, Mask, LHS);
+ } else if (DstIdx != 0) {
+ // Transform LHS lanemasks to new register class if necessary.
+ for (LiveInterval::SubRange &R : LHS.subranges()) {
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
+ R.LaneMask = Mask;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
+ << '\n');
+
+ // Determine lanemasks of RHS in the coalesced register and merge subranges.
+ unsigned SrcIdx = CP.getSrcIdx();
+ if (!RHS.hasSubRanges()) {
+ LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(SrcIdx);
+ mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
+ } else {
+ // Pair up subranges and merge.
+ for (LiveInterval::SubRange &R : RHS.subranges()) {
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
+ mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+
+ // Pruning implicit defs from subranges may result in the main range
+ // having stale segments.
+ LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ }
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints, true);
+ RHSVals.pruneValues(LHSVals, EndPoints, true);
+
+ // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+ // registers to require trimming.
+ SmallVector<Register, 8> ShrinkRegs;
+ LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
+ RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ while (!ShrinkRegs.empty())
+ shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+ // Scan and mark undef any DBG_VALUEs that would refer to a different value.
+ checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
+
+ // If the RHS covers any PHI locations that were tracked for debug-info, we
+ // must update tracking information to reflect the join.
+ auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
+ if (RegIt != RegToPHIIdx.end()) {
+ // Iterate over all the debug instruction numbers assigned this register.
+ for (unsigned InstID : RegIt->second) {
+ auto PHIIt = PHIValToPos.find(InstID);
+ assert(PHIIt != PHIValToPos.end());
+ const SlotIndex &SI = PHIIt->second.SI;
+
+ // Does the RHS cover the position of this PHI?
+ auto LII = RHS.find(SI);
+ if (LII == RHS.end() || LII->start > SI)
+ continue;
+
+ // Accept two kinds of subregister movement:
+ // * When we merge from one register class into a larger register:
+ // %1:gr16 = some-inst
+ // ->
+ // %2:gr32.sub_16bit = some-inst
+ // * When the PHI is already in a subregister, and the larger class
+ // is coalesced:
+ // %2:gr32.sub_16bit = some-inst
+ // %3:gr32 = COPY %2
+ // ->
+ // %3:gr32.sub_16bit = some-inst
+ // Test for subregister move:
+ if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
+ // If we're moving between different subregisters, ignore this join.
+ // The PHI will not get a location, dropping variable locations.
+ if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
+ continue;
+
+ // Update our tracking of where the PHI is.
+ PHIIt->second.Reg = CP.getDstReg();
+
+ // If we merge into a sub-register of a larger class (test above),
+ // update SubReg.
+ if (CP.getSrcIdx() != 0)
+ PHIIt->second.SubReg = CP.getSrcIdx();
+ }
+
+ // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
+ // different VRegs now. Copy old collection of debug instruction numbers and
+ // erase the old one:
+ auto InstrNums = RegIt->second;
+ RegToPHIIdx.erase(RegIt);
+
+ // There might already be PHIs being tracked in the destination VReg. Insert
+ // into an existing tracking collection, or insert a new one.
+ RegIt = RegToPHIIdx.find(CP.getDstReg());
+ if (RegIt != RegToPHIIdx.end())
+ RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
+ InstrNums.end());
+ else
+ RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
+ }
+
+ // Join RHS into LHS.
+ LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
+
+ // Kill flags are going to be wrong if the live ranges were overlapping.
+ // Eventually, we should simply clear all kill flags when computing live
+ // ranges. They are reinserted after register allocation.
+ MRI->clearKillFlags(LHS.reg());
+ MRI->clearKillFlags(RHS.reg());
+
+ if (!EndPoints.empty()) {
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ LLVM_DEBUG({
+ dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+ for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+ dbgs() << EndPoints[i];
+ if (i != n-1)
+ dbgs() << ',';
+ }
+ dbgs() << ": " << LHS << '\n';
+ });
+ LIS->extendToIndices((LiveRange&)LHS, EndPoints);
+ }
+
+ return true;
+}
+
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
+void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
+{
+ const SlotIndexes &Slots = *LIS->getSlotIndexes();
+ SmallVector<MachineInstr *, 8> ToInsert;
+
+ // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
+ // vreg => DbgValueLoc map.
+ auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
+ for (auto *X : ToInsert) {
+ for (const auto &Op : X->debug_operands()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DbgVRegToValues[Op.getReg()].push_back({Slot, X});
+ }
+ }
+
+ ToInsert.clear();
+ };
+
+ // Iterate over all instructions, collecting them into the ToInsert vector.
+ // Once a non-debug instruction is found, record the slot index of the
+ // collected DBG_VALUEs.
+ for (auto &MBB : MF) {
+ SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
+
+ for (auto &MI : MBB) {
+ if (MI.isDebugValue()) {
+ if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isVirtual();
+ }))
+ ToInsert.push_back(&MI);
+ } else if (!MI.isDebugOrPseudoInstr()) {
+ CurrentSlot = Slots.getInstructionIndex(MI);
+ CloseNewDVRange(CurrentSlot);
+ }
+ }
+
+ // Close range of DBG_VALUEs at the end of blocks.
+ CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
+ }
+
+ // Sort all DBG_VALUEs we've seen by slot number.
+ for (auto &Pair : DbgVRegToValues)
+ llvm::sort(Pair.second);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
+ LiveRange &LHS,
+ JoinVals &LHSVals,
+ LiveRange &RHS,
+ JoinVals &RHSVals) {
+ auto ScanForDstReg = [&](Register Reg) {
+ checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
+ };
+
+ auto ScanForSrcReg = [&](Register Reg) {
+ checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
+ };
+
+ // Scan for unsound updates of both the source and destination register.
+ ScanForSrcReg(CP.getSrcReg());
+ ScanForDstReg(CP.getDstReg());
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
+ LiveRange &OtherLR,
+ LiveRange &RegLR,
+ JoinVals &RegVals) {
+ // Are there any DBG_VALUEs to examine?
+ auto VRegMapIt = DbgVRegToValues.find(Reg);
+ if (VRegMapIt == DbgVRegToValues.end())
+ return;
+
+ auto &DbgValueSet = VRegMapIt->second;
+ auto DbgValueSetIt = DbgValueSet.begin();
+ auto SegmentIt = OtherLR.begin();
+
+ bool LastUndefResult = false;
+ SlotIndex LastUndefIdx;
+
+ // If the "Other" register is live at a slot Idx, test whether Reg can
+ // safely be merged with it, or should be marked undef.
+ auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
+ &LastUndefIdx](SlotIndex Idx) -> bool {
+ // Our worst-case performance typically happens with asan, causing very
+ // many DBG_VALUEs of the same location. Cache a copy of the most recent
+ // result for this edge-case.
+ if (LastUndefIdx == Idx)
+ return LastUndefResult;
+
+ // If the other range was live, and Reg's was not, the register coalescer
+ // will not have tried to resolve any conflicts. We don't know whether
+ // the DBG_VALUE will refer to the same value number, so it must be made
+ // undef.
+ auto OtherIt = RegLR.find(Idx);
+ if (OtherIt == RegLR.end())
+ return true;
+
+ // Both the registers were live: examine the conflict resolution record for
+ // the value number Reg refers to. CR_Keep meant that this value number
+ // "won" and the merged register definitely refers to that value. CR_Erase
+ // means the value number was a redundant copy of the other value, which
+ // was coalesced and Reg deleted. It's safe to refer to the other register
+ // (which will be the source of the copy).
+ auto Resolution = RegVals.getResolution(OtherIt->valno->id);
+ LastUndefResult = Resolution != JoinVals::CR_Keep &&
+ Resolution != JoinVals::CR_Erase;
+ LastUndefIdx = Idx;
+ return LastUndefResult;
+ };
+
+ // Iterate over both the live-range of the "Other" register, and the set of
+ // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
+ // slot index. This relies on the DbgValueSet being ordered.
+ while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
+ if (DbgValueSetIt->first < SegmentIt->end) {
+ // "Other" is live and there is a DBG_VALUE of Reg: test if we should
+ // set it undef.
+ if (DbgValueSetIt->first >= SegmentIt->start) {
+ bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
+ bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
+ if (HasReg && ShouldUndefReg) {
+ // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
+ DbgValueSetIt->second->setDebugValueUndef();
+ continue;
+ }
+ }
+ ++DbgValueSetIt;
+ } else {
+ ++SegmentIt;
+ }
+ }
+}
+
+namespace {
+
+/// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+
+} // end anonymous namespace
+
+/// C-style comparator that sorts first based on the loop depth of the basic
+/// block (the unsigned), and then on the MBB number.
+///
+/// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const MBBPriorityInfo *LHS,
+ const MBBPriorityInfo *RHS) {
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+ if (!Copy->isCopy())
+ return false;
+
+ if (Copy->getOperand(1).isUndef())
+ return false;
+
+ Register SrcReg = Copy->getOperand(1).getReg();
+ Register DstReg = Copy->getOperand(0).getReg();
+ if (SrcReg.isPhysical() || DstReg.isPhysical())
+ return false;
+
+ return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+ || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
+}
+
+void RegisterCoalescer::lateLiveIntervalUpdate() {
+ for (Register reg : ToBeUpdated) {
+ if (!LIS->hasInterval(reg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(reg);
+ shrinkToUses(&LI, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+ }
+ ToBeUpdated.clear();
+}
+
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
+ bool Progress = false;
+ for (MachineInstr *&MI : CurrList) {
+ if (!MI)
+ continue;
+ // Skip instruction pointers that have already been erased, for example by
+ // dead code elimination.
+ if (ErasedInstrs.count(MI)) {
+ MI = nullptr;
+ continue;
+ }
+ bool Again = false;
+ bool Success = joinCopy(MI, Again);
+ Progress |= Success;
+ if (Success || !Again)
+ MI = nullptr;
+ }
+ return Progress;
+}
+
+/// Check if DstReg is a terminal node.
+/// I.e., it does not have any affinity other than \p Copy.
+static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
+ const MachineRegisterInfo *MRI) {
+ assert(Copy.isCopyLike());
+ // Check if the destination of this copy as any other affinity.
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
+ if (&MI != &Copy && MI.isCopyLike())
+ return false;
+ return true;
+}
+
+bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
+ assert(Copy.isCopyLike());
+ if (!UseTerminalRule)
+ return false;
+ Register SrcReg, DstReg;
+ unsigned SrcSubReg = 0, DstSubReg = 0;
+ if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return false;
+ // Check if the destination of this copy has any other affinity.
+ if (DstReg.isPhysical() ||
+ // If SrcReg is a physical register, the copy won't be coalesced.
+ // Ignoring it may have other side effect (like missing
+ // rematerialization). So keep it.
+ SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
+ return false;
+
+ // DstReg is a terminal node. Check if it interferes with any other
+ // copy involving SrcReg.
+ const MachineBasicBlock *OrigBB = Copy.getParent();
+ const LiveInterval &DstLI = LIS->getInterval(DstReg);
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
+ // Technically we should check if the weight of the new copy is
+ // interesting compared to the other one and update the weight
+ // of the copies accordingly. However, this would only work if
+ // we would gather all the copies first then coalesce, whereas
+ // right now we interleave both actions.
+ // For now, just consider the copies that are in the same block.
+ if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
+ continue;
+ Register OtherSrcReg, OtherReg;
+ unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
+ if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+ OtherSubReg))
+ return false;
+ if (OtherReg == SrcReg)
+ OtherReg = OtherSrcReg;
+ // Check if OtherReg is a non-terminal.
+ if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
+ continue;
+ // Check that OtherReg interfere with DstReg.
+ if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
+ LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
+ << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Collect all copy-like instructions in MBB. Don't start coalescing anything
+ // yet, it might invalidate the iterator.
+ const unsigned PrevSize = WorkList.size();
+ if (JoinGlobalCopies) {
+ SmallVector<MachineInstr*, 2> LocalTerminals;
+ SmallVector<MachineInstr*, 2> GlobalTerminals;
+ // Coalesce copies bottom-up to coalesce local defs before local uses. They
+ // are not inherently easier to resolve, but slightly preferable until we
+ // have local live range splitting. In particular this is required by
+ // cmp+jmp macro fusion.
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isCopyLike())
+ continue;
+ bool ApplyTerminalRule = applyTerminalRule(MI);
+ if (isLocalCopy(&MI, LIS)) {
+ if (ApplyTerminalRule)
+ LocalTerminals.push_back(&MI);
+ else
+ LocalWorkList.push_back(&MI);
+ } else {
+ if (ApplyTerminalRule)
+ GlobalTerminals.push_back(&MI);
+ else
+ WorkList.push_back(&MI);
+ }
+ }
+ // Append the copies evicted by the terminal rule at the end of the list.
+ LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
+ WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
+ }
+ else {
+ SmallVector<MachineInstr*, 2> Terminals;
+ for (MachineInstr &MII : *MBB)
+ if (MII.isCopyLike()) {
+ if (applyTerminalRule(MII))
+ Terminals.push_back(&MII);
+ else
+ WorkList.push_back(&MII);
+ }
+ // Append the copies evicted by the terminal rule at the end of the list.
+ WorkList.append(Terminals.begin(), Terminals.end());
+ }
+ // Try coalescing the collected copies immediately, and remove the nulls.
+ // This prevents the WorkList from getting too large since most copies are
+ // joinable on the first attempt.
+ MutableArrayRef<MachineInstr*>
+ CurrList(WorkList.begin() + PrevSize, WorkList.end());
+ if (copyCoalesceWorkList(CurrList))
+ WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
+ nullptr), WorkList.end());
+}
+
+void RegisterCoalescer::coalesceLocals() {
+ copyCoalesceWorkList(LocalWorkList);
+ for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+ if (LocalWorkList[j])
+ WorkList.push_back(LocalWorkList[j]);
+ }
+ LocalWorkList.clear();
+}
+
+void RegisterCoalescer::joinAllIntervals() {
+ LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+ std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
+ for (MachineBasicBlock &MBB : *MF) {
+ MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
+ JoinSplitEdges && isSplitEdge(&MBB)));
+ }
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+ // Coalesce intervals in MBB priority order.
+ unsigned CurrDepth = std::numeric_limits<unsigned>::max();
+ for (MBBPriorityInfo &MBB : MBBs) {
+ // Try coalescing the collected local copies for deeper loops.
+ if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
+ coalesceLocals();
+ CurrDepth = MBB.Depth;
+ }
+ copyCoalesceInMBB(MBB.MBB);
+ }
+ lateLiveIntervalUpdate();
+ coalesceLocals();
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ while (copyCoalesceWorkList(WorkList))
+ /* empty */ ;
+ lateLiveIntervalUpdate();
+}
+
+void RegisterCoalescer::releaseMemory() {
+ ErasedInstrs.clear();
+ WorkList.clear();
+ DeadDefs.clear();
+ InflateRegs.clear();
+ LargeLIVisitCounter.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
+ << "********** Function: " << fn.getName() << '\n');
+
+ // Variables changed between a setjmp and a longjump can have undefined value
+ // after the longjmp. This behaviour can be observed if such a variable is
+ // spilled, so longjmp won't restore the value in the spill slot.
+ // RegisterCoalescer should not run in functions with a setjmp to avoid
+ // merging such undefined variables with predictable ones.
+ //
+ // TODO: Could specifically disable coalescing registers live across setjmp
+ // calls
+ if (fn.exposesReturnsTwice()) {
+ LLVM_DEBUG(
+ dbgs() << "* Skipped as it exposes functions that returns twice.\n");
+ return false;
+ }
+
+ MF = &fn;
+ MRI = &fn.getRegInfo();
+ const TargetSubtargetInfo &STI = fn.getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TII = STI.getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ if (EnableGlobalCopies == cl::BOU_UNSET)
+ JoinGlobalCopies = STI.enableJoinGlobalCopies();
+ else
+ JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+ // If there are PHIs tracked by debug-info, they will need updating during
+ // coalescing. Build an index of those PHIs to ease updating.
+ SlotIndexes *Slots = LIS->getSlotIndexes();
+ for (const auto &DebugPHI : MF->DebugPHIPositions) {
+ MachineBasicBlock *MBB = DebugPHI.second.MBB;
+ Register Reg = DebugPHI.second.Reg;
+ unsigned SubReg = DebugPHI.second.SubReg;
+ SlotIndex SI = Slots->getMBBStartIdx(MBB);
+ PHIValPos P = {SI, Reg, SubReg};
+ PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
+ RegToPHIIdx[Reg].push_back(DebugPHI.first);
+ }
+
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
+
+ if (VerifyCoalescing)
+ MF->verify(this, "Before register coalescing");
+
+ DbgVRegToValues.clear();
+ buildVRegToDbgValueMap(fn);
+
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining)
+ joinAllIntervals();
+
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
+ << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ Register Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg)) {
+ LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+ ++NumInflated;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ // If the inflated register class does not support subregisters anymore
+ // remove the subranges.
+ if (!MRI->shouldTrackSubRegLiveness(Reg)) {
+ LI.clearSubRanges();
+ } else {
+#ifndef NDEBUG
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ // If subranges are still supported, then the same subregs
+ // should still be supported.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ assert((S.LaneMask & ~MaxMask).none());
+ }
+#endif
+ }
+ }
+ }
+ }
+
+ // After coalescing, update any PHIs that are being tracked by debug-info
+ // with their new VReg locations.
+ for (auto &p : MF->DebugPHIPositions) {
+ auto it = PHIValToPos.find(p.first);
+ assert(it != PHIValToPos.end());
+ p.second.Reg = it->second.Reg;
+ p.second.SubReg = it->second.SubReg;
+ }
+
+ PHIValToPos.clear();
+ RegToPHIIdx.clear();
+
+ LLVM_DEBUG(dump());
+ if (VerifyCoalescing)
+ MF->verify(this, "After register coalescing");
+ return true;
+}
+
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+ LIS->print(O, m);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 000000000000..f265d93fb0d6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,114 @@
+//===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers,
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+
+#include "llvm/CodeGen/Register.h"
+
+namespace llvm {
+
+class MachineInstr;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+
+ /// A helper class for register coalescers. When deciding if
+ /// two registers can be coalesced, CoalescerPair can determine if a copy
+ /// instruction would become an identity copy after coalescing.
+ class CoalescerPair {
+ const TargetRegisterInfo &TRI;
+
+ /// The register that will be left after coalescing. It can be a
+ /// virtual or physical register.
+ Register DstReg;
+
+ /// The virtual register that will be coalesced into dstReg.
+ Register SrcReg;
+
+ /// The sub-register index of the old DstReg in the new coalesced register.
+ unsigned DstIdx = 0;
+
+ /// The sub-register index of the old SrcReg in the new coalesced register.
+ unsigned SrcIdx = 0;
+
+ /// True when the original copy was a partial subregister copy.
+ bool Partial = false;
+
+ /// True when both regs are virtual and newRC is constrained.
+ bool CrossClass = false;
+
+ /// True when DstReg and SrcReg are reversed from the original
+ /// copy instruction.
+ bool Flipped = false;
+
+ /// The register class of the coalesced register, or NULL if DstReg
+ /// is a physreg. This register class may be a super-register of both
+ /// SrcReg and DstReg.
+ const TargetRegisterClass *NewRC = nullptr;
+
+ public:
+ CoalescerPair(const TargetRegisterInfo &tri) : TRI(tri) {}
+
+ /// Create a CoalescerPair representing a virtreg-to-physreg copy.
+ /// No need to call setRegisters().
+ CoalescerPair(Register VirtReg, MCRegister PhysReg,
+ const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
+
+ /// Set registers to match the copy instruction MI. Return
+ /// false if MI is not a coalescable copy instruction.
+ bool setRegisters(const MachineInstr*);
+
+ /// Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// because DstReg is a physical register, or SubIdx is set.
+ bool flip();
+
+ /// Return true if MI is a copy instruction that will become
+ /// an identity copy after coalescing.
+ bool isCoalescable(const MachineInstr*) const;
+
+ /// Return true if DstReg is a physical register.
+ bool isPhys() const { return !NewRC; }
+
+ /// Return true if the original copy instruction did not copy
+ /// the full register, but was a subreg operation.
+ bool isPartial() const { return Partial; }
+
+ /// Return true if DstReg is virtual and NewRC is a smaller
+ /// register class than DstReg's.
+ bool isCrossClass() const { return CrossClass; }
+
+ /// Return true when getSrcReg is the register being defined by
+ /// the original copy instruction.
+ bool isFlipped() const { return Flipped; }
+
+ /// Return the register (virtual or physical) that will remain
+ /// after coalescing.
+ Register getDstReg() const { return DstReg; }
+
+ /// Return the virtual register that will be coalesced away.
+ Register getSrcReg() const { return SrcReg; }
+
+ /// Return the subregister index that DstReg will be coalesced into, or 0.
+ unsigned getDstIdx() const { return DstIdx; }
+
+ /// Return the subregister index that SrcReg will be coalesced into, or 0.
+ unsigned getSrcIdx() const { return SrcIdx; }
+
+ /// Return the register class of the coalesced register.
+ const TargetRegisterClass *getNewRC() const { return NewRC; }
+ };
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
new file mode 100644
index 000000000000..f86aa3a16720
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -0,0 +1,1392 @@
+//===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterPressure class which can be used to track
+// MachineInstr level register pressure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
+static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const MachineRegisterInfo &MRI, unsigned Reg,
+ LaneBitmask PrevMask, LaneBitmask NewMask) {
+ assert((PrevMask & ~NewMask).none() && "Must not remove bits");
+ if (PrevMask.any() || NewMask.none())
+ return;
+
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI)
+ CurrSetPressure[*PSetI] += Weight;
+}
+
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
+static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const MachineRegisterInfo &MRI, Register Reg,
+ LaneBitmask PrevMask, LaneBitmask NewMask) {
+ //assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+ if (NewMask.any() || PrevMask.none())
+ return;
+
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSetI] -= Weight;
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
+ const TargetRegisterInfo *TRI) {
+ bool Empty = true;
+ for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+ if (SetPressure[i] != 0) {
+ dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+ Empty = false;
+ }
+ }
+ if (Empty)
+ dbgs() << "\n";
+}
+
+LLVM_DUMP_METHOD
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+ dbgs() << "Max Pressure: ";
+ dumpRegSetPressure(MaxSetPressure, TRI);
+ dbgs() << "Live In: ";
+ for (const RegisterMaskPair &P : LiveInRegs) {
+ dbgs() << printVRegOrUnit(P.RegUnit, TRI);
+ if (!P.LaneMask.all())
+ dbgs() << ':' << PrintLaneMask(P.LaneMask);
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ dbgs() << "Live Out: ";
+ for (const RegisterMaskPair &P : LiveOutRegs) {
+ dbgs() << printVRegOrUnit(P.RegUnit, TRI);
+ if (!P.LaneMask.all())
+ dbgs() << ':' << PrintLaneMask(P.LaneMask);
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+}
+
+LLVM_DUMP_METHOD
+void RegPressureTracker::dump() const {
+ if (!isTopClosed() || !isBottomClosed()) {
+ dbgs() << "Curr Pressure: ";
+ dumpRegSetPressure(CurrSetPressure, TRI);
+ }
+ P.dump(TRI);
+}
+
+LLVM_DUMP_METHOD
+void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+ const char *sep = "";
+ for (const PressureChange &Change : *this) {
+ if (!Change.isValid())
+ break;
+ dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet())
+ << " " << Change.getUnitInc();
+ sep = " ";
+ }
+ dbgs() << '\n';
+}
+
+LLVM_DUMP_METHOD
+void PressureChange::dump() const {
+ dbgs() << "[" << getPSetOrMax() << ", " << getUnitInc() << "]\n";
+}
+
+void RegPressureDelta::dump() const {
+ dbgs() << "[Excess=";
+ Excess.dump();
+ dbgs() << ", CriticalMax=";
+ CriticalMax.dump();
+ dbgs() << ", CurrentMax=";
+ CurrentMax.dump();
+ dbgs() << "]\n";
+}
+
+#endif
+
+void RegPressureTracker::increaseRegPressure(Register RegUnit,
+ LaneBitmask PreviousMask,
+ LaneBitmask NewMask) {
+ if (PreviousMask.any() || NewMask.none())
+ return;
+
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ CurrSetPressure[*PSetI] += Weight;
+ P.MaxSetPressure[*PSetI] =
+ std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
+ }
+}
+
+void RegPressureTracker::decreaseRegPressure(Register RegUnit,
+ LaneBitmask PreviousMask,
+ LaneBitmask NewMask) {
+ decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask);
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void IntervalPressure::reset() {
+ TopIdx = BottomIdx = SlotIndex();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void RegionPressure::reset() {
+ TopPos = BottomPos = MachineBasicBlock::const_iterator();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// If the current top is not less than or equal to the next index, open it.
+/// We happen to need the SlotIndex for the next top for pressure update.
+void IntervalPressure::openTop(SlotIndex NextTop) {
+ if (TopIdx <= NextTop)
+ return;
+ TopIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current top is the previous instruction (before receding), open it.
+void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) {
+ if (TopPos != PrevTop)
+ return;
+ TopPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is not greater than the previous index, open it.
+void IntervalPressure::openBottom(SlotIndex PrevBottom) {
+ if (BottomIdx > PrevBottom)
+ return;
+ BottomIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is the previous instr (before advancing), open it.
+void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
+ if (BottomPos != PrevBottom)
+ return;
+ BottomPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+void LiveRegSet::init(const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned NumRegUnits = TRI.getNumRegs();
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ Regs.setUniverse(NumRegUnits + NumVirtRegs);
+ this->NumRegUnits = NumRegUnits;
+}
+
+void LiveRegSet::clear() {
+ Regs.clear();
+}
+
+static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
+ if (Register::isVirtualRegister(Reg))
+ return &LIS.getInterval(Reg);
+ return LIS.getCachedRegUnit(Reg);
+}
+
+void RegPressureTracker::reset() {
+ MBB = nullptr;
+ LIS = nullptr;
+
+ CurrSetPressure.clear();
+ LiveThruPressure.clear();
+ P.MaxSetPressure.clear();
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+
+ LiveRegs.clear();
+ UntiedDefs.clear();
+}
+
+/// Setup the RegPressureTracker.
+///
+/// TODO: Add support for pressure without LiveIntervals.
+void RegPressureTracker::init(const MachineFunction *mf,
+ const RegisterClassInfo *rci,
+ const LiveIntervals *lis,
+ const MachineBasicBlock *mbb,
+ MachineBasicBlock::const_iterator pos,
+ bool TrackLaneMasks, bool TrackUntiedDefs) {
+ reset();
+
+ MF = mf;
+ TRI = MF->getSubtarget().getRegisterInfo();
+ RCI = rci;
+ MRI = &MF->getRegInfo();
+ MBB = mbb;
+ this->TrackUntiedDefs = TrackUntiedDefs;
+ this->TrackLaneMasks = TrackLaneMasks;
+
+ if (RequireIntervals) {
+ assert(lis && "IntervalPressure requires LiveIntervals");
+ LIS = lis;
+ }
+
+ CurrPos = pos;
+ CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+
+ P.MaxSetPressure = CurrSetPressure;
+
+ LiveRegs.init(*MRI);
+ if (TrackUntiedDefs)
+ UntiedDefs.setUniverse(MRI->getNumVirtRegs());
+}
+
+/// Does this pressure result have a valid top position and live ins.
+bool RegPressureTracker::isTopClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).TopIdx.isValid();
+ return (static_cast<RegionPressure&>(P).TopPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Does this pressure result have a valid bottom position and live outs.
+bool RegPressureTracker::isBottomClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).BottomIdx.isValid();
+ return (static_cast<RegionPressure&>(P).BottomPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+ MachineBasicBlock::const_iterator IdxPos =
+ skipDebugInstructionsForward(CurrPos, MBB->end());
+ if (IdxPos == MBB->end())
+ return LIS->getMBBEndIdx(MBB);
+ return LIS->getInstructionIndex(*IdxPos).getRegSlot();
+}
+
+/// Set the boundary for the top of the region and summarize live ins.
+void RegPressureTracker::closeTop() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).TopPos = CurrPos;
+
+ assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
+ P.LiveInRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveInRegs);
+}
+
+/// Set the boundary for the bottom of the region and summarize live outs.
+void RegPressureTracker::closeBottom() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).BottomPos = CurrPos;
+
+ assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
+ P.LiveOutRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveOutRegs);
+}
+
+/// Finalize the region boundaries and record live ins and live outs.
+void RegPressureTracker::closeRegion() {
+ if (!isTopClosed() && !isBottomClosed()) {
+ assert(LiveRegs.size() == 0 && "no region boundary");
+ return;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+ else if (!isTopClosed())
+ closeTop();
+ // If both top and bottom are closed, do nothing.
+}
+
+/// The register tracker is unaware of global liveness so ignores normal
+/// live-thru ranges. However, two-address or coalesced chains can also lead
+/// to live ranges with no holes. Count these to inform heuristics that we
+/// can never drop below this pressure.
+void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
+ LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
+ assert(isBottomClosed() && "need bottom-up tracking to intialize.");
+ for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
+ Register RegUnit = Pair.RegUnit;
+ if (RegUnit.isVirtual() && !RPTracker.hasUntiedDef(RegUnit))
+ increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
+ LaneBitmask::getNone(), Pair.LaneMask);
+ }
+}
+
+static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
+ Register RegUnit) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end())
+ return LaneBitmask::getNone();
+ return I->LaneMask;
+}
+
+static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ RegisterMaskPair Pair) {
+ Register RegUnit = Pair.RegUnit;
+ assert(Pair.LaneMask.any());
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end()) {
+ RegUnits.push_back(Pair);
+ } else {
+ I->LaneMask |= Pair.LaneMask;
+ }
+}
+
+static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ Register RegUnit) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end()) {
+ RegUnits.push_back(RegisterMaskPair(RegUnit, LaneBitmask::getNone()));
+ } else {
+ I->LaneMask = LaneBitmask::getNone();
+ }
+}
+
+static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ RegisterMaskPair Pair) {
+ Register RegUnit = Pair.RegUnit;
+ assert(Pair.LaneMask.any());
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I != RegUnits.end()) {
+ I->LaneMask &= ~Pair.LaneMask;
+ if (I->LaneMask.none())
+ RegUnits.erase(I);
+ }
+}
+
+static LaneBitmask
+getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
+ LaneBitmask SafeDefault,
+ bool (*Property)(const LiveRange &LR, SlotIndex Pos)) {
+ if (RegUnit.isVirtual()) {
+ const LiveInterval &LI = LIS.getInterval(RegUnit);
+ LaneBitmask Result;
+ if (TrackLaneMasks && LI.hasSubRanges()) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (Property(SR, Pos))
+ Result |= SR.LaneMask;
+ }
+ } else if (Property(LI, Pos)) {
+ Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
+ : LaneBitmask::getAll();
+ }
+
+ return Result;
+ } else {
+ const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
+ // Be prepared for missing liveranges: We usually do not compute liveranges
+ // for physical registers on targets with many registers (GPUs).
+ if (LR == nullptr)
+ return SafeDefault;
+ return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
+ }
+}
+
+static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit,
+ SlotIndex Pos) {
+ return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos,
+ LaneBitmask::getAll(),
+ [](const LiveRange &LR, SlotIndex Pos) {
+ return LR.liveAt(Pos);
+ });
+}
+
+namespace {
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+///
+/// FIXME: always ignore tied opers
+class RegisterOperandsCollector {
+ friend class llvm::RegisterOperands;
+
+ RegisterOperands &RegOpers;
+ const TargetRegisterInfo &TRI;
+ const MachineRegisterInfo &MRI;
+ bool IgnoreDead;
+
+ RegisterOperandsCollector(RegisterOperands &RegOpers,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI, bool IgnoreDead)
+ : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {}
+
+ void collectInstr(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ collectOperand(*OperI);
+
+ // Remove redundant physreg dead defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ removeRegLanes(RegOpers.DeadDefs, P);
+ }
+
+ void collectInstrLanes(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ collectOperandLanes(*OperI);
+
+ // Remove redundant physreg dead defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ removeRegLanes(RegOpers.DeadDefs, P);
+ }
+
+ /// Push this operand's register onto the correct vectors.
+ void collectOperand(const MachineOperand &MO) const {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ Register Reg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MO.isUndef() && !MO.isInternalRead())
+ pushReg(Reg, RegOpers.Uses);
+ } else {
+ assert(MO.isDef());
+ // Subregister definitions may imply a register read.
+ if (MO.readsReg())
+ pushReg(Reg, RegOpers.Uses);
+
+ if (MO.isDead()) {
+ if (!IgnoreDead)
+ pushReg(Reg, RegOpers.DeadDefs);
+ } else
+ pushReg(Reg, RegOpers.Defs);
+ }
+ }
+
+ void pushReg(Register Reg,
+ SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
+ if (Reg.isVirtual()) {
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ addRegLanes(RegUnits, RegisterMaskPair(Unit, LaneBitmask::getAll()));
+ }
+ }
+
+ void collectOperandLanes(const MachineOperand &MO) const {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ Register Reg = MO.getReg();
+ unsigned SubRegIdx = MO.getSubReg();
+ if (MO.isUse()) {
+ if (!MO.isUndef() && !MO.isInternalRead())
+ pushRegLanes(Reg, SubRegIdx, RegOpers.Uses);
+ } else {
+ assert(MO.isDef());
+ // Treat read-undef subreg defs as definitions of the whole register.
+ if (MO.isUndef())
+ SubRegIdx = 0;
+
+ if (MO.isDead()) {
+ if (!IgnoreDead)
+ pushRegLanes(Reg, SubRegIdx, RegOpers.DeadDefs);
+ } else
+ pushRegLanes(Reg, SubRegIdx, RegOpers.Defs);
+ }
+ }
+
+ void pushRegLanes(Register Reg, unsigned SubRegIdx,
+ SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
+ if (Reg.isVirtual()) {
+ LaneBitmask LaneMask = SubRegIdx != 0
+ ? TRI.getSubRegIndexLaneMask(SubRegIdx)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ addRegLanes(RegUnits, RegisterMaskPair(Unit, LaneBitmask::getAll()));
+ }
+ }
+};
+
+} // end anonymous namespace
+
+void RegisterOperands::collect(const MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, bool IgnoreDead) {
+ RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead);
+ if (TrackLaneMasks)
+ Collector.collectInstrLanes(MI);
+ else
+ Collector.collectInstr(MI);
+}
+
+void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ SlotIndex SlotIdx = LIS.getInstructionIndex(MI);
+ for (auto *RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
+ Register Reg = RI->RegUnit;
+ const LiveRange *LR = getLiveRange(LIS, Reg);
+ if (LR != nullptr) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ if (LRQ.isDeadDef()) {
+ // LiveIntervals knows this is a dead even though it's MachineOperand is
+ // not flagged as such.
+ DeadDefs.push_back(*RI);
+ RI = Defs.erase(RI);
+ continue;
+ }
+ }
+ ++RI;
+ }
+}
+
+void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ SlotIndex Pos,
+ MachineInstr *AddFlagsMI) {
+ for (auto *I = Defs.begin(); I != Defs.end();) {
+ LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
+ Pos.getDeadSlot());
+ // If the def is all that is live after the instruction, then in case
+ // of a subregister def we need a read-undef flag.
+ Register RegUnit = I->RegUnit;
+ if (RegUnit.isVirtual() && AddFlagsMI != nullptr &&
+ (LiveAfter & ~I->LaneMask).none())
+ AddFlagsMI->setRegisterDefReadUndef(RegUnit);
+
+ LaneBitmask ActualDef = I->LaneMask & LiveAfter;
+ if (ActualDef.none()) {
+ I = Defs.erase(I);
+ } else {
+ I->LaneMask = ActualDef;
+ ++I;
+ }
+ }
+ for (auto *I = Uses.begin(); I != Uses.end();) {
+ LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
+ Pos.getBaseIndex());
+ LaneBitmask LaneMask = I->LaneMask & LiveBefore;
+ if (LaneMask.none()) {
+ I = Uses.erase(I);
+ } else {
+ I->LaneMask = LaneMask;
+ ++I;
+ }
+ }
+ if (AddFlagsMI != nullptr) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ Register RegUnit = P.RegUnit;
+ if (!RegUnit.isVirtual())
+ continue;
+ LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
+ Pos.getDeadSlot());
+ if (LiveAfter.none())
+ AddFlagsMI->setRegisterDefReadUndef(RegUnit);
+ }
+ }
+}
+
+/// Initialize an array of N PressureDiffs.
+void PressureDiffs::init(unsigned N) {
+ Size = N;
+ if (N <= Max) {
+ memset(PDiffArray, 0, N * sizeof(PressureDiff));
+ return;
+ }
+ Max = Size;
+ free(PDiffArray);
+ PDiffArray = static_cast<PressureDiff*>(safe_calloc(N, sizeof(PressureDiff)));
+}
+
+void PressureDiffs::addInstruction(unsigned Idx,
+ const RegisterOperands &RegOpers,
+ const MachineRegisterInfo &MRI) {
+ PressureDiff &PDiff = (*this)[Idx];
+ assert(!PDiff.begin()->isValid() && "stale PDiff");
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ PDiff.addPressureChange(P.RegUnit, true, &MRI);
+
+ for (const RegisterMaskPair &P : RegOpers.Uses)
+ PDiff.addPressureChange(P.RegUnit, false, &MRI);
+}
+
+/// Add a change in pressure to the pressure diff of a given instruction.
+void PressureDiff::addPressureChange(Register RegUnit, bool IsDec,
+ const MachineRegisterInfo *MRI) {
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
+ int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ // Find an existing entry in the pressure diff for this PSet.
+ PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
+ for (; I != E && I->isValid(); ++I) {
+ if (I->getPSet() >= *PSetI)
+ break;
+ }
+ // If all pressure sets are more constrained, skip the remaining PSets.
+ if (I == E)
+ break;
+ // Insert this PressureChange.
+ if (!I->isValid() || I->getPSet() != *PSetI) {
+ PressureChange PTmp = PressureChange(*PSetI);
+ for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J)
+ std::swap(*J, PTmp);
+ }
+ // Update the units for this pressure set.
+ unsigned NewUnitInc = I->getUnitInc() + Weight;
+ if (NewUnitInc != 0) {
+ I->setUnitInc(NewUnitInc);
+ } else {
+ // Remove entry
+ PressureDiff::iterator J;
+ for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
+ *I = *J;
+ *I = PressureChange();
+ }
+ }
+}
+
+/// Force liveness of registers.
+void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) {
+ for (const RegisterMaskPair &P : Regs) {
+ LaneBitmask PrevMask = LiveRegs.insert(P);
+ LaneBitmask NewMask = PrevMask | P.LaneMask;
+ increaseRegPressure(P.RegUnit, PrevMask, NewMask);
+ }
+}
+
+void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
+ SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
+ assert(Pair.LaneMask.any());
+
+ Register RegUnit = Pair.RegUnit;
+ auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ LaneBitmask PrevMask;
+ LaneBitmask NewMask;
+ if (I == LiveInOrOut.end()) {
+ PrevMask = LaneBitmask::getNone();
+ NewMask = Pair.LaneMask;
+ LiveInOrOut.push_back(Pair);
+ } else {
+ PrevMask = I->LaneMask;
+ NewMask = PrevMask | Pair.LaneMask;
+ I->LaneMask = NewMask;
+ }
+ increaseSetPressure(P.MaxSetPressure, *MRI, RegUnit, PrevMask, NewMask);
+}
+
+void RegPressureTracker::discoverLiveIn(RegisterMaskPair Pair) {
+ discoverLiveInOrOut(Pair, P.LiveInRegs);
+}
+
+void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) {
+ discoverLiveInOrOut(Pair, P.LiveOutRegs);
+}
+
+void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ Register Reg = P.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ increaseRegPressure(Reg, LiveMask, BumpedMask);
+ }
+ for (const RegisterMaskPair &P : DeadDefs) {
+ Register Reg = P.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ decreaseRegPressure(Reg, BumpedMask, LiveMask);
+ }
+}
+
+/// Recede across the previous instruction. If LiveUses is provided, record any
+/// RegUnits that are made live by the current instruction's uses. This includes
+/// registers that are both defined and used by the instruction. If a pressure
+/// difference pointer is provided record the changes is pressure caused by this
+/// instruction independent of liveness.
+void RegPressureTracker::recede(const RegisterOperands &RegOpers,
+ SmallVectorImpl<RegisterMaskPair> *LiveUses) {
+ assert(!CurrPos->isDebugOrPseudoInstr());
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ // TODO: consider earlyclobbers?
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ Register Reg = Def.RegUnit;
+
+ LaneBitmask PreviousMask = LiveRegs.erase(Def);
+ LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
+
+ LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask;
+ if (LiveOut.any()) {
+ discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
+ // Retroactively model effects on pressure of the live out lanes.
+ increaseSetPressure(CurrSetPressure, *MRI, Reg, LaneBitmask::getNone(),
+ LiveOut);
+ PreviousMask = LiveOut;
+ }
+
+ if (NewMask.none()) {
+ // Add a 0 entry to LiveUses as a marker that the complete vreg has become
+ // dead.
+ if (TrackLaneMasks && LiveUses != nullptr)
+ setRegZero(*LiveUses, Reg);
+ }
+
+ decreaseRegPressure(Reg, PreviousMask, NewMask);
+ }
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+
+ // Generate liveness for uses.
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ Register Reg = Use.RegUnit;
+ assert(Use.LaneMask.any());
+ LaneBitmask PreviousMask = LiveRegs.insert(Use);
+ LaneBitmask NewMask = PreviousMask | Use.LaneMask;
+ if (NewMask == PreviousMask)
+ continue;
+
+ // Did the register just become live?
+ if (PreviousMask.none()) {
+ if (LiveUses != nullptr) {
+ if (!TrackLaneMasks) {
+ addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ } else {
+ auto I =
+ llvm::find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
+ bool IsRedef = I != LiveUses->end();
+ if (IsRedef) {
+ // ignore re-defs here...
+ assert(I->LaneMask.none());
+ removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ } else {
+ addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ }
+ }
+ }
+
+ // Discover live outs if this may be the first occurance of this register.
+ if (RequireIntervals) {
+ LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx);
+ if (LiveOut.any())
+ discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
+ }
+ }
+
+ increaseRegPressure(Reg, PreviousMask, NewMask);
+ }
+ if (TrackUntiedDefs) {
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ Register RegUnit = Def.RegUnit;
+ if (RegUnit.isVirtual() &&
+ (LiveRegs.contains(RegUnit) & Def.LaneMask).none())
+ UntiedDefs.insert(RegUnit);
+ }
+ }
+}
+
+void RegPressureTracker::recedeSkipDebugValues() {
+ assert(CurrPos != MBB->begin());
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ CurrPos = prev_nodbg(CurrPos, MBB->begin());
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals && !CurrPos->isDebugOrPseudoInstr())
+ SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+}
+
+void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
+ recedeSkipDebugValues();
+ if (CurrPos->isDebugInstr() || CurrPos->isPseudoProbe()) {
+ // It's possible to only have debug_value and pseudo probe instructions and
+ // hit the start of the block.
+ assert(CurrPos == MBB->begin());
+ return;
+ }
+
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ } else if (RequireIntervals) {
+ RegOpers.detectDeadDefs(MI, *LIS);
+ }
+
+ recede(RegOpers, LiveUses);
+}
+
+/// Advance across the current instruction.
+void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
+ assert(!TrackUntiedDefs && "unsupported mode");
+ assert(CurrPos != MBB->end());
+ if (!isTopClosed())
+ closeTop();
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = getCurrSlot();
+
+ // Open the bottom of the region using slot indexes.
+ if (isBottomClosed()) {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).openBottom(SlotIdx);
+ else
+ static_cast<RegionPressure&>(P).openBottom(CurrPos);
+ }
+
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ Register Reg = Use.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
+ if (LiveIn.any()) {
+ discoverLiveIn(RegisterMaskPair(Reg, LiveIn));
+ increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn);
+ LiveRegs.insert(RegisterMaskPair(Reg, LiveIn));
+ }
+ // Kill liveness at last uses.
+ if (RequireIntervals) {
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask.any()) {
+ LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask));
+ decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask);
+ }
+ }
+ }
+
+ // Generate liveness for defs.
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ LaneBitmask PreviousMask = LiveRegs.insert(Def);
+ LaneBitmask NewMask = PreviousMask | Def.LaneMask;
+ increaseRegPressure(Def.RegUnit, PreviousMask, NewMask);
+ }
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Find the next instruction.
+ CurrPos = next_nodbg(CurrPos, MBB->end());
+}
+
+void RegPressureTracker::advance() {
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = getCurrSlot();
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ }
+ advance(RegOpers);
+}
+
+/// Find the max change in excess pressure across all sets.
+static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
+ ArrayRef<unsigned> NewPressureVec,
+ RegPressureDelta &Delta,
+ const RegisterClassInfo *RCI,
+ ArrayRef<unsigned> LiveThruPressureVec) {
+ Delta.Excess = PressureChange();
+ for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldPressureVec[i];
+ unsigned PNew = NewPressureVec[i];
+ int PDiff = (int)PNew - (int)POld;
+ if (!PDiff) // No change in this set in the common case.
+ continue;
+ // Only consider change beyond the limit.
+ unsigned Limit = RCI->getRegPressureSetLimit(i);
+ if (!LiveThruPressureVec.empty())
+ Limit += LiveThruPressureVec[i];
+
+ if (Limit > POld) {
+ if (Limit > PNew)
+ PDiff = 0; // Under the limit
+ else
+ PDiff = PNew - Limit; // Just exceeded limit.
+ } else if (Limit > PNew)
+ PDiff = Limit - POld; // Just obeyed limit.
+
+ if (PDiff) {
+ Delta.Excess = PressureChange(i);
+ Delta.Excess.setUnitInc(PDiff);
+ break;
+ }
+ }
+}
+
+/// Find the max change in max pressure that either surpasses a critical PSet
+/// limit or exceeds the current MaxPressureLimit.
+///
+/// FIXME: comparing each element of the old and new MaxPressure vectors here is
+/// silly. It's done now to demonstrate the concept but will go away with a
+/// RegPressureTracker API change to work with pressure differences.
+static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
+ ArrayRef<unsigned> NewMaxPressureVec,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit,
+ RegPressureDelta &Delta) {
+ Delta.CriticalMax = PressureChange();
+ Delta.CurrentMax = PressureChange();
+
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldMaxPressureVec[i];
+ unsigned PNew = NewMaxPressureVec[i];
+ if (PNew == POld) // No change in this set in the common case.
+ continue;
+
+ if (!Delta.CriticalMax.isValid()) {
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ if (PDiff > 0) {
+ Delta.CriticalMax = PressureChange(i);
+ Delta.CriticalMax.setUnitInc(PDiff);
+ }
+ }
+ }
+ // Find the first increase above MaxPressureLimit.
+ // (Ignores negative MDiff).
+ if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) {
+ Delta.CurrentMax = PressureChange(i);
+ Delta.CurrentMax.setUnitInc(PNew - POld);
+ if (CritIdx == CritEnd || Delta.CriticalMax.isValid())
+ break;
+ }
+ }
+}
+
+/// Record the upward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true);
+ assert(RegOpers.DeadDefs.size() == 0);
+ if (TrackLaneMasks)
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ else if (RequireIntervals)
+ RegOpers.detectDeadDefs(*MI, *LIS);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs) {
+ Register Reg = P.RegUnit;
+ LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
+ LaneBitmask DefLanes = P.LaneMask;
+ LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes;
+ decreaseRegPressure(Reg, LiveLanes, LiveAfter);
+ }
+ // Generate liveness for uses.
+ for (const RegisterMaskPair &P : RegOpers.Uses) {
+ Register Reg = P.RegUnit;
+ LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
+ increaseRegPressure(Reg, LiveLanes, LiveAfter);
+ }
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// bottom-up. Find the pressure set with the most change beyond its pressure
+/// limit based on the tracker's current pressure, and return the change in
+/// number of register units of that pressure set introduced by this
+/// instruction.
+///
+/// This assumes that the current LiveOut set is sufficient.
+///
+/// This is expensive for an on-the-fly query because it calls
+/// bumpUpwardPressure to recompute the pressure sets based on current
+/// liveness. This mainly exists to verify correctness, e.g. with
+/// -verify-misched. getUpwardPressureDelta is the fast version of this query
+/// that uses the per-SUnit cache of the PressureDiff.
+void RegPressureTracker::
+getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
+ RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ // FIXME: The snapshot heap space should persist. But I'm planning to
+ // summarize the pressure effect so we don't need to snapshot at all.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI,
+ LiveThruPressure);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.getUnitInc() >= 0 &&
+ Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+
+#ifndef NDEBUG
+ if (!PDiff)
+ return;
+
+ // Check if the alternate algorithm yields the same result.
+ RegPressureDelta Delta2;
+ getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit);
+ if (Delta != Delta2) {
+ dbgs() << "PDiff: ";
+ PDiff->dump(*TRI);
+ dbgs() << "DELTA: " << *MI;
+ if (Delta.Excess.isValid())
+ dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet())
+ << " " << Delta.Excess.getUnitInc() << "\n";
+ if (Delta.CriticalMax.isValid())
+ dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet())
+ << " " << Delta.CriticalMax.getUnitInc() << "\n";
+ if (Delta.CurrentMax.isValid())
+ dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet())
+ << " " << Delta.CurrentMax.getUnitInc() << "\n";
+ if (Delta2.Excess.isValid())
+ dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet())
+ << " " << Delta2.Excess.getUnitInc() << "\n";
+ if (Delta2.CriticalMax.isValid())
+ dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet())
+ << " " << Delta2.CriticalMax.getUnitInc() << "\n";
+ if (Delta2.CurrentMax.isValid())
+ dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet())
+ << " " << Delta2.CurrentMax.getUnitInc() << "\n";
+ llvm_unreachable("RegP Delta Mismatch");
+ }
+#endif
+}
+
+/// This is the fast version of querying register pressure that does not
+/// directly depend on current liveness.
+///
+/// @param Delta captures information needed for heuristics.
+///
+/// @param CriticalPSets Are the pressure sets that are known to exceed some
+/// limit within the region, not necessarily at the current position.
+///
+/// @param MaxPressureLimit Is the max pressure within the region, not
+/// necessarily at the current position.
+void RegPressureTracker::
+getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
+ RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) const {
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (PressureDiff::const_iterator
+ PDiffI = PDiff.begin(), PDiffE = PDiff.end();
+ PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) {
+
+ unsigned PSetID = PDiffI->getPSet();
+ unsigned Limit = RCI->getRegPressureSetLimit(PSetID);
+ if (!LiveThruPressure.empty())
+ Limit += LiveThruPressure[PSetID];
+
+ unsigned POld = CurrSetPressure[PSetID];
+ unsigned MOld = P.MaxSetPressure[PSetID];
+ unsigned MNew = MOld;
+ // Ignore DeadDefs here because they aren't captured by PressureChange.
+ unsigned PNew = POld + PDiffI->getUnitInc();
+ assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld)
+ && "PSet overflow/underflow");
+ if (PNew > MOld)
+ MNew = PNew;
+ // Check if current pressure has exceeded the limit.
+ if (!Delta.Excess.isValid()) {
+ unsigned ExcessInc = 0;
+ if (PNew > Limit)
+ ExcessInc = POld > Limit ? PNew - POld : PNew - Limit;
+ else if (POld > Limit)
+ ExcessInc = Limit - POld;
+ if (ExcessInc) {
+ Delta.Excess = PressureChange(PSetID);
+ Delta.Excess.setUnitInc(ExcessInc);
+ }
+ }
+ // Check if max pressure has exceeded a critical pressure set max.
+ if (MNew == MOld)
+ continue;
+ if (!Delta.CriticalMax.isValid()) {
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
+ int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ if (CritInc > 0 && CritInc <= std::numeric_limits<int16_t>::max()) {
+ Delta.CriticalMax = PressureChange(PSetID);
+ Delta.CriticalMax.setUnitInc(CritInc);
+ }
+ }
+ }
+ // Check if max pressure has exceeded the current max.
+ if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) {
+ Delta.CurrentMax = PressureChange(PSetID);
+ Delta.CurrentMax.setUnitInc(MNew - MOld);
+ }
+ }
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+/// The query starts with a lane bitmask which gets lanes/bits removed for every
+/// use we find.
+static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals *LIS) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
+ const MachineInstr *MI = MO.getParent();
+ SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) {
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ LastUseMask &= ~UseMask;
+ if (LastUseMask.none())
+ return LaneBitmask::getNone();
+ }
+ }
+ return LastUseMask;
+}
+
+LaneBitmask RegPressureTracker::getLiveLanesAt(Register RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+ LaneBitmask::getAll(),
+ [](const LiveRange &LR, SlotIndex Pos) {
+ return LR.liveAt(Pos);
+ });
+}
+
+LaneBitmask RegPressureTracker::getLastUsedLanes(Register RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
+ Pos.getBaseIndex(), LaneBitmask::getNone(),
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->end == Pos.getRegSlot();
+ });
+}
+
+LaneBitmask RegPressureTracker::getLiveThroughAt(Register RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+ LaneBitmask::getNone(),
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->start < Pos.getRegSlot(true) &&
+ S->end != Pos.getDeadSlot();
+ });
+}
+
+/// Record the downward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks)
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+
+ if (RequireIntervals) {
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ Register Reg = Use.RegUnit;
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask.none())
+ continue;
+ // The LastUseMask is queried from the liveness information of instruction
+ // which may be further down the schedule. Some lanes may actually not be
+ // last uses for the current position.
+ // FIXME: allow the caller to pass in the list of vreg uses that remain
+ // to be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx = getCurrSlot();
+ LastUseMask
+ = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS);
+ if (LastUseMask.none())
+ continue;
+
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask NewMask = LiveMask & ~LastUseMask;
+ decreaseRegPressure(Reg, LiveMask, NewMask);
+ }
+ }
+
+ // Generate liveness for defs.
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ Register Reg = Def.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask NewMask = LiveMask | Def.LaneMask;
+ increaseRegPressure(Reg, LiveMask, NewMask);
+ }
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// top-down. Find the register class with the most change in its pressure limit
+/// based on the tracker's current pressure, and return the number of excess
+/// register units of that pressure set introduced by this instruction.
+///
+/// This assumes that the current LiveIn set is sufficient.
+///
+/// This is expensive for an on-the-fly query because it calls
+/// bumpDownwardPressure to recompute the pressure sets based on current
+/// liveness. We don't yet have a fast version of downward pressure tracking
+/// analogous to getUpwardPressureDelta.
+void RegPressureTracker::
+getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI,
+ LiveThruPressure);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.getUnitInc() >= 0 &&
+ Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Get the pressure of each PSet after traversing this instruction bottom-up.
+void RegPressureTracker::
+getUpwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
+
+/// Get the pressure of each PSet after traversing this instruction top-down.
+void RegPressureTracker::
+getDownwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 000000000000..c00d3fde6426
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,686 @@
+//===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the machine register scavenger. It can provide
+/// information, such as unused registers, at any point in a machine basic
+/// block. It also provides a mechanism to make registers available by evicting
+/// them to spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "reg-scavenging"
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
+void RegScavenger::setRegUsed(Register Reg, LaneBitmask LaneMask) {
+ LiveUnits.addRegMasked(Reg, LaneMask);
+}
+
+void RegScavenger::init(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveUnits.init(*TRI);
+
+ assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
+ "Target changed?");
+
+ // Self-initialize.
+ if (!this->MBB) {
+ NumRegUnits = TRI->getNumRegUnits();
+ KillRegUnits.resize(NumRegUnits);
+ DefRegUnits.resize(NumRegUnits);
+ TmpRegUnits.resize(NumRegUnits);
+ }
+ this->MBB = &MBB;
+
+ for (ScavengedInfo &SI : Scavenged) {
+ SI.Reg = 0;
+ SI.Restore = nullptr;
+ }
+
+ Tracking = false;
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+ init(MBB);
+ LiveUnits.addLiveIns(MBB);
+}
+
+void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
+ init(MBB);
+ LiveUnits.addLiveOuts(MBB);
+
+ // Move internal iterator at the last instruction of the block.
+ if (!MBB.empty()) {
+ MBBI = std::prev(MBB.end());
+ Tracking = true;
+ }
+}
+
+void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) {
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ BV.set(Unit);
+}
+
+void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) {
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ BV.reset(Unit);
+}
+
+void RegScavenger::determineKillsAndDefs() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
+
+ MachineInstr &MI = *MBBI;
+ assert(!MI.isDebugInstr() && "Debug values have no kills or defs");
+
+ // Find out which registers are early clobbered, killed, defined, and marked
+ // def-dead in this instruction.
+ KillRegUnits.reset();
+ DefRegUnits.reset();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ TmpRegUnits.reset();
+ for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
+ for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+ if (MO.clobbersPhysReg(*RURI)) {
+ TmpRegUnits.set(RU);
+ break;
+ }
+ }
+ }
+
+ // Apply the mask.
+ KillRegUnits |= TmpRegUnits;
+ }
+ if (!MO.isReg())
+ continue;
+ if (!MO.getReg().isPhysical() || isReserved(MO.getReg()))
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+
+ if (MO.isUse()) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ continue;
+ if (MO.isKill())
+ addRegUnits(KillRegUnits, Reg);
+ } else {
+ assert(MO.isDef());
+ if (MO.isDead())
+ addRegUnits(KillRegUnits, Reg);
+ else
+ addRegUnits(DefRegUnits, Reg);
+ }
+ }
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+ MBBI = std::next(MBBI);
+ }
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+ MachineInstr &MI = *MBBI;
+
+ for (ScavengedInfo &I : Scavenged) {
+ if (I.Restore != &MI)
+ continue;
+
+ I.Reg = 0;
+ I.Restore = nullptr;
+ }
+
+ if (MI.isDebugOrPseudoInstr())
+ return;
+
+ determineKillsAndDefs();
+
+ // Verify uses and defs.
+#ifndef NDEBUG
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || isReserved(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (MO.isUndef())
+ continue;
+ if (!isRegUsed(Reg)) {
+ // Check if it's partial live: e.g.
+ // D0 = insert_subreg undef D0, S0
+ // ... D0
+ // The problem is the insert_subreg could be eliminated. The use of
+ // D0 is using a partially undef value. This is not *incorrect* since
+ // S1 is can be freely clobbered.
+ // Ideally we would like a way to model this, but leaving the
+ // insert_subreg around causes both correctness and performance issues.
+ if (none_of(TRI->subregs(Reg),
+ [&](MCPhysReg SR) { return isRegUsed(SR); }) &&
+ none_of(TRI->superregs(Reg),
+ [&](MCPhysReg SR) { return isRegUsed(SR); })) {
+ MBB->getParent()->verify(nullptr, "In Register Scavenger");
+ llvm_unreachable("Using an undefined register!");
+ }
+ }
+ } else {
+ assert(MO.isDef());
+#if 0
+ // FIXME: Enable this once we've figured out how to correctly transfer
+ // implicit kills during codegen passes like the coalescer.
+ assert((KillRegs.test(Reg) || isUnused(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+#endif
+ }
+ }
+#endif // NDEBUG
+
+ // Commit the changes.
+ setUnused(KillRegUnits);
+ setUsed(DefRegUnits);
+}
+
+void RegScavenger::backward() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
+
+ const MachineInstr &MI = *MBBI;
+ LiveUnits.stepBackward(MI);
+
+ // Expire scavenge spill frameindex uses.
+ for (ScavengedInfo &I : Scavenged) {
+ if (I.Restore == &MI) {
+ I.Reg = 0;
+ I.Restore = nullptr;
+ }
+ }
+
+ if (MBBI == MBB->begin()) {
+ MBBI = MachineBasicBlock::iterator(nullptr);
+ Tracking = false;
+ } else
+ --MBBI;
+}
+
+bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const {
+ if (isReserved(Reg))
+ return includeReserved;
+ return !LiveUnits.available(Reg);
+}
+
+Register RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (Register Reg : *RC) {
+ if (!isRegUsed(Reg)) {
+ LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
+ << "\n");
+ return Reg;
+ }
+ }
+ return 0;
+}
+
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+ BitVector Mask(TRI->getNumRegs());
+ for (Register Reg : *RC)
+ if (!isRegUsed(Reg))
+ Mask.set(Reg);
+ return Mask;
+}
+
+/// Given the bitvector \p Available of free register units at position
+/// \p From. Search backwards to find a register that is part of \p
+/// Candidates and not used/clobbered until the point \p To. If there is
+/// multiple candidates continue searching and pick the one that is not used/
+/// clobbered for the longest time.
+/// Returns the register and the earliest position we know it to be free or
+/// the position MBB.end() if no register is available.
+static std::pair<MCPhysReg, MachineBasicBlock::iterator>
+findSurvivorBackwards(const MachineRegisterInfo &MRI,
+ MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
+ const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder,
+ bool RestoreAfter) {
+ bool FoundTo = false;
+ MCPhysReg Survivor = 0;
+ MachineBasicBlock::iterator Pos;
+ MachineBasicBlock &MBB = *From->getParent();
+ unsigned InstrLimit = 25;
+ unsigned InstrCountDown = InstrLimit;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LiveRegUnits Used(TRI);
+
+ assert(From->getParent() == To->getParent() &&
+ "Target instruction is in other than current basic block, use "
+ "enterBasicBlockEnd first");
+
+ for (MachineBasicBlock::iterator I = From;; --I) {
+ const MachineInstr &MI = *I;
+
+ Used.accumulate(MI);
+
+ if (I == To) {
+ // See if one of the registers in RC wasn't used so far.
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg) &&
+ LiveOut.available(Reg))
+ return std::make_pair(Reg, MBB.end());
+ }
+ // Otherwise we will continue up to InstrLimit instructions to find
+ // the register which is not defined/used for the longest time.
+ FoundTo = true;
+ Pos = To;
+ // Note: It was fine so far to start our search at From, however now that
+ // we have to spill, and can only place the restore after From then
+ // add the regs used/defed by std::next(From) to the set.
+ if (RestoreAfter)
+ Used.accumulate(*std::next(From));
+ }
+ if (FoundTo) {
+ // Don't search to FrameSetup instructions if we were searching from
+ // Non-FrameSetup instructions. Otherwise, the spill position may point
+ // before FrameSetup instructions.
+ if (!From->getFlag(MachineInstr::FrameSetup) &&
+ MI.getFlag(MachineInstr::FrameSetup))
+ break;
+
+ if (Survivor == 0 || !Used.available(Survivor)) {
+ MCPhysReg AvilableReg = 0;
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg)) {
+ AvilableReg = Reg;
+ break;
+ }
+ }
+ if (AvilableReg == 0)
+ break;
+ Survivor = AvilableReg;
+ }
+ if (--InstrCountDown == 0)
+ break;
+
+ // Keep searching when we find a vreg since the spilled register will
+ // be usefull for this other vreg as well later.
+ bool FoundVReg = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ FoundVReg = true;
+ break;
+ }
+ }
+ if (FoundVReg) {
+ InstrCountDown = InstrLimit;
+ Pos = I;
+ }
+ if (I == MBB.begin())
+ break;
+ }
+ assert(I != MBB.begin() && "Did not find target instruction while "
+ "iterating backwards");
+ }
+
+ return std::make_pair(Survivor, Pos);
+}
+
+static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ return i;
+}
+
+RegScavenger::ScavengedInfo &
+RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
+ MachineBasicBlock::iterator Before,
+ MachineBasicBlock::iterator &UseMI) {
+ // Find an available scavenging slot with size and alignment matching
+ // the requirements of the class RC.
+ const MachineFunction &MF = *Before->getMF();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned NeedSize = TRI->getSpillSize(RC);
+ Align NeedAlign = TRI->getSpillAlign(RC);
+
+ unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
+ int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
+ for (unsigned I = 0; I < Scavenged.size(); ++I) {
+ if (Scavenged[I].Reg != 0)
+ continue;
+ // Verify that this slot is valid for this register.
+ int FI = Scavenged[I].FrameIndex;
+ if (FI < FIB || FI >= FIE)
+ continue;
+ unsigned S = MFI.getObjectSize(FI);
+ Align A = MFI.getObjectAlign(FI);
+ if (NeedSize > S || NeedAlign > A)
+ continue;
+ // Avoid wasting slots with large size and/or large alignment. Pick one
+ // that is the best fit for this register class (in street metric).
+ // Picking a larger slot than necessary could happen if a slot for a
+ // larger register is reserved before a slot for a smaller one. When
+ // trying to spill a smaller register, the large slot would be found
+ // first, thus making it impossible to spill the larger register later.
+ unsigned D = (S - NeedSize) + (A.value() - NeedAlign.value());
+ if (D < Diff) {
+ SI = I;
+ Diff = D;
+ }
+ }
+
+ if (SI == Scavenged.size()) {
+ // We need to scavenge a register but have no spill slot, the target
+ // must know how to do it (if not, we'll assert below).
+ Scavenged.push_back(ScavengedInfo(FIE));
+ }
+
+ // Avoid infinite regress
+ Scavenged[SI].Reg = Reg;
+
+ // If the target knows how to save/restore the register, let it do so;
+ // otherwise, use the emergency stack spill slot.
+ if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) {
+ // Spill the scavenged register before \p Before.
+ int FI = Scavenged[SI].FrameIndex;
+ if (FI < FIB || FI >= FIE) {
+ report_fatal_error(Twine("Error while trying to spill ") +
+ TRI->getName(Reg) + " from class " +
+ TRI->getRegClassName(&RC) +
+ ": Cannot scavenge register without an emergency "
+ "spill slot!");
+ }
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI, Register());
+ MachineBasicBlock::iterator II = std::prev(Before);
+
+ unsigned FIOperandNum = getFrameIndexOperandNum(*II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI, Register());
+ II = std::prev(UseMI);
+
+ FIOperandNum = getFrameIndexOperandNum(*II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+ }
+ return Scavenged[SI];
+}
+
+Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
+ MachineBasicBlock::iterator To,
+ bool RestoreAfter, int SPAdj,
+ bool AllowSpill) {
+ const MachineBasicBlock &MBB = *To->getParent();
+ const MachineFunction &MF = *MBB.getParent();
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
+ std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
+ findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder,
+ RestoreAfter);
+ MCPhysReg Reg = P.first;
+ MachineBasicBlock::iterator SpillBefore = P.second;
+ // Found an available register?
+ if (Reg != 0 && SpillBefore == MBB.end()) {
+ LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
+ << '\n');
+ return Reg;
+ }
+
+ if (!AllowSpill)
+ return 0;
+
+ assert(Reg != 0 && "No register left to scavenge!");
+
+ MachineBasicBlock::iterator ReloadAfter =
+ RestoreAfter ? std::next(MBBI) : MBBI;
+ MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ if (ReloadBefore != MBB.end())
+ LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+ Scavenged.Restore = &*std::prev(SpillBefore);
+ LiveUnits.removeReg(Reg);
+ LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+ << " until " << *SpillBefore);
+ return Reg;
+}
+
+/// Allocate a register for the virtual register \p VReg. The last use of
+/// \p VReg is around the current position of the register scavenger \p RS.
+/// \p ReserveAfter controls whether the scavenged register needs to be reserved
+/// after the current instruction, otherwise it will only be reserved before the
+/// current instruction.
+static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
+ Register VReg, bool ReserveAfter) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+#ifndef NDEBUG
+ // Verify that all definitions and uses are in the same basic block.
+ const MachineBasicBlock *CommonMBB = nullptr;
+ // Real definition for the reg, re-definitions are not considered.
+ const MachineInstr *RealDef = nullptr;
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) {
+ MachineBasicBlock *MBB = MO.getParent()->getParent();
+ if (CommonMBB == nullptr)
+ CommonMBB = MBB;
+ assert(MBB == CommonMBB && "All defs+uses must be in the same basic block");
+ if (MO.isDef()) {
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.readsRegister(VReg, &TRI)) {
+ assert((!RealDef || RealDef == &MI) &&
+ "Can have at most one definition which is not a redefinition");
+ RealDef = &MI;
+ }
+ }
+ }
+ assert(RealDef != nullptr && "Must have at least 1 Def");
+#endif
+
+ // We should only have one definition of the register. However to accommodate
+ // the requirements of two address code we also allow definitions in
+ // subsequent instructions provided they also read the register. That way
+ // we get a single contiguous lifetime.
+ //
+ // Definitions in MRI.def_begin() are unordered, search for the first.
+ MachineRegisterInfo::def_iterator FirstDef = llvm::find_if(
+ MRI.def_operands(VReg), [VReg, &TRI](const MachineOperand &MO) {
+ return !MO.getParent()->readsRegister(VReg, &TRI);
+ });
+ assert(FirstDef != MRI.def_end() &&
+ "Must have one definition that does not redefine vreg");
+ MachineInstr &DefMI = *FirstDef->getParent();
+
+ // The register scavenger will report a free register inserting an emergency
+ // spill/reload if necessary.
+ int SPAdj = 0;
+ const TargetRegisterClass &RC = *MRI.getRegClass(VReg);
+ Register SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
+ ReserveAfter, SPAdj);
+ MRI.replaceRegWith(VReg, SReg);
+ ++NumScavengedRegs;
+ return SReg;
+}
+
+/// Allocate (scavenge) vregs inside a single basic block.
+/// Returns true if the target spill callback created new vregs and a 2nd pass
+/// is necessary.
+static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
+ RegScavenger &RS,
+ MachineBasicBlock &MBB) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ RS.enterBasicBlockEnd(MBB);
+
+ unsigned InitialNumVirtRegs = MRI.getNumVirtRegs();
+ bool NextInstructionReadsVReg = false;
+ for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
+ --I;
+ // Move RegScavenger to the position between *I and *std::next(I).
+ RS.backward(I);
+
+ // Look for unassigned vregs in the uses of *std::next(I).
+ if (NextInstructionReadsVReg) {
+ MachineBasicBlock::iterator N = std::next(I);
+ const MachineInstr &NMI = *N;
+ for (const MachineOperand &MO : NMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ // We only care about virtual registers and ignore virtual registers
+ // created by the target callbacks in the process (those will be handled
+ // in a scavenging round).
+ if (!Reg.isVirtual() ||
+ Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ if (!MO.readsReg())
+ continue;
+
+ Register SReg = scavengeVReg(MRI, RS, Reg, true);
+ N->addRegisterKilled(SReg, &TRI, false);
+ RS.setRegUsed(SReg);
+ }
+ }
+
+ // Look for unassigned vregs in the defs of *I.
+ NextInstructionReadsVReg = false;
+ const MachineInstr &MI = *I;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ // Only vregs, no newly created vregs (see above).
+ if (!Reg.isVirtual() ||
+ Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ // We have to look at all operands anyway so we can precalculate here
+ // whether there is a reading operand. This allows use to skip the use
+ // step in the next iteration if there was none.
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ if (MO.readsReg()) {
+ NextInstructionReadsVReg = true;
+ }
+ if (MO.isDef()) {
+ Register SReg = scavengeVReg(MRI, RS, Reg, false);
+ I->addRegisterDead(SReg, &TRI, false);
+ }
+ }
+ }
+#ifndef NDEBUG
+ for (const MachineOperand &MO : MBB.front().operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ assert(!MO.readsReg() && "Vreg use in first instruction not allowed");
+ }
+#endif
+
+ return MRI.getNumVirtRegs() != InitialNumVirtRegs;
+}
+
+void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
+ // FIXME: Iterating over the instruction stream is unnecessary. We can simply
+ // iterate over the vreg use list, which at this point only contains machine
+ // operands for which eliminateFrameIndex need a new scratch reg.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Shortcut.
+ if (MRI.getNumVirtRegs() == 0) {
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ return;
+ }
+
+ // Run through the instructions and find any virtual registers.
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ if (Again) {
+ LLVM_DEBUG(dbgs() << "Warning: Required two scavenging passes for block "
+ << MBB.getName() << '\n');
+ Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ // The target required a 2nd run (because it created new vregs while
+ // spilling). Refuse to do another pass to keep compiletime in check.
+ if (Again)
+ report_fatal_error("Incomplete scavenging after 2nd pass");
+ }
+ }
+
+ MRI.clearVirtRegs();
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+namespace {
+
+/// This class runs register scavenging independ of the PrologEpilogInserter.
+/// This is used in for testing.
+class ScavengerTest : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ScavengerTest() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetFrameLowering &TFL = *STI.getFrameLowering();
+
+ RegScavenger RS;
+ // Let's hope that calling those outside of PrologEpilogueInserter works
+ // well enough to initialize the scavenger with some emergency spillslots
+ // for the target.
+ BitVector SavedRegs;
+ TFL.determineCalleeSaves(MF, SavedRegs, &RS);
+ TFL.processFunctionBeforeFrameFinalized(MF, &RS);
+
+ // Let's scavenge the current function
+ scavengeFrameVirtualRegs(MF, RS);
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+char ScavengerTest::ID;
+
+INITIALIZE_PASS(ScavengerTest, "scavenger-test",
+ "Scavenge virtual registers inside basic blocks", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
new file mode 100644
index 000000000000..51bac3fc0a23
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -0,0 +1,99 @@
+//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<bool> DumpRegUsage(
+ "print-regusage", cl::init(false), cl::Hidden,
+ cl::desc("print register usage details collected for analysis."));
+
+INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
+ "Register Usage Information Storage", false, true)
+
+char PhysicalRegisterUsageInfo::ID = 0;
+
+void PhysicalRegisterUsageInfo::setTargetMachine(const LLVMTargetMachine &TM) {
+ this->TM = &TM;
+}
+
+bool PhysicalRegisterUsageInfo::doInitialization(Module &M) {
+ RegMasks.grow(M.size());
+ return false;
+}
+
+bool PhysicalRegisterUsageInfo::doFinalization(Module &M) {
+ if (DumpRegUsage)
+ print(errs());
+
+ RegMasks.shrink_and_clear();
+ return false;
+}
+
+void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo(
+ const Function &FP, ArrayRef<uint32_t> RegMask) {
+ RegMasks[&FP] = RegMask;
+}
+
+ArrayRef<uint32_t>
+PhysicalRegisterUsageInfo::getRegUsageInfo(const Function &FP) {
+ auto It = RegMasks.find(&FP);
+ if (It != RegMasks.end())
+ return ArrayRef<uint32_t>(It->second);
+ return ArrayRef<uint32_t>();
+}
+
+void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
+ using FuncPtrRegMaskPair = std::pair<const Function *, std::vector<uint32_t>>;
+
+ SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector;
+
+ // Create a vector of pointer to RegMasks entries
+ for (const auto &RegMask : RegMasks)
+ FPRMPairVector.push_back(&RegMask);
+
+ // sort the vector to print analysis in alphabatic order of function name.
+ llvm::sort(
+ FPRMPairVector,
+ [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
+ return A->first->getName() < B->first->getName();
+ });
+
+ for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) {
+ OS << FPRMPair->first->getName() << " "
+ << "Clobbered Registers: ";
+ const TargetRegisterInfo *TRI
+ = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first))
+ .getRegisterInfo();
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg))
+ OS << printReg(PReg, TRI) << " ";
+ }
+ OS << "\n";
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
new file mode 100644
index 000000000000..feb31e59f5fd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -0,0 +1,227 @@
+//===- RemoveRedundantDebugValues.cpp - Remove Redundant Debug Value MIs --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+
+/// \file RemoveRedundantDebugValues.cpp
+///
+/// The RemoveRedundantDebugValues pass removes redundant DBG_VALUEs that
+/// appear in MIR after the register allocator.
+
+#define DEBUG_TYPE "removeredundantdebugvalues"
+
+using namespace llvm;
+
+STATISTIC(NumRemovedBackward, "Number of DBG_VALUEs removed (backward scan)");
+STATISTIC(NumRemovedForward, "Number of DBG_VALUEs removed (forward scan)");
+
+namespace {
+
+class RemoveRedundantDebugValues : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RemoveRedundantDebugValues();
+
+ bool reduceDbgValues(MachineFunction &MF);
+
+ /// Remove redundant debug value MIs for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char RemoveRedundantDebugValues::ID = 0;
+
+char &llvm::RemoveRedundantDebugValuesID = RemoveRedundantDebugValues::ID;
+
+INITIALIZE_PASS(RemoveRedundantDebugValues, DEBUG_TYPE,
+ "Remove Redundant DEBUG_VALUE analysis", false, false)
+
+/// Default construct and initialize the pass.
+RemoveRedundantDebugValues::RemoveRedundantDebugValues()
+ : MachineFunctionPass(ID) {
+ initializeRemoveRedundantDebugValuesPass(*PassRegistry::getPassRegistry());
+}
+
+// This analysis aims to remove redundant DBG_VALUEs by going forward
+// in the basic block by considering the first DBG_VALUE as a valid
+// until its first (location) operand is not clobbered/modified.
+// For example:
+// (1) DBG_VALUE $edi, !"var1", ...
+// (2) <block of code that does affect $edi>
+// (3) DBG_VALUE $edi, !"var1", ...
+// ...
+// in this case, we can remove (3).
+// TODO: Support DBG_VALUE_LIST and other debug instructions.
+static bool reduceDbgValsForwardScan(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "\n == Forward Scan == \n");
+
+ SmallVector<MachineInstr *, 8> DbgValsToBeRemoved;
+ DenseMap<DebugVariable, std::pair<MachineOperand *, const DIExpression *>>
+ VariableMap;
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+
+ for (auto &MI : MBB) {
+ if (MI.isDebugValue()) {
+ DebugVariable Var(MI.getDebugVariable(), std::nullopt,
+ MI.getDebugLoc()->getInlinedAt());
+ auto VMI = VariableMap.find(Var);
+ // Just stop tracking this variable, until we cover DBG_VALUE_LIST.
+ // 1 DBG_VALUE $rax, "x", DIExpression()
+ // ...
+ // 2 DBG_VALUE_LIST "x", DIExpression(...), $rax, $rbx
+ // ...
+ // 3 DBG_VALUE $rax, "x", DIExpression()
+ if (MI.isDebugValueList() && VMI != VariableMap.end()) {
+ VariableMap.erase(VMI);
+ continue;
+ }
+
+ MachineOperand &Loc = MI.getDebugOperand(0);
+ if (!Loc.isReg()) {
+ // If it it's not a register, just stop tracking such variable.
+ if (VMI != VariableMap.end())
+ VariableMap.erase(VMI);
+ continue;
+ }
+
+ // We have found a new value for a variable.
+ if (VMI == VariableMap.end() ||
+ VMI->second.first->getReg() != Loc.getReg() ||
+ VMI->second.second != MI.getDebugExpression()) {
+ VariableMap[Var] = {&Loc, MI.getDebugExpression()};
+ continue;
+ }
+
+ // Found an identical DBG_VALUE, so it can be considered
+ // for later removal.
+ DbgValsToBeRemoved.push_back(&MI);
+ }
+
+ if (MI.isMetaInstruction())
+ continue;
+
+ // Stop tracking any location that is clobbered by this instruction.
+ for (auto &Var : VariableMap) {
+ auto &LocOp = Var.second.first;
+ if (MI.modifiesRegister(LocOp->getReg(), TRI))
+ VariableMap.erase(Var.first);
+ }
+ }
+
+ for (auto &Instr : DbgValsToBeRemoved) {
+ LLVM_DEBUG(dbgs() << "removing "; Instr->dump());
+ Instr->eraseFromParent();
+ ++NumRemovedForward;
+ }
+
+ return !DbgValsToBeRemoved.empty();
+}
+
+// This analysis aims to remove redundant DBG_VALUEs by going backward
+// in the basic block and removing all but the last DBG_VALUE for any
+// given variable in a set of consecutive DBG_VALUE instructions.
+// For example:
+// (1) DBG_VALUE $edi, !"var1", ...
+// (2) DBG_VALUE $esi, !"var2", ...
+// (3) DBG_VALUE $edi, !"var1", ...
+// ...
+// in this case, we can remove (1).
+static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "\n == Backward Scan == \n");
+ SmallVector<MachineInstr *, 8> DbgValsToBeRemoved;
+ SmallDenseSet<DebugVariable> VariableSet;
+
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugValue()) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ auto R = VariableSet.insert(Var);
+ // If it is a DBG_VALUE describing a constant as:
+ // DBG_VALUE 0, ...
+ // we just don't consider such instructions as candidates
+ // for redundant removal.
+ if (MI.isNonListDebugValue()) {
+ MachineOperand &Loc = MI.getDebugOperand(0);
+ if (!Loc.isReg()) {
+ // If we have already encountered this variable, just stop
+ // tracking it.
+ if (!R.second)
+ VariableSet.erase(Var);
+ continue;
+ }
+ }
+
+ // We have already encountered the value for this variable,
+ // so this one can be deleted.
+ if (!R.second)
+ DbgValsToBeRemoved.push_back(&MI);
+ continue;
+ }
+
+ // If we encountered a non-DBG_VALUE, try to find the next
+ // sequence with consecutive DBG_VALUE instructions.
+ VariableSet.clear();
+ }
+
+ for (auto &Instr : DbgValsToBeRemoved) {
+ LLVM_DEBUG(dbgs() << "removing "; Instr->dump());
+ Instr->eraseFromParent();
+ ++NumRemovedBackward;
+ }
+
+ return !DbgValsToBeRemoved.empty();
+}
+
+bool RemoveRedundantDebugValues::reduceDbgValues(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "\nDebug Value Reduction\n");
+
+ bool Changed = false;
+
+ for (auto &MBB : MF) {
+ Changed |= reduceDbgValsBackwardScan(MBB);
+ Changed |= reduceDbgValsForwardScan(MBB);
+ }
+
+ return Changed;
+}
+
+bool RemoveRedundantDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ // Skip functions without debugging information.
+ if (!MF.getFunction().getSubprogram())
+ return false;
+
+ // Skip functions from NoDebug compilation units.
+ if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return false;
+
+ bool Changed = reduceDbgValues(MF);
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
new file mode 100644
index 000000000000..bc3ef1c0329a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -0,0 +1,405 @@
+//===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Rename independent subregisters looks for virtual registers with
+/// independently used subregisters and renames them to new virtual registers.
+/// Example: In the following:
+/// %0:sub0<read-undef> = ...
+/// %0:sub1 = ...
+/// use %0:sub0
+/// %0:sub0 = ...
+/// use %0:sub0
+/// use %0:sub1
+/// sub0 and sub1 are never used together, and we have two independent sub0
+/// definitions. This pass will rename to:
+/// %0:sub0<read-undef> = ...
+/// %1:sub1<read-undef> = ...
+/// use %1:sub1
+/// %2:sub1<read-undef> = ...
+/// use %2:sub1
+/// use %0:sub0
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeUtils.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "rename-independent-subregs"
+
+namespace {
+
+class RenameIndependentSubregs : public MachineFunctionPass {
+public:
+ static char ID;
+ RenameIndependentSubregs() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename Disconnected Subregister Components";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ struct SubRangeInfo {
+ ConnectedVNInfoEqClasses ConEQ;
+ LiveInterval::SubRange *SR;
+ unsigned Index;
+
+ SubRangeInfo(LiveIntervals &LIS, LiveInterval::SubRange &SR,
+ unsigned Index)
+ : ConEQ(LIS), SR(&SR), Index(Index) {}
+ };
+
+ /// Split unrelated subregister components and rename them to new vregs.
+ bool renameComponents(LiveInterval &LI) const;
+
+ /// Build a vector of SubRange infos and a union find set of
+ /// equivalence classes.
+ /// Returns true if more than 1 equivalence class was found.
+ bool findComponents(IntEqClasses &Classes,
+ SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ LiveInterval &LI) const;
+
+ /// Distribute the LiveInterval segments into the new LiveIntervals
+ /// belonging to their class.
+ void distribute(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+ /// Constructs main liverange and add missing undef+dead flags.
+ void computeMainRangesFixFlags(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+ /// Rewrite Machine Operands to use the new vreg belonging to their class.
+ void rewriteOperands(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+
+ LiveIntervals *LIS = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+};
+
+} // end anonymous namespace
+
+char RenameIndependentSubregs::ID;
+
+char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
+
+INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE,
+ "Rename Independent Subregisters", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE,
+ "Rename Independent Subregisters", false, false)
+
+bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
+ // Shortcut: We cannot have split components with a single definition.
+ if (LI.valnos.size() < 2)
+ return false;
+
+ SmallVector<SubRangeInfo, 4> SubRangeInfos;
+ IntEqClasses Classes;
+ if (!findComponents(Classes, SubRangeInfos, LI))
+ return false;
+
+ // Create a new VReg for each class.
+ Register Reg = LI.reg();
+ const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+ SmallVector<LiveInterval*, 4> Intervals;
+ Intervals.push_back(&LI);
+ LLVM_DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses()
+ << " equivalence classes.\n");
+ LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
+ for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
+ ++I) {
+ Register NewVReg = MRI->createVirtualRegister(RegClass);
+ LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
+ Intervals.push_back(&NewLI);
+ LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg));
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+
+ rewriteOperands(Classes, SubRangeInfos, Intervals);
+ distribute(Classes, SubRangeInfos, Intervals);
+ computeMainRangesFixFlags(Classes, SubRangeInfos, Intervals);
+ return true;
+}
+
+bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
+ SmallVectorImpl<RenameIndependentSubregs::SubRangeInfo> &SubRangeInfos,
+ LiveInterval &LI) const {
+ // First step: Create connected components for the VNInfos inside the
+ // subranges and count the global number of such components.
+ unsigned NumComponents = 0;
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRangeInfos.push_back(SubRangeInfo(*LIS, SR, NumComponents));
+ ConnectedVNInfoEqClasses &ConEQ = SubRangeInfos.back().ConEQ;
+
+ unsigned NumSubComponents = ConEQ.Classify(SR);
+ NumComponents += NumSubComponents;
+ }
+ // Shortcut: With only 1 subrange, the normal separate component tests are
+ // enough and we do not need to perform the union-find on the subregister
+ // segments.
+ if (SubRangeInfos.size() < 2)
+ return false;
+
+ // Next step: Build union-find structure over all subranges and merge classes
+ // across subranges when they are affected by the same MachineOperand.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ Classes.grow(NumComponents);
+ Register Reg = LI.reg();
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ unsigned MergedID = ~0u;
+ for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) {
+ const LiveInterval::SubRange &SR = *SRInfo.SR;
+ if ((SR.LaneMask & LaneMask).none())
+ continue;
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+ : Pos.getBaseIndex();
+ const VNInfo *VNI = SR.getVNInfoAt(Pos);
+ if (VNI == nullptr)
+ continue;
+
+ // Map to local representant ID.
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
+ // Global ID
+ unsigned ID = LocalID + SRInfo.Index;
+ // Merge other sets
+ MergedID = MergedID == ~0u ? ID : Classes.join(MergedID, ID);
+ }
+ }
+
+ // Early exit if we ended up with a single equivalence class.
+ Classes.compress();
+ unsigned NumClasses = Classes.getNumClasses();
+ return NumClasses > 1;
+}
+
+void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ unsigned Reg = Intervals[0]->reg();
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ) {
+ MachineOperand &MO = *I++;
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+
+ auto *MI = MO.getParent();
+ SlotIndex Pos = LIS->getInstructionIndex(*MI);
+ Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+ : Pos.getBaseIndex();
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+
+ unsigned ID = ~0u;
+ for (const SubRangeInfo &SRInfo : SubRangeInfos) {
+ const LiveInterval::SubRange &SR = *SRInfo.SR;
+ if ((SR.LaneMask & LaneMask).none())
+ continue;
+ const VNInfo *VNI = SR.getVNInfoAt(Pos);
+ if (VNI == nullptr)
+ continue;
+
+ // Map to local representant ID.
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
+ // Global ID
+ ID = Classes[LocalID + SRInfo.Index];
+ break;
+ }
+
+ unsigned VReg = Intervals[ID]->reg();
+ MO.setReg(VReg);
+
+ if (MO.isTied() && Reg != VReg) {
+ /// Undef use operands are not tracked in the equivalence class,
+ /// but need to be updated if they are tied; take care to only
+ /// update the tied operand.
+ unsigned OperandNo = MO.getOperandNo();
+ unsigned TiedIdx = MI->findTiedOperandIdx(OperandNo);
+ MI->getOperand(TiedIdx).setReg(VReg);
+
+ // above substitution breaks the iterator, so restart.
+ I = MRI->reg_nodbg_begin(Reg);
+ }
+ }
+ // TODO: We could attempt to recompute new register classes while visiting
+ // the operands: Some of the split register may be fine with less constraint
+ // classes than the original vreg.
+}
+
+void RenameIndependentSubregs::distribute(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ unsigned NumClasses = Classes.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ for (const SubRangeInfo &SRInfo : SubRangeInfos) {
+ LiveInterval::SubRange &SR = *SRInfo.SR;
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumClasses-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+ unsigned ID = Classes[LocalID + SRInfo.Index];
+ VNIMapping.push_back(ID);
+ if (ID > 0 && SubRanges[ID-1] == nullptr)
+ SubRanges[ID-1] = Intervals[ID]->createSubRange(Allocator, SR.LaneMask);
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
+ }
+}
+
+static bool subRangeLiveAt(const LiveInterval &LI, SlotIndex Pos) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (SR.liveAt(Pos))
+ return true;
+ }
+ return false;
+}
+
+void RenameIndependentSubregs::computeMainRangesFixFlags(
+ const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
+ for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
+ LiveInterval &LI = *Intervals[I];
+ Register Reg = LI.reg();
+
+ LI.removeEmptySubRanges();
+
+ // There must be a def (or live-in) before every use. Splitting vregs may
+ // violate this principle as the splitted vreg may not have a definition on
+ // every path. Fix this by creating IMPLICIT_DEF instruction as necessary.
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ // Search for "PHI" value numbers in the subranges. We must find a live
+ // value in each predecessor block, add an IMPLICIT_DEF where it is
+ // missing.
+ for (unsigned I = 0; I < SR.valnos.size(); ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ if (VNI.isUnused() || !VNI.isPHIDef())
+ continue;
+
+ SlotIndex Def = VNI.def;
+ MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(Def);
+ for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ SlotIndex PredEnd = Indexes.getMBBEndIdx(PredMBB);
+ if (subRangeLiveAt(LI, PredEnd.getPrevSlot()))
+ continue;
+
+ MachineBasicBlock::iterator InsertPos =
+ llvm::findPHICopyInsertPoint(PredMBB, &MBB, Reg);
+ const MCInstrDesc &MCDesc = TII->get(TargetOpcode::IMPLICIT_DEF);
+ MachineInstrBuilder ImpDef = BuildMI(*PredMBB, InsertPos,
+ DebugLoc(), MCDesc, Reg);
+ SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
+ SlotIndex RegDefIdx = DefIdx.getRegSlot();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
+ SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
+ }
+ }
+ }
+ }
+
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef())
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx == 0)
+ continue;
+ // After assigning the new vreg we may not have any other sublanes living
+ // in and out of the instruction anymore. We need to add new dead and
+ // undef flags in these cases.
+ if (!MO.isUndef()) {
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ if (!subRangeLiveAt(LI, Pos))
+ MO.setIsUndef();
+ }
+ if (!MO.isDead()) {
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()).getDeadSlot();
+ if (!subRangeLiveAt(LI, Pos))
+ MO.setIsDead();
+ }
+ }
+
+ if (I == 0)
+ LI.clear();
+ LIS->constructMainRangeFromSubranges(LI);
+ // A def of a subregister may be a use of other register lanes. Replacing
+ // such a def with a def of a different register will eliminate the use,
+ // and may cause the recorded live range to be larger than the actual
+ // liveness in the program IR.
+ LIS->shrinkToUses(&LI);
+ }
+}
+
+bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
+ // Skip renaming if liveness of subregister is not tracked.
+ MRI = &MF.getRegInfo();
+ if (!MRI->subRegLivenessEnabled())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in "
+ << MF.getName() << '\n');
+
+ LIS = &getAnalysis<LiveIntervals>();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Iterate over all vregs. Note that we query getNumVirtRegs() the newly
+ // created vregs end up with higher numbers but do not need to be visited as
+ // there can't be any further splitting.
+ bool Changed = false;
+ for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (!LIS->hasInterval(Reg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.hasSubRanges())
+ continue;
+
+ Changed |= renameComponents(LI);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
new file mode 100644
index 000000000000..57cd1fcffb61
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -0,0 +1,251 @@
+//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "replace-with-veclib"
+
+STATISTIC(NumCallsReplaced,
+ "Number of calls to intrinsics that have been replaced.");
+
+STATISTIC(NumTLIFuncDeclAdded,
+ "Number of vector library function declarations added.");
+
+STATISTIC(NumFuncUsedAdded,
+ "Number of functions added to `llvm.compiler.used`");
+
+static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
+ Module *M = CI.getModule();
+
+ Function *OldFunc = CI.getCalledFunction();
+
+ // Check if the vector library function is already declared in this module,
+ // otherwise insert it.
+ Function *TLIFunc = M->getFunction(TLIName);
+ if (!TLIFunc) {
+ TLIFunc = Function::Create(OldFunc->getFunctionType(),
+ Function::ExternalLinkage, TLIName, *M);
+ TLIFunc->copyAttributesFrom(OldFunc);
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+ << TLIName << "` of type `" << *(TLIFunc->getType())
+ << "` to module.\n");
+
+ ++NumTLIFuncDeclAdded;
+
+ // Add the freshly created function to llvm.compiler.used,
+ // similar to as it is done in InjectTLIMappings
+ appendToCompilerUsed(*M, {TLIFunc});
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+ << "` to `@llvm.compiler.used`.\n");
+ ++NumFuncUsedAdded;
+ }
+
+ // Replace the call to the vector intrinsic with a call
+ // to the corresponding function from the vector library.
+ IRBuilder<> IRBuilder(&CI);
+ SmallVector<Value *> Args(CI.args());
+ // Preserve the operand bundles.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CI.getOperandBundlesAsDefs(OpBundles);
+ CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
+ assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
+ "Expecting function types to be identical");
+ CI.replaceAllUsesWith(Replacement);
+ if (isa<FPMathOperator>(Replacement)) {
+ // Preserve fast math flags for FP math.
+ Replacement->copyFastMathFlags(&CI);
+ }
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+ << OldFunc->getName() << "` with call to `" << TLIName
+ << "`.\n");
+ ++NumCallsReplaced;
+ return true;
+}
+
+static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+ CallInst &CI) {
+ if (!CI.getCalledFunction()) {
+ return false;
+ }
+
+ auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
+ if (IntrinsicID == Intrinsic::not_intrinsic) {
+ // Replacement is only performed for intrinsic functions
+ return false;
+ }
+
+ // Convert vector arguments to scalar type and check that
+ // all vector operands have identical vector width.
+ ElementCount VF = ElementCount::getFixed(0);
+ SmallVector<Type *> ScalarTypes;
+ for (auto Arg : enumerate(CI.args())) {
+ auto *ArgType = Arg.value()->getType();
+ // Vector calls to intrinsics can still have
+ // scalar operands for specific arguments.
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
+ ScalarTypes.push_back(ArgType);
+ } else {
+ // The argument in this place should be a vector if
+ // this is a call to a vector intrinsic.
+ auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
+ if (!VectorArgTy) {
+ // The argument is not a vector, do not perform
+ // the replacement.
+ return false;
+ }
+ ElementCount NumElements = VectorArgTy->getElementCount();
+ if (NumElements.isScalable()) {
+ // The current implementation does not support
+ // scalable vectors.
+ return false;
+ }
+ if (VF.isNonZero() && VF != NumElements) {
+ // The different arguments differ in vector size.
+ return false;
+ } else {
+ VF = NumElements;
+ }
+ ScalarTypes.push_back(VectorArgTy->getElementType());
+ }
+ }
+
+ // Try to reconstruct the name for the scalar version of this
+ // intrinsic using the intrinsic ID and the argument types
+ // converted to scalar above.
+ std::string ScalarName;
+ if (Intrinsic::isOverloaded(IntrinsicID)) {
+ ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule());
+ } else {
+ ScalarName = Intrinsic::getName(IntrinsicID).str();
+ }
+
+ if (!TLI.isFunctionVectorizable(ScalarName)) {
+ // The TargetLibraryInfo does not contain a vectorized version of
+ // the scalar function.
+ return false;
+ }
+
+ // Try to find the mapping for the scalar version of this intrinsic
+ // and the exact vector width of the call operands in the
+ // TargetLibraryInfo.
+ const std::string TLIName =
+ std::string(TLI.getVectorizedFunction(ScalarName, VF));
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+ << ScalarName << "` and vector width " << VF << ".\n");
+
+ if (!TLIName.empty()) {
+ // Found the correct mapping in the TargetLibraryInfo,
+ // replace the call to the intrinsic with a call to
+ // the vector library function.
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+ << "`.\n");
+ return replaceWithTLIFunction(CI, TLIName);
+ }
+
+ return false;
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+ bool Changed = false;
+ SmallVector<CallInst *> ReplacedCalls;
+ for (auto &I : instructions(F)) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (replaceWithCallToVeclib(TLI, *CI)) {
+ ReplacedCalls.push_back(CI);
+ Changed = true;
+ }
+ }
+ }
+ // Erase the calls to the intrinsics that have been replaced
+ // with calls to the vector library.
+ for (auto *CI : ReplacedCalls) {
+ CI->eraseFromParent();
+ }
+ return Changed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses ReplaceWithVeclib::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto Changed = runImpl(TLI, F);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<TargetLibraryAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<LoopAccessAnalysis>();
+ PA.preserve<DemandedBitsAnalysis>();
+ PA.preserve<OptimizationRemarkEmitterAnalysis>();
+ return PA;
+ } else {
+ // The pass did not replace any calls, hence it preserves all analyses.
+ return PreservedAnalyses::all();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return runImpl(TLI, F);
+}
+
+void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char ReplaceWithVeclibLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+ "Replace intrinsics with calls to vector library", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+ "Replace intrinsics with calls to vector library", false,
+ false)
+
+FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
+ return new ReplaceWithVeclibLegacy();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
new file mode 100644
index 000000000000..11bdf3bb2ba8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -0,0 +1,97 @@
+//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a pass that will conditionally reset a machine
+/// function as if it was just created. This is used to provide a fallback
+/// mechanism when GlobalISel fails, thus the condition for the reset to
+/// happen is that the MachineFunction has the FailedISel property.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "reset-machine-function"
+
+STATISTIC(NumFunctionsReset, "Number of functions reset");
+STATISTIC(NumFunctionsVisited, "Number of functions visited");
+
+namespace {
+ class ResetMachineFunction : public MachineFunctionPass {
+ /// Tells whether or not this pass should emit a fallback
+ /// diagnostic when it resets a function.
+ bool EmitFallbackDiag;
+ /// Whether we should abort immediately instead of resetting the function.
+ bool AbortOnFailedISel;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ResetMachineFunction(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false)
+ : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag),
+ AbortOnFailedISel(AbortOnFailedISel) {}
+
+ StringRef getPassName() const override { return "ResetMachineFunction"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<StackProtector>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ ++NumFunctionsVisited;
+ // No matter what happened, whether we successfully selected the function
+ // or not, nothing is going to use the vreg types after us. Make sure they
+ // disappear.
+ auto ClearVRegTypesOnReturn =
+ make_scope_exit([&MF]() { MF.getRegInfo().clearVirtRegTypes(); });
+
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel)) {
+ if (AbortOnFailedISel)
+ report_fatal_error("Instruction selection failed");
+ LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n');
+ ++NumFunctionsReset;
+ MF.reset();
+ MF.initTargetMachineFunctionInfo(MF.getSubtarget());
+
+ const LLVMTargetMachine &TM = MF.getTarget();
+ // MRI callback for target specific initializations.
+ TM.registerMachineRegisterInfoCallback(MF);
+
+ if (EmitFallbackDiag) {
+ const Function &F = MF.getFunction();
+ DiagnosticInfoISelFallback DiagFallback(F);
+ F.getContext().diagnose(DiagFallback);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ };
+} // end anonymous namespace
+
+char ResetMachineFunction::ID = 0;
+INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
+ "Reset machine function if ISel failed", false, false)
+
+MachineFunctionPass *
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false) {
+ return new ResetMachineFunction(EmitFallbackDiag, AbortOnFailedISel);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
new file mode 100644
index 000000000000..bcad7a3f24da
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -0,0 +1,939 @@
+//===- SafeStack.cpp - Safe Stack Insertion -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass splits the stack into the safe stack (kept as-is for LLVM backend)
+// and the unsafe stack (explicitly allocated and managed through the runtime
+// support library).
+//
+// http://clang.llvm.org/docs/SafeStack.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackLayout.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/StackLifetime.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safe-stack"
+
+namespace llvm {
+
+STATISTIC(NumFunctions, "Total number of functions");
+STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack");
+STATISTIC(NumUnsafeStackRestorePointsFunctions,
+ "Number of functions that use setjmp or exceptions");
+
+STATISTIC(NumAllocas, "Total number of allocas");
+STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
+STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
+STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
+
+} // namespace llvm
+
+/// Use __safestack_pointer_address even if the platform has a faster way of
+/// access safe stack pointer.
+static cl::opt<bool>
+ SafeStackUsePointerAddress("safestack-use-pointer-address",
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> ClColoring("safe-stack-coloring",
+ cl::desc("enable safe stack coloring"),
+ cl::Hidden, cl::init(true));
+
+namespace {
+
+/// The SafeStack pass splits the stack of each function into the safe
+/// stack, which is only accessed through memory safe dereferences (as
+/// determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in ways that we can't prove to
+/// be safe.
+class SafeStack {
+ Function &F;
+ const TargetLoweringBase &TL;
+ const DataLayout &DL;
+ DomTreeUpdater *DTU;
+ ScalarEvolution &SE;
+
+ Type *StackPtrTy;
+ Type *IntPtrTy;
+ Type *Int32Ty;
+ Type *Int8Ty;
+
+ Value *UnsafeStackPtr = nullptr;
+
+ /// Unsafe stack alignment. Each stack frame must ensure that the stack is
+ /// aligned to this value. We need to re-align the unsafe stack if the
+ /// alignment of any object on the stack exceeds this value.
+ ///
+ /// 16 seems like a reasonable upper bound on the alignment of objects that we
+ /// might expect to appear on the stack on most common targets.
+ static constexpr Align StackAlignment = Align::Constant<16>();
+
+ /// Return the value of the stack canary.
+ Value *getStackGuard(IRBuilder<> &IRB, Function &F);
+
+ /// Load stack guard from the frame and check if it has changed.
+ void checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
+ AllocaInst *StackGuardSlot, Value *StackGuard);
+
+ /// Find all static allocas, dynamic allocas, return instructions and
+ /// stack restore points (exception unwind blocks and setjmp calls) in the
+ /// given function and append them to the respective vectors.
+ void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
+ SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
+ SmallVectorImpl<Instruction *> &Returns,
+ SmallVectorImpl<Instruction *> &StackRestorePoints);
+
+ /// Calculate the allocation size of a given alloca. Returns 0 if the
+ /// size can not be statically determined.
+ uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
+
+ /// Allocate space for all static allocas in \p StaticAllocas,
+ /// replace allocas with pointers into the unsafe stack.
+ ///
+ /// \returns A pointer to the top of the unsafe stack after all unsafe static
+ /// allocas are allocated.
+ Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
+ ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments,
+ Instruction *BasePointer,
+ AllocaInst *StackGuardSlot);
+
+ /// Generate code to restore the stack after all stack restore points
+ /// in \p StackRestorePoints.
+ ///
+ /// \returns A local variable in which to maintain the dynamic top of the
+ /// unsafe stack if needed.
+ AllocaInst *
+ createStackRestorePoints(IRBuilder<> &IRB, Function &F,
+ ArrayRef<Instruction *> StackRestorePoints,
+ Value *StaticTop, bool NeedDynamicTop);
+
+ /// Replace all allocas in \p DynamicAllocas with code to allocate
+ /// space dynamically on the unsafe stack and store the dynamic unsafe stack
+ /// top to \p DynamicTop if non-null.
+ void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr,
+ AllocaInst *DynamicTop,
+ ArrayRef<AllocaInst *> DynamicAllocas);
+
+ bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize);
+
+ bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr, uint64_t AllocaSize);
+ bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
+ uint64_t AllocaSize);
+
+ bool ShouldInlinePointerAddress(CallInst &CI);
+ void TryInlinePointerAddress();
+
+public:
+ SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
+ DomTreeUpdater *DTU, ScalarEvolution &SE)
+ : F(F), TL(TL), DL(DL), DTU(DTU), SE(SE),
+ StackPtrTy(Type::getInt8PtrTy(F.getContext())),
+ IntPtrTy(DL.getIntPtrType(F.getContext())),
+ Int32Ty(Type::getInt32Ty(F.getContext())),
+ Int8Ty(Type::getInt8Ty(F.getContext())) {}
+
+ // Run the transformation on the associated function.
+ // Returns whether the function was changed.
+ bool run();
+};
+
+constexpr Align SafeStack::StackAlignment;
+
+uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
+ uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
+ const Value *AllocaPtr, uint64_t AllocaSize) {
+ const SCEV *AddrExpr = SE.getSCEV(Addr);
+ const auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(AddrExpr));
+ if (!Base || Base->getValue() != AllocaPtr) {
+ LLVM_DEBUG(
+ dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << "SCEV " << *AddrExpr << " not directly based on alloca\n");
+ return false;
+ }
+
+ const SCEV *Expr = SE.removePointerBase(AddrExpr);
+ uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE.getUnsignedRange(Expr);
+ ConstantRange SizeRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ ConstantRange AllocaRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
+ bool Safe = AllocaRange.contains(AccessRange);
+
+ LLVM_DEBUG(
+ dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << " Access " << *Addr << "\n"
+ << " SCEV " << *Expr
+ << " U: " << SE.getUnsignedRange(Expr)
+ << ", S: " << SE.getSignedRange(Expr) << "\n"
+ << " Range " << AccessRange << "\n"
+ << " AllocaRange " << AllocaRange << "\n"
+ << " " << (Safe ? "safe" : "unsafe") << "\n");
+
+ return Safe;
+}
+
+bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr,
+ uint64_t AllocaSize) {
+ if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+ return true;
+ } else {
+ if (MI->getRawDest() != U)
+ return true;
+ }
+
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len) return false;
+ return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize);
+}
+
+/// Check whether a given allocation must be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
+ // Go through all uses of this alloca and check whether all accesses to the
+ // allocated object are statically known to be memory safe and, hence, the
+ // object can be placed on the safe stack.
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AllocaPtr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr,
+ AllocaSize))
+ return false;
+ break;
+
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store:
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ LLVM_DEBUG(dbgs()
+ << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n store of address: " << *I << "\n");
+ return false;
+ }
+
+ if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getOperand(0)->getType()),
+ AllocaPtr, AllocaSize))
+ return false;
+ break;
+
+ case Instruction::Ret:
+ // Information leak.
+ return false;
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ const CallBase &CS = *cast<CallBase>(I);
+
+ if (I->isLifetimeStartOrEnd())
+ continue;
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
+ LLVM_DEBUG(dbgs()
+ << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe memintrinsic: " << *I << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ // LLVM 'nocapture' attribute is only set for arguments whose address
+ // is not stored, passed around, or used in any other non-trivial way.
+ // We assume that passing a pointer to an object as a 'nocapture
+ // readnone' argument is safe.
+ // FIXME: a more precise solution would require an interprocedural
+ // analysis here, which would look at all uses of an argument inside
+ // the function being called.
+ auto B = CS.arg_begin(), E = CS.arg_end();
+ for (const auto *A = B; A != E; ++A)
+ if (A->get() == V)
+ if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
+ CS.doesNotAccessMemory()))) {
+ LLVM_DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe call: " << *I << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ // All uses of the alloca are safe, we can place it on the safe stack.
+ return true;
+}
+
+Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
+ Value *StackGuardVar = TL.getIRStackGuard(IRB);
+ Module *M = F.getParent();
+
+ if (!StackGuardVar) {
+ TL.insertSSPDeclarations(*M);
+ return IRB.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard));
+ }
+
+ return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard");
+}
+
+void SafeStack::findInsts(Function &F,
+ SmallVectorImpl<AllocaInst *> &StaticAllocas,
+ SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
+ SmallVectorImpl<Instruction *> &Returns,
+ SmallVectorImpl<Instruction *> &StackRestorePoints) {
+ for (Instruction &I : instructions(&F)) {
+ if (auto AI = dyn_cast<AllocaInst>(&I)) {
+ ++NumAllocas;
+
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (IsSafeStackAlloca(AI, Size))
+ continue;
+
+ if (AI->isStaticAlloca()) {
+ ++NumUnsafeStaticAllocas;
+ StaticAllocas.push_back(AI);
+ } else {
+ ++NumUnsafeDynamicAllocas;
+ DynamicAllocas.push_back(AI);
+ }
+ } else if (auto RI = dyn_cast<ReturnInst>(&I)) {
+ if (CallInst *CI = I.getParent()->getTerminatingMustTailCall())
+ Returns.push_back(CI);
+ else
+ Returns.push_back(RI);
+ } else if (auto CI = dyn_cast<CallInst>(&I)) {
+ // setjmps require stack restore.
+ if (CI->getCalledFunction() && CI->canReturnTwice())
+ StackRestorePoints.push_back(CI);
+ } else if (auto LP = dyn_cast<LandingPadInst>(&I)) {
+ // Exception landing pads require stack restore.
+ StackRestorePoints.push_back(LP);
+ } else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::gcroot)
+ report_fatal_error(
+ "gcroot intrinsic not compatible with safestack attribute");
+ }
+ }
+ for (Argument &Arg : F.args()) {
+ if (!Arg.hasByValAttr())
+ continue;
+ uint64_t Size = DL.getTypeStoreSize(Arg.getParamByValType());
+ if (IsSafeStackAlloca(&Arg, Size))
+ continue;
+
+ ++NumUnsafeByValArguments;
+ ByValArguments.push_back(&Arg);
+ }
+}
+
+AllocaInst *
+SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
+ ArrayRef<Instruction *> StackRestorePoints,
+ Value *StaticTop, bool NeedDynamicTop) {
+ assert(StaticTop && "The stack top isn't set.");
+
+ if (StackRestorePoints.empty())
+ return nullptr;
+
+ // We need the current value of the shadow stack pointer to restore
+ // after longjmp or exception catching.
+
+ // FIXME: On some platforms this could be handled by the longjmp/exception
+ // runtime itself.
+
+ AllocaInst *DynamicTop = nullptr;
+ if (NeedDynamicTop) {
+ // If we also have dynamic alloca's, the stack pointer value changes
+ // throughout the function. For now we store it in an alloca.
+ DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr,
+ "unsafe_stack_dynamic_ptr");
+ IRB.CreateStore(StaticTop, DynamicTop);
+ }
+
+ // Restore current stack pointer after longjmp/exception catch.
+ for (Instruction *I : StackRestorePoints) {
+ ++NumUnsafeStackRestorePoints;
+
+ IRB.SetInsertPoint(I->getNextNode());
+ Value *CurrentTop =
+ DynamicTop ? IRB.CreateLoad(StackPtrTy, DynamicTop) : StaticTop;
+ IRB.CreateStore(CurrentTop, UnsafeStackPtr);
+ }
+
+ return DynamicTop;
+}
+
+void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
+ AllocaInst *StackGuardSlot, Value *StackGuard) {
+ Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot);
+ Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
+
+ auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false);
+ MDNode *Weights = MDBuilder(F.getContext())
+ .createBranchWeights(SuccessProb.getNumerator(),
+ FailureProb.getNumerator());
+ Instruction *CheckTerm =
+ SplitBlockAndInsertIfThen(Cmp, &RI, /* Unreachable */ true, Weights, DTU);
+ IRBuilder<> IRBFail(CheckTerm);
+ // FIXME: respect -fsanitize-trap / -ftrap-function here?
+ FunctionCallee StackChkFail =
+ F.getParent()->getOrInsertFunction("__stack_chk_fail", IRB.getVoidTy());
+ IRBFail.CreateCall(StackChkFail, {});
+}
+
+/// We explicitly compute and set the unsafe stack layout for all unsafe
+/// static alloca instructions. We save the unsafe "base pointer" in the
+/// prologue into a local variable and restore it in the epilogue.
+Value *SafeStack::moveStaticAllocasToUnsafeStack(
+ IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments, Instruction *BasePointer,
+ AllocaInst *StackGuardSlot) {
+ if (StaticAllocas.empty() && ByValArguments.empty())
+ return BasePointer;
+
+ DIBuilder DIB(*F.getParent());
+
+ StackLifetime SSC(F, StaticAllocas, StackLifetime::LivenessType::May);
+ static const StackLifetime::LiveRange NoColoringRange(1, true);
+ if (ClColoring)
+ SSC.run();
+
+ for (const auto *I : SSC.getMarkers()) {
+ auto *Op = dyn_cast<Instruction>(I->getOperand(1));
+ const_cast<IntrinsicInst *>(I)->eraseFromParent();
+ // Remove the operand bitcast, too, if it has no more uses left.
+ if (Op && Op->use_empty())
+ Op->eraseFromParent();
+ }
+
+ // Unsafe stack always grows down.
+ StackLayout SSL(StackAlignment);
+ if (StackGuardSlot) {
+ Type *Ty = StackGuardSlot->getAllocatedType();
+ Align Align = std::max(DL.getPrefTypeAlign(Ty), StackGuardSlot->getAlign());
+ SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
+ Align, SSC.getFullLiveRange());
+ }
+
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getParamByValType();
+ uint64_t Size = DL.getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ Align Align = DL.getPrefTypeAlign(Ty);
+ if (auto A = Arg->getParamAlign())
+ Align = std::max(Align, *A);
+ SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
+ }
+
+ for (AllocaInst *AI : StaticAllocas) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ Align Align = std::max(DL.getPrefTypeAlign(Ty), AI->getAlign());
+
+ SSL.addObject(AI, Size, Align,
+ ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
+ }
+
+ SSL.computeLayout();
+ Align FrameAlignment = SSL.getFrameAlignment();
+
+ // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
+ // (AlignmentSkew).
+ if (FrameAlignment > StackAlignment) {
+ // Re-align the base pointer according to the max requested alignment.
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+ BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
+ IRB.CreateAnd(
+ IRB.CreatePtrToInt(BasePointer, IntPtrTy),
+ ConstantInt::get(IntPtrTy, ~(FrameAlignment.value() - 1))),
+ StackPtrTy));
+ }
+
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ if (StackGuardSlot) {
+ unsigned Offset = SSL.getObjectOffset(StackGuardSlot);
+ Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *NewAI =
+ IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot");
+
+ // Replace alloc with the new location.
+ StackGuardSlot->replaceAllUsesWith(NewAI);
+ StackGuardSlot->eraseFromParent();
+ }
+
+ for (Argument *Arg : ByValArguments) {
+ unsigned Offset = SSL.getObjectOffset(Arg);
+ MaybeAlign Align(SSL.getObjectAlignment(Arg));
+ Type *Ty = Arg->getParamByValType();
+
+ uint64_t Size = DL.getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
+ Arg->getName() + ".unsafe-byval");
+
+ // Replace alloc with the new location.
+ replaceDbgDeclare(Arg, BasePointer, DIB, DIExpression::ApplyOffset,
+ -Offset);
+ Arg->replaceAllUsesWith(NewArg);
+ IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
+ IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size);
+ }
+
+ // Allocate space for every unsafe static AllocaInst on the unsafe stack.
+ for (AllocaInst *AI : StaticAllocas) {
+ IRB.SetInsertPoint(AI);
+ unsigned Offset = SSL.getObjectOffset(AI);
+
+ replaceDbgDeclare(AI, BasePointer, DIB, DIExpression::ApplyOffset, -Offset);
+ replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
+
+ // Replace uses of the alloca with the new location.
+ // Insert address calculation close to each use to work around PR27844.
+ std::string Name = std::string(AI->getName()) + ".unsafe";
+ while (!AI->use_empty()) {
+ Use &U = *AI->use_begin();
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Instruction *InsertBefore;
+ if (auto *PHI = dyn_cast<PHINode>(User))
+ InsertBefore = PHI->getIncomingBlock(U)->getTerminator();
+ else
+ InsertBefore = User;
+
+ IRBuilder<> IRBUser(InsertBefore);
+ Value *Off = IRBUser.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
+
+ if (auto *PHI = dyn_cast<PHINode>(User))
+ // PHI nodes may have multiple incoming edges from the same BB (why??),
+ // all must be updated at once with the same incoming value.
+ PHI->setIncomingValueForBlock(PHI->getIncomingBlock(U), Replacement);
+ else
+ U.set(Replacement);
+ }
+
+ AI->eraseFromParent();
+ }
+
+ // Re-align BasePointer so that our callees would see it aligned as
+ // expected.
+ // FIXME: no need to update BasePointer in leaf functions.
+ unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment);
+
+ MDBuilder MDB(F.getContext());
+ SmallVector<Metadata *, 2> Data;
+ Data.push_back(MDB.createString("unsafe-stack-size"));
+ Data.push_back(MDB.createConstant(ConstantInt::get(Int32Ty, FrameSize)));
+ MDNode *MD = MDTuple::get(F.getContext(), Data);
+ F.setMetadata(LLVMContext::MD_annotation, MD);
+
+ // Update shadow stack pointer in the function epilogue.
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ Value *StaticTop =
+ IRB.CreateGEP(Int8Ty, BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
+ "unsafe_stack_static_top");
+ IRB.CreateStore(StaticTop, UnsafeStackPtr);
+ return StaticTop;
+}
+
+void SafeStack::moveDynamicAllocasToUnsafeStack(
+ Function &F, Value *UnsafeStackPtr, AllocaInst *DynamicTop,
+ ArrayRef<AllocaInst *> DynamicAllocas) {
+ DIBuilder DIB(*F.getParent());
+
+ for (AllocaInst *AI : DynamicAllocas) {
+ IRBuilder<> IRB(AI);
+
+ // Compute the new SP value (after AI).
+ Value *ArraySize = AI->getArraySize();
+ if (ArraySize->getType() != IntPtrTy)
+ ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false);
+
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
+
+ Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(StackPtrTy, UnsafeStackPtr),
+ IntPtrTy);
+ SP = IRB.CreateSub(SP, Size);
+
+ // Align the SP value to satisfy the AllocaInst, type and stack alignments.
+ auto Align = std::max(std::max(DL.getPrefTypeAlign(Ty), AI->getAlign()),
+ StackAlignment);
+
+ Value *NewTop = IRB.CreateIntToPtr(
+ IRB.CreateAnd(SP,
+ ConstantInt::get(IntPtrTy, ~uint64_t(Align.value() - 1))),
+ StackPtrTy);
+
+ // Save the stack pointer.
+ IRB.CreateStore(NewTop, UnsafeStackPtr);
+ if (DynamicTop)
+ IRB.CreateStore(NewTop, DynamicTop);
+
+ Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType());
+ if (AI->hasName() && isa<Instruction>(NewAI))
+ NewAI->takeName(AI);
+
+ replaceDbgDeclare(AI, NewAI, DIB, DIExpression::ApplyOffset, 0);
+ AI->replaceAllUsesWith(NewAI);
+ AI->eraseFromParent();
+ }
+
+ if (!DynamicAllocas.empty()) {
+ // Now go through the instructions again, replacing stacksave/stackrestore.
+ for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ continue;
+
+ if (II->getIntrinsicID() == Intrinsic::stacksave) {
+ IRBuilder<> IRB(II);
+ Instruction *LI = IRB.CreateLoad(StackPtrTy, UnsafeStackPtr);
+ LI->takeName(II);
+ II->replaceAllUsesWith(LI);
+ II->eraseFromParent();
+ } else if (II->getIntrinsicID() == Intrinsic::stackrestore) {
+ IRBuilder<> IRB(II);
+ Instruction *SI = IRB.CreateStore(II->getArgOperand(0), UnsafeStackPtr);
+ SI->takeName(II);
+ assert(II->use_empty());
+ II->eraseFromParent();
+ }
+ }
+ }
+}
+
+bool SafeStack::ShouldInlinePointerAddress(CallInst &CI) {
+ Function *Callee = CI.getCalledFunction();
+ if (CI.hasFnAttr(Attribute::AlwaysInline) &&
+ isInlineViable(*Callee).isSuccess())
+ return true;
+ if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
+ CI.isNoInline())
+ return false;
+ return true;
+}
+
+void SafeStack::TryInlinePointerAddress() {
+ auto *CI = dyn_cast<CallInst>(UnsafeStackPtr);
+ if (!CI)
+ return;
+
+ if(F.hasOptNone())
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ return;
+
+ if (!ShouldInlinePointerAddress(*CI))
+ return;
+
+ InlineFunctionInfo IFI;
+ InlineFunction(*CI, IFI);
+}
+
+bool SafeStack::run() {
+ assert(F.hasFnAttribute(Attribute::SafeStack) &&
+ "Can't run SafeStack on a function without the attribute");
+ assert(!F.isDeclaration() && "Can't run SafeStack on a function declaration");
+
+ ++NumFunctions;
+
+ SmallVector<AllocaInst *, 16> StaticAllocas;
+ SmallVector<AllocaInst *, 4> DynamicAllocas;
+ SmallVector<Argument *, 4> ByValArguments;
+ SmallVector<Instruction *, 4> Returns;
+
+ // Collect all points where stack gets unwound and needs to be restored
+ // This is only necessary because the runtime (setjmp and unwind code) is
+ // not aware of the unsafe stack and won't unwind/restore it properly.
+ // To work around this problem without changing the runtime, we insert
+ // instrumentation to restore the unsafe stack pointer when necessary.
+ SmallVector<Instruction *, 4> StackRestorePoints;
+
+ // Find all static and dynamic alloca instructions that must be moved to the
+ // unsafe stack, all return instructions and stack restore points.
+ findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns,
+ StackRestorePoints);
+
+ if (StaticAllocas.empty() && DynamicAllocas.empty() &&
+ ByValArguments.empty() && StackRestorePoints.empty())
+ return false; // Nothing to do in this function.
+
+ if (!StaticAllocas.empty() || !DynamicAllocas.empty() ||
+ !ByValArguments.empty())
+ ++NumUnsafeStackFunctions; // This function has the unsafe stack.
+
+ if (!StackRestorePoints.empty())
+ ++NumUnsafeStackRestorePointsFunctions;
+
+ IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ // Calls must always have a debug location, or else inlining breaks. So
+ // we explicitly set a artificial debug location here.
+ if (DISubprogram *SP = F.getSubprogram())
+ IRB.SetCurrentDebugLocation(
+ DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP));
+ if (SafeStackUsePointerAddress) {
+ FunctionCallee Fn = F.getParent()->getOrInsertFunction(
+ "__safestack_pointer_address", StackPtrTy->getPointerTo(0));
+ UnsafeStackPtr = IRB.CreateCall(Fn);
+ } else {
+ UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB);
+ }
+
+ // Load the current stack pointer (we'll also use it as a base pointer).
+ // FIXME: use a dedicated register for it ?
+ Instruction *BasePointer =
+ IRB.CreateLoad(StackPtrTy, UnsafeStackPtr, false, "unsafe_stack_ptr");
+ assert(BasePointer->getType() == StackPtrTy);
+
+ AllocaInst *StackGuardSlot = nullptr;
+ // FIXME: implement weaker forms of stack protector.
+ if (F.hasFnAttribute(Attribute::StackProtect) ||
+ F.hasFnAttribute(Attribute::StackProtectStrong) ||
+ F.hasFnAttribute(Attribute::StackProtectReq)) {
+ Value *StackGuard = getStackGuard(IRB, F);
+ StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr);
+ IRB.CreateStore(StackGuard, StackGuardSlot);
+
+ for (Instruction *RI : Returns) {
+ IRBuilder<> IRBRet(RI);
+ checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard);
+ }
+ }
+
+ // The top of the unsafe stack after all unsafe static allocas are
+ // allocated.
+ Value *StaticTop = moveStaticAllocasToUnsafeStack(
+ IRB, F, StaticAllocas, ByValArguments, BasePointer, StackGuardSlot);
+
+ // Safe stack object that stores the current unsafe stack top. It is updated
+ // as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
+ // This is only needed if we need to restore stack pointer after longjmp
+ // or exceptions, and we have dynamic allocations.
+ // FIXME: a better alternative might be to store the unsafe stack pointer
+ // before setjmp / invoke instructions.
+ AllocaInst *DynamicTop = createStackRestorePoints(
+ IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+
+ // Handle dynamic allocas.
+ moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
+ DynamicAllocas);
+
+ // Restore the unsafe stack pointer before each return.
+ for (Instruction *RI : Returns) {
+ IRB.SetInsertPoint(RI);
+ IRB.CreateStore(BasePointer, UnsafeStackPtr);
+ }
+
+ TryInlinePointerAddress();
+
+ LLVM_DEBUG(dbgs() << "[SafeStack] safestack applied\n");
+ return true;
+}
+
+class SafeStackLegacyPass : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid..
+
+ SafeStackLegacyPass() : FunctionPass(ID) {
+ initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ LLVM_DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+ if (!F.hasFnAttribute(Attribute::SafeStack)) {
+ LLVM_DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
+ return false;
+ }
+
+ if (F.isDeclaration()) {
+ LLVM_DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
+ return false;
+ }
+
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (!TL)
+ report_fatal_error("TargetLowering instance is required");
+
+ auto *DL = &F.getParent()->getDataLayout();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ // Compute DT and LI only for functions that have the attribute.
+ // This is only useful because the legacy pass manager doesn't let us
+ // compute analyzes lazily.
+
+ DominatorTree *DT;
+ bool ShouldPreserveDominatorTree;
+ std::optional<DominatorTree> LazilyComputedDomTree;
+
+ // Do we already have a DominatorTree avaliable from the previous pass?
+ // Note that we should *NOT* require it, to avoid the case where we end up
+ // not needing it, but the legacy PM would have computed it for us anyways.
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DT = &DTWP->getDomTree();
+ ShouldPreserveDominatorTree = true;
+ } else {
+ // Otherwise, we need to compute it.
+ LazilyComputedDomTree.emplace(F);
+ DT = &*LazilyComputedDomTree;
+ ShouldPreserveDominatorTree = false;
+ }
+
+ // Likewise, lazily compute loop info.
+ LoopInfo LI(*DT);
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ ScalarEvolution SE(F, TLI, ACT, *DT, LI);
+
+ return SafeStack(F, *TL, *DL, ShouldPreserveDominatorTree ? &DTU : nullptr,
+ SE)
+ .run();
+ }
+};
+
+} // end anonymous namespace
+
+char SafeStackLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE,
+ "Safe Stack instrumentation pass", false, false)
+
+FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
new file mode 100644
index 000000000000..f821145f4b63
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -0,0 +1,152 @@
+//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackLayout.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestacklayout"
+
+static cl::opt<bool> ClLayout("safe-stack-layout",
+ cl::desc("enable safe stack layout"), cl::Hidden,
+ cl::init(true));
+
+LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
+ OS << "Stack regions:\n";
+ for (unsigned i = 0; i < Regions.size(); ++i) {
+ OS << " " << i << ": [" << Regions[i].Start << ", " << Regions[i].End
+ << "), range " << Regions[i].Range << "\n";
+ }
+ OS << "Stack objects:\n";
+ for (auto &IT : ObjectOffsets) {
+ OS << " at " << IT.getSecond() << ": " << *IT.getFirst() << "\n";
+ }
+}
+
+void StackLayout::addObject(const Value *V, unsigned Size, Align Alignment,
+ const StackLifetime::LiveRange &Range) {
+ StackObjects.push_back({V, Size, Alignment, Range});
+ ObjectAlignments[V] = Alignment;
+ MaxAlignment = std::max(MaxAlignment, Alignment);
+}
+
+static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
+ Align Alignment) {
+ return alignTo(Offset + Size, Alignment) - Size;
+}
+
+void StackLayout::layoutObject(StackObject &Obj) {
+ if (!ClLayout) {
+ // If layout is disabled, just grab the next aligned address.
+ // This effectively disables stack coloring as well.
+ unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End;
+ unsigned Start = AdjustStackOffset(LastRegionEnd, Obj.Size, Obj.Alignment);
+ unsigned End = Start + Obj.Size;
+ Regions.emplace_back(Start, End, Obj.Range);
+ ObjectOffsets[Obj.Handle] = End;
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align "
+ << Obj.Alignment.value() << ", range " << Obj.Range
+ << "\n");
+ assert(Obj.Alignment <= MaxAlignment);
+ unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);
+ unsigned End = Start + Obj.Size;
+ LLVM_DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n");
+ for (const StackRegion &R : Regions) {
+ LLVM_DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End
+ << ", range " << R.Range << "\n");
+ assert(End >= R.Start);
+ if (Start >= R.End) {
+ LLVM_DEBUG(dbgs() << " Does not intersect, skip.\n");
+ continue;
+ }
+ if (Obj.Range.overlaps(R.Range)) {
+ // Find the next appropriate location.
+ Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment);
+ End = Start + Obj.Size;
+ LLVM_DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. "
+ << End << "\n");
+ continue;
+ }
+ if (End <= R.End) {
+ LLVM_DEBUG(dbgs() << " Reusing region(s).\n");
+ break;
+ }
+ }
+
+ unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End;
+ if (End > LastRegionEnd) {
+ // Insert a new region at the end. Maybe two.
+ if (Start > LastRegionEnd) {
+ LLVM_DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. "
+ << Start << "\n");
+ Regions.emplace_back(LastRegionEnd, Start, StackLifetime::LiveRange(0));
+ LastRegionEnd = Start;
+ }
+ LLVM_DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. "
+ << End << ", range " << Obj.Range << "\n");
+ Regions.emplace_back(LastRegionEnd, End, Obj.Range);
+ LastRegionEnd = End;
+ }
+
+ // Split starting and ending regions if necessary.
+ for (unsigned i = 0; i < Regions.size(); ++i) {
+ StackRegion &R = Regions[i];
+ if (Start > R.Start && Start < R.End) {
+ StackRegion R0 = R;
+ R.Start = R0.End = Start;
+ Regions.insert(&R, R0);
+ continue;
+ }
+ if (End > R.Start && End < R.End) {
+ StackRegion R0 = R;
+ R0.End = R.Start = End;
+ Regions.insert(&R, R0);
+ break;
+ }
+ }
+
+ // Update live ranges for all affected regions.
+ for (StackRegion &R : Regions) {
+ if (Start < R.End && End > R.Start)
+ R.Range.join(Obj.Range);
+ if (End <= R.End)
+ break;
+ }
+
+ ObjectOffsets[Obj.Handle] = End;
+}
+
+void StackLayout::computeLayout() {
+ // Simple greedy algorithm.
+ // If this is replaced with something smarter, it must preserve the property
+ // that the first object is always at the offset 0 in the stack frame (for
+ // StackProtectorSlot), or handle stack protector in some other way.
+
+ // Sort objects by size (largest first) to reduce fragmentation.
+ if (StackObjects.size() > 2)
+ llvm::stable_sort(drop_begin(StackObjects),
+ [](const StackObject &a, const StackObject &b) {
+ return a.Size > b.Size;
+ });
+
+ for (auto &Obj : StackObjects)
+ layoutObject(Obj);
+
+ LLVM_DEBUG(print(dbgs()));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
new file mode 100644
index 000000000000..6126c7a67854
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
@@ -0,0 +1,84 @@
+//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
+#define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/StackLifetime.h"
+
+namespace llvm {
+
+class raw_ostream;
+class Value;
+
+namespace safestack {
+
+/// Compute the layout of an unsafe stack frame.
+class StackLayout {
+ Align MaxAlignment;
+
+ struct StackRegion {
+ unsigned Start;
+ unsigned End;
+ StackLifetime::LiveRange Range;
+
+ StackRegion(unsigned Start, unsigned End,
+ const StackLifetime::LiveRange &Range)
+ : Start(Start), End(End), Range(Range) {}
+ };
+
+ /// The list of current stack regions, sorted by StackRegion::Start.
+ SmallVector<StackRegion, 16> Regions;
+
+ struct StackObject {
+ const Value *Handle;
+ unsigned Size;
+ Align Alignment;
+ StackLifetime::LiveRange Range;
+ };
+
+ SmallVector<StackObject, 8> StackObjects;
+
+ DenseMap<const Value *, unsigned> ObjectOffsets;
+ DenseMap<const Value *, Align> ObjectAlignments;
+
+ void layoutObject(StackObject &Obj);
+
+public:
+ StackLayout(Align StackAlignment) : MaxAlignment(StackAlignment) {}
+
+ /// Add an object to the stack frame. Value pointer is opaque and used as a
+ /// handle to retrieve the object's offset in the frame later.
+ void addObject(const Value *V, unsigned Size, Align Alignment,
+ const StackLifetime::LiveRange &Range);
+
+ /// Run the layout computation for all previously added objects.
+ void computeLayout();
+
+ /// Returns the offset to the object start in the stack frame.
+ unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
+
+ /// Returns the alignment of the object
+ Align getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+
+ /// Returns the size of the entire frame.
+ unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
+
+ /// Returns the alignment of the frame.
+ Align getFrameAlignment() { return MaxAlignment; }
+
+ void print(raw_ostream &OS);
+};
+
+} // end namespace safestack
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
new file mode 100644
index 000000000000..cc29bdce1210
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -0,0 +1,87 @@
+//===- SanitizerBinaryMetadata.cpp
+//----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of SanitizerBinaryMetadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+
+using namespace llvm;
+
+namespace {
+class MachineSanitizerBinaryMetadata : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineSanitizerBinaryMetadata();
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+} // namespace
+
+INITIALIZE_PASS(MachineSanitizerBinaryMetadata, "machine-sanmd",
+ "Machine Sanitizer Binary Metadata", false, false)
+
+char MachineSanitizerBinaryMetadata::ID = 0;
+char &llvm::MachineSanitizerBinaryMetadataID =
+ MachineSanitizerBinaryMetadata::ID;
+
+MachineSanitizerBinaryMetadata::MachineSanitizerBinaryMetadata()
+ : MachineFunctionPass(ID) {
+ initializeMachineSanitizerBinaryMetadataPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
+ MDNode *MD = MF.getFunction().getMetadata(LLVMContext::MD_pcsections);
+ if (!MD)
+ return false;
+ const auto &Section = *cast<MDString>(MD->getOperand(0));
+ if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
+ return false;
+ auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
+ // Assume it currently only has features.
+ assert(AuxMDs.getNumOperands() == 1);
+ Constant *Features =
+ cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue();
+ if (!Features->getUniqueInteger()[kSanitizerBinaryMetadataUARBit])
+ return false;
+ // Calculate size of stack args for the function.
+ int64_t Size = 0;
+ uint64_t Align = 0;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (int i = -1; i >= (int)-MFI.getNumFixedObjects(); --i) {
+ Size = std::max(Size, MFI.getObjectOffset(i) + MFI.getObjectSize(i));
+ Align = std::max(Align, MFI.getObjectAlign(i).value());
+ }
+ Size = (Size + Align - 1) & ~(Align - 1);
+ if (!Size)
+ return false;
+ // Non-zero size, update metadata.
+ auto &F = MF.getFunction();
+ IRBuilder<> IRB(F.getContext());
+ MDBuilder MDB(F.getContext());
+ // Keep the features and append size of stack args to the metadata.
+ APInt NewFeatures = Features->getUniqueInteger();
+ NewFeatures.setBit(kSanitizerBinaryMetadataUARHasSizeBit);
+ F.setMetadata(
+ LLVMContext::MD_pcsections,
+ MDB.createPCSections({{Section.getString(),
+ {IRB.getInt(NewFeatures), IRB.getInt32(Size)}}}));
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 000000000000..14ec41920e3e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,754 @@
+//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Implements the ScheduleDAG class, which is a base class used by
+/// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumNewPredsAdded, "Number of times a single predecessor was added");
+STATISTIC(NumTopoInits,
+ "Number of times the topological order has been recomputed");
+
+#ifndef NDEBUG
+static cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
+void SchedulingPriorityQueue::anchor() {}
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
+ TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
+ MRI(mf.getRegInfo()) {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
+}
+
+ScheduleDAG::~ScheduleDAG() = default;
+
+void ScheduleDAG::clearDAG() {
+ SUnits.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+}
+
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return nullptr;
+ return &TII->get(Node->getMachineOpcode());
+}
+
+LLVM_DUMP_METHOD void SDep::dump(const TargetRegisterInfo *TRI) const {
+ switch (getKind()) {
+ case Data: dbgs() << "Data"; break;
+ case Anti: dbgs() << "Anti"; break;
+ case Output: dbgs() << "Out "; break;
+ case Order: dbgs() << "Ord "; break;
+ }
+
+ switch (getKind()) {
+ case Data:
+ dbgs() << " Latency=" << getLatency();
+ if (TRI && isAssignedRegDep())
+ dbgs() << " Reg=" << printReg(getReg(), TRI);
+ break;
+ case Anti:
+ case Output:
+ dbgs() << " Latency=" << getLatency();
+ break;
+ case Order:
+ dbgs() << " Latency=" << getLatency();
+ switch(Contents.OrdKind) {
+ case Barrier: dbgs() << " Barrier"; break;
+ case MayAliasMem:
+ case MustAliasMem: dbgs() << " Memory"; break;
+ case Artificial: dbgs() << " Artificial"; break;
+ case Weak: dbgs() << " Weak"; break;
+ case Cluster: dbgs() << " Cluster"; break;
+ }
+ break;
+ }
+}
+
+bool SUnit::addPred(const SDep &D, bool Required) {
+ // If this node already has this dependence, don't add a redundant one.
+ for (SDep &PredDep : Preds) {
+ // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+ // add them if another kind of edge already exists.
+ if (!Required && PredDep.getSUnit() == D.getSUnit())
+ return false;
+ if (PredDep.overlaps(D)) {
+ // Extend the latency if needed. Equivalent to
+ // removePred(PredDep) + addPred(D).
+ if (PredDep.getLatency() < D.getLatency()) {
+ SUnit *PredSU = PredDep.getSUnit();
+ // Find the corresponding successor in N.
+ SDep ForwardD = PredDep;
+ ForwardD.setSUnit(this);
+ for (SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep == ForwardD) {
+ SuccDep.setLatency(D.getLatency());
+ break;
+ }
+ }
+ PredDep.setLatency(D.getLatency());
+ }
+ return false;
+ }
+ }
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ assert(NumPreds < std::numeric_limits<unsigned>::max() &&
+ "NumPreds will overflow!");
+ assert(N->NumSuccs < std::numeric_limits<unsigned>::max() &&
+ "NumSuccs will overflow!");
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak()) {
+ ++WeakPredsLeft;
+ }
+ else {
+ assert(NumPredsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak()) {
+ ++N->WeakSuccsLeft;
+ }
+ else {
+ assert(N->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
+ }
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return true;
+}
+
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ SmallVectorImpl<SDep>::iterator I = llvm::find(Preds, D);
+ if (I == Preds.end())
+ return;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak()) {
+ assert(WeakPredsLeft > 0 && "WeakPredsLeft will underflow!");
+ --WeakPredsLeft;
+ } else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak()) {
+ assert(WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!");
+ --N->WeakSuccsLeft;
+ } else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SDep &SuccDep : SU->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SDep &PredDep : SU->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// Calculates the maximal path from the node to the exit.
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (const SDep &PredDep : Cur->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + PredDep.getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// Calculates the maximal path from the node to the entry.
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (const SDep &SuccDep : Cur->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + SuccDep.getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::biasCriticalPath() {
+ if (NumPreds < 2)
+ return;
+
+ SUnit::pred_iterator BestI = Preds.begin();
+ unsigned MaxDepth = BestI->getSUnit()->getDepth();
+ for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
+ ++I) {
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ BestI = I;
+ }
+ if (BestI != Preds.begin())
+ std::swap(*Preds.begin(), *BestI);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SUnit::dumpAttributes() const {
+ dbgs() << " # preds left : " << NumPredsLeft << "\n";
+ dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ if (WeakPredsLeft)
+ dbgs() << " # weak preds left : " << WeakPredsLeft << "\n";
+ if (WeakSuccsLeft)
+ dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
+ dbgs() << " Latency : " << Latency << "\n";
+ dbgs() << " Depth : " << getDepth() << "\n";
+ dbgs() << " Height : " << getHeight() << "\n";
+}
+
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const {
+ if (&SU == &EntrySU)
+ dbgs() << "EntrySU";
+ else if (&SU == &ExitSU)
+ dbgs() << "ExitSU";
+ else
+ dbgs() << "SU(" << SU.NodeNum << ")";
+}
+
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const {
+ dumpNode(SU);
+ SU.dumpAttributes();
+ if (SU.Preds.size() > 0) {
+ dbgs() << " Predecessors:\n";
+ for (const SDep &Dep : SU.Preds) {
+ dbgs() << " ";
+ dumpNodeName(*Dep.getSUnit());
+ dbgs() << ": ";
+ Dep.dump(TRI);
+ dbgs() << '\n';
+ }
+ }
+ if (SU.Succs.size() > 0) {
+ dbgs() << " Successors:\n";
+ for (const SDep &Dep : SU.Succs) {
+ dbgs() << " ";
+ dumpNodeName(*Dep.getSUnit());
+ dbgs() << ": ";
+ Dep.dump(TRI);
+ dbgs() << '\n';
+ }
+ }
+}
+#endif
+
+#ifndef NDEBUG
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ for (const SUnit &SUnit : SUnits) {
+ if (!SUnit.isScheduled) {
+ if (SUnit.NumPreds == 0 && SUnit.NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(SUnit);
+ dbgs() << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnit.isScheduled &&
+ (isBottomUp ? SUnit.getHeight() : SUnit.getDepth()) >
+ unsigned(std::numeric_limits<int>::max())) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(SUnit);
+ dbgs() << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnit.NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(SUnit);
+ dbgs() << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnit.NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(SUnit);
+ dbgs() << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ assert(!AnyNotSched);
+ return SUnits.size() - DeadNodes;
+}
+#endif
+
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ // The idea of the algorithm is taken from
+ // "Online algorithms for managing the topological order of
+ // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+ // This is the MNR algorithm, which was first introduced by
+ // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+ // "Maintaining a topological order under edge insertions".
+ //
+ // Short description of the algorithm:
+ //
+ // Topological ordering, ord, of a DAG maps each node to a topological
+ // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+ //
+ // This means that if there is a path from the node X to the node Z,
+ // then ord(X) < ord(Z).
+ //
+ // This property can be used to check for reachability of nodes:
+ // if Z is reachable from X, then an insertion of the edge Z->X would
+ // create a cycle.
+ //
+ // The algorithm first computes a topological ordering for the DAG by
+ // initializing the Index2Node and Node2Index arrays and then tries to keep
+ // the ordering up-to-date after edge insertions by reordering the DAG.
+ //
+ // On insertion of the edge X->Y, the algorithm first marks by calling DFS
+ // the nodes reachable from Y, and then shifts them using Shift to lie
+ // immediately after X in Index2Node.
+
+ // Cancel pending updates, mark as valid.
+ Dirty = false;
+ Updates.clear();
+
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ if (ExitSU)
+ WorkList.push_back(ExitSU);
+ for (SUnit &SU : SUnits) {
+ int NodeNum = SU.NodeNum;
+ unsigned Degree = SU.Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU.Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(&SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (SU->NodeNum < DAGSize)
+ Allocate(SU->NodeNum, --Id);
+ for (const SDep &PredDep : SU->Preds) {
+ SUnit *SU = PredDep.getSUnit();
+ if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+ NumTopoInits++;
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (SUnit &SU : SUnits) {
+ for (const SDep &PD : SU.Preds) {
+ assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+void ScheduleDAGTopologicalSort::FixOrder() {
+ // Recompute from scratch after new nodes have been added.
+ if (Dirty) {
+ InitDAGTopologicalSorting();
+ return;
+ }
+
+ // Otherwise apply updates one-by-one.
+ for (auto &U : Updates)
+ AddPred(U.first, U.second);
+ Updates.clear();
+}
+
+void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) {
+ // Recomputing the order from scratch is likely more efficient than applying
+ // updates one-by-one for too many updates. The current cut-off is arbitrarily
+ // chosen.
+ Dirty = Dirty || Updates.size() > 10;
+
+ if (Dirty)
+ return;
+
+ Updates.emplace_back(Y, X);
+}
+
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+
+ NumNewPredsAdded++;
+}
+
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool &HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (const SDep &SuccDep : llvm::reverse(SU->Succs)) {
+ unsigned s = SuccDep.getSUnit()->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (s >= Node2Index.size())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SuccDep.getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
+ const SUnit &TargetSU,
+ bool &Success) {
+ std::vector<const SUnit*> WorkList;
+ int LowerBound = Node2Index[StartSU.NodeNum];
+ int UpperBound = Node2Index[TargetSU.NodeNum];
+ bool Found = false;
+ BitVector VisitedBack;
+ std::vector<int> Nodes;
+
+ if (LowerBound > UpperBound) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.reserve(SUnits.size());
+ Visited.reset();
+
+ // Starting from StartSU, visit all successors up
+ // to UpperBound.
+ WorkList.push_back(&StartSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (const SDep &SD : llvm::reverse(SU->Succs)) {
+ const SUnit *Succ = SD.getSUnit();
+ unsigned s = Succ->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (Succ->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ Found = true;
+ continue;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ Visited.set(s);
+ WorkList.push_back(Succ);
+ }
+ }
+ } while (!WorkList.empty());
+
+ if (!Found) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.clear();
+ VisitedBack.resize(SUnits.size());
+ Found = false;
+
+ // Starting from TargetSU, visit all predecessors up
+ // to LowerBound. SUs that are visited by the two
+ // passes are added to Nodes.
+ WorkList.push_back(&TargetSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (const SDep &SD : llvm::reverse(SU->Preds)) {
+ const SUnit *Pred = SD.getSUnit();
+ unsigned s = Pred->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
+ if (Pred->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == LowerBound) {
+ Found = true;
+ continue;
+ }
+ if (!VisitedBack.test(s) && Visited.test(s)) {
+ VisitedBack.set(s);
+ WorkList.push_back(Pred);
+ Nodes.push_back(s);
+ }
+ }
+ } while (!WorkList.empty());
+
+ assert(Found && "Error in SUnit Graph!");
+ Success = true;
+ return Nodes;
+}
+
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned LI : L) {
+ Allocate(LI, i - shift);
+ i = i + 1;
+ }
+}
+
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ FixOrder();
+ // Is SU reachable from TargetSU via successor edges?
+ if (IsReachable(SU, TargetSU))
+ return true;
+ for (const SDep &PredDep : TargetSU->Preds)
+ if (PredDep.isAssignedRegDep() &&
+ IsReachable(SU, PredDep.getSUnit()))
+ return true;
+ return false;
+}
+
+void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit *SU) {
+ assert(SU->NodeNum == Index2Node.size() && "Node cannot be added at the end");
+ assert(SU->NumPreds == 0 && "Can only add SU's with no predecessors");
+ Node2Index.push_back(Index2Node.size());
+ Index2Node.push_back(SU->NodeNum);
+ Visited.resize(Node2Index.size());
+}
+
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ assert(TargetSU != nullptr && "Invalid target SUnit");
+ assert(SU != nullptr && "Invalid SUnit");
+ FixOrder();
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+ : SUnits(sunits), ExitSU(exitsu) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 000000000000..239b44857c28
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,1531 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This implements the ScheduleDAGInstrs class, which implements
+/// re-scheduling of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+static cl::opt<bool>
+ EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::desc("Enable use of AA during MI DAG construction"));
+
+static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
+ cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
+
+// Note: the two options below might be used in tuning compile time vs
+// output quality. Setting HugeRegion so large that it will never be
+// reached means best-effort, but may be slow.
+
+// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
+// together hold this many SUs, a reduction of maps will be done.
+static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
+ cl::init(1000), cl::desc("The limit to use while constructing the DAG "
+ "prior to scheduling, at which point a trade-off "
+ "is made to avoid excessive compile time."));
+
+static cl::opt<unsigned> ReductionSize(
+ "dag-maps-reduction-size", cl::Hidden,
+ cl::desc("A huge scheduling region will have maps reduced by this many "
+ "nodes at a time. Defaults to HugeRegion / 2."));
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> SchedPrintCycles(
+ "sched-print-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Report top/bottom cycles when dumping SUnit instances"));
+#endif
+
+static unsigned getReductionSize() {
+ // Always reduce a huge region with half of the elements, except
+ // when user sets this number explicitly.
+ if (ReductionSize.getNumOccurrences() == 0)
+ return HugeRegion / 2;
+ return ReductionSize;
+}
+
+static void dumpSUList(const ScheduleDAGInstrs::SUList &L) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << "{ ";
+ for (const SUnit *SU : L) {
+ dbgs() << "SU(" << SU->NodeNum << ")";
+ if (SU != L.back())
+ dbgs() << ", ";
+ }
+ dbgs() << "}\n";
+#endif
+}
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo *mli,
+ bool RemoveKillFlags)
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
+ RemoveKillFlags(RemoveKillFlags),
+ UnknownValue(UndefValue::get(
+ Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
+ DbgValues.clear();
+
+ const TargetSubtargetInfo &ST = mf.getSubtarget();
+ SchedModel.init(&ST);
+}
+
+/// If this machine instr has memory reference information and it can be
+/// tracked to a normal reference to a known object, return the Value
+/// for that object. This function returns false the memory location is
+/// unknown or may alias anything.
+static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
+ const MachineFrameInfo &MFI,
+ UnderlyingObjectsVector &Objects,
+ const DataLayout &DL) {
+ auto AllMMOsOkay = [&]() {
+ for (const MachineMemOperand *MMO : MI->memoperands()) {
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic())
+ return false;
+
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
+ // Function that contain tail calls don't have unique PseudoSourceValue
+ // objects. Two PseudoSourceValues might refer to the same or
+ // overlapping locations. The client code calling this function assumes
+ // this is not the case. So return a conservative answer of no known
+ // object.
+ if (MFI.hasTailCall())
+ return false;
+
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(&MFI))
+ return false;
+
+ bool MayAlias = PSV->mayAlias(&MFI);
+ Objects.emplace_back(PSV, MayAlias);
+ } else if (const Value *V = MMO->getValue()) {
+ SmallVector<Value *, 4> Objs;
+ if (!getUnderlyingObjectsForCodeGen(V, Objs))
+ return false;
+
+ for (Value *V : Objs) {
+ assert(isIdentifiedObject(V));
+ Objects.emplace_back(V, true);
+ }
+ } else
+ return false;
+ }
+ return true;
+ };
+
+ if (!AllMMOsOkay()) {
+ Objects.clear();
+ return false;
+ }
+
+ return true;
+}
+
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
+ BB = bb;
+}
+
+void ScheduleDAGInstrs::finishBlock() {
+ // Subclasses should no longer refer to the old block.
+ BB = nullptr;
+}
+
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs) {
+ assert(bb == BB && "startBlock should set BB");
+ RegionBegin = begin;
+ RegionEnd = end;
+ NumRegionInstrs = regioninstrs;
+}
+
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI =
+ RegionEnd != BB->end()
+ ? &*skipDebugInstructionsBackward(RegionEnd, RegionBegin)
+ : nullptr;
+ ExitSU.setInstr(ExitMI);
+ // Add dependencies on the defs and uses of the instruction.
+ if (ExitMI) {
+ for (const MachineOperand &MO : ExitMI->all_uses()) {
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ } else if (Reg.isVirtual() && MO.readsReg()) {
+ addVRegUseDeps(&ExitSU, MO.getOperandNo());
+ }
+ }
+ }
+ if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ for (const MachineBasicBlock *Succ : BB->successors()) {
+ for (const auto &LI : Succ->liveins()) {
+ if (!Uses.contains(LI.PhysReg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
+ }
+ }
+ }
+}
+
+/// MO is an operand of SU's instruction that defines a physical register. Adds
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
+ assert(MO.isDef() && "expect physreg def");
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
+ // Only use any non-zero latency for real defs/uses, in contrast to
+ // "fake" operands added by regalloc.
+ const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
+ bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
+ !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+ SUnit *UseSU = I->SU;
+ if (UseSU == SU)
+ continue;
+
+ // Adjust the dependence latency using operand def/use information,
+ // then allow the target to perform its own adjustments.
+ int UseOp = I->OpIdx;
+ MachineInstr *RegUse = nullptr;
+ SDep Dep;
+ if (UseOp < 0)
+ Dep = SDep(SU, SDep::Artificial);
+ else {
+ // Set the hasPhysRegDefs only for physreg defs that have a use within
+ // the scheduling region.
+ SU->hasPhysRegDefs = true;
+ Dep = SDep(SU, SDep::Data, *Alias);
+ RegUse = UseSU->getInstr();
+ }
+ const MCInstrDesc *UseMIDesc =
+ (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
+ bool ImplicitPseudoUse =
+ (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
+ !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
+ if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
+ Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp));
+ } else {
+ Dep.setLatency(0);
+ }
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
+ UseSU->addPred(Dep);
+ }
+ }
+}
+
+/// Adds register dependencies (data, anti, and output) from this SUnit
+/// to following instructions in the same scheduling region that depend the
+/// physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
+ Register Reg = MO.getReg();
+ // We do not need to track any dependencies for constant registers.
+ if (MRI.isConstantPhysReg(Reg))
+ return;
+
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
+ if (!Defs.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+ SUnit *DefSU = I->SU;
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+ SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ if (Kind != SDep::Anti)
+ Dep.setLatency(
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
+ DefSU->addPred(Dep);
+ }
+ }
+ }
+
+ if (!MO.isDef()) {
+ SU->hasPhysRegUses = true;
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's uses.
+ // Push this SUnit on the use list.
+ Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ if (RemoveKillFlags)
+ MO.setIsKill(false);
+ } else {
+ addPhysRegDataDeps(SU, OperIdx);
+
+ // Clear previous uses and defs of this register and its subergisters.
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ if (Uses.contains(SubReg))
+ Uses.eraseAll(SubReg);
+ if (!MO.isDead())
+ Defs.eraseAll(SubReg);
+ }
+ if (MO.isDead() && SU->isCall) {
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
+ Reg2SUnitsMap::iterator B = P.first;
+ Reg2SUnitsMap::iterator I = P.second;
+ for (bool isBegin = I == B; !isBegin; /* empty */) {
+ isBegin = (--I) == B;
+ if (!I->SU->isCall)
+ break;
+ I = Defs.erase(I);
+ }
+ }
+
+ // Defs are pushed in the order they are visited and never reordered.
+ Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ }
+}
+
+LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
+{
+ Register Reg = MO.getReg();
+ // No point in tracking lanemasks if we don't have interesting subregisters.
+ const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+ if (!RC.HasDisjunctSubRegs)
+ return LaneBitmask::getAll();
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0)
+ return RC.getLaneMask();
+ return TRI->getSubRegIndexLaneMask(SubReg);
+}
+
+bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
+ auto RegUse = CurrentVRegUses.find(MO.getReg());
+ if (RegUse == CurrentVRegUses.end())
+ return true;
+ return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
+}
+
+/// Adds register output and data dependencies from this SUnit to instructions
+/// that occur later in the same scheduling region if they read from or write to
+/// the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
+ Register Reg = MO.getReg();
+
+ LaneBitmask DefLaneMask;
+ LaneBitmask KillLaneMask;
+ if (TrackLaneMasks) {
+ bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
+ DefLaneMask = getLaneMaskForMO(MO);
+ // If we have a <read-undef> flag, none of the lane values comes from an
+ // earlier instruction.
+ KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
+
+ if (MO.getSubReg() != 0 && MO.isUndef()) {
+ // There may be other subregister defs on the same instruction of the same
+ // register in later operands. The lanes of other defs will now be live
+ // after this instruction, so these should not be treated as killed by the
+ // instruction even though they appear to be killed in this one operand.
+ for (const MachineOperand &OtherMO :
+ llvm::drop_begin(MI->operands(), OperIdx + 1))
+ if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
+ KillLaneMask &= ~getLaneMaskForMO(OtherMO);
+ }
+
+ // Clear undef flag, we'll re-add it later once we know which subregister
+ // Def is first.
+ MO.setIsUndef(false);
+ } else {
+ DefLaneMask = LaneBitmask::getAll();
+ KillLaneMask = LaneBitmask::getAll();
+ }
+
+ if (MO.isDead()) {
+ assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
+ } else {
+ // Add data dependence to all uses we found so far.
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
+ E = CurrentVRegUses.end(); I != E; /*empty*/) {
+ LaneBitmask LaneMask = I->LaneMask;
+ // Ignore uses of other lanes.
+ if ((LaneMask & KillLaneMask).none()) {
+ ++I;
+ continue;
+ }
+
+ if ((LaneMask & DefLaneMask).any()) {
+ SUnit *UseSU = I->SU;
+ MachineInstr *Use = UseSU->getInstr();
+ SDep Dep(SU, SDep::Data, Reg);
+ Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
+ I->OperandIndex));
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
+ UseSU->addPred(Dep);
+ }
+
+ LaneMask &= ~KillLaneMask;
+ // If we found a Def for all lanes of this use, remove it from the list.
+ if (LaneMask.any()) {
+ I->LaneMask = LaneMask;
+ ++I;
+ } else
+ I = CurrentVRegUses.erase(I);
+ }
+ }
+
+ // Shortcut: Singly defined vregs do not have output/anti dependencies.
+ if (MRI.hasOneDef(Reg))
+ return;
+
+ // Add output dependence to the next nearest defs of this vreg.
+ //
+ // Unless this definition is dead, the output dependence should be
+ // transitively redundant with antidependencies from this definition's
+ // uses. We're conservative for now until we have a way to guarantee the uses
+ // are not eliminated sometime during scheduling. The output dependence edge
+ // is also useful if output latency exceeds def-use latency.
+ LaneBitmask LaneMask = DefLaneMask;
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for other lanes.
+ if ((V2SU.LaneMask & LaneMask).none())
+ continue;
+ // Add an output dependence.
+ SUnit *DefSU = V2SU.SU;
+ // Ignore additional defs of the same lanes in one instruction. This can
+ // happen because lanemasks are shared for targets with too many
+ // subregisters. We also use some representration tricks/hacks where we
+ // add super-register defs/uses, to imply that although we only access parts
+ // of the reg we care about the full one.
+ if (DefSU == SU)
+ continue;
+ SDep Dep(SU, SDep::Output, Reg);
+ Dep.setLatency(
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ DefSU->addPred(Dep);
+
+ // Update current definition. This can get tricky if the def was about a
+ // bigger lanemask before. We then have to shrink it and create a new
+ // VReg2SUnit for the non-overlapping part.
+ LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
+ LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
+ V2SU.SU = SU;
+ V2SU.LaneMask = OverlapMask;
+ if (NonOverlapMask.any())
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
+ }
+ // If there was no CurrentVRegDefs entry for some lanes yet, create one.
+ if (LaneMask.any())
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
+}
+
+/// Adds a register data dependency if the instruction that defines the
+/// virtual register used at OperIdx is mapped to an SUnit. Add a register
+/// antidependency from this SUnit to instructions that occur later in the same
+/// scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ assert(!MI->isDebugOrPseudoInstr());
+
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+ Register Reg = MO.getReg();
+
+ // Remember the use. Data dependencies will be added when we find the def.
+ LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
+ : LaneBitmask::getAll();
+ CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
+
+ // Add antidependences to the following defs of the vreg.
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for unrelated lanes.
+ LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
+ if ((PrevDefLaneMask & LaneMask).none())
+ continue;
+ if (V2SU.SU == SU)
+ continue;
+
+ V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
+ }
+}
+
+/// Returns true if MI is an instruction we are unable to reason about
+/// (like a call or something with unmodeled side effects).
+static inline bool isGlobalMemoryObject(MachineInstr *MI) {
+ return MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
+}
+
+void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
+ unsigned Latency) {
+ if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
+ SDep Dep(SUa, SDep::MayAliasMem);
+ Dep.setLatency(Latency);
+ SUb->addPred(Dep);
+ }
+}
+
+/// Creates an SUnit for each real instruction, numbered in top-down
+/// topological order. The instruction order A < B, implies that no edge exists
+/// from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+ // We'll be allocating one SUnit for each real instruction in the region,
+ // which is contained within a basic block.
+ SUnits.reserve(NumRegionInstrs);
+
+ for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+
+ SUnit *SU = newSUnit(&MI);
+ MISUnitMap[&MI] = SU;
+
+ SU->isCall = MI.isCall();
+ SU->isCommutable = MI.isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
+
+ // If this SUnit uses a reserved or unbuffered resource, mark it as such.
+ //
+ // Reserved resources block an instruction from issuing and stall the
+ // entire pipeline. These are identified by BufferSize=0.
+ //
+ // Unbuffered resources prevent execution of subsequent instructions that
+ // require the same resources. This is used for in-order execution pipelines
+ // within an out-of-order core. These are identified by BufferSize=1.
+ if (SchedModel.hasInstrSchedModel()) {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (const MCWriteProcResEntry &PRE :
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC))) {
+ switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
+ case 0:
+ SU->hasReservedResource = true;
+ break;
+ case 1:
+ SU->isUnbuffered = true;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
+class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
+ /// Current total number of SUs in map.
+ unsigned NumNodes = 0;
+
+ /// 1 for loads, 0 for stores. (see comment in SUList)
+ unsigned TrueMemOrderLatency;
+
+public:
+ Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
+
+ /// To keep NumNodes up to date, insert() is used instead of
+ /// this operator w/ push_back().
+ ValueType &operator[](const SUList &Key) {
+ llvm_unreachable("Don't use. Use insert() instead."); };
+
+ /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
+ /// reduce().
+ void inline insert(SUnit *SU, ValueType V) {
+ MapVector::operator[](V).push_back(SU);
+ NumNodes++;
+ }
+
+ /// Clears the list of SUs mapped to V.
+ void inline clearList(ValueType V) {
+ iterator Itr = find(V);
+ if (Itr != end()) {
+ assert(NumNodes >= Itr->second.size());
+ NumNodes -= Itr->second.size();
+
+ Itr->second.clear();
+ }
+ }
+
+ /// Clears map from all contents.
+ void clear() {
+ MapVector<ValueType, SUList>::clear();
+ NumNodes = 0;
+ }
+
+ unsigned inline size() const { return NumNodes; }
+
+ /// Counts the number of SUs in this map after a reduction.
+ void reComputeSize() {
+ NumNodes = 0;
+ for (auto &I : *this)
+ NumNodes += I.second.size();
+ }
+
+ unsigned inline getTrueMemOrderLatency() const {
+ return TrueMemOrderLatency;
+ }
+
+ void dump();
+};
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap) {
+ for (auto &I : Val2SUsMap)
+ addChainDependencies(SU, I.second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap,
+ ValueType V) {
+ Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
+ if (Itr != Val2SUsMap.end())
+ addChainDependencies(SU, Itr->second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
+ assert(BarrierChain != nullptr);
+
+ for (auto &[V, SUs] : map) {
+ (void)V;
+ for (auto *SU : SUs)
+ SU->addPredBarrier(BarrierChain);
+ }
+ map.clear();
+}
+
+void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
+ assert(BarrierChain != nullptr);
+
+ // Go through all lists of SUs.
+ for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
+ Value2SUsMap::iterator CurrItr = I++;
+ SUList &sus = CurrItr->second;
+ SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
+ for (; SUItr != SUEE; ++SUItr) {
+ // Stop on BarrierChain or any instruction above it.
+ if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
+ break;
+
+ (*SUItr)->addPredBarrier(BarrierChain);
+ }
+
+ // Remove also the BarrierChain from list if present.
+ if (SUItr != SUEE && *SUItr == BarrierChain)
+ SUItr++;
+
+ // Remove all SUs that are now successors of BarrierChain.
+ if (SUItr != sus.begin())
+ sus.erase(sus.begin(), SUItr);
+ }
+
+ // Remove all entries with empty su lists.
+ map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
+ return (mapEntry.second.empty()); });
+
+ // Recompute the size of the map (NumNodes).
+ map.reComputeSize();
+}
+
+void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
+ RegPressureTracker *RPTracker,
+ PressureDiffs *PDiffs,
+ LiveIntervals *LIS,
+ bool TrackLaneMasks) {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
+ : ST.useAA();
+ AAForDep = UseAA ? AA : nullptr;
+
+ BarrierChain = nullptr;
+
+ this->TrackLaneMasks = TrackLaneMasks;
+ MISUnitMap.clear();
+ ScheduleDAG::clearDAG();
+
+ // Create an SUnit for each real instruction.
+ initSUnits();
+
+ if (PDiffs)
+ PDiffs->init(SUnits.size());
+
+ // We build scheduling units by walking a block's instruction list
+ // from bottom to top.
+
+ // Each MIs' memory operand(s) is analyzed to a list of underlying
+ // objects. The SU is then inserted in the SUList(s) mapped from the
+ // Value(s). Each Value thus gets mapped to lists of SUs depending
+ // on it, stores and loads kept separately. Two SUs are trivially
+ // non-aliasing if they both depend on only identified Values and do
+ // not share any common Value.
+ Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
+
+ // Certain memory accesses are known to not alias any SU in Stores
+ // or Loads, and have therefore their own 'NonAlias'
+ // domain. E.g. spill / reload instructions never alias LLVM I/R
+ // Values. It would be nice to assume that this type of memory
+ // accesses always have a proper memory operand modelling, and are
+ // therefore never unanalyzable, but this is conservatively not
+ // done.
+ Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
+
+ // Track all instructions that may raise floating-point exceptions.
+ // These do not depend on one other (or normal loads or stores), but
+ // must not be rescheduled across global barriers. Note that we don't
+ // really need a "map" here since we don't track those MIs by value;
+ // using the same Value2SUsMap data type here is simply a matter of
+ // convenience.
+ Value2SUsMap FPExceptions;
+
+ // Remove any stale debug info; sometimes BuildSchedGraph is called again
+ // without emitting the info from the previous call.
+ DbgValues.clear();
+ FirstDbgValue = nullptr;
+
+ assert(Defs.empty() && Uses.empty() &&
+ "Only BuildGraph should update Defs/Uses");
+ Defs.setUniverse(TRI->getNumRegs());
+ Uses.setUniverse(TRI->getNumRegs());
+
+ assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
+ assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ CurrentVRegDefs.setUniverse(NumVirtRegs);
+ CurrentVRegUses.setUniverse(NumVirtRegs);
+
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ addSchedBarrierDeps();
+
+ // Walk the list of instructions, from bottom moving up.
+ MachineInstr *DbgMI = nullptr;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (DbgMI) {
+ DbgValues.emplace_back(DbgMI, &MI);
+ DbgMI = nullptr;
+ }
+
+ if (MI.isDebugValue() || MI.isDebugPHI()) {
+ DbgMI = &MI;
+ continue;
+ }
+
+ if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
+ continue;
+
+ SUnit *SU = MISUnitMap[&MI];
+ assert(SU && "No SUnit mapped to this MI");
+
+ if (RPTracker) {
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
+ }
+ if (PDiffs != nullptr)
+ PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
+
+ if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
+ RPTracker->recedeSkipDebugValues();
+ assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
+ RPTracker->recede(RegOpers);
+ }
+
+ assert(
+ (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
+ "Cannot schedule terminators or labels!");
+
+ // Add register-based dependencies (data, anti, and output).
+ // For some instructions (calls, returns, inline-asm, etc.) there can
+ // be explicit uses and implicit defs, in which case the use will appear
+ // on the operand list before the def. Do two passes over the operand
+ // list to make sure that defs are processed before any uses.
+ bool HasVRegDef = false;
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ addPhysRegDeps(SU, j);
+ } else if (Reg.isVirtual()) {
+ HasVRegDef = true;
+ addVRegDefDeps(SU, j);
+ }
+ }
+ // Now process all uses.
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ // Only look at use operands.
+ // We do not need to check for MO.readsReg() here because subsequent
+ // subregister defs will get output dependence edges and need no
+ // additional use dependencies.
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ addPhysRegDeps(SU, j);
+ } else if (Reg.isVirtual() && MO.readsReg()) {
+ addVRegUseDeps(SU, j);
+ }
+ }
+
+ // If we haven't seen any uses in this scheduling region, create a
+ // dependence edge to ExitSU to model the live-out latency. This is required
+ // for vreg defs with no in-region use, and prefetches with no vreg def.
+ //
+ // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+ // check currently relies on being called before adding chain deps.
+ if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
+ SDep Dep(SU, SDep::Artificial);
+ Dep.setLatency(SU->Latency - 1);
+ ExitSU.addPred(Dep);
+ }
+
+ // Add memory dependencies (Note: isStoreToStackSlot and
+ // isLoadFromStackSLot are not usable after stack slots are lowered to
+ // actual addresses).
+
+ // This is a barrier event that acts as a pivotal node in the DAG.
+ if (isGlobalMemoryObject(&MI)) {
+
+ // Become the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+ BarrierChain = SU;
+
+ LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+
+ // Add dependencies against everything below it and clear maps.
+ addBarrierChain(Stores);
+ addBarrierChain(Loads);
+ addBarrierChain(NonAliasStores);
+ addBarrierChain(NonAliasLoads);
+ addBarrierChain(FPExceptions);
+
+ continue;
+ }
+
+ // Instructions that may raise FP exceptions may not be moved
+ // across any global barriers.
+ if (MI.mayRaiseFPException()) {
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ FPExceptions.insert(SU, UnknownValue);
+
+ if (FPExceptions.size() >= HugeRegion) {
+ LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
+ Value2SUsMap empty;
+ reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
+ }
+ }
+
+ // If it's not a store or a variant load, we're done.
+ if (!MI.mayStore() &&
+ !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
+ continue;
+
+ // Always add dependecy edge to BarrierChain if present.
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ // Find the underlying objects for MI. The Objs vector is either
+ // empty, or filled with the Values of memory locations which this
+ // SU depends on.
+ UnderlyingObjectsVector Objs;
+ bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
+ MF.getDataLayout());
+
+ if (MI.mayStore()) {
+ if (!ObjsFound) {
+ // An unknown store depends on all stores and loads.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+ addChainDependencies(SU, Loads);
+ addChainDependencies(SU, NonAliasLoads);
+
+ // Map this store to 'UnknownValue'.
+ Stores.insert(SU, UnknownValue);
+ } else {
+ // Add precise dependencies against all previously seen memory
+ // accesses mapped to the same Value(s).
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add dependencies to previous stores and loads mapped to V.
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
+ addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
+ }
+ // Update the store map after all chains have been added to avoid adding
+ // self-loop edge if multiple underlying objects are present.
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Map this store to V.
+ (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
+ }
+ // The store may have dependencies to unanalyzable loads and
+ // stores.
+ addChainDependencies(SU, Loads, UnknownValue);
+ addChainDependencies(SU, Stores, UnknownValue);
+ }
+ } else { // SU is a load.
+ if (!ObjsFound) {
+ // An unknown load depends on all stores.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+
+ Loads.insert(SU, UnknownValue);
+ } else {
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add precise dependencies against all previously seen stores
+ // mapping to the same Value(s).
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
+
+ // Map this load to V.
+ (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
+ }
+ // The load may have dependencies to unanalyzable stores.
+ addChainDependencies(SU, Stores, UnknownValue);
+ }
+ }
+
+ // Reduce maps if they grow huge.
+ if (Stores.size() + Loads.size() >= HugeRegion) {
+ LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
+ reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
+ }
+ if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
+ LLVM_DEBUG(
+ dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
+ reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
+ }
+ }
+
+ if (DbgMI)
+ FirstDbgValue = DbgMI;
+
+ Defs.clear();
+ Uses.clear();
+ CurrentVRegDefs.clear();
+ CurrentVRegUses.clear();
+
+ Topo.MarkDirty();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
+ PSV->printCustom(OS);
+ return OS;
+}
+
+void ScheduleDAGInstrs::Value2SUsMap::dump() {
+ for (const auto &[ValType, SUs] : *this) {
+ if (isa<const Value *>(ValType)) {
+ const Value *V = cast<const Value *>(ValType);
+ if (isa<UndefValue>(V))
+ dbgs() << "Unknown";
+ else
+ V->printAsOperand(dbgs());
+ } else if (isa<const PseudoSourceValue *>(ValType))
+ dbgs() << cast<const PseudoSourceValue *>(ValType);
+ else
+ llvm_unreachable("Unknown Value type.");
+
+ dbgs() << " : ";
+ dumpSUList(SUs);
+ }
+}
+
+void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
+ Value2SUsMap &loads, unsigned N) {
+ LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
+ dbgs() << "Loading SUnits:\n"; loads.dump());
+
+ // Insert all SU's NodeNums into a vector and sort it.
+ std::vector<unsigned> NodeNums;
+ NodeNums.reserve(stores.size() + loads.size());
+ for (const auto &[V, SUs] : stores) {
+ (void)V;
+ for (const auto *SU : SUs)
+ NodeNums.push_back(SU->NodeNum);
+ }
+ for (const auto &[V, SUs] : loads) {
+ (void)V;
+ for (const auto *SU : SUs)
+ NodeNums.push_back(SU->NodeNum);
+ }
+ llvm::sort(NodeNums);
+
+ // The N last elements in NodeNums will be removed, and the SU with
+ // the lowest NodeNum of them will become the new BarrierChain to
+ // let the not yet seen SUs have a dependency to the removed SUs.
+ assert(N <= NodeNums.size());
+ SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
+ if (BarrierChain) {
+ // The aliasing and non-aliasing maps reduce independently of each
+ // other, but share a common BarrierChain. Check if the
+ // newBarrierChain is above the former one. If it is not, it may
+ // introduce a loop to use newBarrierChain, so keep the old one.
+ if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
+ BarrierChain->addPredBarrier(newBarrierChain);
+ BarrierChain = newBarrierChain;
+ LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ BarrierChain = newBarrierChain;
+
+ insertBarrierChain(stores);
+ insertBarrierChain(loads);
+
+ LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
+ dbgs() << "Loading SUnits:\n"; loads.dump());
+}
+
+static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
+ MachineInstr &MI, bool addToLiveRegs) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ // Things that are available after the instruction are killed by it.
+ bool IsKill = LiveRegs.available(MRI, Reg);
+ MO.setIsKill(IsKill);
+ if (addToLiveRegs)
+ LiveRegs.addReg(Reg);
+ }
+}
+
+void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
+
+ LiveRegs.init(*TRI);
+ LiveRegs.addLiveOuts(MBB);
+
+ // Examine block from end to start...
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+ if (MO.isReg()) {
+ if (!MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LiveRegs.removeReg(Reg);
+ } else if (MO.isRegMask()) {
+ LiveRegs.removeRegsInMask(MO);
+ }
+ }
+
+ // If there is a bundle header fix it up first.
+ if (!MI.isBundled()) {
+ toggleKills(MRI, LiveRegs, MI, true);
+ } else {
+ MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
+ if (MI.isBundle())
+ toggleKills(MRI, LiveRegs, MI, false);
+
+ // Some targets make the (questionable) assumtion that the instructions
+ // inside the bundle are ordered and consequently only the last use of
+ // a register inside the bundle can kill it.
+ MachineBasicBlock::instr_iterator I = std::next(Bundle);
+ while (I->isBundledWithSucc())
+ ++I;
+ do {
+ if (!I->isDebugOrPseudoInstr())
+ toggleKills(MRI, LiveRegs, *I, true);
+ --I;
+ } while (I != Bundle);
+ }
+ }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dumpNodeName(SU);
+ if (SchedPrintCycles)
+ dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
+ << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
+ dbgs() << ": ";
+ SU.getInstr()->dump();
+#endif
+}
+
+void ScheduleDAGInstrs::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss, /*IsStandalone=*/true);
+ return oss.str();
+}
+
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
+}
+
+bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+ return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
+bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+ SchedDFSResult &R;
+
+ /// Join DAG nodes into equivalence classes by their subtree.
+ IntEqClasses SubtreeClasses;
+ /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+ std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
+
+ struct RootData {
+ unsigned NodeID;
+ unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
+ unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
+ /// children.
+
+ RootData(unsigned id): NodeID(id),
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
+
+ unsigned getSparseSetIndex() const { return NodeID; }
+ };
+
+ SparseSet<RootData> RootSet;
+
+public:
+ SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
+ RootSet.setUniverse(R.DFSNodeData.size());
+ }
+
+ /// Returns true if this node been visited by the DFS traversal.
+ ///
+ /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
+ /// ID. Later, SubtreeID is updated but remains valid.
+ bool isVisited(const SUnit *SU) const {
+ return R.DFSNodeData[SU->NodeNum].SubtreeID
+ != SchedDFSResult::InvalidSubtreeID;
+ }
+
+ /// Initializes this node's instruction count. We don't need to flag the node
+ /// visited until visitPostorder because the DAG cannot have cycles.
+ void visitPreorder(const SUnit *SU) {
+ R.DFSNodeData[SU->NodeNum].InstrCount =
+ SU->getInstr()->isTransient() ? 0 : 1;
+ }
+
+ /// Called once for each node after all predecessors are visited. Revisit this
+ /// node's predecessors and potentially join them now that we know the ILP of
+ /// the other predecessors.
+ void visitPostorderNode(const SUnit *SU) {
+ // Mark this node as the root of a subtree. It may be joined with its
+ // successors later.
+ R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
+ RootData RData(SU->NodeNum);
+ RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+
+ // If any predecessors are still in their own subtree, they either cannot be
+ // joined or are large enough to remain separate. If this parent node's
+ // total instruction count is not greater than a child subtree by at least
+ // the subtree limit, then try to join it now since splitting subtrees is
+ // only useful if multiple high-pressure paths are possible.
+ unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
+ for (const SDep &PredDep : SU->Preds) {
+ if (PredDep.getKind() != SDep::Data)
+ continue;
+ unsigned PredNum = PredDep.getSUnit()->NodeNum;
+ if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
+ joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
+
+ // Either link or merge the TreeData entry from the child to the parent.
+ if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+ // If the predecessor's parent is invalid, this is a tree edge and the
+ // current node is the parent.
+ if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+ RootSet[PredNum].ParentNodeID = SU->NodeNum;
+ }
+ else if (RootSet.count(PredNum)) {
+ // The predecessor is not a root, but is still in the root set. This
+ // must be the new parent that it was just joined to. Note that
+ // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+ // set to the original parent.
+ RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
+ RootSet.erase(PredNum);
+ }
+ }
+ RootSet[SU->NodeNum] = RData;
+ }
+
+ /// Called once for each tree edge after calling visitPostOrderNode on
+ /// the predecessor. Increment the parent node's instruction count and
+ /// preemptively join this subtree to its parent's if it is small enough.
+ void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
+ R.DFSNodeData[Succ->NodeNum].InstrCount
+ += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
+ joinPredSubtree(PredDep, Succ);
+ }
+
+ /// Adds a connection for cross edges.
+ void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
+ ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
+ }
+
+ /// Sets each node's subtree ID to the representative ID and record
+ /// connections between trees.
+ void finalize() {
+ SubtreeClasses.compress();
+ R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
+ assert(SubtreeClasses.getNumClasses() == RootSet.size()
+ && "number of roots should match trees");
+ for (const RootData &Root : RootSet) {
+ unsigned TreeID = SubtreeClasses[Root.NodeID];
+ if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+ R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
+ R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
+ // Note that SubInstrCount may be greater than InstrCount if we joined
+ // subtrees across a cross edge. InstrCount will be attributed to the
+ // original parent, while SubInstrCount will be attributed to the joined
+ // parent.
+ }
+ R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+ R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+ LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+ for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
+ R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
+ LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
+ << R.DFSNodeData[Idx].SubtreeID << '\n');
+ }
+ for (const auto &[Pred, Succ] : ConnectionPairs) {
+ unsigned PredTree = SubtreeClasses[Pred->NodeNum];
+ unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
+ if (PredTree == SuccTree)
+ continue;
+ unsigned Depth = Pred->getDepth();
+ addConnection(PredTree, SuccTree, Depth);
+ addConnection(SuccTree, PredTree, Depth);
+ }
+ }
+
+protected:
+ /// Joins the predecessor subtree with the successor that is its DFS parent.
+ /// Applies some heuristics before joining.
+ bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
+ bool CheckLimit = true) {
+ assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
+
+ // Check if the predecessor is already joined.
+ const SUnit *PredSU = PredDep.getSUnit();
+ unsigned PredNum = PredSU->NodeNum;
+ if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
+ return false;
+
+ // Four is the magic number of successors before a node is considered a
+ // pinch point.
+ unsigned NumDataSucs = 0;
+ for (const SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep.getKind() == SDep::Data) {
+ if (++NumDataSucs >= 4)
+ return false;
+ }
+ }
+ if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
+ return false;
+ R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
+ SubtreeClasses.join(Succ->NodeNum, PredNum);
+ return true;
+ }
+
+ /// Called by finalize() to record a connection between trees.
+ void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+ if (!Depth)
+ return;
+
+ do {
+ SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+ R.SubtreeConnections[FromTree];
+ for (SchedDFSResult::Connection &C : Connections) {
+ if (C.TreeID == ToTree) {
+ C.Level = std::max(C.Level, Depth);
+ return;
+ }
+ }
+ Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+ FromTree = R.DFSTreeData[FromTree].ParentTreeID;
+ } while (FromTree != SchedDFSResult::InvalidSubtreeID);
+ }
+};
+
+} // end namespace llvm
+
+namespace {
+
+/// Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+ std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
+
+public:
+ bool isComplete() const { return DFSStack.empty(); }
+
+ void follow(const SUnit *SU) {
+ DFSStack.emplace_back(SU, SU->Preds.begin());
+ }
+ void advance() { ++DFSStack.back().second; }
+
+ const SDep *backtrack() {
+ DFSStack.pop_back();
+ return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
+ }
+
+ const SUnit *getCurr() const { return DFSStack.back().first; }
+
+ SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+ SUnit::const_pred_iterator getPredEnd() const {
+ return getCurr()->Preds.end();
+ }
+};
+
+} // end anonymous namespace
+
+static bool hasDataSucc(const SUnit *SU) {
+ for (const SDep &SuccDep : SU->Succs) {
+ if (SuccDep.getKind() == SDep::Data &&
+ !SuccDep.getSUnit()->isBoundaryNode())
+ return true;
+ }
+ return false;
+}
+
+/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
+ if (!IsBottomUp)
+ llvm_unreachable("Top-down ILP metric is unimplemented");
+
+ SchedDFSImpl Impl(*this);
+ for (const SUnit &SU : SUnits) {
+ if (Impl.isVisited(&SU) || hasDataSucc(&SU))
+ continue;
+
+ SchedDAGReverseDFS DFS;
+ Impl.visitPreorder(&SU);
+ DFS.follow(&SU);
+ while (true) {
+ // Traverse the leftmost path as far as possible.
+ while (DFS.getPred() != DFS.getPredEnd()) {
+ const SDep &PredDep = *DFS.getPred();
+ DFS.advance();
+ // Ignore non-data edges.
+ if (PredDep.getKind() != SDep::Data
+ || PredDep.getSUnit()->isBoundaryNode()) {
+ continue;
+ }
+ // An already visited edge is a cross edge, assuming an acyclic DAG.
+ if (Impl.isVisited(PredDep.getSUnit())) {
+ Impl.visitCrossEdge(PredDep, DFS.getCurr());
+ continue;
+ }
+ Impl.visitPreorder(PredDep.getSUnit());
+ DFS.follow(PredDep.getSUnit());
+ }
+ // Visit the top of the stack in postorder and backtrack.
+ const SUnit *Child = DFS.getCurr();
+ const SDep *PredDep = DFS.backtrack();
+ Impl.visitPostorderNode(Child);
+ if (PredDep)
+ Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
+ if (DFS.isComplete())
+ break;
+ }
+ }
+ Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+ for (const Connection &C : SubtreeConnections[SubtreeID]) {
+ SubtreeConnectLevels[C.TreeID] =
+ std::max(SubtreeConnectLevels[C.TreeID], C.Level);
+ LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
+ << SubtreeConnectLevels[C.TreeID] << '\n');
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
+ OS << InstrCount << " / " << Length << " = ";
+ if (!Length)
+ OS << "BADILP";
+ else
+ OS << format("%g", ((double)InstrCount / Length));
+}
+
+LLVM_DUMP_METHOD void ILPValue::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+ Val.print(OS);
+ return OS;
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 000000000000..e7b14944acfe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,92 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return std::string(G->MF.getName());
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static std::string getNodeIdentifierLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+ OS << static_cast<const void *>(Node);
+ return R;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 000000000000..209c6d81f602
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,241 @@
+//===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE DebugType
+
+ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType)
+ : DebugType(ParentDebugType), ItinData(II), DAG(SchedDAG) {
+ (void)DebugType;
+ // Determine the maximum depth of any itinerary. This determines the depth of
+ // the scoreboard. We always make the scoreboard at least 1 cycle deep to
+ // avoid dealing with the boundary condition.
+ unsigned ScoreboardDepth = 1;
+ if (ItinData && !ItinData->isEmpty()) {
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData->isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
+
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ // Don't set MaxLookAhead until we find at least one nonzero stage.
+ // This way, an itinerary with no stages has MaxLookAhead==0, which
+ // completely bypasses the scoreboard hazard logic.
+ MaxLookAhead = ScoreboardDepth;
+ }
+ }
+ }
+
+ ReservedScoreboard.reset(ScoreboardDepth);
+ RequiredScoreboard.reset(ScoreboardDepth);
+
+ // If MaxLookAhead is not set above, then we are not enabled.
+ if (!isEnabled())
+ LLVM_DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ else {
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel.IssueWidth;
+ LLVM_DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+ }
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
+ RequiredScoreboard.reset();
+ ReservedScoreboard.reset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = Depth - 1;
+ while ((last > 0) && ((*this)[last] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ InstrStage::FuncUnits FUs = (*this)[i];
+ dbgs() << "\t";
+ for (int j = std::numeric_limits<InstrStage::FuncUnits>::digits - 1;
+ j >= 0; j--)
+ dbgs() << ((FUs & (1ULL << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+#endif
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
+ return NoHazard;
+
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
+
+ InstrStage::FuncUnits freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[StageCycle];
+ [[fallthrough]];
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[StageCycle];
+ break;
+ }
+
+ if (!freeUnits) {
+ LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
+ LLVM_DEBUG(DAG->dumpNode(*SU));
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
+ return;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ assert(MCID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(MCID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+ "Scoreboard depth exceeded!");
+
+ InstrStage::FuncUnits freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[cycle + i];
+ [[fallthrough]];
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[cycle + i];
+ break;
+ }
+
+ // reduce to a single unit
+ InstrStage::FuncUnits freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ if (IS->getReservationKind() == InstrStage::Required)
+ RequiredScoreboard[cycle + i] |= freeUnit;
+ else
+ ReservedScoreboard[cycle + i] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ LLVM_DEBUG(ReservedScoreboard.dump());
+ LLVM_DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+ RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
new file mode 100644
index 000000000000..30d959704745
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -0,0 +1,1046 @@
+//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts selects to conditional jumps when profitable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ScaledNumber.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <algorithm>
+#include <memory>
+#include <queue>
+#include <stack>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "select-optimize"
+
+STATISTIC(NumSelectOptAnalyzed,
+ "Number of select groups considered for conversion to branch");
+STATISTIC(NumSelectConvertedExpColdOperand,
+ "Number of select groups converted due to expensive cold operand");
+STATISTIC(NumSelectConvertedHighPred,
+ "Number of select groups converted due to high-predictability");
+STATISTIC(NumSelectUnPred,
+ "Number of select groups not converted due to unpredictability");
+STATISTIC(NumSelectColdBB,
+ "Number of select groups not converted due to cold basic block");
+STATISTIC(NumSelectConvertedLoop,
+ "Number of select groups converted due to loop-level analysis");
+STATISTIC(NumSelectsConverted, "Number of selects converted");
+
+static cl::opt<unsigned> ColdOperandThreshold(
+ "cold-operand-threshold",
+ cl::desc("Maximum frequency of path for an operand to be considered cold."),
+ cl::init(20), cl::Hidden);
+
+static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
+ "cold-operand-max-cost-multiplier",
+ cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
+ "slice of a cold operand to be considered inexpensive."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned>
+ GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
+ cl::desc("Gradient gain threshold (%)."),
+ cl::init(25), cl::Hidden);
+
+static cl::opt<unsigned>
+ GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
+ cl::desc("Minimum gain per loop (in cycles) threshold."),
+ cl::init(4), cl::Hidden);
+
+static cl::opt<unsigned> GainRelativeThreshold(
+ "select-opti-loop-relative-gain-threshold",
+ cl::desc(
+ "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
+ cl::init(8), cl::Hidden);
+
+static cl::opt<unsigned> MispredictDefaultRate(
+ "mispredict-default-rate", cl::Hidden, cl::init(25),
+ cl::desc("Default mispredict rate (initialized to 25%)."));
+
+static cl::opt<bool>
+ DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable loop-level heuristics."));
+
+namespace {
+
+class SelectOptimize : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+ const TargetSubtargetInfo *TSI = nullptr;
+ const TargetLowering *TLI = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ const LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI = nullptr;
+ OptimizationRemarkEmitter *ORE = nullptr;
+ TargetSchedModel TSchedModel;
+
+public:
+ static char ID;
+
+ SelectOptimize() : FunctionPass(ID) {
+ initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+
+private:
+ // Select groups consist of consecutive select instructions with the same
+ // condition.
+ using SelectGroup = SmallVector<SelectInst *, 2>;
+ using SelectGroups = SmallVector<SelectGroup, 2>;
+
+ using Scaled64 = ScaledNumber<uint64_t>;
+
+ struct CostInfo {
+ /// Predicated cost (with selects as conditional moves).
+ Scaled64 PredCost;
+ /// Non-predicated cost (with selects converted to branches).
+ Scaled64 NonPredCost;
+ };
+
+ // Converts select instructions of a function to conditional jumps when deemed
+ // profitable. Returns true if at least one select was converted.
+ bool optimizeSelects(Function &F);
+
+ // Heuristics for determining which select instructions can be profitably
+ // conveted to branches. Separate heuristics for selects in inner-most loops
+ // and the rest of code regions (base heuristics for non-inner-most loop
+ // regions).
+ void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
+ void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
+
+ // Converts to branches the select groups that were deemed
+ // profitable-to-convert.
+ void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
+
+ // Splits selects of a given basic block into select groups.
+ void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
+
+ // Determines for which select groups it is profitable converting to branches
+ // (base and inner-most-loop heuristics).
+ void findProfitableSIGroupsBase(SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups);
+ void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups);
+
+ // Determines if a select group should be converted to a branch (base
+ // heuristics).
+ bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
+
+ // Returns true if there are expensive instructions in the cold value
+ // operand's (if any) dependence slice of any of the selects of the given
+ // group.
+ bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
+
+ // For a given source instruction, collect its backwards dependence slice
+ // consisting of instructions exclusively computed for producing the operands
+ // of the source instruction.
+ void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
+ Instruction *SI, bool ForSinking = false);
+
+ // Returns true if the condition of the select is highly predictable.
+ bool isSelectHighlyPredictable(const SelectInst *SI);
+
+ // Loop-level checks to determine if a non-predicated version (with branches)
+ // of the given loop is more profitable than its predicated version.
+ bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
+
+ // Computes instruction and loop-critical-path costs for both the predicated
+ // and non-predicated version of the given loop.
+ bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
+ DenseMap<const Instruction *, CostInfo> &InstCostMap,
+ CostInfo *LoopCost);
+
+ // Returns a set of all the select instructions in the given select groups.
+ SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
+
+ // Returns the latency cost of a given instruction.
+ std::optional<uint64_t> computeInstCost(const Instruction *I);
+
+ // Returns the misprediction cost of a given select when converted to branch.
+ Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
+
+ // Returns the cost of a branch when the prediction is correct.
+ Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI);
+
+ // Returns true if the target architecture supports lowering a given select.
+ bool isSelectKindSupported(SelectInst *SI);
+};
+} // namespace
+
+char SelectOptimize::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+ false)
+
+FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
+
+bool SelectOptimize::runOnFunction(Function &F) {
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ TSI = TM->getSubtargetImpl(F);
+ TLI = TSI->getTargetLowering();
+
+ // If none of the select types is supported then skip this pass.
+ // This is an optimization pass. Legality issues will be handled by
+ // instruction selection.
+ if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+ !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+ !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+ return false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ if (!TTI->enableSelectOptimize())
+ return false;
+
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ TSchedModel.init(TSI);
+
+ // When optimizing for size, selects are preferable over branches.
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+ return false;
+
+ return optimizeSelects(F);
+}
+
+bool SelectOptimize::optimizeSelects(Function &F) {
+ // Determine for which select groups it is profitable converting to branches.
+ SelectGroups ProfSIGroups;
+ // Base heuristics apply only to non-loops and outer loops.
+ optimizeSelectsBase(F, ProfSIGroups);
+ // Separate heuristics for inner-most loops.
+ optimizeSelectsInnerLoops(F, ProfSIGroups);
+
+ // Convert to branches the select groups that were deemed
+ // profitable-to-convert.
+ convertProfitableSIGroups(ProfSIGroups);
+
+ // Code modified if at least one select group was converted.
+ return !ProfSIGroups.empty();
+}
+
+void SelectOptimize::optimizeSelectsBase(Function &F,
+ SelectGroups &ProfSIGroups) {
+ // Collect all the select groups.
+ SelectGroups SIGroups;
+ for (BasicBlock &BB : F) {
+ // Base heuristics apply only to non-loops and outer loops.
+ Loop *L = LI->getLoopFor(&BB);
+ if (L && L->isInnermost())
+ continue;
+ collectSelectGroups(BB, SIGroups);
+ }
+
+ // Determine for which select groups it is profitable converting to branches.
+ findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
+}
+
+void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
+ SelectGroups &ProfSIGroups) {
+ SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
+ // Need to check size on each iteration as we accumulate child loops.
+ for (unsigned long i = 0; i < Loops.size(); ++i)
+ for (Loop *ChildL : Loops[i]->getSubLoops())
+ Loops.push_back(ChildL);
+
+ for (Loop *L : Loops) {
+ if (!L->isInnermost())
+ continue;
+
+ SelectGroups SIGroups;
+ for (BasicBlock *BB : L->getBlocks())
+ collectSelectGroups(*BB, SIGroups);
+
+ findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
+ }
+}
+
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *
+getTrueOrFalseValue(SelectInst *SI, bool isTrue,
+ const SmallPtrSet<const Instruction *, 2> &Selects) {
+ Value *V = nullptr;
+ for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+ DefSI = dyn_cast<SelectInst>(V)) {
+ assert(DefSI->getCondition() == SI->getCondition() &&
+ "The condition of DefSI does not match with SI");
+ V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ }
+ assert(V && "Failed to get select true/false value");
+ return V;
+}
+
+void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+ for (SelectGroup &ASI : ProfSIGroups) {
+ // The code transformation here is a modified version of the sinking
+ // transformation in CodeGenPrepare::optimizeSelectInst with a more
+ // aggressive strategy of which instructions to sink.
+ //
+ // TODO: eliminate the redundancy of logic transforming selects to branches
+ // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
+ // selects for all cases (with and without profile information).
+
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %cmp.frozen = freeze %cmp
+ // br i1 %cmp.frozen, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // %cmp should be frozen, otherwise it may introduce undefined behavior.
+ // In addition, we may sink instructions that produce %c or %d into the
+ // destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
+ // Find all the instructions that can be soundly sunk to the true/false
+ // blocks. These are instructions that are computed solely for producing the
+ // operands of the select instructions in the group and can be sunk without
+ // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
+ // with side effects).
+ SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
+ typedef std::stack<Instruction *>::size_type StackSizeType;
+ StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
+ for (SelectInst *SI : ASI) {
+ // For each select, compute the sinkable dependence chains of the true and
+ // false operands.
+ if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
+ std::stack<Instruction *> TrueSlice;
+ getExclBackwardsSlice(TI, TrueSlice, SI, true);
+ maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
+ TrueSlices.push_back(TrueSlice);
+ }
+ if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
+ std::stack<Instruction *> FalseSlice;
+ getExclBackwardsSlice(FI, FalseSlice, SI, true);
+ maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
+ FalseSlices.push_back(FalseSlice);
+ }
+ }
+ // In the case of multiple select instructions in the same group, the order
+ // of non-dependent instructions (instructions of different dependence
+ // slices) in the true/false blocks appears to affect performance.
+ // Interleaving the slices seems to experimentally be the optimal approach.
+ // This interleaving scheduling allows for more ILP (with a natural downside
+ // of increasing a bit register pressure) compared to a simple ordering of
+ // one whole chain after another. One would expect that this ordering would
+ // not matter since the scheduling in the backend of the compiler would
+ // take care of it, but apparently the scheduler fails to deliver optimal
+ // ILP with a naive ordering here.
+ SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
+ for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
+ for (auto &S : TrueSlices) {
+ if (!S.empty()) {
+ TrueSlicesInterleaved.push_back(S.top());
+ S.pop();
+ }
+ }
+ }
+ for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
+ for (auto &S : FalseSlices) {
+ if (!S.empty()) {
+ FalseSlicesInterleaved.push_back(S.top());
+ S.pop();
+ }
+ }
+ }
+
+ // We split the block containing the select(s) into two blocks.
+ SelectInst *SI = ASI.front();
+ SelectInst *LastSI = ASI.back();
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+ // Delete the unconditional branch that was just created by the split.
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Move any debug/pseudo instructions that were in-between the select
+ // group to the newly-created end block.
+ SmallVector<Instruction *, 2> DebugPseudoINS;
+ auto DIt = SI->getIterator();
+ while (&*DIt != LastSI) {
+ if (DIt->isDebugOrPseudoInst())
+ DebugPseudoINS.push_back(&*DIt);
+ DIt++;
+ }
+ for (auto *DI : DebugPseudoINS) {
+ DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+ }
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
+ BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
+ if (!TrueSlicesInterleaved.empty()) {
+ TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ TrueBranch->setDebugLoc(LastSI->getDebugLoc());
+ for (Instruction *TrueInst : TrueSlicesInterleaved)
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (!FalseSlicesInterleaved.empty()) {
+ FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(LastSI->getDebugLoc());
+ for (Instruction *FalseInst : FalseSlicesInterleaved)
+ FalseInst->moveBefore(FalseBranch);
+ }
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
+ }
+
+ // Insert the real conditional branch based on the original condition.
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ BasicBlock *TT, *FT;
+ if (TrueBlock == nullptr) {
+ TT = EndBlock;
+ FT = FalseBlock;
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ TT = TrueBlock;
+ FT = EndBlock;
+ FalseBlock = StartBlock;
+ } else {
+ TT = TrueBlock;
+ FT = FalseBlock;
+ }
+ IRBuilder<> IB(SI);
+ auto *CondFr =
+ IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
+ IB.CreateCondBr(CondFr, TT, FT, SI);
+
+ SmallPtrSet<const Instruction *, 2> INS;
+ INS.insert(ASI.begin(), ASI.end());
+ // Use reverse iterator because later select may use the value of the
+ // earlier select, and we need to propagate value through earlier select
+ // to get the PHI operand.
+ for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+ SelectInst *SI = *It;
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PN->takeName(SI);
+ PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+ PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+ PN->setDebugLoc(SI->getDebugLoc());
+
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+ INS.erase(SI);
+ ++NumSelectsConverted;
+ }
+ }
+}
+
+static bool isSpecialSelect(SelectInst *SI) {
+ using namespace llvm::PatternMatch;
+
+ // If the select is a logical-and/logical-or then it is better treated as a
+ // and/or by the backend.
+ if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
+ m_LogicalOr(m_Value(), m_Value()))))
+ return true;
+
+ return false;
+}
+
+void SelectOptimize::collectSelectGroups(BasicBlock &BB,
+ SelectGroups &SIGroups) {
+ BasicBlock::iterator BBIt = BB.begin();
+ while (BBIt != BB.end()) {
+ Instruction *I = &*BBIt++;
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (isSpecialSelect(SI))
+ continue;
+
+ SelectGroup SIGroup;
+ SIGroup.push_back(SI);
+ while (BBIt != BB.end()) {
+ Instruction *NI = &*BBIt;
+ SelectInst *NSI = dyn_cast<SelectInst>(NI);
+ if (NSI && SI->getCondition() == NSI->getCondition()) {
+ SIGroup.push_back(NSI);
+ } else if (!NI->isDebugOrPseudoInst()) {
+ // Debug/pseudo instructions should be skipped and not prevent the
+ // formation of a select group.
+ break;
+ }
+ ++BBIt;
+ }
+
+ // If the select type is not supported, no point optimizing it.
+ // Instruction selection will take care of it.
+ if (!isSelectKindSupported(SI))
+ continue;
+
+ SIGroups.push_back(SIGroup);
+ }
+ }
+}
+
+void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups) {
+ for (SelectGroup &ASI : SIGroups) {
+ ++NumSelectOptAnalyzed;
+ if (isConvertToBranchProfitableBase(ASI))
+ ProfSIGroups.push_back(ASI);
+ }
+}
+
+static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
+ DiagnosticInfoOptimizationBase &Rem) {
+ LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
+ ORE->emit(Rem);
+}
+
+void SelectOptimize::findProfitableSIGroupsInnerLoops(
+ const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
+ NumSelectOptAnalyzed += SIGroups.size();
+ // For each select group in an inner-most loop,
+ // a branch is more preferable than a select/conditional-move if:
+ // i) conversion to branches for all the select groups of the loop satisfies
+ // loop-level heuristics including reducing the loop's critical path by
+ // some threshold (see SelectOptimize::checkLoopHeuristics); and
+ // ii) the total cost of the select group is cheaper with a branch compared
+ // to its predicated version. The cost is in terms of latency and the cost
+ // of a select group is the cost of its most expensive select instruction
+ // (assuming infinite resources and thus fully leveraging available ILP).
+
+ DenseMap<const Instruction *, CostInfo> InstCostMap;
+ CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
+ {Scaled64::getZero(), Scaled64::getZero()}};
+ if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
+ !checkLoopHeuristics(L, LoopCost)) {
+ return;
+ }
+
+ for (SelectGroup &ASI : SIGroups) {
+ // Assuming infinite resources, the cost of a group of instructions is the
+ // cost of the most expensive instruction of the group.
+ Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
+ for (SelectInst *SI : ASI) {
+ SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
+ BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
+ }
+ if (BranchCost < SelectCost) {
+ OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
+ OR << "Profitable to convert to branch (loop analysis). BranchCost="
+ << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
+ << ". ";
+ EmitAndPrintRemark(ORE, OR);
+ ++NumSelectConvertedLoop;
+ ProfSIGroups.push_back(ASI);
+ } else {
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+ ORmiss << "Select is more profitable (loop analysis). BranchCost="
+ << BranchCost.toString()
+ << ", SelectCost=" << SelectCost.toString() << ". ";
+ EmitAndPrintRemark(ORE, ORmiss);
+ }
+ }
+}
+
+bool SelectOptimize::isConvertToBranchProfitableBase(
+ const SmallVector<SelectInst *, 2> &ASI) {
+ SelectInst *SI = ASI.front();
+ LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
+ OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
+
+ // Skip cold basic blocks. Better to optimize for size for cold blocks.
+ if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+ ++NumSelectColdBB;
+ ORmiss << "Not converted to branch because of cold basic block. ";
+ EmitAndPrintRemark(ORE, ORmiss);
+ return false;
+ }
+
+ // If unpredictable, branch form is less profitable.
+ if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
+ ++NumSelectUnPred;
+ ORmiss << "Not converted to branch because of unpredictable branch. ";
+ EmitAndPrintRemark(ORE, ORmiss);
+ return false;
+ }
+
+ // If highly predictable, branch form is more profitable, unless a
+ // predictable select is inexpensive in the target architecture.
+ if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
+ ++NumSelectConvertedHighPred;
+ OR << "Converted to branch because of highly predictable branch. ";
+ EmitAndPrintRemark(ORE, OR);
+ return true;
+ }
+
+ // Look for expensive instructions in the cold operand's (if any) dependence
+ // slice of any of the selects in the group.
+ if (hasExpensiveColdOperand(ASI)) {
+ ++NumSelectConvertedExpColdOperand;
+ OR << "Converted to branch because of expensive cold operand.";
+ EmitAndPrintRemark(ORE, OR);
+ return true;
+ }
+
+ ORmiss << "Not profitable to convert to branch (base heuristic).";
+ EmitAndPrintRemark(ORE, ORmiss);
+ return false;
+}
+
+static InstructionCost divideNearest(InstructionCost Numerator,
+ uint64_t Denominator) {
+ return (Numerator + (Denominator / 2)) / Denominator;
+}
+
+bool SelectOptimize::hasExpensiveColdOperand(
+ const SmallVector<SelectInst *, 2> &ASI) {
+ bool ColdOperand = false;
+ uint64_t TrueWeight, FalseWeight, TotalWeight;
+ if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
+ uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
+ TotalWeight = TrueWeight + FalseWeight;
+ // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
+ ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
+ } else if (PSI->hasProfileSummary()) {
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+ ORmiss << "Profile data available but missing branch-weights metadata for "
+ "select instruction. ";
+ EmitAndPrintRemark(ORE, ORmiss);
+ }
+ if (!ColdOperand)
+ return false;
+ // Check if the cold path's dependence slice is expensive for any of the
+ // selects of the group.
+ for (SelectInst *SI : ASI) {
+ Instruction *ColdI = nullptr;
+ uint64_t HotWeight;
+ if (TrueWeight < FalseWeight) {
+ ColdI = dyn_cast<Instruction>(SI->getTrueValue());
+ HotWeight = FalseWeight;
+ } else {
+ ColdI = dyn_cast<Instruction>(SI->getFalseValue());
+ HotWeight = TrueWeight;
+ }
+ if (ColdI) {
+ std::stack<Instruction *> ColdSlice;
+ getExclBackwardsSlice(ColdI, ColdSlice, SI);
+ InstructionCost SliceCost = 0;
+ while (!ColdSlice.empty()) {
+ SliceCost += TTI->getInstructionCost(ColdSlice.top(),
+ TargetTransformInfo::TCK_Latency);
+ ColdSlice.pop();
+ }
+ // The colder the cold value operand of the select is the more expensive
+ // the cmov becomes for computing the cold value operand every time. Thus,
+ // the colder the cold operand is the more its cost counts.
+ // Get nearest integer cost adjusted for coldness.
+ InstructionCost AdjSliceCost =
+ divideNearest(SliceCost * HotWeight, TotalWeight);
+ if (AdjSliceCost >=
+ ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check if it is safe to move LoadI next to the SI.
+// Conservatively assume it is safe only if there is no instruction
+// modifying memory in-between the load and the select instruction.
+static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
+ // Assume loads from different basic blocks are unsafe to move.
+ if (LoadI->getParent() != SI->getParent())
+ return false;
+ auto It = LoadI->getIterator();
+ while (&*It != SI) {
+ if (It->mayWriteToMemory())
+ return false;
+ It++;
+ }
+ return true;
+}
+
+// For a given source instruction, collect its backwards dependence slice
+// consisting of instructions exclusively computed for the purpose of producing
+// the operands of the source instruction. As an approximation
+// (sufficiently-accurate in practice), we populate this set with the
+// instructions of the backwards dependence slice that only have one-use and
+// form an one-use chain that leads to the source instruction.
+void SelectOptimize::getExclBackwardsSlice(Instruction *I,
+ std::stack<Instruction *> &Slice,
+ Instruction *SI, bool ForSinking) {
+ SmallPtrSet<Instruction *, 2> Visited;
+ std::queue<Instruction *> Worklist;
+ Worklist.push(I);
+ while (!Worklist.empty()) {
+ Instruction *II = Worklist.front();
+ Worklist.pop();
+
+ // Avoid cycles.
+ if (!Visited.insert(II).second)
+ continue;
+
+ if (!II->hasOneUse())
+ continue;
+
+ // Cannot soundly sink instructions with side-effects.
+ // Terminator or phi instructions cannot be sunk.
+ // Avoid sinking other select instructions (should be handled separetely).
+ if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
+ isa<SelectInst>(II) || isa<PHINode>(II)))
+ continue;
+
+ // Avoid sinking loads in order not to skip state-modifying instructions,
+ // that may alias with the loaded address.
+ // Only allow sinking of loads within the same basic block that are
+ // conservatively proven to be safe.
+ if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
+ continue;
+
+ // Avoid considering instructions with less frequency than the source
+ // instruction (i.e., avoid colder code regions of the dependence slice).
+ if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
+ continue;
+
+ // Eligible one-use instruction added to the dependence slice.
+ Slice.push(II);
+
+ // Explore all the operands of the current instruction to expand the slice.
+ for (unsigned k = 0; k < II->getNumOperands(); ++k)
+ if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
+ Worklist.push(OpI);
+ }
+}
+
+bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TTI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool SelectOptimize::checkLoopHeuristics(const Loop *L,
+ const CostInfo LoopCost[2]) {
+ // Loop-level checks to determine if a non-predicated version (with branches)
+ // of the loop is more profitable than its predicated version.
+
+ if (DisableLoopLevelHeuristics)
+ return true;
+
+ OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
+ L->getHeader()->getFirstNonPHI());
+
+ if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
+ LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
+ ORmissL << "No select conversion in the loop due to no reduction of loop's "
+ "critical path. ";
+ EmitAndPrintRemark(ORE, ORmissL);
+ return false;
+ }
+
+ Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
+ LoopCost[1].PredCost - LoopCost[1].NonPredCost};
+
+ // Profitably converting to branches need to reduce the loop's critical path
+ // by at least some threshold (absolute gain of GainCycleThreshold cycles and
+ // relative gain of 12.5%).
+ if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
+ Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
+ Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
+ ORmissL << "No select conversion in the loop due to small reduction of "
+ "loop's critical path. Gain="
+ << Gain[1].toString()
+ << ", RelativeGain=" << RelativeGain.toString() << "%. ";
+ EmitAndPrintRemark(ORE, ORmissL);
+ return false;
+ }
+
+ // If the loop's critical path involves loop-carried dependences, the gradient
+ // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
+ // This check ensures that the latency reduction for the loop's critical path
+ // keeps decreasing with sufficient rate beyond the two analyzed loop
+ // iterations.
+ if (Gain[1] > Gain[0]) {
+ Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
+ (LoopCost[1].PredCost - LoopCost[0].PredCost);
+ if (GradientGain < Scaled64::get(GainGradientThreshold)) {
+ ORmissL << "No select conversion in the loop due to small gradient gain. "
+ "GradientGain="
+ << GradientGain.toString() << "%. ";
+ EmitAndPrintRemark(ORE, ORmissL);
+ return false;
+ }
+ }
+ // If the gain decreases it is not profitable to convert.
+ else if (Gain[1] < Gain[0]) {
+ ORmissL
+ << "No select conversion in the loop due to negative gradient gain. ";
+ EmitAndPrintRemark(ORE, ORmissL);
+ return false;
+ }
+
+ // Non-predicated version of the loop is more profitable than its
+ // predicated version.
+ return true;
+}
+
+// Computes instruction and loop-critical-path costs for both the predicated
+// and non-predicated version of the given loop.
+// Returns false if unable to compute these costs due to invalid cost of loop
+// instruction(s).
+bool SelectOptimize::computeLoopCosts(
+ const Loop *L, const SelectGroups &SIGroups,
+ DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
+ LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
+ << L->getHeader()->getName() << "\n");
+ const auto &SIset = getSIset(SIGroups);
+ // Compute instruction and loop-critical-path costs across two iterations for
+ // both predicated and non-predicated version.
+ const unsigned Iterations = 2;
+ for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
+ // Cost of the loop's critical path.
+ CostInfo &MaxCost = LoopCost[Iter];
+ for (BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // Compute the predicated and non-predicated cost of the instruction.
+ Scaled64 IPredCost = Scaled64::getZero(),
+ INonPredCost = Scaled64::getZero();
+
+ // Assume infinite resources that allow to fully exploit the available
+ // instruction-level parallelism.
+ // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
+ for (const Use &U : I.operands()) {
+ auto UI = dyn_cast<Instruction>(U.get());
+ if (!UI)
+ continue;
+ if (InstCostMap.count(UI)) {
+ IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
+ INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
+ }
+ }
+ auto ILatency = computeInstCost(&I);
+ if (!ILatency) {
+ OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
+ ORmissL << "Invalid instruction cost preventing analysis and "
+ "optimization of the inner-most loop containing this "
+ "instruction. ";
+ EmitAndPrintRemark(ORE, ORmissL);
+ return false;
+ }
+ IPredCost += Scaled64::get(*ILatency);
+ INonPredCost += Scaled64::get(*ILatency);
+
+ // For a select that can be converted to branch,
+ // compute its cost as a branch (non-predicated cost).
+ //
+ // BranchCost = PredictedPathCost + MispredictCost
+ // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
+ // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
+ if (SIset.contains(&I)) {
+ auto SI = cast<SelectInst>(&I);
+
+ Scaled64 TrueOpCost = Scaled64::getZero(),
+ FalseOpCost = Scaled64::getZero();
+ if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
+ if (InstCostMap.count(TI))
+ TrueOpCost = InstCostMap[TI].NonPredCost;
+ if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
+ if (InstCostMap.count(FI))
+ FalseOpCost = InstCostMap[FI].NonPredCost;
+ Scaled64 PredictedPathCost =
+ getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
+
+ Scaled64 CondCost = Scaled64::getZero();
+ if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
+ if (InstCostMap.count(CI))
+ CondCost = InstCostMap[CI].NonPredCost;
+ Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
+
+ INonPredCost = PredictedPathCost + MispredictCost;
+ }
+ LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
+ << INonPredCost << " for " << I << "\n");
+
+ InstCostMap[&I] = {IPredCost, INonPredCost};
+ MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
+ MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
+ << " MaxCost = " << MaxCost.PredCost << " "
+ << MaxCost.NonPredCost << "\n");
+ }
+ return true;
+}
+
+SmallPtrSet<const Instruction *, 2>
+SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+ SmallPtrSet<const Instruction *, 2> SIset;
+ for (const SelectGroup &ASI : SIGroups)
+ for (const SelectInst *SI : ASI)
+ SIset.insert(SI);
+ return SIset;
+}
+
+std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+ InstructionCost ICost =
+ TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ if (auto OC = ICost.getValue())
+ return std::optional<uint64_t>(*OC);
+ return std::nullopt;
+}
+
+ScaledNumber<uint64_t>
+SelectOptimize::getMispredictionCost(const SelectInst *SI,
+ const Scaled64 CondCost) {
+ uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
+
+ // Account for the default misprediction rate when using a branch
+ // (conservatively set to 25% by default).
+ uint64_t MispredictRate = MispredictDefaultRate;
+ // If the select condition is obviously predictable, then the misprediction
+ // rate is zero.
+ if (isSelectHighlyPredictable(SI))
+ MispredictRate = 0;
+
+ // CondCost is included to account for cases where the computation of the
+ // condition is part of a long dependence chain (potentially loop-carried)
+ // that would delay detection of a misprediction and increase its cost.
+ Scaled64 MispredictCost =
+ std::max(Scaled64::get(MispredictPenalty), CondCost) *
+ Scaled64::get(MispredictRate);
+ MispredictCost /= Scaled64::get(100);
+
+ return MispredictCost;
+}
+
+// Returns the cost of a branch when the prediction is correct.
+// TrueCost * TrueProbability + FalseCost * FalseProbability.
+ScaledNumber<uint64_t>
+SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI) {
+ Scaled64 PredPathCost;
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
+ uint64_t SumWeight = TrueWeight + FalseWeight;
+ if (SumWeight != 0) {
+ PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
+ FalseCost * Scaled64::get(FalseWeight);
+ PredPathCost /= Scaled64::get(SumWeight);
+ return PredPathCost;
+ }
+ }
+ // Without branch weight metadata, we assume 75% for the one path and 25% for
+ // the other, and pick the result with the biggest cost.
+ PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
+ FalseCost * Scaled64::get(3) + TrueCost);
+ PredPathCost /= Scaled64::get(4);
+ return PredPathCost;
+}
+
+bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+ if (VectorCond)
+ return false;
+ TargetLowering::SelectSupportKind SelectKind;
+ if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+ return TLI->isSelectSupported(SelectKind);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 000000000000..235f0da86b90
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,27593 @@
+//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ByteProvider.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <optional>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <variant>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dagcombine"
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+STATISTIC(SlicedLoads, "Number of load sliced");
+STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
+
+static cl::opt<bool>
+CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
+
+static cl::opt<bool>
+UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+ cl::desc("Enable DAG combiner's use of TBAA"));
+
+#ifndef NDEBUG
+static cl::opt<std::string>
+CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
+#endif
+
+/// Hidden option to stress test load slicing, i.e., when this option
+/// is enabled, load slicing bypasses most of its profitability guards.
+static cl::opt<bool>
+StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load slicing"),
+ cl::init(false));
+
+static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
+
+static cl::opt<bool>
+ EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable merging multiple stores "
+ "into a wider store"));
+
+static cl::opt<unsigned> TokenFactorInlineLimit(
+ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+ cl::desc("Limit the number of operands to inline for Token Factors"));
+
+static cl::opt<unsigned> StoreMergeDependenceLimit(
+ "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the number of times for the same StoreNode and RootNode "
+ "to bail out in store merging dependence check"));
+
+static cl::opt<bool> EnableReduceLoadOpStoreWidth(
+ "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable reducing the width of load/op/store "
+ "sequence"));
+
+static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
+ "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable load/<replace bytes>/store with "
+ "a narrower store"));
+
+static cl::opt<bool> EnableVectorFCopySignExtendRound(
+ "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Enable merging extends and rounds into FCOPYSIGN on vector types"));
+
+namespace {
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ const SelectionDAGTargetInfo *STI;
+ CombineLevel Level = BeforeLegalizeTypes;
+ CodeGenOpt::Level OptLevel;
+ bool LegalDAG = false;
+ bool LegalOperations = false;
+ bool LegalTypes = false;
+ bool ForCodeSize;
+ bool DisableGenericCombines;
+
+ /// Worklist of all of the nodes that need to be simplified.
+ ///
+ /// This must behave as a stack -- new nodes to process are pushed onto the
+ /// back and when processing we pop off of the back.
+ ///
+ /// The worklist will not contain duplicates but may contain null entries
+ /// due to nodes being deleted from the underlying DAG.
+ SmallVector<SDNode *, 64> Worklist;
+
+ /// Mapping from an SDNode to its position on the worklist.
+ ///
+ /// This is used to find and remove nodes from the worklist (by nulling
+ /// them) when they are deleted from the underlying DAG. It relies on
+ /// stable indices of nodes within the worklist.
+ DenseMap<SDNode *, unsigned> WorklistMap;
+
+ /// This records all nodes attempted to be added to the worklist since we
+ /// considered a new worklist entry. As we keep do not add duplicate nodes
+ /// in the worklist, this is different from the tail of the worklist.
+ SmallSetVector<SDNode *, 32> PruningList;
+
+ /// Set of nodes which have been combined (at least once).
+ ///
+ /// This is used to allow us to reliably add any operands of a DAG node
+ /// which have not yet been combined to the worklist.
+ SmallPtrSet<SDNode *, 32> CombinedNodes;
+
+ /// Map from candidate StoreNode to the pair of RootNode and count.
+ /// The count is used to track how many times we have seen the StoreNode
+ /// with the same RootNode bail out in dependence check. If we have seen
+ /// the bail out for the same pair many times over a limit, we won't
+ /// consider the StoreNode with the same RootNode as store merging
+ /// candidate again.
+ DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis *AA;
+
+ /// When an instruction is simplified, add all users of the instruction to
+ /// the work lists because they might get more simplified now.
+ void AddUsersToWorklist(SDNode *N) {
+ for (SDNode *Node : N->uses())
+ AddToWorklist(Node);
+ }
+
+ /// Convenient shorthand to add a node and all of its user to the worklist.
+ void AddToWorklistWithUsers(SDNode *N) {
+ AddUsersToWorklist(N);
+ AddToWorklist(N);
+ }
+
+ // Prune potentially dangling nodes. This is called after
+ // any visit to a node, but should also be called during a visit after any
+ // failed combine which may have created a DAG node.
+ void clearAddedDanglingWorklistEntries() {
+ // Check any nodes added to the worklist to see if they are prunable.
+ while (!PruningList.empty()) {
+ auto *N = PruningList.pop_back_val();
+ if (N->use_empty())
+ recursivelyDeleteUnusedNodes(N);
+ }
+ }
+
+ SDNode *getNextWorklistEntry() {
+ // Before we do any work, remove nodes that are not in use.
+ clearAddedDanglingWorklistEntries();
+ SDNode *N = nullptr;
+ // The Worklist holds the SDNodes in order, but it may contain null
+ // entries.
+ while (!N && !Worklist.empty()) {
+ N = Worklist.pop_back_val();
+ }
+
+ if (N) {
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
+ }
+ return N;
+ }
+
+ /// Call the node-specific routine that folds each particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()),
+ STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
+ ForCodeSize = DAG.shouldOptForSize();
+ DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
+
+ MaximumLegalStoreInBits = 0;
+ // We use the minimum store size here, since that's all we can guarantee
+ // for the scalable vector types.
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
+ }
+
+ void ConsiderForPruning(SDNode *N) {
+ // Mark this for potential pruning.
+ PruningList.insert(N);
+ }
+
+ /// Add to the worklist making sure its instance is at the back (next to be
+ /// processed.)
+ void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted Node added to Worklist");
+
+ // Skip handle nodes as they can't usefully be combined and confuse the
+ // zero-use deletion strategy.
+ if (N->getOpcode() == ISD::HANDLENODE)
+ return;
+
+ if (IsCandidateForPruning)
+ ConsiderForPruning(N);
+
+ if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+ Worklist.push_back(N);
+ }
+
+ /// Remove all instances of N from the worklist.
+ void removeFromWorklist(SDNode *N) {
+ CombinedNodes.erase(N);
+ PruningList.remove(N);
+ StoreRootCountMap.erase(N);
+
+ auto It = WorklistMap.find(N);
+ if (It == WorklistMap.end())
+ return; // Not in the worklist.
+
+ // Null out the entry rather than erasing it to avoid a linear operation.
+ Worklist[It->second] = nullptr;
+ WorklistMap.erase(It);
+ }
+
+ void deleteAndRecombine(SDNode *N);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+ unsigned MaximumLegalStoreInBits;
+
+ /// Check the specified integer node value to see if it can be simplified or
+ /// if things it uses can be simplified by bit propagation.
+ /// If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ APInt DemandedBits = APInt::getAllOnes(BitWidth);
+ return SimplifyDemandedBits(Op, DemandedBits);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ KnownBits Known;
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+ }
+
+ /// Check the specified vector node value to see if it can be simplified or
+ /// if things it uses can be simplified as it only uses some of the
+ /// elements. If so, return true.
+ bool SimplifyDemandedVectorElts(SDValue Op) {
+ // TODO: For now just pretend it cannot be simplified.
+ if (Op.getValueType().isScalableVector())
+ return false;
+
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
+ return SimplifyDemandedVectorElts(Op, DemandedElts);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ bool AssumeSingleUse = false);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
+ bool AssumeSingleUse = false);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+ SDValue SplitIndexingFromLoad(LoadSDNode *LD);
+ bool SliceUpLoad(SDNode *N);
+
+ // Looks up the chain to find a unique (unaliased) store feeding the passed
+ // load. If no such store is found, returns a nullptr.
+ // Note: This will look past a CALLSEQ_START if the load is chained to it so
+ // so that it can find stack stores for byval params.
+ StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
+ // Scalars have size 0 to distinguish from singleton vectors.
+ SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
+ bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
+ bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
+
+ /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad);
+ void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+ SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+ SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue PromoteIntBinOp(SDValue Op);
+ SDValue PromoteIntShiftOp(SDValue Op);
+ SDValue PromoteExtend(SDValue Op);
+ bool PromoteLoad(SDValue Op);
+
+ SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC);
+
+ /// Call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitADDLike(SDNode *N);
+ SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDSAT(SDNode *N);
+ SDValue visitSUBSAT(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitADDO(SDNode *N);
+ SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitSUBC(SDNode *N);
+ SDValue visitSUBO(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitUADDO_CARRY(SDNode *N);
+ SDValue visitSADDO_CARRY(SDNode *N);
+ SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N);
+ SDValue visitSUBE(SDNode *N);
+ SDValue visitUSUBO_CARRY(SDNode *N);
+ SDValue visitSSUBO_CARRY(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitMULFIX(SDNode *N);
+ SDValue useDivRem(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitAVG(SDNode *N);
+ SDValue visitABD(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitMULO(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitFunnelShift(SDNode *N);
+ SDValue visitSHLSAT(SDNode *N);
+ SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
+ SDValue visitBSWAP(SDNode *N);
+ SDValue visitBITREVERSE(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitVSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCCARRY(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitAssertExt(SDNode *N);
+ SDValue visitAssertAlign(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
+ SDValue visitFREEZE(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitVP_FADD(SDNode *N);
+ SDValue visitVP_FSUB(SDNode *N);
+ SDValue visitSTRICT_FADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ template <class MatchContextClass> SDValue visitFMA(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFSQRT(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitFPOW(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFREXP(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
+ SDValue visitFMinMax(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+
+ SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+ SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+ SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
+
+ bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
+
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitLIFETIME_END(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitSCALAR_TO_VECTOR(SDNode *N);
+ SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitMLOAD(SDNode *N);
+ SDValue visitMSTORE(SDNode *N);
+ SDValue visitMGATHER(SDNode *N);
+ SDValue visitMSCATTER(SDNode *N);
+ SDValue visitVPGATHER(SDNode *N);
+ SDValue visitVPSCATTER(SDNode *N);
+ SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitFP_TO_BF16(SDNode *N);
+ SDValue visitVECREDUCE(SDNode *N);
+ SDValue visitVPOp(SDNode *N);
+ SDValue visitGET_FPENV_MEM(SDNode *N);
+ SDValue visitSET_FPENV_MEM(SDNode *N);
+
+ template <class MatchContextClass>
+ SDValue visitFADDForFMACombine(SDNode *N);
+ template <class MatchContextClass>
+ SDValue visitFSUBForFMACombine(SDNode *N);
+ SDValue visitFMULForFMADistributiveCombine(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL,
+ SDNode *N,
+ SDValue N0,
+ SDValue N1);
+ SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags);
+ SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags);
+ SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL,
+ EVT VT, SDValue N0, SDValue N1,
+ SDNodeFlags Flags = SDNodeFlags());
+
+ SDValue visitShiftByConstant(SDNode *N);
+
+ SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldVSelectOfConstants(SDNode *N);
+ SDValue foldBinOpIntoSelect(SDNode *BO);
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC);
+ SDValue foldSignChangeInBitcast(SDNode *N);
+ SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldSelectOfBinops(SDNode *N);
+ SDValue foldSextSetcc(SDNode *N);
+ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL);
+ SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
+ SDValue foldABSToABD(SDNode *N);
+ SDValue unfoldMaskedMerge(SDNode *N);
+ SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ const SDLoc &DL, bool foldBooleans);
+ SDValue rebuildSetCC(SDValue N);
+
+ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC, bool MatchStrict = false) const;
+ bool isOneUseSetCC(SDValue N) const;
+
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI);
+
+ SDValue CombineExtLoad(SDNode *N);
+ SDValue CombineZExtLogicopShiftLoad(SDNode *N);
+ SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
+ SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
+ SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildSDIVPow2(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDValue BuildSREMPow2(SDNode *N);
+ SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
+ SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
+ SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+ SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal);
+ SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL);
+ SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL);
+ SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchLoadCombine(SDNode *N);
+ SDValue mergeTruncStores(StoreSDNode *N);
+ SDValue reduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue splitMergedValStore(StoreSDNode *ST);
+ SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue convertBuildVecZextToZext(SDNode *N);
+ SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecTruncToBitCast(SDNode *N);
+ SDValue reduceBuildVecToShuffle(SDNode *N);
+ SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask, SDValue VecIn1,
+ SDValue VecIn2, unsigned LeftIdx,
+ bool DidSplitVec);
+ SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
+
+ /// Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVectorImpl<SDValue> &Aliases);
+
+ /// Return true if there is any possibility that the two addresses overlap.
+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;
+
+ /// Walk up chain skipping non-aliasing memory nodes, looking for a better
+ /// chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// Try to replace a store and any possibly adjacent stores on
+ /// consecutive chains with better chains. Return true only if St is
+ /// replaced.
+ ///
+ /// Notice that other chains may still be replaced even if the function
+ /// returns false.
+ bool findBetterNeighborChains(StoreSDNode *St);
+
+ // Helper for findBetterNeighborChains. Walk up store chain add additional
+ // chained stores that do not overlap and can be parallelized.
+ bool parallelizeChainedStores(StoreSDNode *St);
+
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
+ };
+
+ // Classify the origin of a stored value.
+ enum class StoreSource { Unknown, Constant, Extract, Load };
+ StoreSource getStoreSource(SDValue StoreVal) {
+ switch (StoreVal.getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ return StoreSource::Constant;
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR:
+ return StoreSource::Extract;
+ case ISD::LOAD:
+ return StoreSource::Load;
+ default:
+ return StoreSource::Unknown;
+ }
+ }
+
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
+ SDValue ConstNode);
+
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT);
+
+ /// Helper function to calculate whether the given Load/Store can have its
+ /// width reduced to ExtVT.
+ bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
+ EVT &MemVT, unsigned ShAmt = 0);
+
+ /// Used by BackwardsPropagateMask to find suitable loads.
+ bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask, SDNode *&NodeToMask);
+ /// Attempt to propagate a given AND node back to load leaves so that they
+ /// can be combined into narrow loads.
+ bool BackwardsPropagateMask(SDNode *N);
+
+ /// Helper function for mergeConsecutiveStores which merges the component
+ /// store chains.
+ SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores);
+
+ /// Helper function for mergeConsecutiveStores which checks if all the store
+ /// nodes have the same underlying object. We can still reuse the first
+ /// store's pointer info if all the stores are from the same object.
+ bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
+
+ /// This is a helper function for mergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store introducing
+ /// bitcasts if necessary. \return True if a merged store was created.
+ bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector,
+ bool UseTrunc);
+
+ /// This is a helper function for mergeConsecutiveStores. Stores that
+ /// potentially may be merged with St are placed in StoreNodes. RootNode is
+ /// a chain predecessor to all store candidates.
+ void getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&Root);
+
+ /// Helper function for mergeConsecutiveStores. Checks if candidate stores
+ /// have indirect dependency through their operands. RootNode is the
+ /// predecessor to all stores calculated by getStoreMergeCandidates and is
+ /// used to prune the dependency check. \return True if safe to merge.
+ bool checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode);
+
+ /// This is a helper function for mergeConsecutiveStores. Given a list of
+ /// store candidates, find the first N that are consecutive in memory.
+ /// Returns 0 if there are not at least 2 consecutive stores to try merging.
+ unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+ int64_t ElementSizeBytes) const;
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of constant values.
+ bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *Root, bool AllowVectors);
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of extracted vector elements.
+ /// When extracting multiple vector elements, try to store them in one
+ /// vector store rather than a sequence of scalar stores.
+ bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *Root);
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of loaded values.
+ bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *Root, bool AllowVectors,
+ bool IsNonTemporalStore, bool IsNonTemporalLoad);
+
+ /// Merge consecutive store operations into a wide store.
+ /// This optimization uses wide integers or vectors when possible.
+ /// \return true if stores were merged.
+ bool mergeConsecutiveStores(StoreSDNode *St);
+
+ /// Try to transform a truncation where C is a constant:
+ /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
+ ///
+ /// \p N needs to be a truncation and its first operand an AND. Other
+ /// requirements are checked by the function (e.g. that trunc is
+ /// single-use) and if missed an empty SDValue is returned.
+ SDValue distributeTruncateThroughAnd(SDNode *N);
+
+ /// Helper function to determine whether the target supports operation
+ /// given by \p Opcode for type \p VT, that is, whether the operation
+ /// is legal or custom before legalizing operations, and whether is
+ /// legal (but not custom) after legalization.
+ bool hasOperation(unsigned Opcode, EVT VT) {
+ return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
+ }
+
+ public:
+ /// Runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+ /// Returns a type large enough to hold any valid shift amount - before type
+ /// legalization these can be huge.
+ EVT getShiftAmountTy(EVT LHSTy) {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
+ }
+
+ /// This method returns true if we are running before type legalization or
+ /// if the specified VT is legal.
+ bool isTypeLegal(const EVT &VT) {
+ if (!LegalTypes) return true;
+ return TLI.isTypeLegal(VT);
+ }
+
+ /// Convenience wrapper around TargetLowering::getSetCCResultType
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType);
+ };
+
+/// This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorklistRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ DC.removeFromWorklist(N);
+ }
+};
+
+class WorklistInserter : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistInserter(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ // FIXME: Ideally we could add N to the worklist, but this causes exponential
+ // compile time costs in large DAGs, e.g. Halide.
+ void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
+};
+
+class EmptyMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+
+public:
+ EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI) {}
+
+ bool match(SDValue OpN, unsigned Opcode) const {
+ return Opcode == OpN->getOpcode();
+ }
+
+ // Same as SelectionDAG::getNode().
+ template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
+ return DAG.getNode(std::forward<ArgT>(Args)...);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
+ }
+};
+
+class VPMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ SDValue RootMaskOp;
+ SDValue RootVectorLenOp;
+
+public:
+ VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
+ assert(Root->isVPOpcode());
+ if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
+ RootMaskOp = Root->getOperand(*RootMaskPos);
+
+ if (auto RootVLenPos =
+ ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
+ RootVectorLenOp = Root->getOperand(*RootVLenPos);
+ }
+
+ /// whether \p OpVal is a node that is functionally compatible with the
+ /// NodeType \p Opc
+ bool match(SDValue OpVal, unsigned Opc) const {
+ if (!OpVal->isVPOpcode())
+ return OpVal->getOpcode() == Opc;
+
+ auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
+ !OpVal->getFlags().hasNoFPExcept());
+ if (BaseOpc != Opc)
+ return false;
+
+ // Make sure the mask of OpVal is true mask or is same as Root's.
+ unsigned VPOpcode = OpVal->getOpcode();
+ if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
+ SDValue MaskOp = OpVal.getOperand(*MaskPos);
+ if (RootMaskOp != MaskOp &&
+ !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
+ return false;
+ }
+
+ // Make sure the EVL of OpVal is same as Root's.
+ if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
+ if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
+ return false;
+ return true;
+ }
+
+ // Specialize based on number of operands.
+ // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
+ // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
+ // DAG.getNode(Opcode, DL, VT); }
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {Operand, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
+ SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
+ }
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorklist(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+bool TargetLowering::DAGCombinerInfo::
+recursivelyDeleteUnusedNodes(SDNode *N) {
+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::deleteAndRecombine(SDNode *N) {
+ removeFromWorklist(N);
+
+ // If the operands of this node are only used by the node, they will now be
+ // dead. Make sure to re-visit them and recursively delete dead nodes.
+ for (const SDValue &Op : N->ops())
+ // For an operand generating multiple values, one of the values may
+ // become dead allowing further simplification (e.g. split index
+ // arithmetic from an indexed load).
+ if (Op->hasOneUse() || Op->getNumValues() > 1)
+ AddToWorklist(Op.getNode());
+
+ DAG.DeleteNode(N);
+}
+
+// APInts must be the same size for most operations, this helper
+// function zero extends the shorter of the pair so that they match.
+// We provide an Offset so that we can create bitwidths that won't overflow.
+static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
+ unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
+ LHS = LHS.zext(Bits);
+ RHS = RHS.zext(Bits);
+}
+
+// Return true if this node is a setcc, or is a select_cc
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC, bool MatchStrict) const {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+
+ if (MatchStrict &&
+ (N.getOpcode() == ISD::STRICT_FSETCC ||
+ N.getOpcode() == ISD::STRICT_FSETCCS)) {
+ LHS = N.getOperand(1);
+ RHS = N.getOperand(2);
+ CC = N.getOperand(3);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
+ !TLI.isConstFalseVal(N.getOperand(3)))
+ return false;
+
+ if (TLI.getBooleanContents(N.getValueType()) ==
+ TargetLowering::UndefinedBooleanContent)
+ return false;
+
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+}
+
+/// Return true if this is a SetCC-equivalent operation with only one use.
+/// If this is true, it allows the users to invert the operation for free when
+/// it is profitable to do so.
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
+ return true;
+ return false;
+}
+
+static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
+ if (!ScalarTy.isSimple())
+ return false;
+
+ uint64_t MaskForTy = 0ULL;
+ switch (ScalarTy.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ MaskForTy = 0xFFULL;
+ break;
+ case MVT::i16:
+ MaskForTy = 0xFFFFULL;
+ break;
+ case MVT::i32:
+ MaskForTy = 0xFFFFFFFFULL;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ APInt Val;
+ if (ISD::isConstantSplatVector(N, Val))
+ return Val.getLimitedValue() == MaskForTy;
+
+ return false;
+}
+
+// Determines if it is a constant integer or a splat/build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+ return !(Const->isOpaque() && NoOpaques);
+ if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
+ return false;
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+ if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+ (Const->isOpaque() && NoOpaques))
+ return false;
+ }
+ return true;
+}
+
+// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
+// undef's.
+static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ return isConstantOrConstantVector(V, NoOpaques) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
+}
+
+// Determine if this an indexed load with an opaque target constant index.
+static bool canSplitIdx(LoadSDNode *LD) {
+ return MaySplitLoadIndex &&
+ (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
+}
+
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL,
+ SDNode *N,
+ SDValue N0,
+ SDValue N1) {
+ // Currently this only tries to ensure we don't undo the GEP splits done by
+ // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+ // we check if the following transformation would be problematic:
+ // (load/store (add, (add, x, offset1), offset2)) ->
+ // (load/store (add, x, offset1+offset2)).
+
+ // (load/store (add, (add, x, y), offset2)) ->
+ // (load/store (add, (add, x, offset2), y)).
+
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+ return false;
+
+ auto *C2 = dyn_cast<ConstantSDNode>(N1);
+ if (!C2)
+ return false;
+
+ const APInt &C2APIntVal = C2->getAPIntValue();
+ if (C2APIntVal.getSignificantBits() > 64)
+ return false;
+
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (N0.hasOneUse())
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getSignificantBits() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N->uses()) {
+ if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
+ }
+ }
+ } else {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
+ if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+ return false;
+
+ for (SDNode *Node : N->uses()) {
+ auto *LoadStore = dyn_cast<MemSDNode>(Node);
+ if (!LoadStore)
+ return false;
+
+ // Is x[offset2] a legal addressing mode? If so then
+ // reassociating the constants breaks address pattern
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+ SDValue N0, SDValue N1,
+ SDNodeFlags Flags) {
+ EVT VT = N0.getValueType();
+
+ if (N0.getOpcode() != Opc)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
+ // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
+ return DAG.getNode(Opc, DL, VT, N00, OpNode);
+ return SDValue();
+ }
+ if (TLI.isReassocProfitable(DAG, N0, N1)) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ SDNodeFlags NewFlags;
+ if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
+ Flags.hasNoUnsignedWrap())
+ NewFlags.setNoUnsignedWrap(true);
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
+ }
+ }
+
+ // Check for repeated operand logic simplifications.
+ if (Opc == ISD::AND || Opc == ISD::OR) {
+ // (N00 & N01) & N00 --> N00 & N01
+ // (N00 & N01) & N01 --> N00 & N01
+ // (N00 | N01) | N00 --> N00 | N01
+ // (N00 | N01) | N01 --> N00 | N01
+ if (N1 == N00 || N1 == N01)
+ return N0;
+ }
+ if (Opc == ISD::XOR) {
+ // (N00 ^ N01) ^ N00 --> N01
+ if (N1 == N00)
+ return N01;
+ // (N00 ^ N01) ^ N01 --> N00
+ if (N1 == N01)
+ return N00;
+ }
+
+ if (TLI.isReassocProfitable(DAG, N0, N1)) {
+ if (N1 != N01) {
+ // Reassociate if (op N00, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
+ // if Op (Op N00, N1), N01 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
+ }
+ }
+
+ if (N1 != N00) {
+ // Reassociate if (op N01, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
+ // if Op (Op N01, N1), N00 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+
+ // Floating-point reassociation is not allowed without loose FP math.
+ if (N0.getValueType().isFloatingPoint() ||
+ N1.getValueType().isFloatingPoint())
+ if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
+ return SDValue();
+
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
+ return Combined;
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
+ return Combined;
+ return SDValue();
+}
+
+// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
+// Note that we only expect Flags to be passed from FP operations. For integer
+// operations they need to be dropped.
+SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
+ const SDLoc &DL, EVT VT, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ N0->hasOneUse() && N1->hasOneUse() &&
+ TLI.isOperationLegalOrCustom(Opc, N0.getOperand(0).getValueType()) &&
+ TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+ return DAG.getNode(RedOpc, DL, VT,
+ DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0)));
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ To[0].dump(&DAG);
+ dbgs() << " and " << NumTo - 1 << " other values\n");
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!");
+
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To);
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode())
+ AddToWorklistWithUsers(To[i].getNode());
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty())
+ deleteAndRecombine(N);
+ return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
+
+ // Replace all uses.
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorklistWithUsers(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph.
+ recursivelyDeleteUnusedNodes(TLO.Old.getNode());
+}
+
+/// Check the specified integer node value to see if it can be simplified or if
+/// things it uses can be simplified by bit propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ bool AssumeSingleUse) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ KnownBits Known;
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
+ AssumeSingleUse))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+/// Check the specified vector node value to see if it can be simplified or
+/// if things it uses can be simplified as it only uses some of the elements.
+/// If so, return true.
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ bool AssumeSingleUse) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownUndef, KnownZero;
+ if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+ TLO, 0, AssumeSingleUse))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+ SDLoc DL(Load);
+ EVT VT = Load->getValueType(0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
+
+ LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
+ Trunc.dump(&DAG); dbgs() << '\n');
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
+
+ AddToWorklist(Trunc.getNode());
+ recursivelyDeleteUnusedNodes(Load);
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+ Replace = false;
+ SDLoc DL(Op);
+ if (ISD::isUNINDEXEDLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
+ Replace = true;
+ return DAG.getExtLoad(ExtType, DL, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ MemVT, LD->getMemOperand());
+ }
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::AssertSext:
+ if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
+ return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
+ break;
+ case ISD::AssertZext:
+ if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
+ return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
+ break;
+ case ISD::Constant: {
+ unsigned ExtOpc =
+ Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, DL, PVT, Op);
+ }
+ }
+
+ if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+ return SDValue();
+ return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+ if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+ return SDValue();
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (!NewOp.getNode())
+ return SDValue();
+ AddToWorklist(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
+ DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (!NewOp.getNode())
+ return SDValue();
+ AddToWorklist(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
+}
+
+/// Promote the specified integer binary operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
+
+ bool Replace0 = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+
+ bool Replace1 = false;
+ SDValue N1 = Op.getOperand(1);
+ SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+ SDLoc DL(Op);
+
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
+
+ // We are always replacing N0/N1's use in N and only need additional
+ // replacements if there are additional uses.
+ // Note: We are checking uses of the *nodes* (SDNode) rather than values
+ // (SDValue) here because the node may reference multiple values
+ // (for example, the chain value of a load node).
+ Replace0 &= !N0->hasOneUse();
+ Replace1 &= (N0 != N1) && !N1->hasOneUse();
+
+ // Combine Op here so it is preserved past replacements.
+ CombineTo(Op.getNode(), RV);
+
+ // If operands have a use ordering, make sure we deal with
+ // predecessor first.
+ if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
+ std::swap(N0, N1);
+ std::swap(NN0, NN1);
+ }
+
+ if (Replace0) {
+ AddToWorklist(NN0.getNode());
+ ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+ }
+ if (Replace1) {
+ AddToWorklist(NN1.getNode());
+ ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+ }
+ return Op;
+ }
+ return SDValue();
+}
+
+/// Promote the specified integer shift operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
+
+ bool Replace = false;
+ SDValue N0 = Op.getOperand(0);
+ if (Opc == ISD::SRA)
+ N0 = SExtPromoteOperand(N0, PVT);
+ else if (Opc == ISD::SRL)
+ N0 = ZExtPromoteOperand(N0, PVT);
+ else
+ N0 = PromoteOperand(N0, PVT, Replace);
+
+ if (!N0.getNode())
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue N1 = Op.getOperand(1);
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
+ }
+ return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+ if (!LegalOperations)
+ return false;
+
+ if (!ISD::isUNINDEXEDLoad(Op.getNode()))
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return false;
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return false;
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ SDLoc DL(Op);
+ SDNode *N = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
+ SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ MemVT, LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
+
+ LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
+ Result.dump(&DAG); dbgs() << '\n');
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
+
+ AddToWorklist(Result.getNode());
+ recursivelyDeleteUnusedNodes(N);
+ return true;
+ }
+
+ return false;
+}
+
+/// Recursively delete a node which has no uses and any operands for
+/// which it is the only use.
+///
+/// Note that this both deletes the nodes and removes them from the worklist.
+/// It also adds any nodes who have had a user deleted to the worklist as they
+/// may now have only one use and subject to other combines.
+bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
+ if (!N->use_empty())
+ return false;
+
+ SmallSetVector<SDNode *, 16> Nodes;
+ Nodes.insert(N);
+ do {
+ N = Nodes.pop_back_val();
+ if (!N)
+ continue;
+
+ if (N->use_empty()) {
+ for (const SDValue &ChildN : N->op_values())
+ Nodes.insert(ChildN.getNode());
+
+ removeFromWorklist(N);
+ DAG.DeleteNode(N);
+ } else {
+ AddToWorklist(N);
+ }
+ } while (!Nodes.empty());
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalDAG = Level >= AfterLegalizeDAG;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
+
+ WorklistInserter AddNodes(*this);
+
+ // Add all the dag nodes to the worklist.
+ //
+ // Note: All nodes are not added to PruningList here, this is because the only
+ // nodes which can be deleted are those which have no uses and all other nodes
+ // which would otherwise be added to the worklist by the first call to
+ // getNextWorklistEntry are already present in it.
+ for (SDNode &Node : DAG.allnodes())
+ AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // While we have a valid worklist entry node, try to combine it.
+ while (SDNode *N = getNextWorklistEntry()) {
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (recursivelyDeleteUnusedNodes(N))
+ continue;
+
+ WorklistRemover DeadNodes(*this);
+
+ // If this combine is running after legalizing the DAG, re-legalize any
+ // nodes pulled off the worklist.
+ if (LegalDAG) {
+ SmallSetVector<SDNode *, 16> UpdatedNodes;
+ bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
+
+ for (SDNode *LN : UpdatedNodes)
+ AddToWorklistWithUsers(LN);
+
+ if (!NIsValid)
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+
+ // Add any operands of the new node which have not yet been combined to the
+ // worklist as well. Because the worklist uniques things already, this
+ // won't repeatedly process the same operand.
+ for (const SDValue &ChildN : N->op_values())
+ if (!CombinedNodes.count(ChildN.getNode()))
+ AddToWorklist(ChildN.getNode());
+
+ CombinedNodes.insert(N);
+ SDValue RV = combine(N);
+
+ if (!RV.getNode())
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
+
+ if (N->getNumValues() == RV->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ DAG.ReplaceAllUsesWith(N, &RV);
+ }
+
+ // Push the new node and any users onto the worklist. Omit this if the
+ // new node is the EntryToken (e.g. if a store managed to get optimized
+ // out), because re-visiting the EntryToken and its users will not uncover
+ // any additional opportunities, but there may be a large number of such
+ // users, potentially causing compile time explosion.
+ if (RV.getOpcode() != ISD::EntryToken)
+ AddToWorklistWithUsers(RV.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node. This will also take care of adding any
+ // operands which have lost a user to the worklist.
+ recursivelyDeleteUnusedNodes(N);
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::SADDSAT:
+ case ISD::UADDSAT: return visitADDSAT(N);
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: return visitSUBSAT(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::SADDO:
+ case ISD::UADDO: return visitADDO(N);
+ case ISD::SUBC: return visitSUBC(N);
+ case ISD::SSUBO:
+ case ISD::USUBO: return visitSUBO(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
+ case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
+ case ISD::SUBE: return visitSUBE(N);
+ case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
+ case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: return visitMULFIX(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU: return visitAVG(N);
+ case ISD::ABDS:
+ case ISD::ABDU: return visitABD(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO:
+ case ISD::UMULO: return visitMULO(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::ROTR:
+ case ISD::ROTL: return visitRotate(N);
+ case ISD::FSHL:
+ case ISD::FSHR: return visitFunnelShift(N);
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: return visitSHLSAT(N);
+ case ISD::ABS: return visitABS(N);
+ case ISD::BSWAP: return visitBSWAP(N);
+ case ISD::BITREVERSE: return visitBITREVERSE(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::VSELECT: return visitVSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCCARRY: return visitSETCCCARRY(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::AssertSext:
+ case ISD::AssertZext: return visitAssertExt(N);
+ case ISD::AssertAlign: return visitAssertAlign(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FSQRT: return visitFSQRT(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::FPOW: return visitFPOW(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: return visitFMinMax(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::FFREXP: return visitFFREXP(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
+ case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MGATHER: return visitMGATHER(N);
+ case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSCATTER: return visitMSCATTER(N);
+ case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::LIFETIME_END: return visitLIFETIME_END(N);
+ case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
+ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
+ case ISD::FREEZE: return visitFREEZE(N);
+ case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
+ case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
+#include "llvm/IR/VPIntrinsics.def"
+ return visitVPOp(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV;
+ if (!DisableGenericCombines)
+ RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (!RV.getNode()) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If nothing happened still, try promoting the operation.
+ if (!RV.getNode()) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ RV = PromoteIntBinOp(SDValue(N, 0));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ RV = PromoteIntShiftOp(SDValue(N, 0));
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ RV = PromoteExtend(SDValue(N, 0));
+ break;
+ case ISD::LOAD:
+ if (PromoteLoad(SDValue(N, 0)))
+ RV = SDValue(N, 0);
+ break;
+ }
+ }
+
+ // If N is a commutative binary node, try to eliminate it if the commuted
+ // version is already present in the DAG.
+ if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// Given a node, return its input chain if it has one, otherwise return a null
+/// sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ // Don't simplify token factors if optnone.
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ // Don't simplify the token factor if the node itself has too many operands.
+ if (N->getNumOperands() > TokenFactorInlineLimit)
+ return SDValue();
+
+ // If the sole user is a token factor, we should make sure we have a
+ // chance to merge them together. This prevents TF chains from inhibiting
+ // optimizations.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
+ AddToWorklist(*(N->use_begin()));
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ // Limit number of nodes to inline, to avoid quadratic compile times.
+ // We have to add the outstanding Token Factors to Ops, otherwise we might
+ // drop Ops from the resulting Token Factors.
+ if (Ops.size() > TokenFactorInlineLimit) {
+ for (unsigned j = i; j < TFs.size(); j++)
+ Ops.emplace_back(TFs[j], 0);
+ // Drop unprocessed Token Factors from TFs, so we do not add them to the
+ // combiner worklist later.
+ TFs.resize(i);
+ break;
+ }
+
+ SDNode *TF = TFs[i];
+ // Check each of the operands.
+ for (const SDValue &Op : TF->op_values()) {
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // redundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ Changed = true;
+ break;
+ }
+ [[fallthrough]];
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()).second)
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Re-visit inlined Token Factors, to clean them up in case they have been
+ // removed. Skip the first Token Factor, as this is the current node.
+ for (unsigned i = 1, e = TFs.size(); i < e; i++)
+ AddToWorklist(TFs[i]);
+
+ // Remove Nodes that are chained to another node in the list. Do so
+ // by walking up chains breath-first stopping when we've seen
+ // another operand. In general we must climb to the EntryNode, but we can exit
+ // early if we find all remaining work is associated with just one operand as
+ // no further pruning is possible.
+
+ // List of nodes to search through and original Ops from which they originate.
+ SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
+ SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
+ SmallPtrSet<SDNode *, 16> SeenChains;
+ bool DidPruneOps = false;
+
+ unsigned NumLeftToConsider = 0;
+ for (const SDValue &Op : Ops) {
+ Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
+ OpWorkCount.push_back(1);
+ }
+
+ auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
+ // If this is an Op, we can remove the op from the list. Remark any
+ // search associated with it as from the current OpNumber.
+ if (SeenOps.contains(Op)) {
+ Changed = true;
+ DidPruneOps = true;
+ unsigned OrigOpNumber = 0;
+ while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
+ OrigOpNumber++;
+ assert((OrigOpNumber != Ops.size()) &&
+ "expected to find TokenFactor Operand");
+ // Re-mark worklist from OrigOpNumber to OpNumber
+ for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
+ if (Worklist[i].second == OrigOpNumber) {
+ Worklist[i].second = OpNumber;
+ }
+ }
+ OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
+ OpWorkCount[OrigOpNumber] = 0;
+ NumLeftToConsider--;
+ }
+ // Add if it's a new chain
+ if (SeenChains.insert(Op).second) {
+ OpWorkCount[OpNumber]++;
+ Worklist.push_back(std::make_pair(Op, OpNumber));
+ }
+ };
+
+ for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
+ // We need at least be consider at least 2 Ops to prune.
+ if (NumLeftToConsider <= 1)
+ break;
+ auto CurNode = Worklist[i].first;
+ auto CurOpNumber = Worklist[i].second;
+ assert((OpWorkCount[CurOpNumber] > 0) &&
+ "Node should not appear in worklist");
+ switch (CurNode->getOpcode()) {
+ case ISD::EntryToken:
+ // Hitting EntryToken is the only way for the search to terminate without
+ // hitting
+ // another operand's search. Prevent us from marking this operand
+ // considered.
+ NumLeftToConsider++;
+ break;
+ case ISD::TokenFactor:
+ for (const SDValue &Op : CurNode->op_values())
+ AddToWorklist(i, Op.getNode(), CurOpNumber);
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
+ break;
+ default:
+ if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
+ AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
+ break;
+ }
+ OpWorkCount[CurOpNumber]--;
+ if (OpWorkCount[CurOpNumber] == 0)
+ NumLeftToConsider--;
+ }
+
+ // If we've changed things around then replace token factor.
+ if (Changed) {
+ SDValue Result;
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ if (DidPruneOps) {
+ SmallVector<SDValue, 8> PrunedOps;
+ //
+ for (const SDValue &Op : Ops) {
+ if (SeenChains.count(Op.getNode()) == 0)
+ PrunedOps.push_back(Op);
+ }
+ Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
+ } else {
+ Result = DAG.getTokenFactor(SDLoc(N), Ops);
+ }
+ }
+ return Result;
+ }
+ return SDValue();
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorklistRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorklist(N);
+ do {
+ // Do as a single replacement to avoid rewalking use lists.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+ DAG.ReplaceAllUsesWith(N, Ops.data());
+ } while (!N->use_empty());
+ deleteAndRecombine(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
+/// ConstantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+ return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero/one in Known.
+// This function computes KnownBits to avoid a duplicated call to
+// computeKnownBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ KnownBits &Known) {
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ Known = DAG.computeKnownBits(Op);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SETCC ||
+ N.getValueType().getScalarType() != MVT::i1 ||
+ cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ if (isNullOrNullSplat(Op0))
+ Op = Op1;
+ else if (isNullOrNullSplat(Op1))
+ Op = Op0;
+ else
+ return false;
+
+ Known = DAG.computeKnownBits(Op);
+
+ return (Known.Zero | 1).isAllOnes();
+}
+
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ unsigned AS;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else {
+ return false;
+ }
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else {
+ return false;
+ }
+
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
+}
+
+/// This inverts a canonicalization in IR that replaces a variable select arm
+/// with an identity constant. Codegen improves if we re-use the variable
+/// operand rather than load a constant. This can also be converted into a
+/// masked vector operation if the target supports it.
+static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
+ bool ShouldCommuteOperands) {
+ // Match a select as operand 1. The identity constant that we are looking for
+ // is only valid as operand 1 of a non-commutative binop.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (ShouldCommuteOperands)
+ std::swap(N0, N1);
+
+ // TODO: Should this apply to scalar select too?
+ if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
+ return SDValue();
+
+ // We can't hoist all instructions because of immediate UB (not speculatable).
+ // For example div/rem by zero.
+ if (!DAG.isSafeToSpeculativelyExecuteNode(N))
+ return SDValue();
+
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N1.getOperand(0);
+ SDValue TVal = N1.getOperand(1);
+ SDValue FVal = N1.getOperand(2);
+
+ // This transform increases uses of N0, so freeze it to be safe.
+ // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
+ unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
+ if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
+ return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
+ }
+ // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
+ if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
+ return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
+ assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+ "Unexpected binary operator");
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto BinOpcode = BO->getOpcode();
+ EVT VT = BO->getValueType(0);
+ if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
+ if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
+ return Sel;
+
+ if (TLI.isCommutativeBinOp(BO->getOpcode()))
+ if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
+ return Sel;
+ }
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ // TODO: Handle ISD::SELECT_CC.
+ unsigned SelOpNo = 0;
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
+ SelOpNo = 1;
+ Sel = BO->getOperand(1);
+
+ // Peek through trunc to shift amount type.
+ if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
+ BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
+ // This is valid when the truncated bits of x are already zero.
+ SDValue Op;
+ KnownBits Known;
+ if (isTruncateOf(DAG, Sel, Op, Known) &&
+ Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
+ Sel = Op;
+ }
+ }
+
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ SDValue CT = Sel.getOperand(1);
+ if (!isConstantOrConstantVector(CT, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(CT))
+ return SDValue();
+
+ SDValue CF = Sel.getOperand(2);
+ if (!isConstantOrConstantVector(CF, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(CF))
+ return SDValue();
+
+ // Bail out if any constants are opaque because we can't constant fold those.
+ // The exception is "and" and "or" with either 0 or -1 in which case we can
+ // propagate non constant operands into select. I.e.:
+ // and (select Cond, 0, -1), X --> select Cond, 0, X
+ // or X, (select Cond, -1, 0) --> select Cond, -1, X
+ bool CanFoldNonConst =
+ (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+ ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
+ (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
+
+ SDValue CBO = BO->getOperand(SelOpNo ^ 1);
+ if (!CanFoldNonConst &&
+ !isConstantOrConstantVector(CBO, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
+ return SDValue();
+
+ SDLoc DL(Sel);
+ SDValue NewCT, NewCF;
+
+ if (CanFoldNonConst) {
+ // If CBO is an opaque constant, we can't rely on getNode to constant fold.
+ if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
+ (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
+ NewCT = CT;
+ else
+ NewCT = CBO;
+
+ if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
+ (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
+ NewCF = CF;
+ else
+ NewCF = CBO;
+ } else {
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the
+ // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
+ // CBO, CF + CBO
+ NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
+ : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
+ if (!NewCT)
+ return SDValue();
+
+ NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
+ : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
+ if (!NewCF)
+ return SDValue();
+ }
+
+ SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SelectOp->setFlags(BO->getFlags());
+ return SelectOp;
+}
+
+static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // Match a constant operand and a zext operand for the math instruction:
+ // add Z, C
+ // sub C, Z
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ auto *CN = dyn_cast<ConstantSDNode>(C);
+ if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ // Match the zext operand as a setcc of a boolean.
+ if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
+ Z.getOperand(0).getValueType() != MVT::i1)
+ return SDValue();
+
+ // Match the compare as: setcc (X & 1), 0, eq.
+ SDValue SetCC = Z.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
+ SetCC.getOperand(0).getOpcode() != ISD::AND ||
+ !isOneConstant(SetCC.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ // We are adding/subtracting a constant and an inverted low bit. Turn that
+ // into a subtract/add of the low bit with incremented/decremented constant:
+ // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
+ // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
+ EVT VT = C.getValueType();
+ SDLoc DL(N);
+ SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
+ SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
+ DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
+}
+
+/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
+/// a shift and add with a different constant.
+static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // We need a constant operand for the add/sub, and the other operand is a
+ // logical shift right: add (srl), C or sub C, (srl).
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
+ ShiftOp.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The shift must be of a 'not' value.
+ SDValue Not = ShiftOp.getOperand(0);
+ if (!Not.hasOneUse() || !isBitwiseNot(Not))
+ return SDValue();
+
+ // The shift must be moving the sign bit to the least-significant-bit.
+ EVT VT = ShiftOp.getValueType();
+ SDValue ShAmt = ShiftOp.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
+ return SDValue();
+
+ // Eliminate the 'not' by adjusting the shift and add/sub constant:
+ // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
+ // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
+ SDLoc DL(N);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(
+ IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
+ {ConstantOp, DAG.getConstant(1, DL, VT)})) {
+ SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
+ Not.getOperand(0), ShAmt);
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
+ }
+
+ return SDValue();
+}
+
+static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
+ unsigned Opcode = V.getOpcode();
+ if (Opcode == ISD::OR)
+ return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
+ if (Opcode == ISD::XOR)
+ return isMinSignedConstant(V.getOperand(1));
+ return false;
+}
+
+static bool
+areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
+ return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
+ (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
+}
+
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold (add x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ // fold (add c1, c2) -> c1+c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+
+ if (areBitwiseNotOfEachother(N0, N1))
+ return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()),
+ SDLoc(N), VT);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (add x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (add x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ // fold ((A-c1)+c2) -> (A+(c2-c1))
+ if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneOrOneSplat(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
+ }
+ }
+
+ // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
+ // iff (or x, c0) is equivalent to (add x, c0).
+ // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
+ // iff (xor x, c0) is equivalent to (add x, c0).
+ if (isADDLike(N0, DAG)) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
+ }
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // reassociate add
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
+ if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+ return RADD;
+
+ // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
+ // equivalent to (add x, c).
+ // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
+ // equivalent to (add x, c).
+ // Do this optimization only when adding c does not introduce instructions
+ // for adding carries.
+ auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
+ if (isADDLike(N0, DAG) && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+ // If N0's type does not split or is a sign mask, it does not introduce
+ // add carry.
+ auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
+ bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
+ TyActn == TargetLoweringBase::TypePromoteInteger ||
+ isMinSignedConstant(N0.getOperand(1));
+ if (NoAddCarry)
+ return DAG.getNode(
+ ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
+ }
+ return SDValue();
+ };
+ if (SDValue Add = ReassociateAddOr(N0, N1))
+ return Add;
+ if (SDValue Add = ReassociateAddOr(N1, N0))
+ return Add;
+
+ // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
+ return SD;
+ }
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+
+ // fold ((A-B)+(C-A)) -> (C-B)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N0.getOperand(1));
+
+ // fold ((A-B)+(B-C)) -> (A-C)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N1.getOperand(1));
+
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
+ N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0->hasOneUse() && N1->hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
+ DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
+ }
+
+ // fold (add (umax X, C), -C) --> (usubsat X, C)
+ if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return (!Max && !Op) ||
+ (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
+ };
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
+ /*AllowUndefs*/ true))
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (isOneOrOneSplat(N1)) {
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
+ // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
+ if (N0.getOpcode() == ISD::ADD) {
+ SDValue A, Xor;
+
+ if (isBitwiseNot(N0.getOperand(0))) {
+ A = N0.getOperand(1);
+ Xor = N0.getOperand(0);
+ } else if (isBitwiseNot(N0.getOperand(1))) {
+ A = N0.getOperand(0);
+ Xor = N0.getOperand(1);
+ }
+
+ if (Xor)
+ return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
+ }
+
+ // Look for:
+ // add (add x, y), 1
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
+ N0.hasOneUse() &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
+ !N->getFlags().hasNoSignedWrap()))) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+ }
+ }
+
+ // (x - y) + -1 -> add (xor y, -1), x
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isAllOnesOrAllOnesSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
+ // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
+ if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
+ const APInt &C0 = N0->getConstantOperandAPInt(0);
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
+ return DAG.getVScale(DL, VT, C0 + C1);
+ }
+
+ // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::VSCALE &&
+ N1.getOpcode() == ISD::VSCALE) {
+ const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ const APInt &VS1 = N1->getConstantOperandAPInt(0);
+ SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
+ }
+
+ // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ N1.getOpcode() == ISD::STEP_VECTOR) {
+ const APInt &C0 = N0->getConstantOperandAPInt(0);
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
+ APInt NewStep = C0 + C1;
+ return DAG.getStepVector(DL, VT, NewStep);
+ }
+
+ // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
+ N1.getOpcode() == ISD::STEP_VECTOR) {
+ const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ const APInt &SV1 = N1->getConstantOperandAPInt(0);
+ APInt NewStep = SV0 + SV1;
+ SDValue SV = DAG.getStepVector(DL, VT, NewStep);
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = Opcode == ISD::SADDSAT;
+ SDLoc DL(N);
+
+ // fold (add_sat x, undef) -> -1
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getAllOnesConstant(DL, VT);
+
+ // fold (add_sat c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (add_sat x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (add_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
+static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
+ bool ForceCarryReconstruction = false) {
+ bool Masked = false;
+
+ // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
+ while (true) {
+ if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
+ V = V.getOperand(0);
+ continue;
+ }
+
+ if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
+ if (ForceCarryReconstruction)
+ return V;
+
+ Masked = true;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
+ return V;
+
+ break;
+ }
+
+ // If this is not a carry, return.
+ if (V.getResNo() != 1)
+ return SDValue();
+
+ if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
+ V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
+ return SDValue();
+
+ EVT VT = V->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
+ return SDValue();
+
+ // If the result is masked, then no matter what kind of bool it is we can
+ // return. If it isn't, then we need to make sure the bool type is either 0 or
+ // 1 and not other values.
+ if (Masked ||
+ TLI.getBooleanContents(V.getValueType()) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ return V;
+
+ return SDValue();
+}
+
+/// Given the operands of an add/sub operation, see if the 2nd operand is a
+/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
+/// the opcode and bypass the mask operation.
+static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() == ISD::ZERO_EXTEND)
+ N1 = N1.getOperand(0);
+
+ if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N0.getValueType();
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
+ N10 = N10.getOperand(0);
+
+ if (N10.getValueType() != VT)
+ return SDValue();
+
+ if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
+ // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
+}
+
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(LocReference);
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0,
+ DAG.getNode(ISD::SHL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+
+ if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
+ return V;
+
+ // Look for:
+ // add (add x, 1), y
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
+ N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
+ !N0->getFlags().hasNoSignedWrap()))) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+ }
+
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
+ }
+
+ // add (mul x, C), x -> mul x, C+1
+ if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
+ N0.hasOneUse()) {
+ SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
+ // rather than 'add 0/-1' (the zext should get folded).
+ // add (sext i1 Y), X --> sub X, (zext i1 Y)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
+ // add X, (sextinreg Y i1) -> sub X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
+ }
+ }
+
+ // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
+ if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
+ N1.getResNo() == 0)
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
+ N0, N1.getOperand(0), N1.getOperand(2));
+
+ // (add X, Carry) -> (uaddo_carry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::UADDO_CARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), N0,
+ DAG.getConstant(0, DL, VT), Carry);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ DL, MVT::Glue));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ return SDValue();
+}
+
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use DAG.getLogicalNOT.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool Force) {
+ if (Force && isa<ConstantSDNode>(V))
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
+
+ if (V.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
+ if (!Const)
+ return SDValue();
+
+ EVT VT = V.getValueType();
+
+ bool IsFlip = false;
+ switch(TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ IsFlip = Const->isOne();
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ IsFlip = Const->isAllOnes();
+ break;
+ case TargetLowering::UndefinedBooleanContent:
+ IsFlip = (Const->getAPIntValue() & 0x01) == 1;
+ break;
+ }
+
+ if (IsFlip)
+ return V.getOperand(0);
+ if (Force)
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = (ISD::SADDO == N->getOpcode());
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (addo x, 0) -> x + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+
+ if (!IsSigned) {
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
+ }
+
+ if (SDValue Combined = visitUADDOLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitUADDOLike(N1, N0, N))
+ return Combined;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
+ // If Y + 1 cannot overflow.
+ if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
+ SDValue Y = N1.getOperand(0);
+ SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
+ if (DAG.computeOverflowForUnsignedAdd(Y, One) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
+ N1.getOperand(2));
+ }
+
+ // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
+ DAG.getConstant(0, SDLoc(N), VT), Carry);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (uaddo_carry x, y, false) -> (uaddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
+ if (isNullConstant(N0) && isNullConstant(N1)) {
+ EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
+ SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
+ AddToWorklist(CarryExt.getNode());
+ return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
+ DAG.getConstant(1, DL, VT)),
+ DAG.getConstant(0, DL, CarryVT));
+ }
+
+ if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
+ return Combined;
+
+ if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
+ return Combined;
+
+ // We want to avoid useless duplication.
+ // TODO: This is done automatically for binary operations. As UADDO_CARRY is
+ // not a binary operation, this is not really possible to leverage this
+ // existing mechanism for it. However, if more operations require the same
+ // deduplication logic, then it may be worth generalize.
+ SDValue Ops[] = {N1, N0, CarryIn};
+ SDNode *CSENode =
+ DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
+ if (CSENode)
+ return SDValue(CSENode, 0);
+
+ return SDValue();
+}
+
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other transforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (uaddo_carry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (uaddo_carry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
+ SelectionDAG &DAG, SDValue X,
+ SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
+ if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+ return SDValue();
+ if (Carry1.getOpcode() != ISD::UADDO)
+ return SDValue();
+
+ SDValue Z;
+
+ /**
+ * First look for a suitable Z. It will present itself in the form of
+ * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ */
+ if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
+ isNullConstant(Carry0.getOperand(1))) {
+ Z = Carry0.getOperand(2);
+ } else if (Carry0.getOpcode() == ISD::UADDO &&
+ isOneConstant(Carry0.getOperand(1))) {
+ EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+ } else {
+ // We couldn't find a suitable Z.
+ return SDValue();
+ }
+
+
+ auto cancelDiamond = [&](SDValue A,SDValue B) {
+ SDLoc DL(N);
+ SDValue NewY =
+ DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
+ Combiner.AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
+ DAG.getConstant(0, DL, X.getValueType()),
+ NewY.getValue(1));
+ };
+
+ /**
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (uaddo_carry *, 0, Z)
+ */
+ if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+ }
+
+ /**
+ * (uaddo_carry A, 0, Z)
+ * |
+ * Sum
+ * |
+ * (uaddo *, B)
+ */
+ if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+ }
+
+ if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+ }
+
+ return SDValue();
+}
+
+// If we are facing some sort of diamond carry/borrow in/out pattern try to
+// match patterns like:
+//
+// (uaddo A, B) CarryIn
+// | \ |
+// | \ |
+// PartialSum PartialCarryOutX /
+// | | /
+// | ____|____________/
+// | / |
+// (uaddo *, *) \________
+// | \ \
+// | \ |
+// | PartialCarryOutY |
+// | \ |
+// | \ /
+// AddCarrySum | ______/
+// | /
+// CarryOut = (or *, *)
+//
+// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
+//
+// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
+//
+// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
+// with a single path for carry/borrow out propagation.
+static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
+ SDValue N0, SDValue N1, SDNode *N) {
+ SDValue Carry0 = getAsCarry(TLI, N0);
+ if (!Carry0)
+ return SDValue();
+ SDValue Carry1 = getAsCarry(TLI, N1);
+ if (!Carry1)
+ return SDValue();
+
+ unsigned Opcode = Carry0.getOpcode();
+ if (Opcode != Carry1.getOpcode())
+ return SDValue();
+ if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+ return SDValue();
+
+ // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
+ // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
+ if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
+ std::swap(Carry0, Carry1);
+
+ // Check if nodes are connected in expected way.
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ return SDValue();
+
+ // The carry in value must be on the righthand side for subtraction.
+ unsigned CarryInOperandNum =
+ Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
+ if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
+ return SDValue();
+ SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
+
+ unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
+ if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+ return SDValue();
+
+ // Verify that the carry/borrow in is plausibly a carry/borrow bit.
+ CarryIn = getAsCarry(TLI, CarryIn, true);
+ if (!CarryIn)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Merged =
+ DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
+ Carry0.getOperand(1), CarryIn);
+
+ // Please note that because we have proven that the result of the UADDO/USUBO
+ // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
+ // therefore prove that if the first UADDO/USUBO overflows, the second
+ // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
+ // maximum value.
+ //
+ // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
+ // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
+ //
+ // This is important because it means that OR and XOR can be used to merge
+ // carry flags; and that AND can return a constant zero.
+ //
+ // TODO: match other operations that can merge flags (ADD, etc)
+ DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+ if (N->getOpcode() == ISD::AND)
+ return DAG.getConstant(0, DL, MVT::i1);
+ return Merged.getValue(1);
+}
+
+SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
+ SDValue CarryIn, SDNode *N) {
+ // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
+ // carry.
+ if (isBitwiseNot(N0))
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+ SDLoc DL(N);
+ SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
+ }
+
+ // Iff the flag result is dead:
+ // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
+ // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
+ // or the dependency between the instructions.
+ if ((N0.getOpcode() == ISD::ADD ||
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
+ N0.getValue(1) != CarryIn)) &&
+ isNullConstant(N1) && !N->hasAnyUseOfValue(1))
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
+ N0.getOperand(0), N0.getOperand(1), CarryIn);
+
+ /**
+ * When one of the uaddo_carry argument is itself a carry, we may be facing
+ * a diamond carry propagation. In which case we try to transform the DAG
+ * to ensure linear carry propagation if that is possible.
+ */
+ if (auto Y = getAsCarry(TLI, N1)) {
+ // Because both are carries, Y and Z can be swapped.
+ if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ return R;
+ if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ return R;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
+// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
+// clamp/truncation if necessary.
+static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
+ "Illegal truncation");
+
+ if (DstVT == SrcVT)
+ return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
+
+ // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
+ // clamping RHS.
+ APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+ DstVT.getScalarSizeInBits());
+ if (!DAG.MaskedValueIsZero(LHS, UpperBits))
+ return SDValue();
+
+ SDValue SatLimit =
+ DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
+ DstVT.getScalarSizeInBits()),
+ DL, SrcVT);
+ RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
+ RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
+ LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
+ return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
+}
+
+// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
+// usubsat(a,b), optionally as a truncated type.
+SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
+ if (N->getOpcode() != ISD::SUB ||
+ !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
+ return SDValue();
+
+ EVT SubVT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Try to find umax(a,b) - b or a - umin(a,b) patterns
+ // they may be converted to usubsat(a,b).
+ if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
+ SDValue MaxLHS = Op0.getOperand(0);
+ SDValue MaxRHS = Op0.getOperand(1);
+ if (MaxLHS == Op1)
+ return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
+ if (MaxRHS == Op1)
+ return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
+ }
+
+ if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
+ SDValue MinLHS = Op1.getOperand(0);
+ SDValue MinRHS = Op1.getOperand(1);
+ if (MinLHS == Op0)
+ return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
+ if (MinRHS == Op0)
+ return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
+ }
+
+ // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
+ if (Op1.getOpcode() == ISD::TRUNCATE &&
+ Op1.getOperand(0).getOpcode() == ISD::UMIN &&
+ Op1.getOperand(0).hasOneUse()) {
+ SDValue MinLHS = Op1.getOperand(0).getOperand(0);
+ SDValue MinRHS = Op1.getOperand(0).getOperand(1);
+ if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
+ return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
+ DAG, SDLoc(N));
+ if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
+ return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
+ DAG, SDLoc(N));
+ }
+
+ return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector())
+ return DAG.getConstant(0, DL, VT);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return DAG.getConstant(0, DL, VT);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ auto PeekThroughFreeze = [](SDValue N) {
+ if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
+ return N->getOperand(0);
+ return N;
+ };
+
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // fold (sub c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (sub x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ if (isNullOrNullSplat(N0)) {
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ // Right-shifting everything out but the sign bit followed by negation is
+ // the same as flipping arithmetic/logical shift type without the negation:
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
+ ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
+ if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
+ auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
+ if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
+ return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
+ }
+ }
+
+ // 0 - X --> 0 if the sub is NUW.
+ if (N->getFlags().hasNoUnsignedWrap())
+ return N0;
+
+ if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
+ // N1 is either 0 or the minimum signed value. If the sub is NSW, then
+ // N1 must be 0 because negating the minimum signed value is undefined.
+ if (N->getFlags().hasNoSignedWrap())
+ return N0;
+
+ // 0 - X --> X if X is 0 or the minimum signed value.
+ return N1;
+ }
+
+ // Convert 0 - abs(x).
+ if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
+ return Result;
+
+ // Fold neg(splat(neg(x)) -> splat(x)
+ if (VT.isVector()) {
+ SDValue N1S = DAG.getSplatValue(N1, true);
+ if (N1S && N1S.getOpcode() == ISD::SUB &&
+ isNullConstant(N1S.getOperand(0)))
+ return DAG.getSplat(VT, DL, N1S.getOperand(1));
+ }
+ }
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (isAllOnesOrAllOnesSplat(N0))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
+ // fold (A - (0-B)) -> A+B
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
+
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
+
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+
+ // fold (A+C1)-C2 -> A+(C1-C2)
+ if (N0.getOpcode() == ISD::ADD) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold C2-(A+C1) -> (C2-C1)-A
+ if (N1.getOpcode() == ISD::ADD) {
+ SDValue N11 = N1.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
+ }
+
+ // fold (A-C1)-C2 -> A-(C1+C2)
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold (c1-A)-c2 -> (c1-c2)-A
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ }
+
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(1));
+
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
+
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
+
+ // fold (A-(B-C)) -> A+(C-B)
+ if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
+ N1.getOperand(0)));
+
+ // A - (A & B) -> A & (~B)
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue A = N1.getOperand(0);
+ SDValue B = N1.getOperand(1);
+ if (A != N0)
+ std::swap(A, B);
+ if (A == N0 &&
+ (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
+ SDValue InvB =
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::AND, DL, VT, A, InvB);
+ }
+ }
+
+ // fold (X - (-Y * Z)) -> (X + (Y * Z))
+ if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
+ if (N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ if (N1.getOperand(1).getOpcode() == ISD::SUB &&
+ isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ }
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ return V;
+
+ if (SDValue V = foldSubToUSubSat(VT, N))
+ return V;
+
+ // (x - y) - 1 -> add (xor y, -1), x
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ // Look for:
+ // sub y, (xor x, -1)
+ // And if the target does not like this form then turn into:
+ // add (add x, y), 1
+ if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+ }
+
+ // Hoist one-use addition by non-opaque constant:
+ // (x + C) - y -> (x - y) + C
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+ }
+ // y - (x + C) -> (y - x) - C
+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
+ isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+ }
+ // (x - C) - y -> (x - y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+ }
+ // (C - x) - y -> C - (x + y)
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+ }
+
+ // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
+ // rather than 'sub 0/1' (the sext should get folded).
+ // sub X, (zext i1 Y) --> add X, (sext i1 Y)
+ if (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
+ }
+
+ // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
+ SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
+ SDValue S0 = N1.getOperand(0);
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ }
+ }
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ DL, VT);
+ }
+
+ // sub X, (sextinreg Y i1) -> add X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
+ }
+ }
+
+ // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
+ if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
+ const APInt &IntVal = N1.getConstantOperandAPInt(0);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
+ }
+
+ // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
+ if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
+ APInt NewStep = -N1.getConstantOperandAPInt(0);
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getStepVector(DL, VT, NewStep));
+ }
+
+ // Prefer an add for more folding potential and possibly better codegen:
+ // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
+ if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
+ SDValue ShAmt = N1.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (ShAmtC &&
+ ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
+ }
+ }
+
+ // As with the previous fold, prefer add for more folding potential.
+ // Subtracting SMIN/0 is the same as adding SMIN/0:
+ // N0 - (X << BW-1) --> N0 + (X << BW-1)
+ if (N1.getOpcode() == ISD::SHL) {
+ ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
+ if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ }
+
+ // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
+ if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
+ N0.getResNo() == 0 && N0.hasOneUse())
+ return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
+ N0.getOperand(0), N1, N0.getOperand(2));
+
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT)) {
+ // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
+ if (SDValue Carry = getAsCarry(TLI, N0)) {
+ SDValue X = N1;
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
+ return DAG.getNode(ISD::UADDO_CARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
+ Carry);
+ }
+ }
+
+ // If there's no chance of borrowing from adjacent bits, then sub is xor:
+ // sub C0, X --> xor X, C0
+ if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
+ if (!C0->isOpaque()) {
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
+ if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ }
+ }
+
+ // max(a,b) - min(a,b) --> abd(a,b)
+ auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
+ if (N0.getOpcode() != Max || N1.getOpcode() != Min)
+ return SDValue();
+ if ((N0.getOperand(0) != N1.getOperand(0) ||
+ N0.getOperand(1) != N1.getOperand(1)) &&
+ (N0.getOperand(0) != N1.getOperand(1) ||
+ N0.getOperand(1) != N1.getOperand(0)))
+ return SDValue();
+ if (!hasOperation(Abd, VT))
+ return SDValue();
+ return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
+ };
+ if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
+ return R;
+ if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
+ return R;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = Opcode == ISD::SSUBSAT;
+ SDLoc DL(N);
+
+ // fold (sub_sat x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (sub_sat x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (sub_sat c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (sub_sat x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ // If it cannot overflow, transform into an sub.
+ if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = (ISD::SSUBO == N->getOpcode());
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // fold (subo x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (subox, c) -> (addo x, -c)
+ if (IsSigned && N1C && !N1C->isMinSignedValue()) {
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ // fold (subo x, 0) -> x + no borrow
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // If it cannot overflow, transform into an sub.
+ if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getConstant(0, DL, CarryVT));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (usubo_carry x, y, false) -> (usubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (ssubo_carry x, y, false) -> (ssubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
+// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
+// UMULFIXSAT here.
+SDValue DAGCombiner::visitMULFIX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Scale = N->getOperand(2);
+ EVT VT = N0.getValueType();
+
+ // fold (mulfix x, undef, scale) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
+
+ // fold (mulfix x, 0, scale) -> 0
+ if (isNullConstant(N1))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold (mul x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (mul c1, c2) -> c1*c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
+
+ bool N1IsConst = false;
+ bool N1IsOpaqueConst = false;
+ APInt ConstValue1;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
+ assert((!N1IsConst ||
+ ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
+ } else {
+ N1IsConst = isa<ConstantSDNode>(N1);
+ if (N1IsConst) {
+ ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+ N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+ }
+ }
+
+ // fold (mul x, 0) -> 0
+ if (N1IsConst && ConstValue1.isZero())
+ return N1;
+
+ // fold (mul x, 1) -> x
+ if (N1IsConst && ConstValue1.isOne())
+ return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (mul x, -1) -> 0-x
+ if (N1IsConst && ConstValue1.isAllOnes())
+ return DAG.getNegative(N0, DL, VT);
+
+ // fold (mul x, (1 << c)) -> x << c
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) &&
+ (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ }
+
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
+ unsigned Log2Val = (-ConstValue1).logBase2();
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL, ShiftVT)));
+ }
+
+ // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
+ // hi result is in use in case we hit this mid-legalization.
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
+ }
+
+ // Try to transform:
+ // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // mul x, (2^N + 1) --> add (shl x, N), x
+ // mul x, (2^N - 1) --> sub (shl x, N), x
+ // Examples: x * 33 --> (x << 5) + x
+ // x * 15 --> (x << 4) - x
+ // x * -33 --> -((x << 5) + x)
+ // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
+ // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
+ // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
+ // Examples: x * 0x8800 --> (x << 15) + (x << 11)
+ // x * 0xf800 --> (x << 16) - (x << 11)
+ // x * -0x8800 --> -((x << 15) + (x << 11))
+ // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+ // TODO: We could handle more general decomposition of any constant by
+ // having the target set a limit on number of ops and making a
+ // callback to determine that sequence (similar to sqrt expansion).
+ unsigned MathOp = ISD::DELETED_NODE;
+ APInt MulC = ConstValue1.abs();
+ // The constant `2` should be treated as (2^0 + 1).
+ unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
+ MulC.lshrInPlace(TZeros);
+ if ((MulC - 1).isPowerOf2())
+ MathOp = ISD::ADD;
+ else if ((MulC + 1).isPowerOf2())
+ MathOp = ISD::SUB;
+
+ if (MathOp != ISD::DELETED_NODE) {
+ unsigned ShAmt =
+ MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ ShAmt += TZeros;
+ assert(ShAmt < VT.getScalarSizeInBits() &&
+ "multiply-by-constant generated out of bounds shift");
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
+ SDValue R =
+ TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(TZeros, DL, VT)))
+ : DAG.getNode(MathOp, DL, VT, Shl, N0);
+ if (ConstValue1.isNegative())
+ R = DAG.getNegative(R, DL, VT);
+ return R;
+ }
+ }
+
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh, Y;
+
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N1.getOperand(1)) &&
+ N1->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N0.getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(
+ ISD::ADD, DL, VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
+
+ // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
+ ConstantSDNode *NC1 = isConstOrConstSplat(N1);
+ if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
+ return DAG.getVScale(DL, VT, C0 * C1);
+ }
+
+ // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
+ APInt MulVal;
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ APInt NewStep = C0 * MulVal;
+ return DAG.getStepVector(DL, VT, NewStep);
+ }
+
+ // Fold ((mul x, 0/undef) -> 0,
+ // (mul x, 1) -> x) -> x)
+ // -> and(x, mask)
+ // We can replace vectors with '0' and '1' factors with a clearing mask.
+ if (VT.isFixedLengthVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallBitVector ClearMask;
+ ClearMask.reserve(NumElts);
+ auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
+ if (!V || V->isZero()) {
+ ClearMask.push_back(true);
+ return true;
+ }
+ ClearMask.push_back(false);
+ return V->isOne();
+ };
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
+ ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
+ assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
+ EVT LegalSVT = N1.getOperand(0).getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
+ SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (ClearMask[I])
+ Mask[I] = Zero;
+ return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
+ }
+ }
+
+ // reassociate mul
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
+ return RMUL;
+
+ // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ EVT NodeType = Node->getValueType(0);
+ if (!NodeType.isSimple())
+ return false;
+ switch (NodeType.getSimpleVT().SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+
+ // DivMod lib calls can still work on non-legal types if using lib-calls.
+ EVT VT = Node->getValueType(0);
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
+ return SDValue();
+
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode *User : Op0->uses()) {
+ if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
+ User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
+static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ unsigned Opc = N->getOpcode();
+ bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // X / undef -> undef
+ // X % undef -> undef
+ // X / 0 -> undef
+ // X % 0 -> undef
+ // NOTE: This includes vectors where any divisor element is zero/undef.
+ if (DAG.isUndef(Opc, {N0, N1}))
+ return DAG.getUNDEF(VT);
+
+ // undef / X -> 0
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // 0 / X -> 0
+ // 0 % X -> 0
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ if (N0C && N0C->isZero())
+ return N0;
+
+ // X / X -> 1
+ // X % X -> 0
+ if (N0 == N1)
+ return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
+
+ // X / 1 -> X
+ // X % 1 -> 0
+ // If this is a boolean op (single-bit element type), we can't have
+ // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+ // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
+ // it's a 1.
+ if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+ return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (sdiv X, -1) -> 0-X
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C && N1C->isAllOnes())
+ return DAG.getNegative(N0, DL, VT);
+
+ // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
+ if (N1C && N1C->isMinSignedValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
+
+ if (SDValue V = visitSDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
+ return V;
+ }
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+static bool isDivisorPowerOfTwo(SDValue Divisor) {
+ // Helper for determining whether a value is a power-2 constant scalar or a
+ // vector of such elements.
+ auto IsPowerOfTwo = [](ConstantSDNode *C) {
+ if (C->isZero() || C->isOpaque())
+ return false;
+ if (C->getAPIntValue().isPowerOf2())
+ return true;
+ if (C->getAPIntValue().isNegatedPowerOf2())
+ return true;
+ return false;
+ };
+
+ return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+}
+
+SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ // FIXME: We check for the exact bit here because the generic lowering gives
+ // better results in that case. The target-specific lowering should learn how
+ // to handle exact sdivs efficiently.
+ if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
+ // Target-specific implementation of sdiv x, pow2.
+ if (SDValue Res = BuildSDIVPow2(N))
+ return Res;
+
+ // Create constants that are functions of the shift amount value.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
+ SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
+ C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
+ SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
+ if (!isConstantOrConstantVector(Inexact))
+ return SDValue();
+
+ // Splat the sign bit into the register
+ SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
+ AddToWorklist(Sign.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
+ AddToWorklist(Srl.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
+ AddToWorklist(Add.getNode());
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
+ AddToWorklist(Sra.getNode());
+
+ // Special case: (sdiv X, 1) -> X
+ // Special Case: (sdiv X, -1) -> 0-X
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
+ SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
+ SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
+ Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
+
+ // If dividing by a positive value, we're done. Otherwise, the result must
+ // be negated.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
+
+ // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
+ SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
+ SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
+ return Res;
+ }
+
+ // If integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildSDIV(N))
+ return Op;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (udiv X, -1) -> select(X == -1, 1, 0)
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
+ }
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (SDValue V = visitUDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
+ return V;
+ }
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1)) {
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
+
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ SDValue N10 = N1.getOperand(0);
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N10)) {
+ SDValue LogBase2 = BuildLogBase2(N10, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ }
+ }
+
+ // fold (udiv x, c) -> alternate
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildUDIV(N))
+ return Op;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
+ if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
+ !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
+ // Target-specific implementation of srem x, pow2.
+ if (SDValue Res = BuildSREMPow2(N))
+ return Res;
+ }
+ return SDValue();
+}
+
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
+
+ // fold (rem c1, c2) -> c1%c2
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // fold (urem X, -1) -> select(FX == -1, 0, FX)
+ // Freeze the numerator to avoid a miscompile with an undefined value.
+ if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
+ CCVT.isVector() == VT.isVector()) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
+ }
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
+ } else {
+ if (DAG.isKnownToBeAPowerOfTwo(N1)) {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
+ // TODO: We should sink the following into isKnownToBePowerOfTwo
+ // using a OrZero parameter analogous to our handling in ValueTracking.
+ if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
+ DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
+ }
+
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
+ // speculative DIV must not cause a DIVREM conversion. We guard against this
+ // by skipping the simplification if isIntDivCheap(). When div is not cheap,
+ // combine will not return a DIVREM. Regardless, checking cheapness here
+ // makes sense since the simplification results in fatter code.
+ if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ if (isSigned) {
+ // check if we can build faster implementation for srem
+ if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
+ return OptimizedRem;
+ }
+
+ SDValue OptimizedDiv =
+ isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
+ // If the equivalent Div node also exists, update its users.
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
+ { N0, N1 }))
+ CombineTo(DivNode, OptimizedDiv);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(OptimizedDiv.getNode());
+ AddToWorklist(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (mulhs c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (mulhs x, 0) -> 0
+ // do not return N1, because undef node may exist.
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
+ }
+
+ // fold (mulhs x, 0) -> 0
+ if (isNullConstant(N1))
+ return N1;
+
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (isOneConstant(N1))
+ return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
+ DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
+ getShiftAmountTy(N0.getValueType())));
+
+ // fold (mulhs x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
+ !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (mulhu c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (mulhu x, 0) -> 0
+ // do not return N1, because undef node may exist.
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
+ }
+
+ // fold (mulhu x, 0) -> 0
+ if (isNullConstant(N1))
+ return N1;
+
+ // fold (mulhu x, 1) -> 0
+ if (isOneConstant(N1))
+ return DAG.getConstant(0, DL, N0.getValueType());
+
+ // fold (mulhu x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ SDValue SRLAmt = DAG.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
+ !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ // Simplify the operands using demanded-bits information.
+ // We don't have demanded bits support for MULHU so this just enables constant
+ // folding based on known bits.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAVG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (avg c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (avgfloor x, 0) -> x >> 1
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ if (Opcode == ISD::AVGFLOORS)
+ return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
+ if (Opcode == ISD::AVGFLOORU)
+ return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
+ }
+ }
+
+ // fold (avg x, undef) -> x
+ if (N0.isUndef())
+ return N1;
+ if (N1.isUndef())
+ return N0;
+
+ // Fold (avg x, x) --> x
+ if (N0 == N1 && Level >= AfterLegalizeTypes)
+ return N0;
+
+ // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitABD(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (abd c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (abds x, 0) -> abs x
+ // fold (abdu x, 0) -> x
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ if (Opcode == ISD::ABDS)
+ return DAG.getNode(ISD::ABS, DL, VT, N0);
+ if (Opcode == ISD::ABDU)
+ return N0;
+ }
+ }
+
+ // fold (abd x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (abds x, y) -> (abdu x, y) iff both args are known positive
+ if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
+ DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
+ return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
+
+ return SDValue();
+}
+
+/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
+/// give the opcodes for the two computations that are being performed. Return
+/// true if a simplification was made.
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ AddToWorklist(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ AddToWorklist(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+ return Res;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
+
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+ return Res;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
+
+ // (umul_lohi N0, 0) -> (0, 0)
+ if (isNullConstant(N1)) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, Zero, Zero);
+ }
+
+ // (umul_lohi N0, 1) -> (N0, 0)
+ if (isOneConstant(N1)) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, N0, Zero);
+ }
+
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = (ISD::SMULO == N->getOpcode());
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // fold operation with constant operands.
+ // TODO: Move this to FoldConstantArithmetic when it supports nodes with
+ // multiple results.
+ if (N0C && N1C) {
+ bool Overflow;
+ APInt Result =
+ IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
+ : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
+ return CombineTo(N, DAG.getConstant(Result, DL, VT),
+ DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
+ }
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (mulo x, 0) -> 0 + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // (mulo x, 2) -> (addo x, x)
+ // FIXME: This needs a freeze.
+ if (N1C && N1C->getAPIntValue() == 2 &&
+ (!IsSigned || VT.getScalarSizeInBits() > 2))
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
+ N->getVTList(), N0, N0);
+
+ if (IsSigned) {
+ // A 1 bit SMULO overflows if both inputs are 1.
+ if (VT.getScalarSizeInBits() == 1) {
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
+ return CombineTo(N, And,
+ DAG.getSetCC(DL, CarryVT, And,
+ DAG.getConstant(0, DL, VT), ISD::SETNE));
+ }
+
+ // Multiplying n * m significant bits yields a result of n + m significant
+ // bits. If the total number of significant bits does not exceed the
+ // result bit width (minus 1), there is no overflow.
+ unsigned SignBits = DAG.ComputeNumSignBits(N0);
+ if (SignBits > 1)
+ SignBits += DAG.ComputeNumSignBits(N1);
+ if (SignBits > VT.getScalarSizeInBits() + 1)
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+ } else {
+ KnownBits N1Known = DAG.computeKnownBits(N1);
+ KnownBits N0Known = DAG.computeKnownBits(N0);
+ bool Overflow;
+ (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
+ if (!Overflow)
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+ }
+
+ return SDValue();
+}
+
+// Function to calculate whether the Min/Max pair of SDNodes (potentially
+// swapped around) make a signed saturate pattern, clamping to between a signed
+// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
+// Returns the node being clamped and the bitwidth of the clamp in BW. Should
+// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
+// same as SimplifySelectCC. N0<N1 ? N2 : N3.
+static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC, unsigned &BW,
+ bool &Unsigned, SelectionDAG &DAG) {
+ auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // The compare and select operand should be the same or the select operands
+ // should be truncated versions of the comparison.
+ if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
+ return 0;
+ // The constants need to be the same or a truncated version of each other.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return 0;
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C2 = N3C->getAPIntValue();
+ if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
+ return 0;
+ return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
+ };
+
+ // Check the initial value is a SMIN/SMAX equivalent.
+ unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
+ if (!Opcode0)
+ return SDValue();
+
+ // We could only need one range check, if the fptosi could never produce
+ // the upper value.
+ if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
+ if (isNullOrNullSplat(N3)) {
+ EVT IntVT = N0.getValueType().getScalarType();
+ EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
+ if (FPVT.isSimple()) {
+ Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
+ const fltSemantics &Semantics = InputTy->getFltSemantics();
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
+ if (IntVT.getSizeInBits() >= MinBitWidth) {
+ Unsigned = true;
+ BW = PowerOf2Ceil(MinBitWidth);
+ return N0;
+ }
+ }
+ }
+ }
+
+ SDValue N00, N01, N02, N03;
+ ISD::CondCode N0CC;
+ switch (N0.getOpcode()) {
+ case ISD::SMIN:
+ case ISD::SMAX:
+ N00 = N02 = N0.getOperand(0);
+ N01 = N03 = N0.getOperand(1);
+ N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
+ break;
+ case ISD::SELECT_CC:
+ N00 = N0.getOperand(0);
+ N01 = N0.getOperand(1);
+ N02 = N0.getOperand(2);
+ N03 = N0.getOperand(3);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
+ break;
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ if (N0.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+ N00 = N0.getOperand(0).getOperand(0);
+ N01 = N0.getOperand(0).getOperand(1);
+ N02 = N0.getOperand(1);
+ N03 = N0.getOperand(2);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
+ break;
+ default:
+ return SDValue();
+ }
+
+ unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
+ if (!Opcode1 || Opcode0 == Opcode1)
+ return SDValue();
+
+ ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
+ ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
+ if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
+ return SDValue();
+
+ const APInt &MinC = MinCOp->getAPIntValue();
+ const APInt &MaxC = MaxCOp->getAPIntValue();
+ APInt MinCPlus1 = MinC + 1;
+ if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2() + 1;
+ Unsigned = false;
+ return N02;
+ }
+
+ if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2();
+ Unsigned = true;
+ return N02;
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ unsigned BW;
+ bool Unsigned;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
+ if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
+ return SDValue();
+ EVT FPVT = Fp.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
+ return SDValue();
+ SDLoc DL(Fp);
+ SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
+}
+
+static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
+ // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
+ // be truncated versions of the the setcc (N0/N1).
+ if ((N0 != N2 &&
+ (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
+ N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
+ return SDValue();
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return SDValue();
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C3 = N3C->getAPIntValue();
+ if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
+ C1 != C3.zext(C1.getBitWidth()))
+ return SDValue();
+
+ unsigned BW = (C1 + 1).exactLogBase2();
+ EVT FPVT = N0.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
+ FPVT, NewVT))
+ return SDValue();
+
+ SDValue Sat =
+ DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
+}
+
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
+ SDLoc DL(N);
+
+ // fold operation with constant operands.
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // If the operands are the same, this is a no-op.
+ if (N0 == N1)
+ return N0;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
+ // Only do this if the current op isn't legal and the flipped is.
+ if (!TLI.isOperationLegal(Opcode, VT) &&
+ (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
+ (N1.isUndef() || DAG.SignBitIsZero(N1))) {
+ unsigned AltOpcode;
+ switch (Opcode) {
+ case ISD::SMIN: AltOpcode = ISD::UMIN; break;
+ case ISD::SMAX: AltOpcode = ISD::UMAX; break;
+ case ISD::UMIN: AltOpcode = ISD::SMIN; break;
+ case ISD::UMAX: AltOpcode = ISD::SMAX; break;
+ default: llvm_unreachable("Unknown MINMAX opcode");
+ }
+ if (TLI.isOperationLegal(AltOpcode, VT))
+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);
+ }
+
+ if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
+ if (SDValue S = PerformMinMaxFpToSatCombine(
+ N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
+ return S;
+ if (Opcode == ISD::UMIN)
+ if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
+ return S;
+
+ // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
+ auto ReductionOpcode = [](unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SMIN:
+ return ISD::VECREDUCE_SMIN;
+ case ISD::SMAX:
+ return ISD::VECREDUCE_SMAX;
+ case ISD::UMIN:
+ return ISD::VECREDUCE_UMIN;
+ case ISD::UMAX:
+ return ISD::VECREDUCE_UMAX;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ };
+ if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
+ SDLoc(N), VT, N0, N1))
+ return SD;
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// If this is a bitwise logic instruction and both operands have the same
+/// opcode, try to sink the other opcode after the logic instruction.
+SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned LogicOpcode = N->getOpcode();
+ unsigned HandOpcode = N0.getOpcode();
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
+ assert(HandOpcode == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNumOperands() == 0)
+ return SDValue();
+
+ // FIXME: We should check number of uses of the operands to not increase
+ // the instruction count for all transforms.
+
+ // Handle size-changing casts (or sign_extend_inreg).
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N1.getOperand(0);
+ EVT XVT = X.getValueType();
+ SDLoc DL(N);
+ if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
+ (HandOpcode == ISD::SIGN_EXTEND_INREG &&
+ N0.getOperand(1) == N1.getOperand(1))) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching integer source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization. Don't ever
+ // create an unsupported vector op.
+ if ((VT.isVector() || LegalOperations) &&
+ !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
+ return SDValue();
+ // Avoid infinite looping with PromoteIntBinOp.
+ // TODO: Should we apply desirable/legal constraints to all opcodes?
+ if ((HandOpcode == ISD::ANY_EXTEND ||
+ HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ return SDValue();
+ // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ if (HandOpcode == ISD::SIGN_EXTEND_INREG)
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
+ if (HandOpcode == ISD::TRUNCATE) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization.
+ if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
+ return SDValue();
+ // Be extra careful sinking truncate. If it's free, there's no benefit in
+ // widening a binop. Also, don't create a logic op on an illegal type.
+ if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
+ return SDValue();
+ if (!TLI.isTypeLegal(XVT))
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // For binops SHL/SRL/SRA/AND:
+ // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
+ if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
+ HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
+ }
+
+ // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
+ if (HandOpcode == ISD::BSWAP) {
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // For funnel shifts FSHL/FSHR:
+ // logic_op (OP x, x1, s), (OP y, y1, s) -->
+ // --> OP (logic_op x, y), (logic_op, x1, y1), s
+ if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
+ N0.getOperand(2) == N1.getOperand(2)) {
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue X1 = N0.getOperand(1);
+ SDValue Y1 = N1.getOperand(1);
+ SDValue S = N0.getOperand(2);
+ SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
+ SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
+ return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
+ }
+
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization up until type legalization, before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
+ Level <= AfterLegalizeTypes) {
+ // Input types must be integer and the same.
+ if (XVT.isInteger() && XVT == Y.getValueType() &&
+ !(VT.isVector() && TLI.isTypeLegal(VT) &&
+ !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ // There are other cases where moving the shuffle after the xor/and/or
+ // is profitable even if shuffles don't perform a swizzle.
+ // If both shuffles use the same mask, and both shuffles have the same first
+ // or second operand, then it might still be profitable to move the shuffle
+ // after the xor/and/or operation.
+ if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+ auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
+ assert(X.getValueType() == Y.getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ // Check also that shuffles have only one use to avoid introducing extra
+ // instructions.
+ if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
+ !SVN0->getMask().equals(SVN1->getMask()))
+ return SDValue();
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ SDValue ShOp = N0.getOperand(1);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
+ }
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0.getOperand(0);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
+ if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
+ N1.getOperand(1));
+ return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
+SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL) {
+ SDValue LL, LR, RL, RR, N0CC, N1CC;
+ if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
+ !isSetCCEquivalent(N1, RL, RR, N1CC))
+ return SDValue();
+
+ assert(N0.getValueType() == N1.getValueType() &&
+ "Unexpected operand types for bitwise logic op");
+ assert(LL.getValueType() == LR.getValueType() &&
+ RL.getValueType() == RR.getValueType() &&
+ "Unexpected operand types for setcc");
+
+ // If we're here post-legalization or the logic op type is not i1, the logic
+ // op type must match a setcc result type. Also, all folds require new
+ // operations on the left and right operands, so those types must match.
+ EVT VT = N0.getValueType();
+ EVT OpVT = LL.getValueType();
+ if (LegalOperations || VT.getScalarType() != MVT::i1)
+ if (VT != getSetCCResultType(OpVT))
+ return SDValue();
+ if (OpVT != RL.getValueType())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
+ bool IsInteger = OpVT.isInteger();
+ if (LR == RR && CC0 == CC1 && IsInteger) {
+ bool IsZero = isNullOrNullSplat(LR);
+ bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
+
+ // All bits clear?
+ bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
+ // All sign bits clear?
+ bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
+ // Any bits set?
+ bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
+ // Any sign bits set?
+ bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
+
+ // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
+ // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
+ // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
+ // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
+ if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(Or.getNode());
+ return DAG.getSetCC(DL, VT, Or, LR, CC1);
+ }
+
+ // All bits set?
+ bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
+ // All sign bits set?
+ bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
+ // Any bits clear?
+ bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
+ // Any sign bits clear?
+ bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
+
+ // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
+ // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
+ // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
+ // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
+ if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(And.getNode());
+ return DAG.getSetCC(DL, VT, And, LR, CC1);
+ }
+ }
+
+ // TODO: What is the 'or' equivalent of this fold?
+ // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+ if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
+ IsInteger && CC0 == ISD::SETNE &&
+ ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDValue One = DAG.getConstant(1, DL, OpVT);
+ SDValue Two = DAG.getConstant(2, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
+ AddToWorklist(Add.getNode());
+ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
+ }
+
+ // Try more general transforms if the predicates match and the only user of
+ // the compares is the 'and' or 'or'.
+ if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+ // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+ if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+ SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+ SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+ SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+ }
+
+ // Turn compare of constants whose difference is 1 bit into add+and+setcc.
+ if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
+ // Match a shared variable operand and 2 non-opaque constant operands.
+ auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
+ // The difference of the constants must be a single bit.
+ const APInt &CMax =
+ APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
+ const APInt &CMin =
+ APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
+ return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
+ };
+ if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
+ // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
+ // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
+ SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
+ SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
+ SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
+ SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
+ }
+ }
+ }
+
+ // Canonicalize equivalent operands to LL == RL.
+ if (LL == RR && LR == RL) {
+ CC1 = ISD::getSetCCSwappedOperands(CC1);
+ std::swap(RL, RR);
+ }
+
+ // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ if (LL == RL && LR == RR) {
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
+ : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
+ if (NewCC != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, OpVT))))
+ return DAG.getSetCC(DL, VT, LL, LR, NewCC);
+ }
+
+ return SDValue();
+}
+
+static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+ using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
+ assert(
+ (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
+ "Invalid Op to combine SETCC with");
+
+ // TODO: Search past casts/truncates.
+ SDValue LHS = LogicOp->getOperand(0);
+ SDValue RHS = LogicOp->getOperand(1);
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
+ LogicOp, LHS.getNode(), RHS.getNode());
+
+ SDValue LHS0 = LHS->getOperand(0);
+ SDValue RHS0 = RHS->getOperand(0);
+ SDValue LHS1 = LHS->getOperand(1);
+ SDValue RHS1 = RHS->getOperand(1);
+ // TODO: We don't actually need a splat here, for vectors we just need the
+ // invariants to hold for each element.
+ auto *LHS1C = isConstOrConstSplat(LHS1);
+ auto *RHS1C = isConstOrConstSplat(RHS1);
+ ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+ EVT VT = LogicOp->getValueType(0);
+ EVT OpVT = LHS0.getValueType();
+ SDLoc DL(LogicOp);
+
+ // Check if the operands of an and/or operation are comparisons and if they
+ // compare against the same value. Replace the and/or-cmp-cmp sequence with
+ // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
+ // sequence will be replaced with min-cmp sequence:
+ // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
+ // and and-cmp-cmp will be replaced with max-cmp sequence:
+ // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
+ if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::SMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::UMIN, OpVT) &&
+ TLI.isOperationLegal(ISD::SMIN, OpVT)) {
+ if (LHS->getOpcode() == ISD::SETCC && RHS->getOpcode() == ISD::SETCC &&
+ LHS->hasOneUse() && RHS->hasOneUse() &&
+ // The two comparisons should have either the same predicate or the
+ // predicate of one of the comparisons is the opposite of the other one.
+ (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) &&
+ // The optimization does not work for `==` or `!=` .
+ !ISD::isIntEqualitySetCC(CCL) && !ISD::isIntEqualitySetCC(CCR)) {
+ SDValue CommonValue, Operand1, Operand2;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ if (CCL == CCR) {
+ if (LHS0 == RHS0) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS1;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (LHS1 == RHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS0;
+ CC = CCL;
+ }
+ } else {
+ assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
+ if (LHS0 == RHS1) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS0;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (RHS0 == LHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS1;
+ CC = CCL;
+ }
+ }
+
+ if (CC != ISD::SETCC_INVALID) {
+ unsigned NewOpcode;
+ bool IsSigned = isSignedIntSetCC(CC);
+ if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT ||
+ CC == ISD::SETULT) &&
+ (LogicOp->getOpcode() == ISD::OR)) ||
+ ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT ||
+ CC == ISD::SETUGT) &&
+ (LogicOp->getOpcode() == ISD::AND)))
+ NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
+ else
+ NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
+
+ SDValue MinMaxValue =
+ DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
+ return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
+ }
+ }
+ }
+
+ if (TargetPreference == AndOrSETCCFoldKind::None)
+ return SDValue();
+
+ if (CCL == CCR &&
+ CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
+ RHS.hasOneUse()) {
+ const APInt &APLhs = LHS1C->getAPIntValue();
+ const APInt &APRhs = RHS1C->getAPIntValue();
+
+ // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
+ // case this is just a compare).
+ if (APLhs == (-APRhs) &&
+ ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
+ DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
+ const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq Abs(A), C)
+ // (icmp ne A, C) & (icmp ne A, -C)
+ // -> (icmp ne Abs(A), C)
+ SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
+ return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
+ DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference &
+ (AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
+
+ // AndOrSETCCFoldKind::AddAnd:
+ // A == C0 | A == C1
+ // IF IsPow2(smax(C0, C1)-smin(C0, C1))
+ // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
+ // A != C0 & A != C1
+ // IF IsPow2(smax(C0, C1)-smin(C0, C1))
+ // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
+
+ // AndOrSETCCFoldKind::NotAnd:
+ // A == C0 | A == C1
+ // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
+ // -> ~A & smin(C0, C1) == 0
+ // A != C0 & A != C1
+ // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
+ // -> ~A & smin(C0, C1) != 0
+
+ const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
+ const APInt &MinC = APIntOps::smin(APRhs, APLhs);
+ APInt Dif = MaxC - MinC;
+ if (!Dif.isZero() && Dif.isPowerOf2()) {
+ if (MaxC.isAllOnes() &&
+ (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
+ SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
+ DAG.getConstant(MinC, DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
+
+ SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
+ DAG.getConstant(-MinC, DL, OpVT));
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+ DAG.getConstant(~Dif, DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
+ // fold (and x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
+ return V;
+
+ // Canonicalize:
+ // and(x, add) -> and(add, x)
+ if (N1.getOpcode() == ISD::ADD)
+ std::swap(N0, N1);
+
+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ APInt ADDC = ADDI->getAPIntValue();
+ APInt SRLC = SRLI->getAPIntValue();
+ if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
+ !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLC.getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDLoc DL0(N0);
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, DL0, VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT) {
+ if (!AndC->getAPIntValue().isMask())
+ return false;
+
+ unsigned ActiveBits = AndC->getAPIntValue().countr_one();
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ return true;
+ }
+
+ // Do not change the width of a volatile or atomic loads.
+ if (!LoadN->isSimple())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ return true;
+}
+
+bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
+ ISD::LoadExtType ExtType, EVT &MemVT,
+ unsigned ShAmt) {
+ if (!LDST)
+ return false;
+ // Only allow byte offsets.
+ if (ShAmt % 8)
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!MemVT.isRound())
+ return false;
+
+ // Don't change the width of a volatile or atomic loads.
+ if (!LDST->isSimple())
+ return false;
+
+ EVT LdStMemVT = LDST->getMemoryVT();
+
+ // Bail out when changing the scalable property, since we can't be sure that
+ // we're actually narrowing here.
+ if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
+ return false;
+
+ // Verify that we are actually reducing a load width here.
+ if (LdStMemVT.bitsLT(MemVT))
+ return false;
+
+ // Ensure that this isn't going to produce an unsupported memory access.
+ if (ShAmt) {
+ assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
+ const unsigned ByteShAmt = ShAmt / 8;
+ const Align LDSTAlign = LDST->getAlign();
+ const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ LDST->getAddressSpace(), NarrowAlign,
+ LDST->getMemOperand()->getFlags()))
+ return false;
+ }
+
+ // It's not possible to generate a constant of extended or untyped type.
+ EVT PtrType = LDST->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ if (isa<LoadSDNode>(LDST)) {
+ LoadSDNode *Load = cast<LoadSDNode>(LDST);
+ // Don't transform one with multiple uses, this would require adding a new
+ // load.
+ if (!SDValue(Load, 0).hasOneUse())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
+ return false;
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (Load->getNumValues() > 2)
+ return false;
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
+ Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
+ return false;
+ } else {
+ assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
+ StoreSDNode *Store = cast<StoreSDNode>(LDST);
+ // Can't write outside the original store
+ if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
+ return false;
+ }
+ return true;
+}
+
+bool DAGCombiner::SearchForAndLoads(SDNode *N,
+ SmallVectorImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask,
+ SDNode *&NodeToMask) {
+ // Recursively search for the operands, looking for loads which can be
+ // narrowed.
+ for (SDValue Op : N->op_values()) {
+ if (Op.getValueType().isVector())
+ return false;
+
+ // Some constants may need fixing up later if they are too large.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
+ (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
+ NodesWithConsts.insert(N);
+ continue;
+ }
+
+ if (!Op.hasOneUse())
+ return false;
+
+ switch(Op.getOpcode()) {
+ case ISD::LOAD: {
+ auto *Load = cast<LoadSDNode>(Op);
+ EVT ExtVT;
+ if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
+ isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
+
+ // ZEXTLOAD is already small enough.
+ if (Load->getExtensionType() == ISD::ZEXTLOAD &&
+ ExtVT.bitsGE(Load->getMemoryVT()))
+ continue;
+
+ // Use LE to convert equal sized loads to zext.
+ if (ExtVT.bitsLE(Load->getMemoryVT()))
+ Loads.push_back(Load);
+
+ continue;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ case ISD::AssertZext: {
+ unsigned ActiveBits = Mask->getAPIntValue().countr_one();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT VT = Op.getOpcode() == ISD::AssertZext ?
+ cast<VTSDNode>(Op.getOperand(1))->getVT() :
+ Op.getOperand(0).getValueType();
+
+ // We can accept extending nodes if the mask is wider or an equal
+ // width to the original type.
+ if (ExtVT.bitsGE(VT))
+ continue;
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::AND:
+ if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
+ NodeToMask))
+ return false;
+ continue;
+ }
+
+ // Allow one node which will masked along with any loads found.
+ if (NodeToMask)
+ return false;
+
+ // Also ensure that the node to be masked only produces one data result.
+ NodeToMask = Op.getNode();
+ if (NodeToMask->getNumValues() > 1) {
+ bool HasValue = false;
+ for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
+ MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
+ if (VT != MVT::Glue && VT != MVT::Other) {
+ if (HasValue) {
+ NodeToMask = nullptr;
+ return false;
+ }
+ HasValue = true;
+ }
+ }
+ assert(HasValue && "Node to be masked has no data result?");
+ }
+ }
+ return true;
+}
+
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
+ auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Mask)
+ return false;
+
+ if (!Mask->getAPIntValue().isMask())
+ return false;
+
+ // No need to do anything if the and directly uses a load.
+ if (isa<LoadSDNode>(N->getOperand(0)))
+ return false;
+
+ SmallVector<LoadSDNode*, 8> Loads;
+ SmallPtrSet<SDNode*, 2> NodesWithConsts;
+ SDNode *FixupNode = nullptr;
+ if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
+ if (Loads.size() == 0)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
+ SDValue MaskOp = N->getOperand(1);
+
+ // If it exists, fixup the single node we allow in the tree that needs
+ // masking.
+ if (FixupNode) {
+ LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+ FixupNode->getValueType(0),
+ SDValue(FixupNode, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
+ if (And.getOpcode() == ISD ::AND)
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
+ }
+
+ // Narrow any constants that need it.
+ for (auto *LogicN : NodesWithConsts) {
+ SDValue Op0 = LogicN->getOperand(0);
+ SDValue Op1 = LogicN->getOperand(1);
+
+ if (isa<ConstantSDNode>(Op0))
+ std::swap(Op0, Op1);
+
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+ Op1, MaskOp);
+
+ DAG.UpdateNodeOperands(LogicN, Op0, And);
+ }
+
+ // Create narrow loads.
+ for (auto *Load : Loads) {
+ LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
+ SDValue(Load, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
+ if (And.getOpcode() == ISD ::AND)
+ And = SDValue(
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
+ SDValue NewLoad = reduceLoadWidth(And.getNode());
+ assert(NewLoad &&
+ "Shouldn't be masking the load if it can't be narrowed");
+ CombineTo(Load, NewLoad, NewLoad.getValue(1));
+ }
+ DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
+ return true;
+ }
+ return false;
+}
+
+// Unfold
+// x & (-1 'logical shift' y)
+// To
+// (x 'opposite logical shift' y) 'logical shift' y
+// if it is better for performance.
+SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Do we actually prefer shifts over mask?
+ if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
+ return SDValue();
+
+ // Try to match (-1 '[outer] logical shift' y)
+ unsigned OuterShift;
+ unsigned InnerShift; // The opposite direction to the OuterShift.
+ SDValue Y; // Shift amount.
+ auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
+ if (!M.hasOneUse())
+ return false;
+ OuterShift = M->getOpcode();
+ if (OuterShift == ISD::SHL)
+ InnerShift = ISD::SRL;
+ else if (OuterShift == ISD::SRL)
+ InnerShift = ISD::SHL;
+ else
+ return false;
+ if (!isAllOnesConstant(M->getOperand(0)))
+ return false;
+ Y = M->getOperand(1);
+ return true;
+ };
+
+ SDValue X;
+ if (matchMask(N1))
+ X = N0;
+ else if (matchMask(N0))
+ X = N1;
+ else
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // tmp = x 'opposite logical shift' y
+ SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
+ // ret = tmp 'logical shift' y
+ SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
+
+ return T1;
+}
+
+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
+/// For a target with a bit test, this is expected to become test + set and save
+/// at least 1 instruction.
+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
+ assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
+
+ // Look through an optional extension.
+ SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
+ if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
+ And0 = And0.getOperand(0);
+ if (!isOneConstant(And1) || !And0.hasOneUse())
+ return SDValue();
+
+ SDValue Src = And0;
+
+ // Attempt to find a 'not' op.
+ // TODO: Should we favor test+set even without the 'not' op?
+ bool FoundNot = false;
+ if (isBitwiseNot(Src)) {
+ FoundNot = true;
+ Src = Src.getOperand(0);
+
+ // Look though an optional truncation. The source operand may not be the
+ // same type as the original 'and', but that is ok because we are masking
+ // off everything but the low bit.
+ if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
+ Src = Src.getOperand(0);
+ }
+
+ // Match a shift-right by constant.
+ if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
+ return SDValue();
+
+ // This is probably not worthwhile without a supported type.
+ EVT SrcVT = Src.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(SrcVT))
+ return SDValue();
+
+ // We might have looked through casts that make this transform invalid.
+ unsigned BitWidth = SrcVT.getScalarSizeInBits();
+ SDValue ShiftAmt = Src.getOperand(1);
+ auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
+ if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
+ return SDValue();
+
+ // Set source to shift source.
+ Src = Src.getOperand(0);
+
+ // Try again to find a 'not' op.
+ // TODO: Should we favor test+set even with two 'not' ops?
+ if (!FoundNot) {
+ if (!isBitwiseNot(Src))
+ return SDValue();
+ Src = Src.getOperand(0);
+ }
+
+ if (!TLI.hasBitTest(Src, ShiftAmt))
+ return SDValue();
+
+ // Turn this into a bit-test pattern using mask op + setcc:
+ // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+ // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
+ SDLoc DL(And);
+ SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
+ EVT CCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ SDValue Mask = DAG.getConstant(
+ APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
+ SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
+ return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
+}
+
+/// For targets that support usubsat, match a bit-hack form of that operation
+/// that ends in 'and' and convert it.
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // Canonicalize SRA as operand 1.
+ if (N0.getOpcode() == ISD::SRA)
+ std::swap(N0, N1);
+
+ // xor/add with SMIN (signmask) are logically equivalent.
+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||
+ !SraC || SraC->getAPIntValue() != BitWidth - 1)
+ return SDValue();
+
+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
+ SDLoc DL(N);
+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+}
+
+/// Given a bitwise logic operation N with a matching bitwise logic operand,
+/// fold a pattern where 2 of the source operands are identically shifted
+/// values. For example:
+/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
+static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
+ SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
+ "Expected bitwise logic operation");
+
+ if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
+ return SDValue();
+
+ // Match another bitwise logic op and a shift.
+ unsigned ShiftOpcode = ShiftOp.getOpcode();
+ if (LogicOp.getOpcode() != LogicOpcode ||
+ !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
+ ShiftOpcode == ISD::SRA))
+ return SDValue();
+
+ // Match another shift op inside the first logic operand. Handle both commuted
+ // possibilities.
+ // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+ // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+ SDValue X1 = ShiftOp.getOperand(0);
+ SDValue Y = ShiftOp.getOperand(1);
+ SDValue X0, Z;
+ if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
+ LogicOp.getOperand(0).getOperand(1) == Y) {
+ X0 = LogicOp.getOperand(0).getOperand(0);
+ Z = LogicOp.getOperand(1);
+ } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
+ LogicOp.getOperand(1).getOperand(1) == Y) {
+ X0 = LogicOp.getOperand(1).getOperand(0);
+ Z = LogicOp.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
+ SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
+}
+
+/// Given a tree of logic operations with shape like
+/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
+/// try to match and fold shift operations with the same shift amount.
+/// For example:
+/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
+/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
+static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
+ SDValue RightHand, SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
+ "Expected bitwise logic operation");
+ if (LeftHand.getOpcode() != LogicOpcode ||
+ RightHand.getOpcode() != LogicOpcode)
+ return SDValue();
+ if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
+ return SDValue();
+
+ // Try to match one of following patterns:
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
+ // Note that foldLogicOfShifts will handle commuted versions of the left hand
+ // itself.
+ SDValue CombinedShifts, W;
+ SDValue R0 = RightHand.getOperand(0);
+ SDValue R1 = RightHand.getOperand(1);
+ if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
+ W = R1;
+ else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
+ W = R0;
+ else
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // x & x --> x
+ if (N0 == N1)
+ return N0;
+
+ // fold (and c1, c2) -> c1&c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+
+ if (areBitwiseNotOfEachother(N0, N1))
+ return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
+ VT);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ return FoldedVOp;
+
+ // fold (and x, 0) -> 0, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
+ SDLoc(N), N1.getValueType());
+
+ // fold (and x, -1) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
+ return N0;
+
+ // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
+ auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
+ ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
+ N1.hasOneUse()) {
+ EVT LoadVT = MLoad->getMemoryVT();
+ EVT ExtVT = VT;
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
+ // For this AND to be a zero extension of the masked load the elements
+ // of the BuildVec must mask the bottom bits of the extended element
+ // type
+ uint64_t ElementSize =
+ LoadVT.getVectorElementType().getScalarSizeInBits();
+ if (Splat->getAPIntValue().isMask(ElementSize)) {
+ auto NewLoad = DAG.getMaskedLoad(
+ ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
+ LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
+ ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ bool LoadHasOtherUsers = !N0.hasOneUse();
+ CombineTo(N, NewLoad);
+ if (LoadHasOtherUsers)
+ CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+
+ // fold (and x, -1) -> x
+ if (isAllOnesConstant(N1))
+ return N0;
+
+ // if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // reassociate and
+ if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ return RAND;
+
+ // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
+ VT, N0, N1))
+ return SD;
+
+ // fold (and (or x, C), D) -> D if (C & D) == D
+ auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
+ };
+ if (N0.getOpcode() == ISD::OR &&
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
+ return N1;
+
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ EVT SrcVT = N0Op0.getValueType();
+ unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask = Mask.trunc(SrcBitWidth);
+
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (DAG.MaskedValueIsZero(N0Op0, Mask))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
+
+ // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
+ if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
+ TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
+ TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
+ TLI.isNarrowingProfitable(VT, SrcVT)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
+ DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
+ DAG.getZExtOrTrunc(N1, DL, SrcVT)));
+ }
+ }
+
+ // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
+ if (ISD::isExtOpcode(N0.getOpcode())) {
+ unsigned ExtOpc = N0.getOpcode();
+ SDValue N0Op0 = N0.getOperand(0);
+ if (N0Op0.getOpcode() == ISD::AND &&
+ (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
+ N0->hasOneUse() && N0Op0->hasOneUse()) {
+ SDLoc DL(N);
+ SDValue NewMask =
+ DAG.getNode(ISD::AND, DL, VT, N1,
+ DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
+ NewMask);
+ }
+ }
+
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD &&
+ N0.getOperand(0).getResNo() == 0) ||
+ (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getZero(1);
+ if (const ConstantSDNode *C = isConstOrConstSplat(
+ N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (EltBitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
+ SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if ((SplatBitSize % EltBitWidth) == 0) {
+ Constant = APInt::getAllOnes(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
+ }
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getValueType(0),
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnes()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), SDLoc(Load),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
+ if (VT.isVector())
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
+
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
+ return Combined;
+
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
+ ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
+ SDValue Ext = N0.getOperand(0);
+ EVT ExtVT = Ext->getValueType(0);
+ SDValue Extendee = Ext->getOperand(0);
+
+ unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
+ if (N1C->getAPIntValue().isMask(ScalarWidth) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
+ // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
+ // => (extract_subvector (iN_zeroext v))
+ SDValue ZeroExtExtendee =
+ DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
+ N0.getOperand(1));
+ }
+ }
+
+ // fold (and (masked_gather x)) -> (zext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ EVT MemVT = GN0->getMemoryVT();
+ EVT ScalarVT = MemVT.getScalarType();
+
+ if (SDValue(GN0, 0).hasOneUse() &&
+ isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ZExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
+
+ CombineTo(N, ZExtLoad);
+ AddToWorklist(ZExtLoad.getNode());
+ // Avoid recheck of N.
+ return SDValue(N, 0);
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
+ if (SDValue Res = reduceLoadWidth(N))
+ return Res;
+
+ if (LegalTypes) {
+ // Attempt to propagate the AND back up to the leaves which, if they're
+ // loads, can be combined to narrow loads and the AND node can be removed.
+ // Perform after legalization so that extend nodes will already be
+ // combined into the loads.
+ if (BackwardsPropagateMask(N))
+ return SDValue(N, 0);
+ }
+
+ if (SDValue Combined = visitANDLike(N0, N1, N))
+ return Combined;
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
+
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+ if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+ return R;
+
+ // Masking the negated extension of a boolean is just the zero-extended
+ // boolean:
+ // and (sub 0, zext(bool X)), 1 --> zext(bool X)
+ // and (sub 0, sext(bool X)), 1 --> zext(bool X)
+ //
+ // Note: the SimplifyDemandedBits fold below can make an information-losing
+ // transform, and then we have no way to find this better fold.
+ if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
+ if (isNullOrNullSplat(N0.getOperand(0))) {
+ SDValue SubRHS = N0.getOperand(1);
+ if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return SubRHS;
+ if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+ }
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ (ISD::isEXTLoad(N0.getNode()) ||
+ (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned ExtBitSize = N1.getScalarValueSizeInBits();
+ unsigned MemBitSize = MemVT.getScalarSizeInBits();
+ APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
+ if (DAG.MaskedValueIsZero(N1, ExtBits) &&
+ ((!LegalOperations && LN0->isSimple()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+ if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
+ return BSwap;
+ }
+
+ if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
+ return Shifts;
+
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
+
+ // Recognize the following pattern:
+ //
+ // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
+ //
+ // where bitmask is a mask that clears the upper bits of AndVT. The
+ // number of bits in bitmask must be a power of two.
+ auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
+ if (LHS->getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ auto *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C)
+ return false;
+
+ if (!C->getAPIntValue().isMask(
+ LHS.getOperand(0).getValueType().getFixedSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
+ if (IsAndZeroExtMask(N0, N1))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+
+ if (hasOperation(ISD::USUBSAT, VT))
+ if (SDValue V = foldAndToUsubsat(N, DAG))
+ return V;
+
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
+ return SDValue();
+}
+
+/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ // Also handle 0xffff since the LHS is guaranteed to have zeros there.
+ // This is needed for X86.
+ if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
+ N01C->getZExtValue() != 0xFFFF))
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ // Also allow 0xFFFF since the bits will be shifted out. This is needed
+ // for X86.
+ if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
+ N101C->getZExtValue() != 0xFFFF))
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword gets set to zero since the SRL
+ // 16 will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (OpSizeInBits > 16) {
+ // If the left-shift isn't masked out then the only way this is a bswap is
+ // if all bits beyond the low 8 are 0. In that case the entire pattern
+ // reduces to a left shift anyway: leave it for other parts of the combiner.
+ if (DemandHighBits && !LookPassAnd0)
+ return SDValue();
+
+ // However, if the right shift isn't masked out then it might be because
+ // it's not needed. See if we can spot that too. If the high bits aren't
+ // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
+ // upper bits to be zero.
+ if (!LookPassAnd1) {
+ unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
+ if (!DAG.MaskedValueIsZero(N10,
+ APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
+ return SDValue();
+ }
+ }
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
+ if (OpSizeInBits > 16) {
+ SDLoc DL(N);
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getConstant(OpSizeInBits - 16, DL,
+ getShiftAmountTy(VT)));
+ }
+ return Res;
+}
+
+/// Return true if the specified node is an element that makes up a 32-bit
+/// packed halfword byteswap.
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (!N->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ SDValue N0 = N.getOperand(0);
+ unsigned Opc0 = N0.getOpcode();
+ if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = nullptr;
+ // SHL or SRL: look upstream for AND mask operand
+ if (Opc == ISD::AND)
+ N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ else if (Opc0 == ISD::AND)
+ N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned MaskByteOffset;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: MaskByteOffset = 0; break;
+ case 0xFF00: MaskByteOffset = 1; break;
+ case 0xFFFF:
+ // In case demanded bits didn't clear the bits that will be shifted out.
+ // This is needed for X86.
+ if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
+ MaskByteOffset = 1;
+ break;
+ }
+ return false;
+ case 0xFF0000: MaskByteOffset = 2; break;
+ case 0xFF000000: MaskByteOffset = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ if (Opc == ISD::AND) {
+ if (MaskByteOffset == 0 || MaskByteOffset == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (Opc0 != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (Opc0 != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (MaskByteOffset != 0 && MaskByteOffset != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (MaskByteOffset != 1 && MaskByteOffset != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[MaskByteOffset])
+ return false;
+
+ Parts[MaskByteOffset] = N0.getOperand(0).getNode();
+ return true;
+}
+
+// Match 2 elements of a packed halfword bswap.
+static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (N.getOpcode() == ISD::OR)
+ return isBSwapHWordElement(N.getOperand(0), Parts) &&
+ isBSwapHWordElement(N.getOperand(1), Parts);
+
+ if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
+ ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
+ if (!C || C->getAPIntValue() != 16)
+ return false;
+ Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
+ return true;
+ }
+
+ return false;
+}
+
+// Match this pattern:
+// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
+// And rewrite this to:
+// (rotr (bswap A), 16)
+static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
+ SelectionDAG &DAG, SDNode *N, SDValue N0,
+ SDValue N1, EVT VT, EVT ShiftAmountTy) {
+ assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
+ "MatchBSwapHWordOrAndAnd: expecting i32");
+ if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
+ return SDValue();
+ // TODO: this is too restrictive; lifting this restriction requires more tests
+ if (!N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+ ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
+ ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
+ if (!Mask0 || !Mask1)
+ return SDValue();
+ if (Mask0->getAPIntValue() != 0xff00ff00 ||
+ Mask1->getAPIntValue() != 0x00ff00ff)
+ return SDValue();
+ SDValue Shift0 = N0.getOperand(0);
+ SDValue Shift1 = N1.getOperand(0);
+ if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
+ return SDValue();
+ ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
+ ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
+ if (!ShiftAmt0 || !ShiftAmt1)
+ return SDValue();
+ if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
+ return SDValue();
+ if (Shift0.getOperand(0) != Shift1.getOperand(0))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
+ SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+}
+
+/// Match a 32-bit packed halfword bswap. That is
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
+ return SDValue();
+
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
+ getShiftAmountTy(VT)))
+ return BSwap;
+
+ // Try again with commuted operands.
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
+ getShiftAmountTy(VT)))
+ return BSwap;
+
+
+ // Look for either
+ // (or (bswaphpair), (bswaphpair))
+ // (or (or (bswaphpair), (and)), (and))
+ // (or (or (and), (bswaphpair)), (and))
+ SDNode *Parts[4] = {};
+
+ if (isBSwapHWordPair(N0, Parts)) {
+ // (or (or (and), (and)), (or (and), (and)))
+ if (!isBSwapHWordPair(N1, Parts))
+ return SDValue();
+ } else if (N0.getOpcode() == ISD::OR) {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
+ !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+ SDValue(Parts[0], 0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, then
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
+ // fold (or x, undef) -> -1
+ if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
+ return V;
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+ // Don't increase # computations.
+ (N0->hasOneUse() || N1->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ if (const ConstantSDNode *N0O1C =
+ getAsNonOpaqueConstant(N0.getOperand(1))) {
+ if (const ConstantSDNode *N1O1C =
+ getAsNonOpaqueConstant(N1.getOperand(1))) {
+ // We can only do this xform if we know that bits from X that are set in
+ // C2 but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask = N0O1C->getAPIntValue();
+ const APInt &RHSMask = N1O1C->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, DL, VT, X,
+ DAG.getConstant(LHSMask | RHSMask, DL, VT));
+ }
+ }
+ }
+ }
+
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0->hasOneUse() || N1->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
+ }
+
+ return SDValue();
+}
+
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
+ SDNode *N) {
+ EVT VT = N0.getValueType();
+
+ auto peekThroughResize = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
+ return V->getOperand(0);
+ return V;
+ };
+
+ SDValue N0Resized = peekThroughResize(N0);
+ if (N0Resized.getOpcode() == ISD::AND) {
+ SDValue N1Resized = peekThroughResize(N1);
+ SDValue N00 = N0Resized.getOperand(0);
+ SDValue N01 = N0Resized.getOperand(1);
+
+ // fold or (and x, y), x --> x
+ if (N00 == N1Resized || N01 == N1Resized)
+ return N1;
+
+ // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+ // TODO: Set AllowUndefs = true.
+ if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
+ /* AllowUndefs */ false)) {
+ if (peekThroughResize(NotOperand) == N1Resized)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT,
+ DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
+ }
+
+ // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+ if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
+ /* AllowUndefs */ false)) {
+ if (peekThroughResize(NotOperand) == N1Resized)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT,
+ DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::XOR) {
+ // fold or (xor x, y), x --> or x, y
+ // or (xor x, y), (x and/or y) --> or x, y
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (N00 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
+ if (N01 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+
+ if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
+ }
+ }
+
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+
+ auto peekThroughZext = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND)
+ return V->getOperand(0);
+ return V;
+ };
+
+ // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+ if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
+ // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
+ if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
+ N0.getOperand(1) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // x | x --> x
+ if (N0 == N1)
+ return N0;
+
+ // fold (or c1, c2) -> c1|c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ return FoldedVOp;
+
+ // fold (or x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+
+ // fold (or x, -1) -> -1, vector edition
+ if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
+ // do not return N1, because undef node may exist in N1
+ return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
+
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
+ // Do this only if the resulting type / shuffle is legal.
+ auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
+ auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
+ if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
+ bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
+ bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
+ bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+ bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
+ // Ensure both shuffles have a zero input.
+ if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
+ assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
+ assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
+ bool CanFold = true;
+ int NumElts = VT.getVectorNumElements();
+ SmallVector<int, 4> Mask(NumElts, -1);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Determine if either index is pointing to a zero vector.
+ bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
+ bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
+
+ // If one element is zero and the otherside is undef, keep undef.
+ // This also handles the case that both are undef.
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
+ continue;
+
+ // Make sure only one of the elements is zero.
+ if (M0Zero == M1Zero) {
+ CanFold = false;
+ break;
+ }
+
+ assert((M0 >= 0 || M1 >= 0) && "Undef index!");
+
+ // We have a zero and non-zero element. If the non-zero came from
+ // SV0 make the index a LHS index. If it came from SV1, make it
+ // a RHS index. We need to mod by NumElts because we don't care
+ // which operand it came from in the original shuffles.
+ Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
+ }
+
+ if (CanFold) {
+ SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
+ SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
+ Mask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+ }
+ }
+ }
+
+ // fold (or x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ // fold (or x, -1) -> -1
+ if (isAllOnesConstant(N1))
+ return N1;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
+ if (SDValue Combined = visitORLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
+ return Combined;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
+ return BSwap;
+ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
+ return BSwap;
+
+ // reassociate or
+ if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ return ROR;
+
+ // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
+ VT, N0, N1))
+ return SD;
+
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) != 0 or c1/c2 are undef.
+ auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
+ return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
+ };
+ if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+ {N1, N0.getOperand(1)})) {
+ SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
+ AddToWorklist(IOR.getNode());
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
+ }
+ }
+
+ if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+ return Combined;
+ if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+ return Combined;
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
+
+ // See if this is some rotate idiom.
+ if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
+ return Rot;
+
+ if (SDValue Load = MatchLoadCombine(N))
+ return Load;
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // If OR can be rewritten into ADD, try combines based on ADD.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
+ return SDValue();
+}
+
+static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
+ SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND &&
+ DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ return Op.getOperand(0);
+ }
+ return Op;
+}
+
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
+ SDValue &Mask) {
+ Op = stripConstantMask(DAG, Op, Mask);
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+ return false;
+}
+
+/// Helper function for visitOR to extract the needed side of a rotate idiom
+/// from a shl/srl/mul/udiv. This is meant to handle cases where
+/// InstCombine merged some outside op with one of the shifts from
+/// the rotate pattern.
+/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
+/// Otherwise, returns an expansion of \p ExtractFrom based on the following
+/// patterns:
+///
+/// (or (add v v) (shrl v bitwidth-1)):
+/// expands (add v v) -> (shl v 1)
+///
+/// (or (mul v c0) (shrl (mul v c1) c2)):
+/// expands (mul v c0) -> (shl (mul v c1) c3)
+///
+/// (or (udiv v c0) (shl (udiv v c1) c2)):
+/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
+///
+/// (or (shl v c0) (shrl (shl v c1) c2)):
+/// expands (shl v c0) -> (shl (shl v c1) c3)
+///
+/// (or (shrl v c0) (shl (shrl v c1) c2)):
+/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
+///
+/// Such that in all cases, c3+c2==bitwidth(op v c1).
+static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
+ SDValue ExtractFrom, SDValue &Mask,
+ const SDLoc &DL) {
+ assert(OppShift && ExtractFrom && "Empty SDValue");
+ if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+
+ // Value and Type of the shift.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+
+ // (add v v) -> (shl v 1)
+ // TODO: Should this be a general DAG canonicalization?
+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
+ ExtractFrom.getOpcode() == ISD::ADD &&
+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
+ ExtractFrom.getOperand(0) == OppShiftLHS &&
+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));
+
+ // Preconditions:
+ // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
+ //
+ // Find opcode of the needed shift to be extracted from (op0 v c0).
+ unsigned Opcode = ISD::DELETED_NODE;
+ bool IsMulOrDiv = false;
+ // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
+ // opcode or its arithmetic (mul or udiv) variant.
+ auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
+ IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
+ if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
+ return false;
+ Opcode = NeededShift;
+ return true;
+ };
+ // op0 must be either the needed shift opcode or the mul/udiv equivalent
+ // that the needed shift can be extracted from.
+ if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
+ (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
+ return SDValue();
+
+ // op0 must be the same opcode on both sides, have the same LHS argument,
+ // and produce the same value type.
+ if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
+ OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
+ ShiftedVT != ExtractFrom.getValueType())
+ return SDValue();
+
+ // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
+ ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
+ // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
+ ConstantSDNode *ExtractFromCst =
+ isConstOrConstSplat(ExtractFrom.getOperand(1));
+ // TODO: We should be able to handle non-uniform constant vectors for these values
+ // Check that we have constant values.
+ if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
+ !OppLHSCst || !OppLHSCst->getAPIntValue() ||
+ !ExtractFromCst || !ExtractFromCst->getAPIntValue())
+ return SDValue();
+
+ // Compute the shift amount we need to extract to complete the rotate.
+ const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
+ if (OppShiftCst->getAPIntValue().ugt(VTWidth))
+ return SDValue();
+ APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
+ // Normalize the bitwidth of the two mul/udiv/shift constant operands.
+ APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
+ APInt OppLHSAmt = OppLHSCst->getAPIntValue();
+ zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
+
+ // Now try extract the needed shift from the ExtractFrom op and see if the
+ // result matches up with the existing shift's LHS op.
+ if (IsMulOrDiv) {
+ // Op to extract from is a mul or udiv by a constant.
+ // Check:
+ // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
+ // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
+ const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
+ NeededShiftAmt.getZExtValue());
+ APInt ResultAmt;
+ APInt Rem;
+ APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
+ if (Rem != 0 || ResultAmt != OppLHSAmt)
+ return SDValue();
+ } else {
+ // Op to extract from is a shift by a constant.
+ // Check:
+ // c2 - (bitwidth(op0 v c0) - c1) == c0
+ if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
+ ExtractFromAmt.getBitWidth()))
+ return SDValue();
+ }
+
+ // Return the expanded shift op that should allow a rotate to be formed.
+ EVT ShiftVT = OppShift.getOperand(1).getValueType();
+ EVT ResVT = ExtractFrom.getValueType();
+ SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
+ return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
+}
+
+// Return true if we can prove that, whenever Neg and Pos are both in the
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
+// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
+//
+// (or (shift1 X, Neg), (shift2 X, Pos))
+//
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
+// to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
+ SelectionDAG &DAG, bool IsRotate) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ // If EltSize is a power of 2 then:
+ //
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
+ //
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
+ // for the stronger condition:
+ //
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
+ //
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
+ // we can just replace Neg with Neg' for the rest of the function.
+ //
+ // In other cases we check for the even stronger condition:
+ //
+ // Neg == EltSize - Pos [B]
+ //
+ // for all Neg and Pos. Note that the (or ...) then invokes undefined
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
+ //
+ // We could actually use [A] whenever EltSize is a power of 2, but the
+ // only extra cases that it would match are those uninteresting ones
+ // where Neg and Pos are never in range at the same time. E.g. for
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // as well as (sub 32, Pos), but:
+ //
+ // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
+ //
+ // always invokes undefined behavior for 32-bit X.
+ //
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ // This allows us to peek through any operations that only affect Mask's
+ // un-demanded bits.
+ //
+ // NOTE: We can only do this when matching operations which won't modify the
+ // least Log2(EltSize) significant bits and not a general funnel shift.
+ unsigned MaskLoBits = 0;
+ if (IsRotate && isPowerOf2_64(EltSize)) {
+ unsigned Bits = Log2_64(EltSize);
+ unsigned NegBits = Neg.getScalarValueSizeInBits();
+ if (NegBits >= Bits) {
+ APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
+ if (SDValue Inner =
+ TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
+ Neg = Inner;
+ MaskLoBits = Bits;
+ }
+ }
+ }
+
+ // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
+ if (Neg.getOpcode() != ISD::SUB)
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
+ if (!NegC)
+ return false;
+ SDValue NegOp1 = Neg.getOperand(1);
+
+ // On the RHS of [A], if Pos is the result of operation on Pos' that won't
+ // affect Mask's demanded bits, just replace Pos with Pos'. These operations
+ // are redundant for the purpose of the equality.
+ if (MaskLoBits) {
+ unsigned PosBits = Pos.getScalarValueSizeInBits();
+ if (PosBits >= MaskLoBits) {
+ APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
+ if (SDValue Inner =
+ TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
+ Pos = Inner;
+ }
+ }
+ }
+
+ // The condition we need is now:
+ //
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
+ //
+ // If NegOp1 == Pos then we need:
+ //
+ // EltSize & Mask == NegC & Mask
+ //
+ // (because "x & Mask" is a truncation and distributes through subtraction).
+ //
+ // We also need to account for a potential truncation of NegOp1 if the amount
+ // has already been legalized to a shift amount type.
+ APInt Width;
+ if ((Pos == NegOp1) ||
+ (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
+ Width = NegC->getAPIntValue();
+
+ // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
+ // Then the condition we want to prove becomes:
+ //
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
+ //
+ // which, again because "x & Mask" is a truncation, becomes:
+ //
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
+ return false;
+
+ // Now we just need to check that EltSize & Mask == Width & Mask.
+ if (MaskLoBits)
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
+ return Width.getLoBits(MaskLoBits) == 0;
+ return Width == EltSize;
+}
+
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL) {
+ // fold (or (shl x, (*ext y)),
+ // (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y) or (rotr x, (sub 32, y))
+ //
+ // fold (or (shl x, (*ext (sub 32, y))),
+ // (srl x, (*ext y))) ->
+ // (rotr x, y) or (rotl x, (sub 32, y))
+ EVT VT = Shifted.getValueType();
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+ /*IsRotate*/ true)) {
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
+ HasPos ? Pos : Neg);
+ }
+
+ return SDValue();
+}
+
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+// TODO: Merge with MatchRotatePosNeg.
+SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL) {
+ EVT VT = N0.getValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // fold (or (shl x0, (*ext y)),
+ // (srl x1, (*ext (sub 32, y)))) ->
+ // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
+ //
+ // fold (or (shl x0, (*ext (sub 32, y))),
+ // (srl x1, (*ext y))) ->
+ // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
+ if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
+ HasPos ? Pos : Neg);
+ }
+
+ // Matching the shift+xor cases, we can't easily use the xor'd shift amount
+ // so for now just use the PosOpcode case if its legal.
+ // TODO: When can we use the NegOpcode case?
+ if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
+ auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
+ if (Op.getOpcode() != BinOpc)
+ return false;
+ ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
+ return Cst && (Cst->getAPIntValue() == Imm);
+ };
+
+ // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
+ // -> (fshl x0, x1, y)
+ if (IsBinOpImm(N1, ISD::SRL, 1) &&
+ IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
+ InnerPos == InnerNeg.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
+ return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
+ }
+
+ // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
+ // -> (fshr x0, x1, y)
+ if (IsBinOpImm(N0, ISD::SHL, 1) &&
+ IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
+ InnerNeg == InnerPos.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
+ return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
+ }
+
+ // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
+ // -> (fshr x0, x1, y)
+ // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
+ IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
+ InnerNeg == InnerPos.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
+ return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
+ }
+ }
+
+ return SDValue();
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
+// with different shifted sources.
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+ EVT VT = LHS.getValueType();
+
+ // The target must have at least one rotate/funnel flavor.
+ // We still try to match rotate by constant pre-legalization.
+ // TODO: Support pre-legalization funnel-shift by constant.
+ bool HasROTL = hasOperation(ISD::ROTL, VT);
+ bool HasROTR = hasOperation(ISD::ROTR, VT);
+ bool HasFSHL = hasOperation(ISD::FSHL, VT);
+ bool HasFSHR = hasOperation(ISD::FSHR, VT);
+
+ // If the type is going to be promoted and the target has enabled custom
+ // lowering for rotate, allow matching rotate by non-constants. Only allow
+ // this for scalar types.
+ if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
+ TargetLowering::TypePromoteInteger) {
+ HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
+ HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
+ }
+
+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
+ // Check for truncated rotate.
+ if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
+ assert(LHS.getValueType() == RHS.getValueType());
+ if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
+ }
+ }
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
+
+ // If neither side matched a rotate half, bail
+ if (!LHSShift && !RHSShift)
+ return SDValue();
+
+ // InstCombine may have combined a constant shl, srl, mul, or udiv with one
+ // side of the rotate, so try to handle that here. In all cases we need to
+ // pass the matched shift from the opposite side to compute the opcode and
+ // needed shift amount to extract. We still want to do this if both sides
+ // matched a rotate half because one half may be a potential overshift that
+ // can be broken down (ie if InstCombine merged two shl or srl ops into a
+ // single one).
+
+ // Have LHS side of the rotate, try to extract the needed shift from the RHS.
+ if (LHSShift)
+ if (SDValue NewRHSShift =
+ extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
+ RHSShift = NewRHSShift;
+ // Have RHS side of the rotate, try to extract the needed shift from the LHS.
+ if (RHSShift)
+ if (SDValue NewLHSShift =
+ extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
+ LHSShift = NewLHSShift;
+
+ // If a side is still missing, nothing else we can do.
+ if (!RHSShift || !LHSShift)
+ return SDValue();
+
+ // At this point we've matched or extracted a shift op on each side.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return SDValue(); // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask, RHSMask);
+ }
+
+ // Something has gone wrong - we've lost the shl/srl pair - bail.
+ if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftArg = RHSShift.getOperand(0);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+ };
+
+ auto ApplyMasks = [&](SDValue Res) {
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue Mask = AllOnes;
+
+ if (LHSMask.getNode()) {
+ SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
+ }
+ if (RHSMask.getNode()) {
+ SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
+ }
+
+ Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
+ }
+
+ return Res;
+ };
+
+ // TODO: Support pre-legalization funnel-shift by constant.
+ bool IsRotate = LHSShiftArg == RHSShiftArg;
+ if (!IsRotate && !(HasFSHL || HasFSHR)) {
+ if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ // Look for a disguised rotate by constant.
+ // The common shifted operand X may be hidden inside another 'or'.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
+ if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
+ return false;
+ if (CommonOp == Or.getOperand(0)) {
+ X = CommonOp;
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (CommonOp == Or.getOperand(1)) {
+ X = CommonOp;
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ SDValue Res;
+ if (matchOr(LHSShiftArg, RHSShiftArg)) {
+ // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
+ Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
+ } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
+ // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
+ Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
+ } else {
+ return SDValue();
+ }
+
+ return ApplyMasks(Res);
+ }
+
+ return SDValue(); // Requires funnel shift support.
+ }
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
+ // iff C1+C2 == EltSizeInBits
+ if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ SDValue Res;
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
+
+ return ApplyMasks(Res);
+ }
+
+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
+ // shift.
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return SDValue();
+
+ // If the shift amount is sign/zext/any-extended just peel it off.
+ SDValue LExtOp0 = LHSShiftAmt;
+ SDValue RExtOp0 = RHSShiftAmt;
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ LExtOp0 = LHSShiftAmt.getOperand(0);
+ RExtOp0 = RHSShiftAmt.getOperand(0);
+ }
+
+ if (IsRotate && (HasROTL || HasROTR)) {
+ SDValue TryL =
+ MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
+ RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
+ if (TryL)
+ return TryL;
+
+ SDValue TryR =
+ MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
+ LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
+ if (TryR)
+ return TryR;
+ }
+
+ SDValue TryL =
+ MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
+ if (TryL)
+ return TryL;
+
+ SDValue TryR =
+ MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
+ if (TryR)
+ return TryR;
+
+ return SDValue();
+}
+
+/// Recursively traverses the expression calculating the origin of the requested
+/// byte of the given value. Returns std::nullopt if the provider can't be
+/// calculated.
+///
+/// For all the values except the root of the expression, we verify that the
+/// value has exactly one use and if not then return std::nullopt. This way if
+/// the origin of the byte is returned it's guaranteed that the values which
+/// contribute to the byte are not used outside of this expression.
+
+/// However, there is a special case when dealing with vector loads -- we allow
+/// more than one use if the load is a vector type. Since the values that
+/// contribute to the byte ultimately come from the ExtractVectorElements of the
+/// Load, we don't care if the Load has uses other than ExtractVectorElements,
+/// because those operations are independent from the pattern to be combined.
+/// For vector loads, we simply care that the ByteProviders are adjacent
+/// positions of the same vector, and their index matches the byte that is being
+/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
+/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
+/// byte position we are trying to provide for the LoadCombine. If these do
+/// not match, then we can not combine the vector loads. \p Index uses the
+/// byte position we are trying to provide for and is matched against the
+/// shl and load size. The \p Index algorithm ensures the requested byte is
+/// provided for by the pattern, and the pattern does not over provide bytes.
+///
+///
+/// The supported LoadCombine pattern for vector loads is as follows
+/// or
+/// / \
+/// or shl
+/// / \ |
+/// or shl zext
+/// / \ | |
+/// shl zext zext EVE*
+/// | | | |
+/// zext EVE* EVE* LOAD
+/// | | |
+/// EVE* LOAD LOAD
+/// |
+/// LOAD
+///
+/// *ExtractVectorElement
+using SDByteProvider = ByteProvider<SDNode *>;
+
+static const std::optional<SDByteProvider>
+calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
+ std::optional<uint64_t> VectorIndex,
+ unsigned StartingIndex = 0) {
+
+ // Typical i64 by i8 pattern requires recursion up to 8 calls depth
+ if (Depth == 10)
+ return std::nullopt;
+
+ // Only allow multiple uses if the instruction is a vector load (in which
+ // case we will use the load for every ExtractVectorElement)
+ if (Depth && !Op.hasOneUse() &&
+ (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
+ return std::nullopt;
+
+ // Fail to combine if we have encountered anything but a LOAD after handling
+ // an ExtractVectorElement.
+ if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
+ return std::nullopt;
+
+ unsigned BitWidth = Op.getValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return std::nullopt;
+ unsigned ByteWidth = BitWidth / 8;
+ assert(Index < ByteWidth && "invalid index requested");
+ (void) ByteWidth;
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto LHS =
+ calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
+ if (!LHS)
+ return std::nullopt;
+ auto RHS =
+ calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
+ if (!RHS)
+ return std::nullopt;
+
+ if (LHS->isConstantZero())
+ return RHS;
+ if (RHS->isConstantZero())
+ return LHS;
+ return std::nullopt;
+ }
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return std::nullopt;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+
+ if (BitShift % 8 != 0)
+ return std::nullopt;
+ uint64_t ByteShift = BitShift / 8;
+
+ // If we are shifting by an amount greater than the index we are trying to
+ // provide, then do not provide anything. Otherwise, subtract the index by
+ // the amount we shifted by.
+ return Index < ByteShift
+ ? SDByteProvider::getConstantZero()
+ : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
+ Depth + 1, VectorIndex, Index);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? std::optional<SDByteProvider>(
+ SDByteProvider::getConstantZero())
+ : std::nullopt;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
+ StartingIndex);
+ }
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
+ Depth + 1, VectorIndex, StartingIndex);
+ case ISD::EXTRACT_VECTOR_ELT: {
+ auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!OffsetOp)
+ return std::nullopt;
+
+ VectorIndex = OffsetOp->getZExtValue();
+
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ // Check to see if the position of the element in the vector corresponds
+ // with the byte we are trying to provide for. In the case of a vector of
+ // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
+ // the element will provide a range of bytes. For example, if we have a
+ // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
+ // 3).
+ if (*VectorIndex * NarrowByteWidth > StartingIndex)
+ return std::nullopt;
+ if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
+ return std::nullopt;
+
+ return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
+ VectorIndex, StartingIndex);
+ }
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ if (!L->isSimple() || L->isIndexed())
+ return std::nullopt;
+
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ // If the width of the load does not reach byte we are trying to provide for
+ // and it is not a ZEXTLOAD, then the load does not provide for the byte in
+ // question
+ if (Index >= NarrowByteWidth)
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? std::optional<SDByteProvider>(
+ SDByteProvider::getConstantZero())
+ : std::nullopt;
+
+ unsigned BPVectorIndex = VectorIndex.value_or(0U);
+ return SDByteProvider::getSrc(L, Index, BPVectorIndex);
+ }
+ }
+
+ return std::nullopt;
+}
+
+static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
+ return i;
+}
+
+static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
+ return BW - i - 1;
+}
+
+// Check if the bytes offsets we are looking at match with either big or
+// little endian value loaded. Return true for big endian, false for little
+// endian, and std::nullopt if match failed.
+static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
+ int64_t FirstOffset) {
+ // The endian can be decided only when it is 2 bytes at least.
+ unsigned Width = ByteOffsets.size();
+ if (Width < 2)
+ return std::nullopt;
+
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < Width; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
+ if (!BigEndian && !LittleEndian)
+ return std::nullopt;
+ }
+
+ assert((BigEndian != LittleEndian) && "It should be either big endian or"
+ "little endian");
+ return BigEndian;
+}
+
+static SDValue stripTruncAndExt(SDValue Value) {
+ switch (Value.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ return stripTruncAndExt(Value.getOperand(0));
+ }
+ return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
+ // The matching looks for "store (trunc x)" patterns that appear early but are
+ // likely to be replaced by truncating store nodes during combining.
+ // TODO: If there is evidence that running this later would help, this
+ // limitation could be removed. Legality checks may need to be added
+ // for the created store and optional bswap/rotate.
+ if (LegalOperations || OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
+ EVT MemVT = N->getMemoryVT();
+ if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
+ !N->isSimple() || N->isIndexed())
+ return SDValue();
+
+ // Collect all of the stores in the chain, upto the maximum store width (i64).
+ SDValue Chain = N->getChain();
+ SmallVector<StoreSDNode *, 8> Stores = {N};
+ unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
+ unsigned MaxWideNumBits = 64;
+ unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
+ while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
+ // All stores must be the same size to ensure that we are writing all of the
+ // bytes in the wide value.
+ // This store should have exactly one use as a chain operand for another
+ // store in the merging set. If there are other chain uses, then the
+ // transform may not be safe because order of loads/stores outside of this
+ // set may not be preserved.
+ // TODO: We could allow multiple sizes by tracking each stored byte.
+ if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
+ Store->isIndexed() || !Store->hasOneUse())
+ return SDValue();
+ Stores.push_back(Store);
+ Chain = Store->getChain();
+ if (MaxStores < Stores.size())
+ return SDValue();
+ }
+ // There is no reason to continue if we do not have at least a pair of stores.
+ if (Stores.size() < 2)
+ return SDValue();
+
+ // Handle simple types only.
+ LLVMContext &Context = *DAG.getContext();
+ unsigned NumStores = Stores.size();
+ unsigned WideNumBits = NumStores * NarrowNumBits;
+ EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
+ if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
+ return SDValue();
+
+ // Check if all bytes of the source value that we are looking at are stored
+ // to the same base address. Collect offsets from Base address into OffsetMap.
+ SDValue SourceValue;
+ SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
+ int64_t FirstOffset = INT64_MAX;
+ StoreSDNode *FirstStore = nullptr;
+ std::optional<BaseIndexOffset> Base;
+ for (auto *Store : Stores) {
+ // All the stores store different parts of the CombinedValue. A truncate is
+ // required to get the partial value.
+ SDValue Trunc = Store->getValue();
+ if (Trunc.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+ // Other than the first/last part, a shift operation is required to get the
+ // offset.
+ int64_t Offset = 0;
+ SDValue WideVal = Trunc.getOperand(0);
+ if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
+ isa<ConstantSDNode>(WideVal.getOperand(1))) {
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = srl y, ShiftAmtC
+ // i8 z = trunc x
+ // store z, ...
+ uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
+ if (ShiftAmtC % NarrowNumBits != 0)
+ return SDValue();
+
+ Offset = ShiftAmtC / NarrowNumBits;
+ WideVal = WideVal.getOperand(0);
+ }
+
+ // Stores must share the same source value with different offsets.
+ // Truncate and extends should be stripped to get the single source value.
+ if (!SourceValue)
+ SourceValue = WideVal;
+ else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
+ return SDValue();
+ else if (SourceValue.getValueType() != WideVT) {
+ if (WideVal.getValueType() == WideVT ||
+ WideVal.getScalarValueSizeInBits() >
+ SourceValue.getScalarValueSizeInBits())
+ SourceValue = WideVal;
+ // Give up if the source value type is smaller than the store size.
+ if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
+ return SDValue();
+ }
+
+ // Stores must share the same base address.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Remember the first store.
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstStore = Store;
+ FirstOffset = ByteOffsetFromBase;
+ }
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
+ return SDValue();
+ OffsetMap[Offset] = ByteOffsetFromBase;
+ }
+
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ assert(FirstStore && "First store must be set");
+
+ // Check that a store of the wide type is both allowed and fast on the target
+ const DataLayout &Layout = DAG.getDataLayout();
+ unsigned Fast = 0;
+ bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
+ *FirstStore->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStores; ++i)
+ if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
+ if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(Layout.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
+ NeedRotate = true;
+ else
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ if (WideVT != SourceValue.getValueType()) {
+ assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
+ "Unexpected store value to merge");
+ SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
+ }
+
+ // Before legalize we can introduce illegal bswaps/rotates which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedBswap) {
+ SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
+ } else if (NeedRotate) {
+ assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
+ SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
+ SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
+ }
+
+ SDValue NewStore =
+ DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlign());
+
+ // Rely on other DAG combine rules to remove the other individual stores.
+ DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+ return NewStore;
+}
+
+/// Match a pattern where a wide type scalar value is loaded by several narrow
+/// loads and combined by shifts and ors. Fold it into a single load or a load
+/// and a BSWAP if the targets supports it.
+///
+/// Assuming little endian target:
+/// i8 *a = ...
+/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+/// =>
+/// i32 val = *((i32)a)
+///
+/// i8 *a = ...
+/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+/// =>
+/// i32 val = BSWAP(*((i32)a))
+///
+/// TODO: This rule matches complex patterns with OR node roots and doesn't
+/// interact well with the worklist mechanism. When a part of the pattern is
+/// updated (e.g. one of the loads) its direct users are put into the worklist,
+/// but the root node of the pattern which triggers the load combine is not
+/// necessarily a direct user of the changed node. For example, once the address
+/// of t28 load is reassociated load combine won't be triggered:
+/// t25: i32 = add t4, Constant:i32<2>
+/// t26: i64 = sign_extend t25
+/// t27: i64 = add t2, t26
+/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
+/// t29: i32 = zero_extend t28
+/// t32: i32 = shl t29, Constant:i8<8>
+/// t33: i32 = or t23, t32
+/// As a possible fix visitLoad can check if the load can be a part of a load
+/// combine pattern and add corresponding OR roots to the worklist.
+SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR &&
+ "Can only match load combining against OR nodes");
+
+ // Handles simple types only
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+ unsigned ByteWidth = VT.getSizeInBits() / 8;
+
+ bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
+ auto MemoryByteOffset = [&](SDByteProvider P) {
+ assert(P.hasSrc() && "Must be a memory byte provider");
+ auto *Load = cast<LoadSDNode>(P.Src.value());
+
+ unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
+
+ assert(LoadBitWidth % 8 == 0 &&
+ "can only analyze providers for individual bytes not bit");
+ unsigned LoadByteWidth = LoadBitWidth / 8;
+ return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
+ : littleEndianByteAt(LoadByteWidth, P.DestOffset);
+ };
+
+ std::optional<BaseIndexOffset> Base;
+ SDValue Chain;
+
+ SmallPtrSet<LoadSDNode *, 8> Loads;
+ std::optional<SDByteProvider> FirstByteProvider;
+ int64_t FirstOffset = INT64_MAX;
+
+ // Check if all the bytes of the OR we are looking at are loaded from the same
+ // base address. Collect bytes offsets from Base address in ByteOffsets.
+ SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
+ unsigned ZeroExtendedBytes = 0;
+ for (int i = ByteWidth - 1; i >= 0; --i) {
+ auto P =
+ calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
+ /*StartingIndex*/ i);
+ if (!P)
+ return SDValue();
+
+ if (P->isConstantZero()) {
+ // It's OK for the N most significant bytes to be 0, we can just
+ // zero-extend the load.
+ if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
+ return SDValue();
+ continue;
+ }
+ assert(P->hasSrc() && "provenance should either be memory or zero");
+ auto *L = cast<LoadSDNode>(P->Src.value());
+
+ // All loads must share the same chain
+ SDValue LChain = L->getChain();
+ if (!Chain)
+ Chain = LChain;
+ else if (Chain != LChain)
+ return SDValue();
+
+ // Loads must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
+ int64_t ByteOffsetFromBase = 0;
+
+ // For vector loads, the expected load combine pattern will have an
+ // ExtractElement for each index in the vector. While each of these
+ // ExtractElements will be accessing the same base address as determined
+ // by the load instruction, the actual bytes they interact with will differ
+ // due to different ExtractElement indices. To accurately determine the
+ // byte position of an ExtractElement, we offset the base load ptr with
+ // the index multiplied by the byte size of each element in the vector.
+ if (L->getMemoryVT().isVector()) {
+ unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
+ if (LoadWidthInBit % 8 != 0)
+ return SDValue();
+ unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
+ Ptr.addToOffset(ByteOffsetFromVector);
+ }
+
+ if (!Base)
+ Base = Ptr;
+
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Calculate the offset of the current byte from the base address
+ ByteOffsetFromBase += MemoryByteOffset(*P);
+ ByteOffsets[i] = ByteOffsetFromBase;
+
+ // Remember the first byte load
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstByteProvider = P;
+ FirstOffset = ByteOffsetFromBase;
+ }
+
+ Loads.insert(L);
+ }
+
+ assert(!Loads.empty() && "All the bytes of the value must be loaded from "
+ "memory, so there must be at least one load which produces the value");
+ assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+
+ bool NeedsZext = ZeroExtendedBytes > 0;
+
+ EVT MemVT =
+ EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
+
+ if (!MemVT.isSimple())
+ return SDValue();
+
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ MemVT))
+ return SDValue();
+
+ // Check if the bytes of the OR we are looking at match with either big or
+ // little endian value load
+ std::optional<bool> IsBigEndian = isBigEndian(
+ ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
+ if (!IsBigEndian)
+ return SDValue();
+
+ assert(FirstByteProvider && "must be set");
+
+ // Ensure that the first byte is loaded from zero offset of the first load.
+ // So the combined value can be loaded from the first load address.
+ if (MemoryByteOffset(*FirstByteProvider) != 0)
+ return SDValue();
+ auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single (possibly zero-extended) load and bswap + shift if
+ // needed.
+
+ // If the load needs byte swap check if the target supports it
+ bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // load and byte shuffling instead of several loads and byte shuffling.
+ // We do not introduce illegal bswaps when zero-extending as this tends to
+ // introduce too many arithmetic instructions.
+ if (NeedsBswap && (LegalOperations || NeedsZext) &&
+ !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // If we need to bswap and zero extend, we have to insert a shift. Check that
+ // it is legal.
+ if (NeedsBswap && NeedsZext && LegalOperations &&
+ !TLI.isOperationLegal(ISD::SHL, VT))
+ return SDValue();
+
+ // Check that a load of the wide type is both allowed and fast on the target
+ unsigned Fast = 0;
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ *FirstLoad->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ SDValue NewLoad =
+ DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
+ Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
+
+ // Transfer chain users from old loads to the new load.
+ for (LoadSDNode *L : Loads)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+
+ if (!NeedsBswap)
+ return NewLoad;
+
+ SDValue ShiftedLoad =
+ NeedsZext
+ ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+ DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
+ SDLoc(N), LegalOperations))
+ : NewLoad;
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
+}
+
+// If the target has andn, bsl, or a similar bit-select instruction,
+// we want to unfold masked merge, with canonical pattern of:
+// | A | |B|
+// ((x ^ y) & m) ^ y
+// | D |
+// Into:
+// (x & m) | (y & ~m)
+// If y is a constant, m is not a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// ~(~x & m) & (m | y)
+// If x is a constant, m is a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// (x | ~m) & ~(~m & ~y)
+// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
+// the very least that breaks andnpd / andnps patterns, and because those
+// patterns are simplified in IR and shouldn't be created in the DAG
+SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
+ assert(N->getOpcode() == ISD::XOR);
+
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // There are 3 commutable operators in the pattern,
+ // so we have to deal with 8 possible variants of the basic pattern.
+ SDValue X, Y, M;
+ auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
+ if (And.getOpcode() != ISD::AND || !And.hasOneUse())
+ return false;
+ SDValue Xor = And.getOperand(XorIdx);
+ if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
+ return false;
+ SDValue Xor0 = Xor.getOperand(0);
+ SDValue Xor1 = Xor.getOperand(1);
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesOrAllOnesSplat(Xor1))
+ return false;
+ if (Other == Xor0)
+ std::swap(Xor0, Xor1);
+ if (Other != Xor1)
+ return false;
+ X = Xor0;
+ Y = Xor1;
+ M = And.getOperand(XorIdx ? 0 : 1);
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
+ !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
+ return SDValue();
+
+ // Don't do anything if the mask is constant. This should not be reachable.
+ // InstCombine should have already unfolded this pattern, and DAGCombiner
+ // probably shouldn't produce it, too.
+ if (isa<ConstantSDNode>(M.getNode()))
+ return SDValue();
+
+ // We can transform if the target has AndNot
+ if (!TLI.hasAndNot(M))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // If Y is a constant, check that 'andn' works with immediates. Unless M is
+ // a bitwise not that would already allow ANDN to be used.
+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
+ SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
+ SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
+ return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
+ }
+
+ // If X is a constant and M is a bitwise not, check that 'andn' works with
+ // immediates.
+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotM = M.getOperand(0);
+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
+ SDValue NotY = DAG.getNOT(DL, Y, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
+ }
+
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
+ SDValue NotM = DAG.getNOT(DL, M, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
+
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.isUndef() && N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (xor x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ // fold (xor c1, c2) -> c1^c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (xor x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // reassociate xor
+ if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+ return RXOR;
+
+ // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
+ return SD;
+
+ // fold (a^b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
+ // look for 'add-like' folds:
+ // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ isMinSignedConstant(N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
+ // fold !(x cc y) -> (x !cc y)
+ unsigned N0Opcode = N0.getOpcode();
+ SDValue LHS, RHS, CC;
+ if (TLI.isConstTrueVal(N1) &&
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ LHS.getValueType());
+ if (!LegalOperations ||
+ TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
+ switch (N0Opcode) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ if (N0.hasOneUse()) {
+ // FIXME Can we handle multiple uses? Could we token factor the chain
+ // results from the new/old setcc?
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
+ N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
+ CombineTo(N, SetCC);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
+ recursivelyDeleteUnusedNodes(N0.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ SDLoc DL0(N0);
+ V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
+ DAG.getConstant(1, DL0, V.getValueType()));
+ AddToWorklist(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (isAllOnesConstant(N1) && N0.hasOneUse() &&
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
+ }
+ }
+
+ // fold (not (neg x)) -> (add X, -1)
+ // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+ // Y is a constant or the subtract has a single use.
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ }
+
+ // fold (not (add X, -1)) -> (neg X)
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
+ return DAG.getNegative(N0.getOperand(0), DL, VT);
+ }
+
+ // fold (xor (and x, y), y) -> (and (not x), y)
+ if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
+ SDValue X = N0.getOperand(0);
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
+ AddToWorklist(NotX.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
+ }
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
+ SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
+ if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
+ SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
+ SDValue S0 = S.getOperand(0);
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
+ if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
+ return DAG.getNode(ISD::ABS, DL, VT, S0);
+ }
+ }
+
+ // fold (xor x, x) -> 0
+ if (N0 == N1)
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+ // Here is a concrete example of this equivalence:
+ // i16 x == 14
+ // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
+ // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+ //
+ // =>
+ //
+ // i16 ~1 == 0b1111111111111110
+ // i16 rol(~1, 14) == 0b1011111111111111
+ //
+ // Some additional tips to help conceptualize this transform:
+ // - Try to see the operation as placing a single zero in a value of all ones.
+ // - There exists no value for x which would allow the result to contain zero.
+ // - Values of x larger than the bitwidth are undefined and do not require a
+ // consistent result.
+ // - Pushing the zero left requires shifting one bits in from the right.
+ // A rotate left of ~1 is a nice way of achieving the desired result.
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
+ isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+ N0.getOperand(1));
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0Opcode == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
+
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+ if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+ return R;
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
+ // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
+ if (SDValue MM = unfoldMaskedMerge(N))
+ return MM;
+
+ // Simplify the expression using non-local knowledge.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
+ return Combined;
+
+ return SDValue();
+}
+
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This is a
+/// throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+ // Match a one-use bitwise logic op.
+ SDValue LogicOp = Shift->getOperand(0);
+ if (!LogicOp.hasOneUse())
+ return SDValue();
+
+ unsigned LogicOpcode = LogicOp.getOpcode();
+ if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+ LogicOpcode != ISD::XOR)
+ return SDValue();
+
+ // Find a matching one-use shift by constant.
+ unsigned ShiftOpcode = Shift->getOpcode();
+ SDValue C1 = Shift->getOperand(1);
+ ConstantSDNode *C1Node = isConstOrConstSplat(C1);
+ assert(C1Node && "Expected a shift with constant operand");
+ const APInt &C1Val = C1Node->getAPIntValue();
+ auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
+ const APInt *&ShiftAmtVal) {
+ if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+ return false;
+
+ ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
+ if (!ShiftCNode)
+ return false;
+
+ // Capture the shifted operand and shift amount value.
+ ShiftOp = V.getOperand(0);
+ ShiftAmtVal = &ShiftCNode->getAPIntValue();
+
+ // Shift amount types do not have to match their operand type, so check that
+ // the constants are the same width.
+ if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
+ return false;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ SDValue X, Y;
+ const APInt *C0Val;
+ if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
+ Y = LogicOp.getOperand(1);
+ else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
+ Y = LogicOp.getOperand(0);
+ else
+ return SDValue();
+
+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ SDLoc DL(Shift);
+ EVT VT = Shift->getValueType(0);
+ EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+ SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
+ SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
+ SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
+/// Handle transforms common to the three shifts, when the shift amount is a
+/// constant.
+/// We are looking for: (shift being one of shl/sra/srl)
+/// shift (binop X, C0), C1
+/// And want to transform into:
+/// binop (shift X, C1), (shift C0, C1)
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
+ assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
+
+ // Do not turn a 'not' into a regular xor.
+ if (isBitwiseNot(N->getOperand(0)))
+ return SDValue();
+
+ // The inner binop must be one-use, since we want to replace it.
+ SDValue LHS = N->getOperand(0);
+ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
+ return SDValue();
+
+ // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ switch (LHS.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::AND:
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ break;
+ }
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant
+ // or is copy/select. Enable this in other cases when figure out it's exactly
+ // profitable.
+ SDValue BinOpLHSVal = LHS.getOperand(0);
+ bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
+ BinOpLHSVal.getOpcode() == ISD::SRA ||
+ BinOpLHSVal.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
+ bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal.getOpcode() == ISD::SELECT;
+
+ if (!IsShiftByConstant && !IsCopyOrSelect)
+ return SDValue();
+
+ if (IsCopyOrSelect && N->hasOneUse())
+ return SDValue();
+
+ // Attempt to fold the constants, shifting the binop RHS by the shift amount.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (SDValue NewRHS = DAG.FoldConstantArithmetic(
+ N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+ assert(N->getOpcode() == ISD::TRUNCATE);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+ // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+ EVT TruncVT = N->getValueType(0);
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
+ TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
+ SDValue N01 = N->getOperand(0).getOperand(1);
+ if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
+ SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
+ AddToWorklist(Trunc00.getNode());
+ AddToWorklist(Trunc01.getNode());
+ return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+ SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ unsigned Bitsize = VT.getScalarSizeInBits();
+
+ // fold (rot x, 0) -> x
+ if (isNullOrNullSplat(N1))
+ return N0;
+
+ // fold (rot x, c) -> x iff (c % BitSize) == 0
+ if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
+ APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
+ if (DAG.MaskedValueIsZero(N1, ModuloMask))
+ return N0;
+ }
+
+ // fold (rot x, c) -> (rot x, c % BitSize)
+ bool OutOfRange = false;
+ auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
+ OutOfRange |= C->getAPIntValue().uge(Bitsize);
+ return true;
+ };
+ if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
+ EVT AmtVT = N1.getValueType();
+ SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
+ if (SDValue Amt =
+ DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
+ }
+
+ // rot i16 X, 8 --> bswap X
+ auto *RotAmtC = isConstOrConstSplat(N1);
+ if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
+ VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
+ return DAG.getNode(ISD::BSWAP, dl, VT, N0);
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
+ }
+
+ unsigned NextOp = N0.getOpcode();
+
+ // fold (rot* (rot* x, c2), c1)
+ // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
+ if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
+ SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
+ if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
+ EVT ShiftVT = C1->getValueType(0);
+ bool SameSide = (N->getOpcode() == NextOp);
+ unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N1, BitsizeC});
+ SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N0.getOperand(1), BitsizeC});
+ if (Norm1 && Norm2)
+ if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+ CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+ CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
+ {CombinedShift, BitsizeC});
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ return FoldedVOp;
+
+ BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+ // If setcc produces all-one true value then:
+ // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+ if (N1CV && N1CV->isConstant()) {
+ if (N0.getOpcode() == ISD::AND) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ }
+ }
+ }
+ }
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N0.getOpcode() == ISD::SHL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
+ }
+ }
+
+ // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+ EVT InnerVT = N0Op0.getValueType();
+ uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
+
+ auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
+ SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
+ Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
+ }
+ }
+
+ // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
+ // Only fold this if the inner zext has no other uses to avoid increasing
+ // the total number of instructions.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+
+ auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2);
+ return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
+ NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
+ AddToWorklist(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+
+ SDLoc DL(N);
+
+ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
+ // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
+ if (N0->getFlags().hasExact()) {
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ }
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does,
+ // folding this will increase the total number of instructions.
+ if (N0.getOpcode() == ISD::SRL &&
+ (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ }
+ }
+
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true)) {
+ SDLoc DL(N);
+ SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
+ SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
+ }
+
+ // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ // Variant of version done on multiply, except mul by a power of 2 is turned
+ // into a shift.
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
+ N0->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ AddToWorklist(Shl0.getNode());
+ AddToWorklist(Shl1.getNode());
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ }
+
+ // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+ if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Shl =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
+ }
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSHL = visitShiftByConstant(N))
+ return NewSHL;
+
+ // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
+ if (N0.getOpcode() == ISD::VSCALE && N1C) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = N1C->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 << C1);
+ }
+
+ // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
+ APInt ShlVal;
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ if (ShlVal.ult(C0.getBitWidth())) {
+ APInt NewStep = C0 << ShlVal;
+ return DAG.getStepVector(SDLoc(N), VT, NewStep);
+ }
+ }
+
+ return SDValue();
+}
+
+// Transform a right shift of a multiply into a multiply-high.
+// Examples:
+// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
+// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
+static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+
+ // Check the shift amount. Proceed with the transformation if the shift
+ // amount is constant.
+ ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
+ if (!ShiftAmtSrc)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // The operation feeding into the shift must be a multiply.
+ SDValue ShiftOperand = N->getOperand(0);
+ if (ShiftOperand.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // Both operands must be equivalent extend nodes.
+ SDValue LeftOp = ShiftOperand.getOperand(0);
+ SDValue RightOp = ShiftOperand.getOperand(1);
+
+ bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
+ bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
+
+ if (!IsSignExt && !IsZeroExt)
+ return SDValue();
+
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+
+ // return true if U may use the lower bits of its operands
+ auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
+ if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
+ return true;
+ }
+ ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
+ if (!UShiftAmtSrc) {
+ return true;
+ }
+ unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
+ return UShiftAmt < NarrowVTSize;
+ };
+
+ // If the lower part of the MUL is also used and MUL_LOHI is supported
+ // do not introduce the MULH in favor of MUL_LOHI
+ unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!ShiftOperand.hasOneUse() &&
+ TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
+ llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
+ return SDValue();
+ }
+
+ SDValue MulhRightOp;
+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
+ unsigned ActiveBits = IsSignExt
+ ? Constant->getAPIntValue().getSignificantBits()
+ : Constant->getAPIntValue().getActiveBits();
+ if (ActiveBits > NarrowVTSize)
+ return SDValue();
+ MulhRightOp = DAG.getConstant(
+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
+ NarrowVT);
+ } else {
+ if (LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+ MulhRightOp = RightOp.getOperand(0);
+ }
+
+ EVT WideVT = LeftOp.getValueType();
+ // Proceed with the transformation if the wide types match.
+ assert((WideVT == RightOp.getValueType()) &&
+ "Cannot have a multiply node with two different operand types.");
+
+ // Proceed with the transformation if the wide type is twice as large
+ // as the narrow type.
+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
+ return SDValue();
+
+ // Check the shift amount with the narrow type size.
+ // Proceed with the transformation if the shift amount is the width
+ // of the narrow type.
+ unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
+ if (ShiftAmt != NarrowVTSize)
+ return SDValue();
+
+ // If the operation feeding into the MUL is a sign extend (sext),
+ // we use mulhs. Othewise, zero extends (zext) use mulhu.
+ unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+
+ // Combine to mulh if mulh is legal/custom for the narrow type on the target
+ // or if it is a vector type then we could transform to an acceptable type and
+ // rely on legalization to split/combine the result.
+ if (NarrowVT.isVector()) {
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
+ if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
+ !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
+ return SDValue();
+ } else {
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ return SDValue();
+ }
+
+ SDValue Result =
+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
+ bool IsSigned = N->getOpcode() == ISD::SRA;
+ return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
+}
+
+// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
+// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
+static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
+ SDValue OldLHS = N0.getOperand(0);
+ SDValue OldRHS = N0.getOperand(1);
+
+ // If both operands are bswap/bitreverse, ignore the multiuse
+ // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
+ if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
+ return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+ OldRHS.getOperand(0));
+ }
+
+ if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
+ SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
+ return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+ NewBitReorder);
+ }
+
+ if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
+ SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
+ return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
+ OldRHS.getOperand(0));
+ }
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ // fold (sra 0, x) -> 0
+ // fold (sra -1, x) -> -1
+ if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
+ return N0;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ return FoldedVOp;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
+ VT.getVectorElementCount());
+ if (!LegalOperations ||
+ TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
+ TargetLowering::Legal)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ // Even if we can't convert to sext_inreg, we might be able to remove
+ // this shift pair if the input is already sign extended.
+ if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
+ return N0.getOperand(0);
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ // clamp (add c1, c2) to max shift.
+ if (N0.getOpcode() == ISD::SRA) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ EVT ShiftSVT = ShiftVT.getScalarType();
+ SmallVector<SDValue, 16> ShiftValues;
+
+ auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ APInt Sum = c1 + c2;
+ unsigned ShiftSum =
+ Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
+ ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
+ return true;
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
+ SDValue ShiftValue;
+ if (N1.getOpcode() == ISD::BUILD_VECTOR)
+ ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
+ else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(ShiftValues.size() == 1 &&
+ "Expected matchBinaryPredicate to return one element for "
+ "SPLAT_VECTORs");
+ ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
+ } else
+ ShiftValue = ShiftValues[0];
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL && N1C) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+ if (N01C) {
+ LLVMContext &Ctx = *DAG.getContext();
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
+
+ // Determine the residual right-shift amount.
+ int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL,
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+ // sra (add (shl X, N1C), AddC), N1C -->
+ // sext (add (trunc X to (width - N1C)), AddC')
+ // sra (sub AddC, (shl X, N1C)), N1C -->
+ // sext (sub AddC1',(trunc X to (width - N1C)))
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
+ N0.hasOneUse()) {
+ bool IsAdd = N0.getOpcode() == ISD::ADD;
+ SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
+ if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
+ Shl.hasOneUse()) {
+ // TODO: AddC does not need to be a splat.
+ if (ConstantSDNode *AddC =
+ isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
+ // Determine what the truncate's type would be and ask the target if
+ // that is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but
+ // that restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC =
+ DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
+ TruncVT.getScalarSizeInBits()),
+ DL, TruncVT);
+ SDValue Add;
+ if (IsAdd)
+ Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ else
+ Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
+ // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
+ // if c1 is equal to the number of bits the trunc removes
+ // TODO - support non-uniform vector shift amounts.
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ EVT LargeVT = N0Op0.getValueType();
+ unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
+ if (LargeShift->getAPIntValue() == TruncBits) {
+ SDLoc DL(N);
+ EVT LargeShiftVT = getShiftAmountTy(LargeVT);
+ SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
+ Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
+ DAG.getConstant(TruncBits, DL, LargeShiftVT));
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
+ }
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRA = visitShiftByConstant(N))
+ return NewSRA;
+
+ // Try to transform this shift into a multiply-high if
+ // it matches the appropriate pattern detected in combineShiftToMULH.
+ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ return MULH;
+
+ // Attempt to convert a sra of a load into a narrower sign-extending load.
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
+ return NarrowLoad;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ return FoldedVOp;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // if (srl x, c) is known to be zero, return 0
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C &&
+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N0.getOpcode() == ISD::SRL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
+ }
+ }
+
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue InnerShift = N0.getOperand(0);
+ // TODO - support non-uniform vector shift amounts.
+ if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
+ uint64_t c1 = N001C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = InnerShift.getValueType();
+ EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N);
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, DL, VT);
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
+ }
+ // In the more general case, we can clear the high bits after the shift:
+ // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
+ if (N0.hasOneUse() && InnerShift.hasOneUse() &&
+ c1 + c2 < InnerShiftSize) {
+ SDLoc DL(N);
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
+ OpSizeInBits - c2),
+ DL, InnerShiftVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
+ }
+ }
+ }
+
+ // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
+ // (and (srl x, (sub c2, c1), MASK)
+ if (N0.getOpcode() == ISD::SHL &&
+ (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ }
+
+ // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ unsigned BitSize = SmallVT.getScalarSizeInBits();
+ if (N1C->getAPIntValue().uge(BitSize))
+ return DAG.getUNDEF(VT);
+
+ if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
+ SDLoc DL0(N0);
+ SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, DL0,
+ getShiftAmountTy(SmallVT)));
+ AddToWorklist(SmallShift.getNode());
+ APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+ DAG.getConstant(Mask, DL, VT));
+ }
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
+ // of two bitwidth. The "5" represents (log2 (bitwidth x)).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ isPowerOf2_32(OpSizeInBits) &&
+ N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
+ KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~Known.Zero;
+ if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if (UnknownBits.isPowerOf2()) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countr_zero();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ SDLoc DL(N0);
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(ShAmt, DL,
+ getShiftAmountTy(Op.getValueType())));
+ AddToWorklist(Op.getNode());
+ }
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ Op, DAG.getConstant(1, DL, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRL = visitShiftByConstant(N))
+ return NewSRL;
+
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
+ return NarrowLoad;
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ //
+ // The also tends to happen for binary operations when SimplifyDemandedBits
+ // is involved.
+ //
+ // FIXME: This is unecessary if we process the DAG in topological order,
+ // which we plan to do. This workaround can be removed once the DAG is
+ // processed in topological order.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+
+ // Look pass the truncate.
+ if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
+ Use = *Use->use_begin();
+
+ if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
+ Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
+ AddToWorklist(Use);
+ }
+
+ // Try to transform this shift into a multiply-high if
+ // it matches the appropriate pattern detected in combineShiftToMULH.
+ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ return MULH;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ bool IsFSHL = N->getOpcode() == ISD::FSHL;
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // fold (fshl N0, N1, 0) -> N0
+ // fold (fshr N0, N1, 0) -> N1
+ if (isPowerOf2_32(BitWidth))
+ if (DAG.MaskedValueIsZero(
+ N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+ return IsFSHL ? N0 : N1;
+
+ auto IsUndefOrZero = [](SDValue V) {
+ return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
+ };
+
+ // TODO - support non-uniform vector shift amounts.
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ EVT ShAmtTy = N2.getValueType();
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ if (Cst->getAPIntValue().uge(BitWidth)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
+ DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
+ }
+
+ unsigned ShAmt = Cst->getZExtValue();
+ if (ShAmt == 0)
+ return IsFSHL ? N0 : N1;
+
+ // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+ // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+ // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+ // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+ if (IsUndefOrZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+ SDLoc(N), ShAmtTy));
+ if (IsUndefOrZero(N1))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+ SDLoc(N), ShAmtTy));
+
+ // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
+ // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
+ // TODO - bigendian support once we have test coverage.
+ // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
+ // TODO - permit LHS EXTLOAD if extensions are shifted out.
+ if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
+ !DAG.getDataLayout().isBigEndian()) {
+ auto *LHS = dyn_cast<LoadSDNode>(N0);
+ auto *RHS = dyn_cast<LoadSDNode>(N1);
+ if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
+ LHS->getAddressSpace() == RHS->getAddressSpace() &&
+ (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
+ ISD::isNON_EXTLoad(LHS)) {
+ if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
+ SDLoc DL(RHS);
+ uint64_t PtrOff =
+ IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
+ Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
+ unsigned Fast = 0;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ RHS->getAddressSpace(), NewAlign,
+ RHS->getMemOperand()->getFlags(), &Fast) &&
+ Fast) {
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
+ AddToWorklist(NewPtr.getNode());
+ SDValue Load = DAG.getLoad(
+ VT, DL, RHS->getChain(), NewPtr,
+ RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
+ // Replace the old load's chain with the new load's chain.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+ }
+ }
+ }
+
+ // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+ // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+ // iff We know the shift amount is in range.
+ // TODO: when is it worth doing SUB(BW, N2) as well?
+ if (isPowerOf2_32(BitWidth)) {
+ APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
+ }
+
+ // fold (fshl N0, N0, N2) -> (rotl N0, N2)
+ // fold (fshr N0, N0, N2) -> (rotr N0, N2)
+ // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
+ // is legal as well we might be better off avoiding non-constant (BW - N2).
+ unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (N0 == N1 && hasOperation(RotOpc, VT))
+ return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+
+ // Simplify, based on bits shifted out of N0/N1.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+
+ // fold (*shlsat c1, c2) -> c1<<c2
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
+ // fold (sshlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::SSHLSAT && N1C &&
+ N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+
+ // fold (ushlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::USHLSAT && N1C &&
+ N1C->getAPIntValue().ule(
+ DAG.computeKnownBits(N0).countMinLeadingZeros()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+// Given a ABS node, detect the following patterns:
+// (ABS (SUB (EXTEND a), (EXTEND b))).
+// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
+// Generates UABD/SABD instruction.
+SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+ EVT SrcVT = N->getValueType(0);
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::ABS)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue AbsOp1 = N->getOperand(0);
+ SDValue Op0, Op1;
+ SDLoc DL(N);
+
+ if (AbsOp1.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ Op0 = AbsOp1.getOperand(0);
+ Op1 = AbsOp1.getOperand(1);
+
+ unsigned Opc0 = Op0.getOpcode();
+ // Check if the operands of the sub are (zero|sign)-extended.
+ if (Opc0 != Op1.getOpcode() ||
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
+ // fold (abs (sub nsw x, y)) -> abds(x, y)
+ if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
+ TLI.preferABDSToABSWithNSW(VT)) {
+ SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
+ return SDValue();
+ }
+
+ EVT VT1 = Op0.getOperand(0).getValueType();
+ EVT VT2 = Op1.getOperand(0).getValueType();
+ unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
+
+ // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
+ // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
+ // NOTE: Extensions must be equivalent.
+ if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
+ ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
+
+ // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
+ // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
+ if (hasOperation(ABDOpcode, VT)) {
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
+ return C;
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+
+ if (SDValue ABD = foldABSToABD(N))
+ return ABD;
+
+ // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
+ // iff zero_extend/truncate are free.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
+ TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
+ hasOperation(ISD::ABS, ExtVT)) {
+ SDLoc DL(N);
+ return DAG.getNode(
+ ISD::ZERO_EXTEND, DL, VT,
+ DAG.getNode(ISD::ABS, DL, ExtVT,
+ DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBSWAP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (bswap c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BSWAP, DL, VT, N0);
+ // fold (bswap (bswap x)) -> x
+ if (N0.getOpcode() == ISD::BSWAP)
+ return N0.getOperand(0);
+
+ // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
+ // isn't supported, it will be expanded to bswap followed by a manual reversal
+ // of bits in each byte. By placing bswaps before bitreverse, we can remove
+ // the two bswaps if the bitreverse gets expanded.
+ if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
+ }
+
+ // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
+ // iff x >= bw/2 (i.e. lower half is known zero)
+ unsigned BW = VT.getScalarSizeInBits();
+ if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
+ auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
+ if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+ ShAmt->getZExtValue() >= (BW / 2) &&
+ (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
+ TLI.isTruncateFree(VT, HalfVT) &&
+ (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
+ SDValue Res = N0.getOperand(0);
+ if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
+ Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
+ Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+ }
+
+ // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
+ // inverse-shift-of-bswap:
+ // bswap (X u<< C) --> (bswap X) u>> C
+ // bswap (X u>> C) --> (bswap X) u<< C
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ N0.hasOneUse()) {
+ auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+ ShAmt->getZExtValue() % 8 == 0) {
+ SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+ unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
+ return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
+ }
+ }
+
+ if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (bitreverse c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
+ // fold (bitreverse (bitreverse x)) -> x
+ if (N0.getOpcode() == ISD::BITREVERSE)
+ return N0.getOperand(0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+// FIXME: This should be checking for no signed zeros on individual operands, as
+// well as no nans.
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
+ SDValue RHS,
+ const TargetLowering &TLI) {
+ const TargetOptions &Options = DAG.getTarget().Options;
+ EVT VT = LHS.getValueType();
+
+ return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ TLI.isProfitableToCombineMinNumMaxNum(VT) &&
+ DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+}
+
+static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ switch (CC) {
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ // Since it's known never nan to get here already, either fminnum or
+ // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
+ // expanded in terms of it.
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
+ unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: {
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
+ unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ default:
+ return SDValue();
+ }
+}
+
+/// Generate Min/Max node
+SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True,
+ SDValue False, ISD::CondCode CC) {
+ if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
+ return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
+
+ // If we can't directly match this, try to see if we can pull an fneg out of
+ // the select.
+ SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
+ True, DAG, LegalOperations, ForCodeSize);
+ if (!NegTrue)
+ return SDValue();
+
+ HandleSDNode NegTrueHandle(NegTrue);
+
+ // Try to unfold an fneg from the select if we are comparing the negated
+ // constant.
+ //
+ // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
+ //
+ // TODO: Handle fabs
+ if (LHS == NegTrue) {
+ // If we can't directly match this, try to see if we can pull an fneg out of
+ // the select.
+ SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
+ RHS, DAG, LegalOperations, ForCodeSize);
+ if (NegRHS) {
+ HandleSDNode NegRHSHandle(NegRHS);
+ if (NegRHS == False) {
+ SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
+ False, CC, TLI, DAG);
+ if (Combined)
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+ SDValue C1 = N->getOperand(1);
+ SDValue C2 = N->getOperand(2);
+ if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+ VT != Cond.getOperand(0).getValueType())
+ return SDValue();
+
+ // The inverted-condition + commuted-select variants of these patterns are
+ // canonicalized to these forms in IR.
+ SDValue X = Cond.getOperand(0);
+ SDValue CondC = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+ isAllOnesOrAllOnesSplat(C2)) {
+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+ }
+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+ }
+ return SDValue();
+}
+
+static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
+ const TargetLowering &TLI) {
+ if (!TLI.convertSelectOfConstantsToMath(VT))
+ return false;
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
+ return true;
+ if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return true;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
+ return true;
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
+ return true;
+
+ return false;
+}
+
+SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+ SDLoc DL(N);
+
+ if (!VT.isInteger())
+ return SDValue();
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N1);
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C1 || !C2)
+ return SDValue();
+
+ if (CondVT != MVT::i1 || LegalOperations) {
+ // fold (select Cond, 0, 1) -> (xor Cond, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isZero() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
+ if (VT.bitsEq(CondVT))
+ return NotCond;
+ return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ }
+
+ return SDValue();
+ }
+
+ // Only do this before legalization to avoid conflicting with target-specific
+ // transforms in the other direction (create a select from a zext/sext). There
+ // is also a target-independent combine here in DAGCombiner in the other
+ // direction for (select Cond, -1, 0) when the condition is not i1.
+ assert(CondVT == MVT::i1 && !LegalOperations);
+
+ // select Cond, 1, 0 --> zext (Cond)
+ if (C1->isOne() && C2->isZero())
+ return DAG.getZExtOrTrunc(Cond, DL, VT);
+
+ // select Cond, -1, 0 --> sext (Cond)
+ if (C1->isAllOnes() && C2->isZero())
+ return DAG.getSExtOrTrunc(Cond, DL, VT);
+
+ // select Cond, 0, 1 --> zext (!Cond)
+ if (C1->isZero() && C2->isOne()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
+ return NotCond;
+ }
+
+ // select Cond, 0, -1 --> sext (!Cond)
+ if (C1->isZero() && C2->isAllOnes()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
+ return NotCond;
+ }
+
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
+ if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
+ return SDValue();
+
+ // For any constants that differ by 1, we can transform the select into
+ // an extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (C1Val - 1 == C2Val) {
+ Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (C1Val + 1 == C2Val) {
+ Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
+ Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC =
+ DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
+ }
+
+ // select Cond, -1, C --> or (sext Cond), C
+ if (C1->isAllOnes()) {
+ Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
+ }
+
+ // select Cond, C, -1 --> or (sext (not Cond)), C
+ if (C2->isAllOnes()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
+ return SDValue();
+}
+
+static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
+ "Expected a (v)select");
+ SDValue Cond = N->getOperand(0);
+ SDValue T = N->getOperand(1), F = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
+ return SDValue();
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
+ SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
+ SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
+ }
+
+ return SDValue();
+}
+
+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Cond0 = N0.getOperand(0);
+ SDValue Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (VT != Cond0.getValueType())
+ return SDValue();
+
+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
+ // compare is inverted from that pattern ("Cond0 s> -1").
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
+ ; // This is the pattern we are looking for.
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
+ std::swap(N1, N2);
+ else
+ return SDValue();
+
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ if (isNullOrNullSplat(N2)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ }
+
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ if (isAllOnesOrAllOnesSplat(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ }
+
+ // If we have to invert the sign bit mask, only do that transform if the
+ // target has a bitwise 'and not' instruction (the invert is free).
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ SDValue Not = DAG.getNOT(DL, Sra, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ }
+
+ // TODO: There's another pattern in this family, but it may require
+ // implementing hasOrNot() to check for profitability:
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+ SDLoc DL(N);
+ SDNodeFlags Flags = N->getFlags();
+
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
+
+ if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ return V;
+
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
+ }
+
+ if (SDValue V = foldSelectOfConstants(N))
+ return V;
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ if (VT0 == MVT::i1) {
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its preferred form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence =
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect =
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
+ InnerSelect, N2, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
+ InnerSelect, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+ // Create the actual and node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+ N2, Flags);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
+ N2, Flags);
+ }
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+ // Create the actual or node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ N2_2, Flags);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
+ N2_2, Flags);
+ }
+ }
+ }
+
+ // Fold selects based on a setcc into other things, such as min/max/abs.
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ // select (fcmp lt x, y), x, y -> fminnum x, y
+ // select (fcmp gt x, y), x, y -> fmaxnum x, y
+ //
+ // This is OK if we don't care what happens if either operand is a NaN.
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
+ return FMinMax;
+
+ // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
+ // This is conservatively limited to pre-legal-operations to give targets
+ // a chance to reverse the transform if they want to do that. Also, it is
+ // unlikely that the pattern would be formed late, so it's probably not
+ // worth going through the other checks.
+ if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
+ CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
+ N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
+ auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
+ auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
+ if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
+ // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
+ // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
+ //
+ // The IR equivalent of this transform would have this form:
+ // %a = add %x, C
+ // %c = icmp ugt %x, ~C
+ // %r = select %c, -1, %a
+ // =>
+ // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
+ // %u0 = extractvalue %u, 0
+ // %u1 = extractvalue %u, 1
+ // %r = select %u1, -1, %u0
+ SDVTList VTs = DAG.getVTList(VT, VT0);
+ SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
+ return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
+ }
+ }
+
+ if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
+ (!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+ // Any flags available in a select/setcc fold will be on the setcc as they
+ // migrated from fcmp
+ Flags = N0->getFlags();
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+ N2, N0.getOperand(2));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
+
+ if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
+ return NewSel;
+ }
+
+ if (!VT.isVector())
+ if (SDValue BinOp = foldSelectOfBinops(N))
+ return BinOp;
+
+ return SDValue();
+}
+
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
+ // binary ones here.
+ if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
+ return SDValue();
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->isUndef())
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->isUndef())
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, DL, VT,
+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (Index.getOpcode() != ISD::ADD)
+ return false;
+
+ // Only perform the transformation when existing operands can be reused.
+ if (IndexIsScaled)
+ return false;
+
+ if (!isNullConstant(BasePtr) && !Index.hasOneUse())
+ return false;
+
+ EVT VT = BasePtr.getValueType();
+ if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
+ SplatVal && SplatVal.getValueType() == VT) {
+ if (isNullConstant(BasePtr))
+ BasePtr = SplatVal;
+ else
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = Index.getOperand(1);
+ return true;
+ }
+ if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
+ SplatVal && SplatVal.getValueType() == VT) {
+ if (isNullConstant(BasePtr))
+ BasePtr = SplatVal;
+ else
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = Index.getOperand(0);
+ return true;
+ }
+ return false;
+}
+
+// Fold sext/zext of index into index type.
+bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // It's always safe to look through zero extends.
+ if (Index.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ IndexType = ISD::UNSIGNED_SCALED;
+ Index = Op;
+ return true;
+ }
+ if (ISD::isIndexTypeSigned(IndexType)) {
+ IndexType = ISD::UNSIGNED_SCALED;
+ return true;
+ }
+ }
+
+ // It's only safe to look through sign extends when Index is signed.
+ if (Index.getOpcode() == ISD::SIGN_EXTEND &&
+ ISD::isIndexTypeSigned(IndexType)) {
+ SDValue Op = Index.getOperand(0);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
+ VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
+ SDValue VL = MSC->getVectorLength();
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ SDLoc DL(N);
+
+ // Zap scatters with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType);
+ }
+
+ if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ SDLoc DL(N);
+
+ // Zap scatters with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
+ }
+
+ if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ SDValue Chain = MST->getChain();
+ SDValue Value = MST->getValue();
+ SDValue Ptr = MST->getBasePtr();
+ SDLoc DL(N);
+
+ // Zap masked stores with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ // Remove a masked store if base pointers and masks are equal.
+ if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
+ if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
+ MST1->isSimple() && MST1->getBasePtr() == Ptr &&
+ !MST->getBasePtr().isUndef() &&
+ ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
+ MST1->getMemoryVT().getStoreSize()) ||
+ ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
+ TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
+ MST->getMemoryVT().getStoreSize())) {
+ CombineTo(MST1, MST1->getChain());
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, compressing, or truncating stores?
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
+ !MST->isCompressingStore() && !MST->isTruncatingStore())
+ return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
+ MST->getBasePtr(), MST->getPointerInfo(),
+ MST->getOriginalAlign(), MachineMemOperand::MOStore,
+ MST->getAAInfo());
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ if (MST->isTruncatingStore() && MST->isUnindexed() &&
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ MST->getMemoryVT().getScalarSizeInBits());
+
+ // See if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // If this is a TRUNC followed by a masked store, fold this into a masked
+ // truncating store. We can do this even if this is already a masked
+ // truncstore.
+ // TODO: Try combine to masked compress store if possiable.
+ if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
+ MST->isUnindexed() && !MST->isCompressingStore() &&
+ TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
+ MST->getMemoryVT(), LegalOperations)) {
+ auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
+ Value.getOperand(0).getValueType());
+ return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ MST->getOffset(), Mask, MST->getMemoryVT(),
+ MST->getMemOperand(), MST->getAddressingMode(),
+ /*IsTruncating=*/true);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
+ VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue BasePtr = MGT->getBasePtr();
+ SDValue VL = MGT->getVectorLength();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
+ SDLoc DL(N);
+
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getGatherVP(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType);
+ }
+
+ if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
+ SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getGatherVP(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+ MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue BasePtr = MGT->getBasePtr();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
+ SDLoc DL(N);
+
+ // Zap gathers with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, PassThru, MGT->getChain());
+
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
+ }
+
+ if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+ MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ // Zap masked loads with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, expanding, or extending loads?
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
+ !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDValue NewLd = DAG.getLoad(
+ N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
+ MLD->getPointerInfo(), MLD->getOriginalAlign(),
+ MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// A vector select of 2 constant vectors can be simplified to math/logic to
+/// avoid a variable select instruction and possibly avoid constant loads.
+SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
+ !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
+ return SDValue();
+
+ // Check if we can use the condition value to increment/decrement a single
+ // constant value. This simplifies a select to an add and removes a constant
+ // load/materialization from the general case.
+ bool AllAddOne = true;
+ bool AllSubOne = true;
+ unsigned Elts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != Elts; ++i) {
+ SDValue N1Elt = N1.getOperand(i);
+ SDValue N2Elt = N2.getOperand(i);
+ if (N1Elt.isUndef() || N2Elt.isUndef())
+ continue;
+ if (N1Elt.getValueType() != N2Elt.getValueType())
+ continue;
+
+ const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
+ const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
+ if (C1 != C2 + 1)
+ AllAddOne = false;
+ if (C1 != C2 - 1)
+ AllSubOne = false;
+ }
+
+ // Further simplifications for the extra-special cases where the constants are
+ // all 0 or all -1 should be implemented as folds of these patterns.
+ SDLoc DL(N);
+ if (AllAddOne || AllSubOne) {
+ // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
+ // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
+ auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
+ }
+
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
+ // The general case for select-of-constants:
+ // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
+ // ...but that only makes sense if a vselect is slower than 2 logic ops, so
+ // leave that to a machine-specific pass.
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
+
+ if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ return V;
+
+ // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
+ return DAG.getSelect(DL, VT, F, N2, N1);
+
+ // Canonicalize integer abs.
+ // vselect (setg[te] X, 0), X, -X ->
+ // vselect (setgt X, -1), X, -X ->
+ // vselect (setl[te] X, 0), -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ bool isAbs = false;
+ bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+ N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+ else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+ N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+ if (isAbs) {
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ return DAG.getNode(ISD::ABS, DL, VT, LHS);
+
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DL, getShiftAmountTy(VT)));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+ }
+
+ // vselect x, y (fcmp lt x, y) -> fminnum x, y
+ // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
+ return FMinMax;
+ }
+
+ if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
+ if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
+
+ // If this select has a condition (setcc) with narrower operands than the
+ // select, try to widen the compare to match the select width.
+ // TODO: This should be extended to handle any constant.
+ // TODO: This could be extended to handle non-loading patterns, but that
+ // requires thorough testing to avoid regressions.
+ if (isNullOrNullSplat(RHS)) {
+ EVT NarrowVT = LHS.getValueType();
+ EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
+ EVT SetCCVT = getSetCCResultType(LHS.getValueType());
+ unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
+ unsigned WideWidth = WideVT.getScalarSizeInBits();
+ bool IsSigned = isSignedIntSetCC(CC);
+ auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
+ SetCCWidth != 1 && SetCCWidth < WideWidth &&
+ TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
+ // Both compare operands can be widened for free. The LHS can use an
+ // extended load, and the RHS is a constant:
+ // vselect (ext (setcc load(X), C)), N1, N2 -->
+ // vselect (setcc extload(X), C'), N1, N2
+ auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
+ SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
+ EVT WideSetCCVT = getSetCCResultType(WideVT);
+ SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
+ return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
+ }
+ }
+
+ // Match VSELECTs with absolute difference patterns.
+ // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
+ if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
+ N1.getOperand(0) == N2.getOperand(1) &&
+ N1.getOperand(1) == N2.getOperand(0)) {
+ bool IsSigned = isSignedIntSetCC(CC);
+ unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
+ if (hasOperation(ABDOpc, VT)) {
+ switch (CC) {
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ // Match VSELECTs into add with unsigned saturation.
+ if (hasOperation(ISD::UADDSAT, VT)) {
+ // Check if one of the arms of the VSELECT is vector with all bits set.
+ // If it's on the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getOpcode() == ISD::ADD) {
+ SDValue CondLHS = LHS, CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Canonicalize condition operands.
+ if (SatCC == ISD::SETUGE) {
+ std::swap(CondLHS, CondRHS);
+ SatCC = ISD::SETULE;
+ }
+
+ // We can test against either of the addition operands.
+ // x <= x+y ? x+y : ~0 --> uaddsat x, y
+ // x+y >= x ? x+y : ~0 --> uaddsat x, y
+ if (SatCC == ISD::SETULE && Other == CondRHS &&
+ (OpLHS == CondLHS || OpRHS == CondLHS))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+ if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
+ (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
+ OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
+ CondLHS == OpLHS) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x >= ~C ? x+C : ~0 --> uaddsat x, C
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return Cond->getAPIntValue() == ~Op->getAPIntValue();
+ };
+ if (SatCC == ISD::SETULE &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Match VSELECTs into sub with unsigned saturation.
+ if (hasOperation(ISD::USUBSAT, VT)) {
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on
+ // the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
+ Other = N1;
+ }
+
+ // zext(x) >= y ? trunc(zext(x) - y) : 0
+ // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
+ // zext(x) > y ? trunc(zext(x) - y) : 0
+ // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
+ if (Other && Other.getOpcode() == ISD::TRUNCATE &&
+ Other.getOperand(0).getOpcode() == ISD::SUB &&
+ (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
+ SDValue OpLHS = Other.getOperand(0).getOperand(0);
+ SDValue OpRHS = Other.getOperand(0).getOperand(1);
+ if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
+ if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
+ DAG, DL))
+ return R;
+ }
+
+ if (Other && Other.getNumOperands() == 2) {
+ SDValue CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ if (OpLHS == LHS) {
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> usubsat x, y
+ // x > y ? x-y : 0 --> usubsat x, y
+ if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
+ Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+
+ if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
+ OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
+ if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
+ CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> usubsat x, C
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return (!Op && !Cond) ||
+ (Op && Cond &&
+ Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
+ };
+ if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+ /*AllowUndefs*/ true)) {
+ OpRHS = DAG.getNegative(OpRHS, DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to
+ // determine whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> usubsat x, C
+ APInt SplatValue;
+ if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
+ ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
+ ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
+ SplatValue.isSignMask()) {
+ // Note that we have to rebuild the RHS constant here to
+ // ensure we don't rely on particular values of undef lanes.
+ OpRHS = DAG.getConstant(SplatValue, DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // Fold (vselect all_ones, N1, N2) -> N1
+ if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
+ return N1;
+ // Fold (vselect all_zeros, N1, N2) -> N2
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
+ return N2;
+
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
+ return CV;
+ }
+
+ if (SDValue V = foldVSelectOfConstants(N))
+ return V;
+
+ if (hasOperation(ISD::SRA, VT))
+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
+ return V;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // select_cc bool, 0, x, y, seteq -> select bool, y, x
+ if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
+ isNullConstant(N1))
+ return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
+
+ // Determine if the condition we're dealing with is constant
+ if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
+ CC, SDLoc(N), false)) {
+ AddToWorklist(SCC.getNode());
+
+ // cond always true -> true val
+ // cond always false -> false val
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
+ return SCCC->isZero() ? N3 : N2;
+
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ if (SCC->isUndef())
+ return N2;
+
+ // Fold to a simpler select_cc
+ if (SCC.getOpcode() == ISD::SETCC) {
+ SDValue SelectOp = DAG.getNode(
+ ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+ SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+ SelectOp->setFlags(SCC->getFlags());
+ return SelectOp;
+ }
+ }
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ // setcc is very commonly used as an argument to brcond. This pattern
+ // also lend itself to numerous combines and, as a result, it is desired
+ // we keep the argument to a brcond as a setcc as much as possible.
+ bool PreferSetCC =
+ N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
+
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+
+ SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
+ SDLoc(N), !PreferSetCC);
+
+ if (!Combined)
+ return SDValue();
+
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
+
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
+
+ if (NewSetCC)
+ return NewSetCC;
+ }
+
+ return Combined;
+}
+
+SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (isNullConstant(Carry))
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
+/// Check if N satisfies:
+/// N is used once.
+/// N is a Load.
+/// The load is compatible with ExtOpcode. It means
+/// If load has explicit zero/sign extension, ExpOpcode must have the same
+/// extension.
+/// Otherwise returns true.
+static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
+ if (!N.hasOneUse())
+ return false;
+
+ if (!isa<LoadSDNode>(N))
+ return false;
+
+ LoadSDNode *Load = cast<LoadSDNode>(N);
+ ISD::LoadExtType LoadExt = Load->getExtensionType();
+ if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
+ return true;
+
+ // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
+ // extension.
+ if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
+ (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
+ return false;
+
+ return true;
+}
+
+/// Fold
+/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
+/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
+/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG,
+ CombineLevel Level) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+ Opcode == ISD::ANY_EXTEND) &&
+ "Expected EXTEND dag node in input!");
+
+ if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
+ !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
+ return SDValue();
+
+ auto ExtLoadOpcode = ISD::EXTLOAD;
+ if (Opcode == ISD::SIGN_EXTEND)
+ ExtLoadOpcode = ISD::SEXTLOAD;
+ else if (Opcode == ISD::ZERO_EXTEND)
+ ExtLoadOpcode = ISD::ZEXTLOAD;
+
+ // Illegal VSELECT may ISel fail if happen after legalization (DAG
+ // Combine2), so we should conservatively check the OperationAction.
+ LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
+ LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
+ if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
+ !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
+ (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
+ TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
+ return SDValue();
+
+ SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
+ SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
+ return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
+/// a build_vector of constants.
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+/// Vector extends are not folded if operations are legal; this is to
+/// avoid introducing illegal build_vector dag nodes.
+static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
+ "Expected EXTEND dag node in input!");
+
+ // fold (sext c1) -> c1
+ // fold (zext c1) -> c1
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(Opcode, DL, VT, N0);
+
+ // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
+ // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ if (N0->getOpcode() == ISD::SELECT) {
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
+ (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
+ // For any_extend, choose sign extension of the constants to allow a
+ // possible further transform to sign_extend_inreg.i.e.
+ //
+ // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
+ // t2: i64 = any_extend t1
+ // -->
+ // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
+ // -->
+ // t4: i64 = sign_extend_inreg t3
+ unsigned FoldOpc = Opcode;
+ if (FoldOpc == ISD::ANY_EXTEND)
+ FoldOpc = ISD::SIGN_EXTEND;
+ return DAG.getSelect(DL, VT, N0->getOperand(0),
+ DAG.getNode(FoldOpc, DL, VT, Op1),
+ DAG.getNode(FoldOpc, DL, VT, Op2));
+ }
+ }
+
+ // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+ EVT SVT = VT.getScalarType();
+ if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+ return SDValue();
+
+ // We can fold this node into a build_vector.
+ unsigned VTBits = SVT.getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.isUndef()) {
+ if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
+ Elts.push_back(DAG.getUNDEF(SVT));
+ else
+ Elts.push_back(DAG.getConstant(0, DL, SVT));
+ continue;
+ }
+
+ SDLoc DL(Op);
+ // Get the constant value and if needed trunc it to the size of the type.
+ // Nodes like build_vector might have constants wider than the scalar type.
+ APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
+ if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+ Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
+ else
+ Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
+ }
+
+ return DAG.getBuildVector(VT, DL, Elts);
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVectorImpl<SDNode *> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
+ for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
+ ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // a good reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ SDLoc DL(ExtLoad);
+ for (SDNode *SetCC : SetCCs) {
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == OrigLoad)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
+ }
+}
+
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) &&
+ "Unexpected node type (not an extend)!");
+
+ // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+ // For example, on a target with legal v4i32, but illegal v8i32, turn:
+ // (v8i32 (sext (v8i16 (load x))))
+ // into:
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))
+ // Where uses of the original load, i.e.:
+ // (v8i16 (load x))
+ // are replaced with:
+ // (v8i16 (truncate
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))))
+ //
+ // This combine is only applicable to illegal, but splittable, vectors.
+ // All legal types, and illegal non-vector types, are handled elsewhere.
+ // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+ //
+ if (N0->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+ !N0.hasOneUse() || !LN0->isSimple() ||
+ !DstVT.isVector() || !DstVT.isPow2VectorType() ||
+ !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
+ return SDValue();
+
+ ISD::LoadExtType ExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+ // Try to split the vector types to get down to legal types.
+ EVT SplitSrcVT = SrcVT;
+ EVT SplitDstVT = DstVT;
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ SplitSrcVT.getVectorNumElements() > 1) {
+ SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+ SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+ }
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ return SDValue();
+
+ assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
+
+ SDLoc DL(N);
+ const unsigned NumSplits =
+ DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+ const unsigned Stride = SplitSrcVT.getStoreSize();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+
+ SDValue BasePtr = LN0->getBasePtr();
+ for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+ const unsigned Offset = Idx * Stride;
+ const Align Align = commonAlignment(LN0->getAlign(), Offset);
+
+ SDValue SplitLoad = DAG.getExtLoad(
+ ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
+
+ Loads.push_back(SplitLoad.getValue(0));
+ Chains.push_back(SplitLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+ // Simplify TF.
+ AddToWorklist(NewChain.getNode());
+
+ CombineTo(N, NewValue);
+
+ // Replace uses of the original load (before extension)
+ // with a truncate of the concatenated sextloaded vectors.
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
+ CombineTo(N0.getNode(), Trunc, NewChain);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
+ assert(N->getOpcode() == ISD::ZERO_EXTEND);
+ EVT VT = N->getValueType(0);
+ EVT OrigVT = N->getOperand(0).getValueType();
+ if (TLI.isZExtFree(OrigVT, VT))
+ return SDValue();
+
+ // and/or/xor
+ SDValue N0 = N->getOperand(0);
+ if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
+ N0.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
+ return SDValue();
+
+ // shl/shr
+ SDValue N1 = N0->getOperand(0);
+ if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
+ N1.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
+ return SDValue();
+
+ // load
+ if (!isa<LoadSDNode>(N1.getOperand(0)))
+ return SDValue();
+ LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
+ EVT MemVT = Load->getMemoryVT();
+ if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+ Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
+ return SDValue();
+
+
+ // If the shift op is SHL, the logic op must be AND, otherwise the result
+ // will be wrong.
+ if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI))
+ return SDValue();
+
+ // Actually do the transformation.
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
+ Load->getChain(), Load->getBasePtr(),
+ Load->getMemoryVT(), Load->getMemOperand());
+
+ SDLoc DL1(N1);
+ SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
+ N1.getOperand(1));
+
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
+ SDLoc DL0(N0);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
+ DAG.getConstant(Mask, DL0, VT));
+
+ ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
+ CombineTo(N, And);
+ if (SDValue(Load, 0).hasOneUse()) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
+ Load->getValueType(0), ExtLoad);
+ CombineTo(Load, Trunc, ExtLoad.getValue(1));
+ }
+
+ // N0 is dead at this point.
+ recursivelyDeleteUnusedNodes(N0.getNode());
+
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+}
+
+/// If we're narrowing or widening the result of a vector select and the final
+/// size is the same size as a setcc (compare) feeding the select, then try to
+/// apply the cast operation to the select's operands because matching vector
+/// sizes for a select condition and other operands should be more efficient.
+SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
+ unsigned CastOpcode = Cast->getOpcode();
+ assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
+ CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
+ CastOpcode == ISD::FP_ROUND) &&
+ "Unexpected opcode for vector select narrowing/widening");
+
+ // We only do this transform before legal ops because the pattern may be
+ // obfuscated by target-specific operations after legalization. Do not create
+ // an illegal select op, however, because that may be difficult to lower.
+ EVT VT = Cast->getValueType(0);
+ if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ SDValue VSel = Cast->getOperand(0);
+ if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
+ VSel.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Does the setcc have the same vector size as the casted select?
+ SDValue SetCC = VSel.getOperand(0);
+ EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
+ if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
+ SDValue A = VSel.getOperand(1);
+ SDValue B = VSel.getOperand(2);
+ SDValue CastA, CastB;
+ SDLoc DL(Cast);
+ if (CastOpcode == ISD::FP_ROUND) {
+ // FP_ROUND (fptrunc) has an extra flag operand to pass along.
+ CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
+ CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
+ } else {
+ CastA = DAG.getNode(CastOpcode, DL, VT, A);
+ CastB = DAG.getNode(CastOpcode, DL, VT, B);
+ }
+ return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
+}
+
+// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N,
+ SDValue N0, ISD::LoadExtType ExtLoadType) {
+ SDNode *N0Node = N0.getNode();
+ bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
+ : ISD::isZEXTLoad(N0Node);
+ if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
+ !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((LegalOperations || !LN0->isSimple() ||
+ VT.isVector()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
+ return SDValue();
+
+ SDValue ExtLoad =
+ DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
+ Combiner.CombineTo(N, ExtLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ if (LN0->use_empty())
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// Only generate vector extloads when 1) they're legal, and 2) they are
+// deemed desirable by the target.
+static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ // TODO: isFixedLengthVector() should be removed and any negative effects on
+ // code generation being the result of that target's implementation of
+ // isVectorLoadExtDesirable().
+ if (!ISD::isNON_EXTLoad(N0.getNode()) ||
+ !ISD::isUNINDEXEDLoad(N0.getNode()) ||
+ ((LegalOperations || VT.isFixedLengthVector() ||
+ !cast<LoadSDNode>(N0)->isSimple()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
+ return {};
+
+ bool DoXform = true;
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
+ if (!DoXform)
+ return {};
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ Combiner.CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
+ } else {
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
+ Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT VT,
+ SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
+ if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
+ return SDValue();
+
+ if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SDLoc dl(Ld);
+ SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
+ ExtLoadType, Ld->isExpandingLoad());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
+ return NewLoad;
+}
+
+static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
+ bool LegalOperations) {
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
+
+ SDValue SetCC = N->getOperand(0);
+ if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
+ !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDValue X = SetCC.getOperand(0);
+ SDValue Ones = SetCC.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT XVT = X.getValueType();
+ // setge X, C is canonicalized to setgt, so we do not need to match that
+ // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
+ // not require the 'not' op.
+ if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
+ // Invert and smear/shift the sign bit:
+ // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
+ // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
+ SDLoc DL(N);
+ unsigned ShCt = VT.getSizeInBits() - 1;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
+ auto ShiftOpcode =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT N00VT = N00.getValueType();
+ SDLoc DL(N);
+
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
+
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // the same size as the compared operands. Try to optimize sext(setcc())
+ // if this is the case.
+ if (VT.isVector() && !LegalOperations &&
+ TLI.getBooleanContents(N00VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ EVT SVT = getSetCCResultType(N00VT);
+
+ // If we already have the desired type, don't change it.
+ if (SVT != N0.getValueType()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
+
+ // If the desired elements are smaller or larger than the source
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ }
+ }
+
+ // Try to eliminate the sext of a setcc by zexting the compare operands.
+ if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
+ !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
+ bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
+ unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ // We have an unsupported narrow vector compare op that would be legal
+ // if extended to the destination type. See if the compare operands
+ // can be freely extended to the destination type.
+ auto IsFreeToExtend = [&](SDValue V) {
+ if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
+ return true;
+ // Match a simple, non-extended load that can be converted to a
+ // legal {z/s}ext-load.
+ // TODO: Allow widening of an existing {z/s}ext-load?
+ if (!(ISD::isNON_EXTLoad(V.getNode()) &&
+ ISD::isUNINDEXEDLoad(V.getNode()) &&
+ cast<LoadSDNode>(V)->isSimple() &&
+ TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
+ return false;
+
+ // Non-chain users of this value must either be the setcc in this
+ // sequence or extends that can be folded into the new {z/s}ext-load.
+ for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // Skip uses of the chain and the setcc.
+ SDNode *User = *UI;
+ if (UI.getUse().getResNo() != 0 || User == N0.getNode())
+ continue;
+ // Extra users must have exactly the same cast we are about to create.
+ // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
+ // is enhanced similarly.
+ if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
+ return false;
+ }
+ return true;
+ };
+
+ if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
+ SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
+ SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
+ return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
+ }
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
+ // Here, T can be 1 or -1, depending on the type of the setcc and
+ // getBooleanContents().
+ unsigned SetCCWidth = N0.getScalarValueSizeInBits();
+
+ // To determine the "true" side of the select, we need to know the high bit
+ // of the value returned by the setcc if it evaluates to true.
+ // If the type of the setcc is i1, then the true case of the select is just
+ // sext(i1 1), that is, -1.
+ // If the type of the setcc is larger (say, i8) then the value of the high
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
+ // of the appropriate width.
+ SDValue ExtTrueVal = (SetCCWidth == 1)
+ ? DAG.getAllOnesConstant(DL, VT)
+ : DAG.getBoolConstant(true, DL, VT, N00VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
+ return SCC;
+
+ if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
+ return FoldedVOp;
+
+ // sext(undef) = 0 because the top bit will all be the same.
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
+
+ // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
+
+ // fold (sext (sext_inreg x)) -> (sext (trunc x))
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N00 = N0.getOperand(0);
+ EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
+ if (N00.getOpcode() == ISD::TRUNCATE &&
+ (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
+ SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getScalarValueSizeInBits();
+ unsigned MidBits = N0.getScalarValueSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // Try to simplify (sext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::SEXTLOAD, ISD::SIGN_EXTEND))
+ return foldedExt;
+
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
+ ISD::SIGN_EXTEND))
+ return foldedExt;
+
+ // fold (sext (load x)) to multiple smaller sextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
+ // Try to simplify (sext (sextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
+ return foldedExt;
+
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
+ SmallVector<SDNode*, 4> SetCCs;
+ bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
+ APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
+ CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
+ if (SDValue V = foldSextSetcc(N))
+ return V;
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ // Eliminate this sign extend by doing a negation in the destination type:
+ // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isNullOrNullSplat(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
+ return DAG.getNegative(Zext, DL, VT);
+ }
+ // Eliminate this sign extend by doing a decrement in the destination type:
+ // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
+ // fold sext (not i1 X) -> add (zext i1 X), -1
+ // TODO: This could be extended to handle bool vectors.
+ if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
+ (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegal(ISD::ADD, VT)))) {
+ // If we can eliminate the 'not', the sext form should be better
+ if (SDValue NewXor = visitXOR(N0.getNode())) {
+ // Returning N0 is a form of in-visit replacement that may have
+ // invalidated N0.
+ if (NewXor.getNode() == N0.getNode()) {
+ // Return SDValue here as the xor should have already been replaced in
+ // this sext.
+ return SDValue();
+ }
+
+ // Return a new sext with the new xor.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
+ }
+
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
+ return Res;
+
+ return SDValue();
+}
+
+/// Given an extending node with a pop-count operand, if the target does not
+/// support a pop-count in the narrow source type but does support it in the
+/// destination type, widen the pop-count to the destination type.
+static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
+ assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
+ Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
+
+ SDValue CtPop = Extend->getOperand(0);
+ if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
+ return SDValue();
+
+ EVT VT = Extend->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
+ !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
+ return SDValue();
+
+ // zext (ctpop X) --> ctpop (zext X)
+ SDLoc DL(Extend);
+ SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
+ return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
+}
+
+// If we have (zext (abs X)) where X is a type that will be promoted by type
+// legalization, convert to (abs (sext X)). But don't extend past a legal type.
+static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
+ assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
+
+ EVT VT = Extend->getValueType(0);
+ if (VT.isVector())
+ return SDValue();
+
+ SDValue Abs = Extend->getOperand(0);
+ if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
+ return SDValue();
+
+ EVT AbsVT = Abs.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
+ TargetLowering::TypePromoteInteger)
+ return SDValue();
+
+ EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
+
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
+ SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
+ return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
+ return FoldedVOp;
+
+ // zext(undef) = 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+
+ // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ SDValue Op;
+ KnownBits Known;
+ if (isTruncateOf(DAG, N0, Op, Known)) {
+ APInt TruncatedBits =
+ (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
+ APInt(Op.getScalarValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getScalarValueSizeInBits(),
+ N0.getScalarValueSizeInBits(),
+ std::min(Op.getScalarValueSizeInBits(),
+ VT.getScalarSizeInBits()));
+ if (TruncatedBits.isSubsetOf(Known.Zero))
+ return DAG.getZExtOrTrunc(Op, DL, VT);
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT) && VT.isVector()) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
+ AddToWorklist(Op.getNode());
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
+ // Transfer the debug info; the new node is equivalent to N0.
+ DAG.transferDbgValues(N0, ZExtOrTrunc);
+ return ZExtOrTrunc;
+ }
+ }
+
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
+ AddToWorklist(Op.getNode());
+ SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
+ // We may safely transfer the debug info describing the truncate node over
+ // to the equivalent and operation.
+ DAG.transferDbgValues(N0, And);
+ return And;
+ }
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
+ }
+
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
+
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
+ ISD::ZERO_EXTEND))
+ return foldedExt;
+
+ // fold (zext (load x)) to multiple smaller zextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT;
+ if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
+ DoXform = false;
+ }
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
+ CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+ // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+ if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
+ return ZExtLoad;
+
+ // Try to simplify (zext (zextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
+ return foldedExt;
+
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
+
+ // Only do this before legalize for now.
+ if (!LegalOperations && VT.isVector() &&
+ N0.getValueType().getVectorElementType() == MVT::i1) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the #
+ // elements of the compare (and the # elements of the compare result for
+ // that matter). Check to see that they are the same size. If so, we know
+ // that the element size of the sext'd result matches the element size of
+ // the compare operands.
+ if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
+ // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
+ SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
+ }
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/any extend followed by zext_in_reg.
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
+ N0.getValueType());
+ }
+
+ // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
+ EVT N0VT = N0.getValueType();
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getBoolConstant(true, DL, N0VT, N00VT),
+ DAG.getBoolConstant(false, DL, N0VT, N00VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ !TLI.isZExtFree(N0, VT)) {
+ SDValue ShVal = N0.getOperand(0);
+ SDValue ShAmt = N0.getOperand(1);
+ if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
+ if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SHL) {
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ // TODO: Add MaskedValueIsZero check.
+ unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
+ ShVal.getOperand(0).getValueSizeInBits();
+ if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
+ return SDValue();
+ }
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
+ }
+ }
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
+ if (SDValue V = widenAbs(N, DAG))
+ return V;
+
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
+ return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // aext(undef) = undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
+ // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDLoc DL(N);
+ SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
+ assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
+ return DAG.getNode(ISD::AND, DL, VT, X, Y);
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction, so attempt to fold to zext instead.
+ if (VT.isVector()) {
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
+ } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ bool DoXform = true;
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform =
+ ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ N0.getValueType(), LN0->getMemOperand());
+ ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = N0.hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
+ } else {
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ ISD::LoadExtType ExtType = LN0->getExtensionType();
+ EVT MemVT = LN0->getMemoryVT();
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
+
+ // For vectors:
+ // aext(setcc) -> vsetcc
+ // aext(setcc) -> truncate(vsetcc)
+ // aext(setcc) -> aext(vsetcc)
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N00VT.getSizeInBits())
+ return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/any extend
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
+ }
+
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
+ return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAssertExt(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT AssertVT = cast<VTSDNode>(N1)->getVT();
+
+ // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
+ if (N0.getOpcode() == Opcode &&
+ AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ return N0;
+
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == Opcode) {
+ // We have an assert, truncate, assert sandwich. Make one stronger assert
+ // by asserting on the smallest asserted type to the larger source type.
+ // This eliminates the later assert:
+ // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
+ // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+ SDLoc DL(N);
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
+ SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), MinAssertVTVal);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+ }
+
+ // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
+ // than X. Just move the AssertZext in front of the truncate and drop the
+ // AssertSExt.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+ Opcode == ISD::AssertZext) {
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ if (AssertVT.bitsLT(BigA_AssertVT)) {
+ SDLoc DL(N);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), N1);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
+ SDLoc DL(N);
+
+ Align AL = cast<AssertAlignSDNode>(N)->getAlign();
+ SDValue N0 = N->getOperand(0);
+
+ // Fold (assertalign (assertalign x, AL0), AL1) ->
+ // (assertalign x, max(AL0, AL1))
+ if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
+ return DAG.getAssertAlign(DL, N0.getOperand(0),
+ std::max(AL, AAN->getAlign()));
+
+ // In rare cases, there are trivial arithmetic ops in source operands. Sink
+ // this assert down to source operands so that those arithmetic ops could be
+ // exposed to the DAG combining.
+ switch (N0.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ case ISD::SUB: {
+ unsigned AlignShift = Log2(AL);
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+ unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
+ unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+ if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
+ if (LHSAlignShift < AlignShift)
+ LHS = DAG.getAssertAlign(DL, LHS, AL);
+ if (RHSAlignShift < AlignShift)
+ RHS = DAG.getAssertAlign(DL, RHS, AL);
+ return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+/// If the result of a load is shifted/masked/truncated to an effectively
+/// narrower type, try to transform the load to a narrower type and/or
+/// use an extending load.
+SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // The ShAmt variable is used to indicate that we've consumed a right
+ // shift. I.e. we want to narrow the width of the load by skipping to load the
+ // ShAmt least significant bits.
+ unsigned ShAmt = 0;
+ // A special case is when the least significant bits from the load are masked
+ // away, but using an AND rather than a right shift. HasShiftedOffset is used
+ // to indicate that the narrowed load should be left-shifted ShAmt bits to get
+ // the result.
+ bool HasShiftedOffset = false;
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
+ // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
+ // value, or it may be shifting a higher subword, half or byte into the
+ // lowest bits.
+
+ // Only handle shift with constant shift amount, and the shiftee must be a
+ // load.
+ auto *LN = dyn_cast<LoadSDNode>(N0);
+ auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!N1C || !LN)
+ return SDValue();
+ // If the shift amount is larger than the memory type then we're not
+ // accessing any of the loaded bytes.
+ ShAmt = N1C->getZExtValue();
+ uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
+ if (MemoryWidth <= ShAmt)
+ return SDValue();
+ // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
+ ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
+ // If original load is a SEXTLOAD then we can't simply replace it by a
+ // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
+ // followed by a ZEXT, but that is not handled at the moment). Similarly if
+ // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
+ if ((LN->getExtensionType() == ISD::SEXTLOAD ||
+ LN->getExtensionType() == ISD::ZEXTLOAD) &&
+ LN->getExtensionType() != ExtType)
+ return SDValue();
+ } else if (Opc == ISD::AND) {
+ // An AND with a constant mask is the same as a truncate + zero-extend.
+ auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!AndC)
+ return SDValue();
+
+ const APInt &Mask = AndC->getAPIntValue();
+ unsigned ActiveBits = 0;
+ if (Mask.isMask()) {
+ ActiveBits = Mask.countr_one();
+ } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
+ HasShiftedOffset = true;
+ } else {
+ return SDValue();
+ }
+
+ ExtType = ISD::ZEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ }
+
+ // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
+ // a right shift. Here we redo some of those checks, to possibly adjust the
+ // ExtVT even further based on "a masking AND". We could also end up here for
+ // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
+ // need to be done here as well.
+ if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
+ SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
+ // Bail out when the SRL has more than one use. This is done for historical
+ // (undocumented) reasons. Maybe intent was to guard the AND-masking below
+ // check below? And maybe it could be non-profitable to do the transform in
+ // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
+ // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
+ if (!SRL.hasOneUse())
+ return SDValue();
+
+ // Only handle shift with constant shift amount, and the shiftee must be a
+ // load.
+ auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
+ auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
+ if (!SRL1C || !LN)
+ return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ ShAmt = SRL1C->getZExtValue();
+ uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
+ if (ShAmt >= MemoryWidth)
+ return SDValue();
+
+ // Because a SRL must be assumed to *need* to zero-extend the high bits
+ // (as opposed to anyext the high bits), we can't combine the zextload
+ // lowering of SRL and an sextload.
+ if (LN->getExtensionType() == ISD::SEXTLOAD)
+ return SDValue();
+
+ // Avoid reading outside the memory accessed by the original load (could
+ // happened if we only adjust the load base pointer by ShAmt). Instead we
+ // try to narrow the load even further. The typical scenario here is:
+ // (i64 (truncate (i96 (srl (load x), 64)))) ->
+ // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
+ if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
+ // Don't replace sextload by zextload.
+ if (ExtType == ISD::SEXTLOAD)
+ return SDValue();
+ // Narrow the load.
+ ExtType = ISD::ZEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
+ }
+
+ // If the SRL is only used by a masking AND, we may be able to adjust
+ // the ExtVT to make the AND redundant.
+ SDNode *Mask = *(SRL->use_begin());
+ if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Mask->getOperand(1))) {
+ const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
+ if (ShiftMask.isMask()) {
+ EVT MaskedVT =
+ EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
+ // If the mask is smaller, recompute the type.
+ if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
+ TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
+ ExtVT = MaskedVT;
+ }
+ }
+
+ N0 = SRL.getOperand(0);
+ }
+
+ // If the load is shifted left (and the result isn't shifted back right), we
+ // can fold a truncate through the shift. The typical scenario is that N
+ // points at a TRUNCATE here so the attempted fold is:
+ // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
+ // ShLeftAmt will indicate how much a narrowed load should be shifted left.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it.
+ if (!isa<LoadSDNode>(N0))
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ // Reducing the width of a volatile load is illegal. For atomics, we may be
+ // able to reduce the width provided we never widen again. (see D66309)
+ if (!LN0->isSimple() ||
+ !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
+ return SDValue();
+
+ auto AdjustBigEndianShift = [&](unsigned ShAmt) {
+ unsigned LVTStoreBits =
+ LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
+ return LVTStoreBits - EVTStoreBits - ShAmt;
+ };
+
+ // We need to adjust the pointer to the load by ShAmt bits in order to load
+ // the correct bytes.
+ unsigned PtrAdjustmentInBits =
+ DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
+
+ uint64_t PtrOff = PtrAdjustmentInBits / 8;
+ Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
+ SDLoc DL(LN0);
+ // The original load itself didn't wrap, so an offset within it doesn't.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
+ TypeSize::Fixed(PtrOff), DL, Flags);
+ AddToWorklist(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ else
+ Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
+ NewAlign, LN0->getMemOperand()->getFlags(),
+ LN0->getAAInfo());
+
+ // Replace the old load's chain with the new load's chain.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ // If the shift amount is as large as the result size (but, presumably,
+ // no larger than the source) then the useful bits of the result are
+ // zero; we can't simply return the shortened shift, because the result
+ // of that operation is undefined.
+ if (ShLeftAmt >= VT.getScalarSizeInBits())
+ Result = DAG.getConstant(0, DL, VT);
+ else
+ Result = DAG.getNode(ISD::SHL, DL, VT,
+ Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
+ }
+
+ if (HasShiftedOffset) {
+ // We're using a shifted mask, so the load now has an offset. This means
+ // that data has been loaded into the lower bytes than it would have been
+ // before, so we need to shl the loaded data into the correct position in the
+ // register.
+ SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ }
+
+ // Return the new loaded value.
+ return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
+
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
+ if (N0.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // fold (sext_in_reg c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
+ N1);
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ if ((N00Bits <= ExtVTBits ||
+ DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
+ }
+
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
+ if (ISD::isExtVecInRegOpcode(N0.getOpcode())) {
+ SDValue N00 = N0.getOperand(0);
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ unsigned DstElts = N0.getValueType().getVectorMinNumElements();
+ unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
+ bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
+ APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
+ if ((N00Bits == ExtVTBits ||
+ (!IsZext && (N00Bits < ExtVTBits ||
+ DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
+ }
+
+ // fold (sext_in_reg (zext x)) -> (sext x)
+ // iff we are extending the source sign bit.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getScalarValueSizeInBits() == ExtVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ // If sextload is not supported by target, we can only do the combine when
+ // load has one use. Doing otherwise can block folding the extload with other
+ // extends that the target does support.
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
+ N0.hasOneUse()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), ExtVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), ExtVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
+ // ignore it if the masked load is already sign extended
+ if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
+ if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
+ Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
+ SDValue ExtMaskedLoad = DAG.getMaskedLoad(
+ VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
+ Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
+ Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
+ CombineTo(N, ExtMaskedLoad);
+ CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ if (SDValue(GN0, 0).hasOneUse() &&
+ ExtVT == GN0->getMemoryVT() &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
+
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
+ }
+
+ // Fold (iM_signext_inreg
+ // (extract_subvector (zext|anyext|sext iN_v to _) _)
+ // from iN)
+ // -> (extract_subvector (signext iN_v to iM))
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
+ ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
+ SDValue InnerExt = N0.getOperand(0);
+ EVT InnerExtVT = InnerExt->getValueType(0);
+ SDValue Extendee = InnerExt->getOperand(0);
+
+ if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
+ SDValue SignExtExtendee =
+ DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
+ N0.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue
+foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG,
+ bool LegalOperations) {
+ unsigned InregOpcode = N->getOpcode();
+ unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
+
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
+ Src.getValueType().getVectorElementType(),
+ VT.getVectorElementCount());
+
+ assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
+ "Expected EXTEND_VECTOR_INREG dag node in input!");
+
+ // Profitability check: our operand must be an one-use CONCAT_VECTORS.
+ // FIXME: one-use check may be overly restrictive
+ if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ // Profitability check: we must be extending exactly one of it's operands.
+ // FIXME: this is probably overly restrictive.
+ Src = Src.getOperand(0);
+ if (Src.getValueType() != SrcVT)
+ return SDValue();
+
+ if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
+ return SDValue();
+
+ return DAG.getNode(Opcode, SDLoc(N), VT, Src);
+}
+
+SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef()) {
+ // aext_vector_inreg(undef) = undef because the top bits are undefined.
+ // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
+ return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
+ ? DAG.getUNDEF(VT)
+ : DAG.getConstant(0, SDLoc(N), VT);
+ }
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
+ LegalOperations))
+ return R;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+
+ // noop truncate
+ if (SrcVT == VT)
+ return N0;
+
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (truncate c1) -> c1
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
+ return C;
+
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ // if the source is smaller than the dest, we still need an extend.
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ // if the source is larger than the dest, than we just need the truncate.
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // Try to narrow a truncate-of-sext_in_reg to the destination type:
+ // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
+ if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ N0.hasOneUse()) {
+ SDValue X = N0.getOperand(0);
+ SDValue ExtVal = N0.getOperand(1);
+ EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
+ if (ExtVT.bitsLT(VT)) {
+ SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
+ }
+ }
+
+ // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+ return SDValue();
+
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ auto EltCnt = VecTy.getVectorElementCount();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+ auto NewEltCnt = EltCnt * SizeRatio;
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
+ DAG.getBitcast(NVT, N0.getOperand(0)),
+ DAG.getVectorIdxConstant(Index, DL));
+ }
+ }
+
+ // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+ if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+ TLI.isTruncateFree(SrcVT, VT)) {
+ SDLoc SL(N0);
+ SDValue Cond = N0.getOperand(0);
+ SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ }
+ }
+
+ // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
+ if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+ TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
+ SDValue Amt = N0.getOperand(1);
+ KnownBits Known = DAG.computeKnownBits(Amt);
+ unsigned Size = VT.getScalarSizeInBits();
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ if (AmtVT != Amt.getValueType()) {
+ Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ AddToWorklist(Amt.getNode());
+ }
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
+ }
+ }
+
+ if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
+ return V;
+
+ if (SDValue ABD = foldABSToABD(N))
+ return ABD;
+
+ // Attempt to pre-truncate BUILD_VECTOR sources.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
+ // Avoid creating illegal types if running after type legalizer.
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 8> TruncOps;
+ for (const SDValue &Op : N0->op_values()) {
+ SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
+ TruncOps.push_back(TruncOp);
+ }
+ return DAG.getBuildVector(VT, DL, TruncOps);
+ }
+
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ }
+ }
+
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ if (SDValue Reduced = reduceLoadWidth(N))
+ return Reduced;
+
+ // Handle the case where the load remains an extending load even
+ // after truncation.
+ if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
+ SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemoryVT(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
+ return NewLoad;
+ }
+ }
+ }
+
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (!X.isUndef()) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorElementCount()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
+ AddToWorklist(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
+ }
+ }
+
+ // Fold truncate of a bitcast of a vector to an extract of the low vector
+ // element.
+ //
+ // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
+ if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
+ SDValue VecSrc = N0.getOperand(0);
+ EVT VecSrcVT = VecSrc.getValueType();
+ if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
+ SDLoc SL(N);
+
+ unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
+ DAG.getVectorIdxConstant(Idx, SL));
+ }
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (truncate (extract_subvector(ext x))) ->
+ // (extract_subvector x)
+ // TODO: This can be generalized to cover cases where the truncate and extract
+ // do not fully cancel each other out.
+ if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) {
+ if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
+ VT.getVectorElementType())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
+ N00.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ // Narrow a suitable binary operation with a non-opaque constant operand by
+ // moving it ahead of the truncate. This is limited to pre-legalization
+ // because targets may prefer a wider type during later combines and invert
+ // this transform.
+ switch (N0.getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ if (!LegalOperations && N0.hasOneUse() &&
+ (isConstantOrConstantVector(N0.getOperand(0), true) ||
+ isConstantOrConstantVector(N0.getOperand(1), true))) {
+ // TODO: We already restricted this to pre-legalization, but for vectors
+ // we are extra cautious to not create an unsupported operation.
+ // Target-specific changes are likely needed to avoid regressions here.
+ if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
+ SDLoc DL(N);
+ SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ }
+ }
+ break;
+ case ISD::ADDE:
+ case ISD::UADDO_CARRY:
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // (trunc uaddo_carry(X, Y, Carry)) ->
+ // (uaddo_carry trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ // We only do for uaddo_carry before legalize operation
+ if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT)) &&
+ N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
+ SDLoc DL(N);
+ SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
+ return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
+ }
+ break;
+ case ISD::USUBSAT:
+ // Truncate the USUBSAT only if LHS is a known zero-extension, its not
+ // enough to know that the upper bits are zero we must ensure that we don't
+ // introduce an extra truncate.
+ if (!LegalOperations && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
+ VT.getScalarSizeInBits() &&
+ hasOperation(N0.getOpcode(), VT)) {
+ return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
+ DAG, SDLoc(N));
+ }
+ break;
+ }
+
+ return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+
+ // A BUILD_PAIR is always having the least significant part in elt 0 and the
+ // most significant part in elt 1. So when combining into one large load, we
+ // need to consider the endianness.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LD1, LD2);
+
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
+ !LD1->hasOneUse() || !LD2->hasOneUse() ||
+ LD1->getAddressSpace() != LD2->getAddressSpace())
+ return SDValue();
+
+ unsigned LD1Fast = 0;
+ EVT LD1VT = LD1->getValueType(0);
+ unsigned LD1Bytes = LD1VT.getStoreSize();
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
+
+ return SDValue();
+}
+
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
+SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // If this is not a bitcast to an FP type or if the target doesn't have
+ // IEEE754-compliant FP logic, we're done.
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+
+ if (!VT.isFloatingPoint())
+ return SDValue();
+
+ // TODO: Handle cases where the integer constant is a different scalar
+ // bitwidth to the FP.
+ if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
+ return SDValue();
+
+ unsigned FPOpcode;
+ APInt SignMask;
+ switch (N0.getOpcode()) {
+ case ISD::AND:
+ FPOpcode = ISD::FABS;
+ SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ case ISD::XOR:
+ FPOpcode = ISD::FNEG;
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ case ISD::OR:
+ FPOpcode = ISD::FABS;
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ default:
+ return SDValue();
+ }
+
+ if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
+ return SDValue();
+
+ // This needs to be the inverse of logic in foldSignChangeInBitcast.
+ // FIXME: I don't think looking for bitcast intrinsically makes sense, but
+ // removing this would require more changes.
+ auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
+ if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
+ return true;
+
+ return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+ };
+
+ // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
+ // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+ // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
+ // fneg (fabs X)
+ SDValue LogicOp0 = N0.getOperand(0);
+ ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
+ if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
+ IsBitCastOrFree(LogicOp0, VT)) {
+ SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
+ SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
+ NumFPLogicOpsConv++;
+ if (N0.getOpcode() == ISD::OR)
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
+ return FPOp;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize types, unless both types are integer and the
+ // scalar type is legal. Only do this before legalize ops, since the target
+ // maybe depending on the bitcast.
+ // First check to see if this is all constant.
+ // TODO: Support FP bitcasts after legalize types.
+ if (VT.isVector() &&
+ (!LegalTypes ||
+ (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
+ TLI.isTypeLegal(VT.getVectorElementType()))) &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
+ cast<BuildVectorSDNode>(N0)->isConstant())
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
+ VT.getVectorElementType());
+
+ // If the input is a constant, let getNode fold it.
+ if (isIntOrFPConstant(N0)) {
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT))) {
+ SDValue C = DAG.getBitcast(VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getBitcast(VT, N0.getOperand(0));
+
+ // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
+ // iff the current bitwise logicop type isn't legal
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
+ !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
+ auto IsFreeBitcast = [VT](SDValue V) {
+ return (V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType() == VT) ||
+ (ISD::isBuildVectorOfConstantSDNodes(V.getNode()) &&
+ V->hasOneUse());
+ };
+ if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
+ DAG.getBitcast(VT, N0.getOperand(0)),
+ DAG.getBitcast(VT, N0.getOperand(1)));
+ }
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
+ SDValue Load =
+ DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), LN0->getAlign(),
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+
+ if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
+ return V;
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ //
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ // This often reduces constant pool loads.
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
+ N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
+ !N0.getValueType().isVector()) {
+ SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
+ AddToWorklist(NewConv.getNode());
+
+ SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, DL, VT,
+ NewConv, DAG.getConstant(SignBit, DL, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, DL, VT,
+ NewConv, DAG.getConstant(~SignBit, DL, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
+ !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (isTypeLegal(IntXVT)) {
+ SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
+ AddToWorklist(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
+ AddToWorklist(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ SDLoc DL(X);
+ X = DAG.getNode(ISD::SRL, DL,
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, DL,
+ X.getValueType()));
+ AddToWorklist(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
+ AddToWorklist(X.getNode());
+ }
+
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, SDLoc(X), VT,
+ X, DAG.getConstant(SignBit, SDLoc(X), VT));
+ AddToWorklist(X.getNode());
+
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
+ Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
+ AddToWorklist(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR)
+ if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
+ return CombineLD;
+
+ // Remove double bitcasts from shuffles - this is often a legacy of
+ // XformToShuffleWithZero being used to combine bitmaskings (of
+ // float vectors bitcast to integer vectors) into shuffles.
+ // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
+ VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+ !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+ // If operands are a bitcast, peek through if it casts the original VT.
+ // If operands are a constant, just bitcast back to original VT.
+ auto PeekThroughBitcast = [&](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getOperand(0).getValueType() == VT)
+ return SDValue(Op.getOperand(0));
+ if (Op.isUndef() || isAnyConstantBuildVector(Op))
+ return DAG.getBitcast(VT, Op);
+ return SDValue();
+ };
+
+ // FIXME: If either input vector is bitcast, try to convert the shuffle to
+ // the result type of this bitcast. This would eliminate at least one
+ // bitcast. See the transform in InstCombine.
+ SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+ SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+ if (!(SV0 && SV1))
+ return SDValue();
+
+ int MaskScale =
+ VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (int M : SVN->getMask())
+ for (int i = 0; i != MaskScale; ++i)
+ NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+SDValue DAGCombiner::visitFREEZE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
+ return N0;
+
+ // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
+ // Try to push freeze through instructions that propagate but don't produce
+ // poison as far as possible. If an operand of freeze follows three
+ // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
+ // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
+ // the freeze through to the operands that are not guaranteed non-poison.
+ // NOTE: we will strip poison-generating flags, so ignore them here.
+ if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
+ /*ConsiderFlags*/ false) ||
+ N0->getNumValues() != 1 || !N0->hasOneUse())
+ return SDValue();
+
+ bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
+ N0.getOpcode() == ISD::BUILD_PAIR ||
+ N0.getOpcode() == ISD::CONCAT_VECTORS;
+
+ SmallSetVector<SDValue, 8> MaybePoisonOperands;
+ for (SDValue Op : N0->ops()) {
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
+ /*Depth*/ 1))
+ continue;
+ bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
+ bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
+ if (!HadMaybePoisonOperands)
+ continue;
+ if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
+ // Multiple maybe-poison ops when not allowed - bail out.
+ return SDValue();
+ }
+ }
+ // NOTE: the whole op may be not guaranteed to not be undef or poison because
+ // it could create undef or poison due to it's poison-generating flags.
+ // So not finding any maybe-poison operands is fine.
+
+ for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
+ // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
+ if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
+ continue;
+ // First, freeze each offending operand.
+ SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
+ // Then, change all other uses of unfrozen operand to use frozen operand.
+ DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
+ if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
+ FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
+ // But, that also updated the use in the freeze we just created, thus
+ // creating a cycle in a DAG. Let's undo that by mutating the freeze.
+ DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
+ MaybePoisonOperand);
+ }
+ }
+
+ // This node has been merged with another.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ return SDValue(N, 0);
+
+ // The whole node may have been updated, so the value we were holding
+ // may no longer be valid. Re-fetch the operand we're `freeze`ing.
+ N0 = N->getOperand(0);
+
+ // Finally, recreate the node, it's operands were updated to use
+ // frozen operands, so we just need to use it's "original" operands.
+ SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
+ // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
+ for (SDValue &Op : Ops) {
+ if (Op.getOpcode() == ISD::UNDEF)
+ Op = DAG.getFreeze(Op);
+ }
+ // NOTE: this strips poison generating flags.
+ SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+ assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
+ "Can't create node that may be undef/poison!");
+ return R;
+}
+
+/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
+/// operands. DstEltVT indicates the destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ SmallVector<SDValue, 8> Ops;
+ for (SDValue Op : BV->op_values()) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
+ Ops.push_back(DAG.getBitcast(DstEltVT, Op));
+ AddToWorklist(Ops.back().getNode());
+ }
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+ return DAG.getBuildVector(VT, SDLoc(BV), Ops);
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+
+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
+ // BuildVectorSDNode?
+ auto *BVN = cast<BuildVectorSDNode>(BV);
+
+ // Extract the constant raw bit data.
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
+ return SDValue();
+
+ SDLoc DL(BV);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
+ if (UndefElements[I])
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
+
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
+
+// Returns true if `N` can assume no infinities involved in its computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
+}
+
+/// Try to perform FMA combining on a given FADD node.
+template <class MatchContextClass>
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+ MatchContextClass matcher(DAG, TLI, N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+
+ // Floating-point multiply-add with intermediate rounding.
+ // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
+ // FIXME: Add VP_FMAD opcode.
+ bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
+ (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ bool CanReassociate =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
+ return SDValue();
+
+ // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
+ // beneficial. It does not reduce latency. It increases register pressure. It
+ // replaces an fadd with an fma which is a more complex instruction, so is
+ // likely to have a larger encoding, use more functional units, etc.
+ if (N0 == N1)
+ return SDValue();
+
+ if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ auto isFusedOp = [&](SDValue N) {
+ return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
+ };
+
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
+ if (!matcher.match(N, ISD::FMUL))
+ return false;
+ return AllowFusionGlobally || N->getFlags().hasAllowContract();
+ };
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
+ if (N0->use_size() > N1->use_size())
+ std::swap(N0, N1);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
+ N1.getOperand(1), N0);
+ }
+
+ // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
+ // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
+ // This also works with nested fma instructions:
+ // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
+ // fma A, B, (fma C, D, fma (E, F, G))
+ // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
+ // fma A, B, (fma C, D, fma (E, F, G)).
+ // This requires reassociation because it changes the order of operations.
+ if (CanReassociate) {
+ SDValue FMA, E;
+ if (isFusedOp(N0) && N0.hasOneUse()) {
+ FMA = N0;
+ E = N1;
+ } else if (isFusedOp(N1) && N1.hasOneUse()) {
+ FMA = N1;
+ E = N0;
+ }
+
+ SDValue TmpFMA = FMA;
+ while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
+ SDValue FMul = TmpFMA->getOperand(2);
+ if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
+ SDValue C = FMul.getOperand(0);
+ SDValue D = FMul.getOperand(1);
+ SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
+ // Replacing the inner FMul could cause the outer FMA to be simplified
+ // away.
+ return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
+ }
+
+ TmpFMA = TmpFMA->getOperand(2);
+ }
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
+ }
+ }
+
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (matcher.match(N1, ISD::FP_EXTEND)) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
+ }
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, X, Y,
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
+ };
+ if (isFusedOp(N0)) {
+ SDValue N02 = N0.getOperand(2);
+ if (matcher.match(N02, ISD::FP_EXTEND)) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
+ }
+ }
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isFusedOp(N00)) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
+ }
+ }
+ }
+
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (isFusedOp(N1)) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
+ }
+ }
+ }
+
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isFusedOp(N10)) {
+ SDValue N102 = N10.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FSUB node.
+template <class MatchContextClass>
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+ MatchContextClass matcher(DAG, TLI, N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+
+ // Floating-point multiply-add with intermediate rounding.
+ // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
+ // FIXME: Add VP_FMAD opcode.
+ bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
+ (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ const SDNodeFlags Flags = N->getFlags();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+
+ // If the subtraction is not contractable, do not combine.
+ if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
+ return SDValue();
+
+ if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
+
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
+ if (!matcher.match(N, ISD::FMUL))
+ return false;
+ return AllowFusionGlobally || N->getFlags().hasAllowContract();
+ };
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
+ if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
+ XY.getOperand(1),
+ matcher.getNode(ISD::FNEG, SL, VT, Z));
+ }
+ return SDValue();
+ };
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
+ if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
+ YZ.getOperand(1), X);
+ }
+ return SDValue();
+ };
+
+ // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
+ (N0->use_size() > N1->use_size())) {
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ } else {
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
+ }
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
+ matcher.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1));
+ }
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (matcher.match(N1, ISD::FP_EXTEND)) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(
+ ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
+ }
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+ if (matcher.match(N00, ISD::FNEG)) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
+ return matcher.getNode(
+ ISD::FNEG, SL, VT,
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (matcher.match(N0, ISD::FNEG)) {
+ SDValue N00 = N0.getOperand(0);
+ if (matcher.match(N00, ISD::FP_EXTEND)) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N000.getValueType())) {
+ return matcher.getNode(
+ ISD::FNEG, SL, VT,
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ auto isReassociable = [&Options](SDNode *N) {
+ return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ };
+
+ auto isContractableAndReassociableFMUL = [&isContractableFMUL,
+ &isReassociable](SDValue N) {
+ return isContractableFMUL(N) && isReassociable(N.getNode());
+ };
+
+ auto isFusedOp = [&](SDValue N) {
+ return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
+ };
+
+ // More folding opportunities when target permits.
+ if (Aggressive && isReassociable(N)) {
+ bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (CanFuse && isFusedOp(N0) &&
+ isContractableAndReassociableFMUL(N0.getOperand(2)) &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (CanFuse && isFusedOp(N1) &&
+ isContractableAndReassociableFMUL(N1.getOperand(2)) &&
+ N1->hasOneUse() && NoSignedZero) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
+ }
+
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (isFusedOp(N0) && N0->hasOneUse()) {
+ SDValue N02 = N0.getOperand(2);
+ if (matcher.match(N02, ISD::FP_EXTEND)) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableAndReassociableFMUL(N020) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
+ }
+ }
+ }
+
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+ if (isFusedOp(N00)) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableAndReassociableFMUL(N002) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
+ }
+ }
+ }
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
+ N1->hasOneUse()) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (isContractableAndReassociableFMUL(N120) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
+ SDValue CvtSrc = N1.getOperand(0);
+ SDValue N100 = CvtSrc.getOperand(0);
+ SDValue N101 = CvtSrc.getOperand(1);
+ SDValue N102 = CvtSrc.getOperand(2);
+ if (isContractableAndReassociableFMUL(N102) &&
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ CvtSrc.getValueType())) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FMUL node based on the distributive
+/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
+/// subtraction instead of addition).
+SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // The transforms below are incorrect when x == 0 and y == inf, because the
+ // intermediate multiplication produces a nan.
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
+ return SDValue();
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ isContractableFMUL(Options, SDValue(N, 0)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // Floating-point multiply-add with intermediate rounding. This can result
+ // in a less precise result due to the changed rounding order.
+ bool HasFMAD = Options.UnsafeFPMath &&
+ (LegalOperations && TLI.isFMADLegal(DAG, N));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
+ // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+ if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y);
+ if (C->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ }
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFADD(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFADD(N1, N0))
+ return FMA;
+
+ // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
+ // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
+ // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
+ // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+ if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
+ if (C0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y);
+ if (C0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ }
+ if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ if (C1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y);
+ }
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFSUB(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFSUB(N1, N0))
+ return FMA;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // FADD -> FMA combines:
+ if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
+ // fold (fadd c1, c2) -> c1 + c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
+ ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
+ if (N1C && N1C->isZero())
+ if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
+ return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
+
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
+
+ auto isFMulNegTwo = [](SDValue FMul) {
+ if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
+ return false;
+ auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
+ return C && C->isExactlyValue(-2.0);
+ };
+
+ // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N0)) {
+ SDValue B = N0.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
+ }
+ // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N1)) {
+ SDValue B = N1.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
+ }
+
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
+
+ // If nnan is enabled, fold lots of things.
+ if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ // If allowed, fold (fadd (fneg x), x) -> 0.0
+ if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ return DAG.getConstantFP(0.0, DL, VT);
+
+ // If allowed, fold (fadd x, (fneg x)) -> 0.0
+ if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ return DAG.getConstantFP(0.0, DL, VT);
+ }
+
+ // If 'unsafe math' or reassoc and nsz, fold lots of things.
+ // TODO: break out portions of the transformations below for which Unsafe is
+ // considered and which do not require both nsz and reassoc
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ AllowNewConst) {
+ // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
+ if (N1CFP && N0.getOpcode() == ISD::FADD &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // We can fold chains of FADD's of the same value into multiplications.
+ // This transform is not safe in general because we are reducing the number
+ // of rounding steps.
+ if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ SDNode *CFP00 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ SDNode *CFP01 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+
+ // (fadd (fmul x, c), x) -> (fmul x, c+1)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ SDNode *CFP10 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ SDNode *CFP11 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+
+ // (fadd x, (fmul x, c)) -> (fmul x, c+1)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+ if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N0.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::FADD) {
+ SDNode *CFP00 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul x, 3.0)
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N1,
+ DAG.getConstantFP(3.0, DL, VT));
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FADD) {
+ SDNode *CFP10 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(3.0, DL, VT));
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
+ if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT));
+ }
+ }
+
+ // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
+ VT, N0, N1, Flags))
+ return SD;
+ } // enable-unsafe-fp-math
+
+ // FADD -> FMA combines:
+ if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N0 = N->getOperand(1);
+ SDValue N1 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT ChainVT = N->getValueType(1);
+ SDLoc DL(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N0, NegN1});
+ }
+
+ // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N1, NegN0});
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero()) {
+ if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) {
+ return N0;
+ }
+ }
+
+ if (N0 == N1) {
+ // (fsub x, x) -> 0.0
+ if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
+ return DAG.getConstantFP(0.0f, DL, VT);
+ }
+
+ // (fsub -0.0, N1) -> -N1
+ if (N0CFP && N0CFP->isZero()) {
+ if (N0CFP->isNegative() ||
+ (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
+ // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
+ // flushed to zero, unless all users treat denorms as zero (DAZ).
+ // FIXME: This transform will change the sign of a NaN and the behavior
+ // of a signaling NaN. It is only valid when a NoNaN flag is present.
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::getIEEE()) {
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return NegN1;
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1);
+ }
+ }
+ }
+
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ N1.getOpcode() == ISD::FADD) {
+ // X - (X + Y) -> -Y
+ if (N0 == N1->getOperand(0))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
+ // X - (Y + X) -> -Y
+ if (N0 == N1->getOperand(1))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
+ }
+
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
+
+ // FSUB -> FMA combines:
+ if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+ // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ N0.getOpcode() == ISD::FMUL) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ // Avoid an infinite loop by making sure that N00 is not a constant
+ // (the inner multiply has not been constant folded yet).
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+ }
+ }
+
+ // Match a special-case: we convert X * 2.0 into fadd.
+ // fmul (fadd X, X), C -> fmul X, 2.0 * C
+ if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
+ N0.getOperand(0) == N0.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+ }
+
+ // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
+ VT, N0, N1, Flags))
+ return SD;
+ }
+
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
+
+ // -N0 * -N1 --> N0 * N1
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
+ }
+
+ // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
+ // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
+ if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
+ (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
+ TLI.isOperationLegal(ISD::FABS, VT)) {
+ SDValue Select = N0, X = N1;
+ if (Select.getOpcode() != ISD::SELECT)
+ std::swap(Select, X);
+
+ SDValue Cond = Select.getOperand(0);
+ auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
+ auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
+
+ if (TrueOpnd && FalseOpnd &&
+ Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
+ isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ std::swap(TrueOpnd, FalseOpnd);
+ [[fallthrough]];
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
+ TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FABS, DL, VT, X));
+ if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FABS, DL, VT, X);
+
+ break;
+ }
+ }
+ }
+
+ // FMUL -> FMA combines:
+ if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
+ return SDValue();
+}
+
+template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ // FMA nodes have flags that propagate to the created nodes.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ MatchContextClass matcher(DAG, TLI, N);
+
+ bool CanReassociate =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+
+ // Constant fold FMA.
+ if (isa<ConstantFPSDNode>(N0) &&
+ isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2)) {
+ return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
+ }
+
+ // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ }
+
+ // FIXME: use fast math flags instead of Options.UnsafeFPMath
+ if (Options.UnsafeFPMath) {
+ if (N0CFP && N0CFP->isZero())
+ return N2;
+ if (N1CFP && N1CFP->isZero())
+ return N2;
+ }
+
+ // FIXME: Support splat of constant.
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
+ return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+
+ if (CanReassociate) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return matcher.getNode(
+ ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
+ }
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (matcher.match(N0, ISD::FMUL) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return matcher.getNode(
+ ISD::FMA, DL, VT, N0.getOperand(0),
+ matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
+ }
+ }
+
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ // FIXME: Support splat of constant.
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
+ AddToWorklist(RHSNeg.getNode());
+ return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
+ }
+
+ // fma (fneg x), K, y -> fma x -K, y
+ if (matcher.match(N0, ISD::FNEG) &&
+ (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ (N1.hasOneUse() &&
+ !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
+ return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
+ }
+ }
+
+ // FIXME: Support splat of constant.
+ if (CanReassociate) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
+ return matcher.getNode(ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(1.0, DL, VT)));
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
+ return matcher.getNode(ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(-1.0, DL, VT)));
+ }
+ }
+
+ // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
+ // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
+ if (!TLI.isFNegFree(VT))
+ if (SDValue Neg = TLI.getCheaperNegatedExpression(
+ SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
+ return matcher.getNode(ISD::FNEG, DL, VT, Neg);
+ return SDValue();
+}
+
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different targets
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ // TODO: Limit this transform based on optsize/minsize - it always creates at
+ // least 1 extra instruction. But the perf win may be substantial enough
+ // that only minsize should restrict this.
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags Flags = N->getFlags();
+ if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
+ return SDValue();
+
+ // Skip if current node is a reciprocal/fneg-reciprocal.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
+ if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
+ return SDValue();
+
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+
+ // For splat vectors, scale the number of uses by the splat factor. If we can
+ // convert the division into a scalar op, that will likely be much faster.
+ unsigned NumElts = 1;
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && DAG.isSplatValue(N1))
+ NumElts = VT.getVectorMinNumElements();
+
+ if (!MinUses || (N1->use_size() * NumElts) < MinUses)
+ return SDValue();
+
+ // Find all FDIV users of the same divisor.
+ // Use a set because duplicates may be present in the user list.
+ SetVector<SDNode *> Users;
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
+ if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
+ U->getOperand(0) == U->getOperand(1).getOperand(0) &&
+ U->getFlags().hasAllowReassociation() &&
+ U->getFlags().hasNoSignedZeros())
+ continue;
+
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags().hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
+
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if ((Users.size() * NumElts) < MinUses)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal, Flags);
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
+ }
+ }
+ return SDValue(N, 0); // N was replaced.
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
+ return C;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (SDValue V = combineRepeatedFPDivisors(N))
+ return V;
+
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Compute the reciprocal 1.0 / c2.
+ const APFloat &N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT));
+ }
+
+ // If this FDIV is part of a reciprocal square root, it may be folded
+ // into a target-specific square root estimate instruction.
+ if (N1.getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FP_ROUND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FMUL) {
+ // Look through an FMUL. Even though this won't remove the FDIV directly,
+ // it's still worthwhile to get rid of the FSQRT if possible.
+ SDValue Sqrt, Y;
+ if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ Sqrt = N1.getOperand(0);
+ Y = N1.getOperand(1);
+ } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+ Sqrt = N1.getOperand(1);
+ Y = N1.getOperand(0);
+ }
+ if (Sqrt.getNode()) {
+ // If the other multiply operand is known positive, pull it into the
+ // sqrt. That will eliminate the division if we convert to an estimate.
+ if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
+ N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
+ SDValue A;
+ if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
+ A = Y.getOperand(0);
+ else if (Y == Sqrt.getOperand(0))
+ A = Y;
+ if (A) {
+ // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
+ // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
+ SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
+ SDValue AAZ =
+ DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
+
+ // Estimate creation failed. Clean up speculatively created nodes.
+ recursivelyDeleteUnusedNodes(AAZ.getNode());
+ }
+ }
+
+ // We found a FSQRT, so try to make this fold:
+ // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
+ AddToWorklist(Div.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
+ }
+ }
+ }
+
+ // Fold into a reciprocal estimate and multiply instead of a real divide.
+ if (Options.NoInfsFPMath || Flags.hasNoInfs())
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
+ }
+
+ // Fold X/Sqrt(X) -> Sqrt(X)
+ if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
+ (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
+ if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
+ return N1;
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+ SDNodeFlags Flags = N->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
+ // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
+ if (!Flags.hasApproximateFuncs() ||
+ (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (TLI.isFsqrtCheap(N0, DAG))
+ return SDValue();
+
+ // FSQRT nodes have flags that propagate to the created nodes.
+ // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
+ // transform the fdiv, we may produce a sub-optimal estimate sequence
+ // because the reciprocal calculation may not have to filter out a
+ // 0.0 input.
+ return buildSqrtEstimate(N0, Flags);
+}
+
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
+/// Operands to the functions are the type of X and Y respectively.
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
+ // Always fold no-op FP casts.
+ if (XTy == YTy)
+ return true;
+
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ if (YTy == MVT::f128)
+ return false;
+
+ return !YTy.isVector() || EnableVectorFCopySignExtendRound;
+}
+
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::FP_EXTEND &&
+ N1.getOpcode() != ISD::FP_ROUND)
+ return false;
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
+ return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
+ const APFloat &V = N1C->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFPOW(SDNode *N) {
+ ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
+ if (!ExponentC)
+ return SDValue();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // Try to convert x ** (1/3) into cube root.
+ // TODO: Handle the various flavors of long double.
+ // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+ // Some range near 1/3 should be fine.
+ EVT VT = N->getValueType(0);
+ if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+ (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+ // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+ // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+ // pow(-val, 1/3) = nan; cbrt(-val) = -num.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf nnan afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Do not create a cbrt() libcall if the target does not have it, and do not
+ // turn a pow that has lowering support into a cbrt() libcall.
+ if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+ (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+ DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+ return SDValue();
+
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
+ }
+
+ // Try to convert x ** (1/4) and x ** (3/4) into square roots.
+ // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
+ // TODO: This could be extended (using a target hook) to handle smaller
+ // power-of-2 fractional exponents.
+ bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+ bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+ if (ExponentIs025 || ExponentIs075) {
+ // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
+ // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+ // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+
+ // We only need no signed zeros for the 0.25 case.
+ if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Don't double the number of libcalls. We are trying to inline fast code.
+ if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
+ return SDValue();
+
+ // Assume that libcalls are the smallest code.
+ // TODO: This restriction should probably be lifted for vectors.
+ if (ForCodeSize)
+ return SDValue();
+
+ // pow(X, 0.25) --> sqrt(sqrt(X))
+ SDLoc DL(N);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
+ if (ExponentIs025)
+ return SqrtSqrt;
+ // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
+ }
+
+ return SDValue();
+}
+
+static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
+ // replacing casts with a libcall. We also must be allowed to ignore -0.0
+ // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
+ // conversions would return +0.0.
+ // FIXME: We should be able to use node-level FMF here.
+ // TODO: If strict math, should we use FABS (+ range check for signed cast)?
+ EVT VT = N->getValueType(0);
+ if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
+ !DAG.getTarget().Options.NoSignedZerosFPMath)
+ return SDValue();
+
+ // fptosi/fptoui round towards zero, so converting from FP to integer and
+ // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
+ SDValue N0 = N->getOperand(0);
+ if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
+ hasOperation(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
+ }
+
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select (setcc x, y, cc), 1.0, 0.0)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
+ }
+
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
+ hasOperation(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
+ }
+
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
+ }
+
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
+ return SDValue();
+}
+
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits();
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ return DAG.getBitcast(VT, Src);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
+
+ return FoldIntToFPToInt(N, DAG);
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
+
+ return FoldIntToFPToInt(N, DAG);
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
+
+ // Avoid folding legal fp_rounds into non-legal ones.
+ if (!hasOperation(ISD::FP_ROUND, VT))
+ return SDValue();
+
+ // Skip this folding if it results in an fp_round from f80 to f16.
+ //
+ // f80 to f16 always generates an expensive (and as yet, unimplemented)
+ // libcall to __truncxfhf2 instead of selecting native f16 conversion
+ // instructions from f32 or f64. Moreover, the first (value-preserving)
+ // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
+ // x86.
+ if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
+ return SDValue();
+
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+ SDLoc DL(N);
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
+ }
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ // Note: From a legality perspective, this is a two step transform. First,
+ // we duplicate the fp_round to the arguments of the copysign, then we
+ // eliminate the fp_round on Y. The second step requires an additional
+ // predicate to match the implementation above.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
+ N0.getValueType())) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
+ N0.getOperand(0), N1);
+ AddToWorklist(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ return FoldedVOp;
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
+
+ // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+ if (N0.getOpcode() == ISD::FP16_TO_FP &&
+ TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(
+ N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+
+ // fold ftrunc (known rounded int x) -> x
+ // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
+ // likely to be generated to extract integer from a rounded floating value.
+ switch (N0.getOpcode()) {
+ default: break;
+ case ISD::FRINT:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ return N0;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFREXP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (ffrexp c1) -> ffrexp(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // Constant fold FNEG.
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+
+ if (SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
+ return NegN0;
+
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
+ // know it was called from a context with a nsz flag if the input fsub does
+ // not.
+ if (N0.getOpcode() == ISD::FSUB &&
+ (DAG.getTarget().Options.NoSignedZerosFPMath ||
+ N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
+ N0.getOperand(0));
+ }
+
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMinMax(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opc = N->getOpcode();
+ bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
+ bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // Constant fold.
+ if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // Canonicalize to constant on RHS.
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+
+ if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
+ const APFloat &AF = N1CFP->getValueAPF();
+
+ // minnum(X, nan) -> X
+ // maxnum(X, nan) -> X
+ // minimum(X, nan) -> nan
+ // maximum(X, nan) -> nan
+ if (AF.isNaN())
+ return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
+
+ // In the following folds, inf can be replaced with the largest finite
+ // float, if the ninf flag is set.
+ if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
+ // minnum(X, -inf) -> -inf
+ // maxnum(X, +inf) -> +inf
+ // minimum(X, -inf) -> -inf if nnan
+ // maximum(X, +inf) -> +inf if nnan
+ if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(1);
+
+ // minnum(X, +inf) -> X if nnan
+ // maxnum(X, -inf) -> X if nnan
+ // minimum(X, +inf) -> X
+ // maximum(X, -inf) -> X
+ if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(0);
+ }
+ }
+
+ if (SDValue SD = reassociateReduction(
+ PropagatesNaN
+ ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
+ : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
+ Opc, SDLoc(N), VT, N0, N1, Flags))
+ return SD;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
+
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
+ // nondeterministic jumps).
+ if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ N1->getOperand(0), N2);
+ }
+
+ // Variant of the previous fold where there is a SETCC in between:
+ // BRCOND(SETCC(FREEZE(X), CONST, Cond))
+ // =>
+ // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
+ // =>
+ // BRCOND(SETCC(X, CONST, Cond))
+ // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
+ // isn't equivalent to true or false.
+ // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
+ // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
+ if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
+ SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
+ ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
+ ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
+ bool Updated = false;
+
+ // Is 'X Cond C' always true or false?
+ auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
+ (Cond == ISD::SETLT && C->isMinSignedValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
+ (Cond == ISD::SETGT && C->isMaxSignedValue());
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
+ (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
+ (Cond == ISD::SETGE && C->isMinSignedValue());
+ return True || False;
+ };
+
+ if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
+ if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
+ S0 = S0->getOperand(0);
+ Updated = true;
+ }
+ }
+ if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
+ if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
+ S1 = S1->getOperand(0);
+ Updated = true;
+ }
+ }
+
+ if (Updated)
+ return DAG.getNode(
+ ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
+ }
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if (N1.hasOneUse()) {
+ // rebuildSetCC calls visitXor which may change the Chain when there is a
+ // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
+ HandleSDNode ChainHandle(Chain);
+ if (SDValue NewN1 = rebuildSetCC(N1))
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
+ ChainHandle.getValue(), NewN1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::rebuildSetCC(SDValue N) {
+ if (N.getOpcode() == ISD::SRL ||
+ (N.getOpcode() == ISD::TRUNCATE &&
+ (N.getOperand(0).hasOneUse() &&
+ N.getOperand(0).getOpcode() == ISD::SRL))) {
+ // Look pass the truncate.
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
+ SDLoc DL(N);
+ return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+ ISD::SETNE);
+ }
+ }
+ }
+ }
+
+ // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
+ // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
+ if (N.getOpcode() == ISD::XOR) {
+ // Because we may call this on a speculatively constructed
+ // SimplifiedSetCC Node, we need to simplify this node first.
+ // Ideally this should be folded into SimplifySetCC and not
+ // here. For now, grab a handle to N so we don't lose it from
+ // replacements interal to the visit.
+ HandleSDNode XORHandle(N);
+ while (N.getOpcode() == ISD::XOR) {
+ SDValue Tmp = visitXOR(N.getNode());
+ // No simplification done.
+ if (!Tmp.getNode())
+ break;
+ // Returning N is form in-visit replacement that may invalidated
+ // N. Grab value from Handle.
+ if (Tmp.getNode() == N.getNode())
+ N = XORHandle.getValue();
+ else // Node simplified. Try simplifying again.
+ N = Tmp;
+ }
+
+ if (N.getOpcode() != ISD::XOR)
+ return N;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
+ if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
+ Op0.getValueType() == MVT::i1) {
+ N = Op0;
+ Op0 = N->getOperand(0);
+ Op1 = N->getOperand(1);
+ Equal = true;
+ }
+
+ EVT SetCCVT = N.getValueType();
+ if (LegalTypes)
+ SetCCVT = getSetCCResultType(SetCCVT);
+ // Replace the uses of XOR with SETCC
+ return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), SDLoc(N),
+ false);
+ if (Simp.getNode()) AddToWorklist(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
+ bool &IsLoad, bool &IsMasked, SDValue &Ptr,
+ const TargetLowering &TLI) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ IsLoad = false;
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedLoadLegal(Dec, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedStoreLegal(Dec, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ IsLoad = false;
+ IsMasked = true;
+ } else {
+ return false;
+ }
+ return true;
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
+ return false;
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!IsLoad) {
+ SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
+ : cast<StoreSDNode>(N)->getValue();
+
+ // Would require a copy.
+ if (Val == BasePtr)
+ return false;
+
+ // Would create a cycle.
+ if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Caches for hasPredecessorHelper.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ // Skip the use that is Ptr and uses of other results from BasePtr's
+ // node (important for nodes that return multiple results).
+ if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
+ continue;
+
+ if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
+ continue;
+
+ if (Use.getUser()->getOpcode() != ISD::ADD &&
+ Use.getUser()->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use.getUser());
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+
+ for (SDNode *Use : Ptr->uses()) {
+ if (Use == N)
+ continue;
+ if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
+ return false;
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (!IsMasked) {
+ if (IsLoad)
+ Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ else
+ Result =
+ DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ } else {
+ if (IsLoad)
+ Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ else
+ Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ }
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.dump(&DAG); dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (IsLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ // We need to replace ptr0 in the following expression:
+ // x0 * offset0 + y0 * ptr0 = t0
+ // knowing that
+ // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+ //
+ // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+ // indexed load/store and the expression that needs to be re-written.
+ //
+ // Therefore, we have:
+ // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
+
+ auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ const APInt &Offset0 = CN->getAPIntValue();
+ const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+ int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+
+ unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+ APInt CNV = Offset0;
+ if (X0 < 0) CNV = -CNV;
+ if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+ else CNV = CNV - Offset1;
+
+ SDLoc DL(OtherUses[i]);
+
+ // We can now generate the new expression.
+ SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
+ SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
+
+ SDValue NewUse = DAG.getNode(Opcode,
+ DL,
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ deleteAndRecombine(OtherUses[i]);
+ }
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
+ deleteAndRecombine(Ptr.getNode());
+ AddToWorklist(Result.getNode());
+
+ return true;
+}
+
+static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
+ SDValue &BasePtr, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (PtrUse == N ||
+ (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
+ return false;
+
+ if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
+ return false;
+
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
+ return false;
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ for (SDNode *Use : BasePtr->uses()) {
+ if (Use == Ptr.getNode())
+ continue;
+
+ // No if there's a later user which could perform the index instead.
+ if (isa<MemSDNode>(Use)) {
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue OtherPtr;
+ if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
+ IsMasked, OtherPtr, TLI)) {
+ SmallVector<const SDNode *, 2> Worklist;
+ Worklist.push_back(Use);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
+ return false;
+ }
+ }
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
+ for (SDNode *UseUse : Use->uses())
+ if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ return false;
+ }
+ }
+ return true;
+}
+
+static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
+ bool &IsMasked, SDValue &Ptr,
+ SDValue &BasePtr, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
+ IsMasked, Ptr, TLI) ||
+ Ptr->hasOneUse())
+ return nullptr;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+ for (SDNode *Op : Ptr->uses()) {
+ // Check for #1.
+ if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
+ continue;
+
+ // Check for #2.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // Ptr is predecessor to both N and Op.
+ Visited.insert(Ptr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(Op);
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
+ return Op;
+ }
+ return nullptr;
+}
+
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
+ Offset, AM, DAG, TLI);
+ if (!Op)
+ return false;
+
+ SDValue Result;
+ if (!IsMasked)
+ Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM)
+ : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.dump(&DAG); dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (IsLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(IsLoad ? 1 : 0));
+ deleteAndRecombine(Op);
+ return true;
+}
+
+/// Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ assert(AM != ISD::UNINDEXED);
+ SDValue BP = LD->getOperand(1);
+ SDValue Inc = LD->getOperand(2);
+
+ // Some backends use TargetConstants for load offsets, but don't expect
+ // TargetConstants in general ADD nodes. We can convert these constants into
+ // regular Constants (if the constant is not opaque).
+ assert((Inc.getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(Inc)->isOpaque()) &&
+ "Cannot split out indexing using opaque target constants");
+ if (Inc.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
+ ConstInc->getValueType(0));
+ }
+
+ unsigned Opc =
+ (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+ return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
+static inline ElementCount numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
+}
+
+bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
+ EVT STType = Val.getValueType();
+ EVT STMemType = ST->getMemoryVT();
+ if (STType == STMemType)
+ return true;
+ if (isTypeLegal(STMemType))
+ return false; // fail.
+ if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
+ TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
+ Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
+ STType.isInteger() && STMemType.isInteger()) {
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
+ Val = DAG.getBitcast(STMemType, Val);
+ return true;
+ }
+ return false; // fail.
+}
+
+bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
+ EVT LDMemType = LD->getMemoryVT();
+ EVT LDType = LD->getValueType(0);
+ assert(Val.getValueType() == LDMemType &&
+ "Attempting to extend value of non-matching type");
+ if (LDType == LDMemType)
+ return true;
+ if (LDMemType.isInteger() && LDType.isInteger()) {
+ switch (LD->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ Val = DAG.getBitcast(LDType, Val);
+ return true;
+ case ISD::EXTLOAD:
+ Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::SEXTLOAD:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::ZEXTLOAD:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ }
+ }
+ return false;
+}
+
+StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
+ int64_t &Offset) {
+ SDValue Chain = LD->getOperand(0);
+
+ // Look through CALLSEQ_START.
+ if (Chain.getOpcode() == ISD::CALLSEQ_START)
+ Chain = Chain->getOperand(0);
+
+ StoreSDNode *ST = nullptr;
+ SmallVector<SDValue, 8> Aliases;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ // Look for unique store within the TokenFactor.
+ for (SDValue Op : Chain->ops()) {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
+ if (!Store)
+ continue;
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ continue;
+ // Make sure the store is not aliased with any nodes in TokenFactor.
+ GatherAllAliases(Store, Chain, Aliases);
+ if (Aliases.empty() ||
+ (Aliases.size() == 1 && Aliases.front().getNode() == Store))
+ ST = Store;
+ break;
+ }
+ } else {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
+ if (Store) {
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
+ if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ ST = Store;
+ }
+ }
+
+ return ST;
+}
+
+SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
+ if (OptLevel == CodeGenOpt::None || !LD->isSimple())
+ return SDValue();
+ SDValue Chain = LD->getOperand(0);
+ int64_t Offset;
+
+ StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
+ // TODO: Relax this restriction for unordered atomics (see D66309)
+ if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
+ return SDValue();
+
+ EVT LDType = LD->getValueType(0);
+ EVT LDMemType = LD->getMemoryVT();
+ EVT STMemType = ST->getMemoryVT();
+ EVT STType = ST->getValue().getValueType();
+
+ // There are two cases to consider here:
+ // 1. The store is fixed width and the load is scalable. In this case we
+ // don't know at compile time if the store completely envelops the load
+ // so we abandon the optimisation.
+ // 2. The store is scalable and the load is fixed width. We could
+ // potentially support a limited number of cases here, but there has been
+ // no cost-benefit analysis to prove it's worth it.
+ bool LdStScalable = LDMemType.isScalableVT();
+ if (LdStScalable != STMemType.isScalableVT())
+ return SDValue();
+
+ // If we are dealing with scalable vectors on a big endian platform the
+ // calculation of offsets below becomes trickier, since we do not know at
+ // compile time the absolute size of the vector. Until we've done more
+ // analysis on big-endian platforms it seems better to bail out for now.
+ if (LdStScalable && DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ // Normalize for Endianness. After this Offset=0 will denote that the least
+ // significant bit in the loaded value maps to the least significant bit in
+ // the stored value). With Offset=n (for n > 0) the loaded value starts at the
+ // n:th least significant byte of the stored value.
+ int64_t OrigOffset = Offset;
+ if (DAG.getDataLayout().isBigEndian())
+ Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
+ (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
+ 8 -
+ Offset;
+
+ // Check that the stored value cover all bits that are loaded.
+ bool STCoversLD;
+
+ TypeSize LdMemSize = LDMemType.getSizeInBits();
+ TypeSize StMemSize = STMemType.getSizeInBits();
+ if (LdStScalable)
+ STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
+ else
+ STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
+ StMemSize.getFixedValue());
+
+ auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
+ if (LD->isIndexed()) {
+ // Cannot handle opaque target constants and we must respect the user's
+ // request not to split indexes from loads.
+ if (!canSplitIdx(LD))
+ return SDValue();
+ SDValue Idx = SplitIndexingFromLoad(LD);
+ SDValue Ops[] = {Val, Idx, Chain};
+ return CombineTo(LD, Ops, 3);
+ }
+ return CombineTo(LD, Val, Chain);
+ };
+
+ if (!STCoversLD)
+ return SDValue();
+
+ // Memory as copy space (potentially masked).
+ if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
+ // Simple case: Direct non-truncating forwarding
+ if (LDType.getSizeInBits() == LdMemSize)
+ return ReplaceLd(LD, ST->getValue(), Chain);
+ // Can we model the truncate and extension with an and mask?
+ if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
+ !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
+ // Mask to size of LDMemType
+ auto Mask =
+ DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
+ StMemSize.getFixedValue()),
+ SDLoc(ST), STType);
+ auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
+ return ReplaceLd(LD, Val, Chain);
+ }
+ }
+
+ // Handle some cases for big-endian that would be Offset 0 and handled for
+ // little-endian.
+ SDValue Val = ST->getValue();
+ if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
+ if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
+ !LDType.isVector() && isTypeLegal(STType) &&
+ TLI.isOperationLegal(ISD::SRL, STType)) {
+ Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
+ DAG.getConstant(Offset * 8, SDLoc(LD), STType));
+ Offset = 0;
+ }
+ }
+
+ // TODO: Deal with nonzero offset.
+ if (LD->getBasePtr().isUndef() || Offset != 0)
+ return SDValue();
+ // Model necessary truncations / extenstions.
+ // Truncate Value To Stored Memory Size.
+ do {
+ if (!getTruncatedStoreValue(ST, Val))
+ continue;
+ if (!isTypeLegal(LDMemType))
+ continue;
+ if (STMemType != LDMemType) {
+ // TODO: Support vectors? This requires extract_subvector/bitcast.
+ if (!STMemType.isVector() && !LDMemType.isVector() &&
+ STMemType.isInteger() && LDMemType.isInteger())
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
+ else
+ continue;
+ }
+ if (!extendLoadedValueToExtension(LD, Val))
+ continue;
+ return ReplaceLd(LD, Val, Chain);
+ } while (false);
+
+ // On failure, cleanup dead nodes we may have created.
+ if (Val->use_empty())
+ deleteAndRecombine(Val.getNode());
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ // TODO: Allow this for unordered atomics (see D66309)
+ if (LD->isSimple()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (!N->hasAnyUseOfValue(0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
+ dbgs() << "\nWith chain: "; Chain.dump(&DAG);
+ dbgs() << "\n");
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddUsersToWorklist(Chain.getNode());
+ if (N->use_empty())
+ deleteAndRecombine(N);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+
+ // If this load has an opaque TargetConstant offset, then we cannot split
+ // the indexing into an add/sub directly (that TargetConstant may not be
+ // valid for a different type of node, and we cannot convert an opaque
+ // target constant into a regular constant).
+ bool CanSplitIdx = canSplitIdx(LD);
+
+ if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ SDValue Index;
+ if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
+ Index = SplitIndexingFromLoad(LD);
+ // Try to fold the base pointer arithmetic into subsequent loads and
+ // stores.
+ AddUsersToWorklist(N);
+ } else
+ Index = DAG.getUNDEF(N->getValueType(1));
+ LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Undef.dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
+ deleteAndRecombine(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ if (auto V = ForwardStoreValueToDirectLoad(LD))
+ return V;
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
+ if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
+ if (*Alignment > LD->getAlign() &&
+ isAligned(*Alignment, LD->getSrcValueOffset())) {
+ SDValue NewLoad = DAG.getExtLoad(
+ LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ // NewLoad will always be N as we are only refining the alignment
+ assert(NewLoad.getNode() == N);
+ (void)NewLoad;
+ }
+ }
+ }
+
+ if (LD->isUnindexed()) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(LD, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
+ BetterChain, Ptr, LD->getMemOperand());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getMemoryVT(),
+ LD->getMemOperand());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Replace uses with load result and token factor
+ return CombineTo(N, ReplLoad.getValue(0), Token);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // Try to slice up N to more direct loads if the slices are mapped to
+ // different register banks or pairing can take place.
+ if (SliceUpLoad(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+namespace {
+
+/// Helper structure used to slice a load in smaller loads.
+/// Basically a slice is obtained from the following sequence:
+/// Origin = load Ty1, Base
+/// Shift = srl Ty1 Origin, CstTy Amount
+/// Inst = trunc Shift to Ty2
+///
+/// Then, it will be rewritten into:
+/// Slice = load SliceTy, Base + SliceOffset
+/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
+///
+/// SliceTy is deduced from the number of bits that are actually used to
+/// build Inst.
+struct LoadedSlice {
+ /// Helper structure used to compute the cost of a slice.
+ struct Cost {
+ /// Are we optimizing for code size.
+ bool ForCodeSize = false;
+
+ /// Various cost.
+ unsigned Loads = 0;
+ unsigned Truncates = 0;
+ unsigned CrossRegisterBanksCopies = 0;
+ unsigned ZExts = 0;
+ unsigned Shift = 0;
+
+ explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
+
+ /// Get the cost of one isolated slice.
+ Cost(const LoadedSlice &LS, bool ForCodeSize)
+ : ForCodeSize(ForCodeSize), Loads(1) {
+ EVT TruncType = LS.Inst->getValueType(0);
+ EVT LoadedType = LS.getLoadedType();
+ if (TruncType != LoadedType &&
+ !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
+ ZExts = 1;
+ }
+
+ /// Account for slicing gain in the current cost.
+ /// Slicing provide a few gains like removing a shift or a
+ /// truncate. This method allows to grow the cost of the original
+ /// load with the gain from this slice.
+ void addSliceGain(const LoadedSlice &LS) {
+ // Each slice saves a truncate.
+ const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+ LS.Inst->getValueType(0)))
+ ++Truncates;
+ // If there is a shift amount, this slice gets rid of it.
+ if (LS.Shift)
+ ++Shift;
+ // If this slice can merge a cross register bank copy, account for it.
+ if (LS.canMergeExpensiveCrossRegisterBankCopy())
+ ++CrossRegisterBanksCopies;
+ }
+
+ Cost &operator+=(const Cost &RHS) {
+ Loads += RHS.Loads;
+ Truncates += RHS.Truncates;
+ CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
+ ZExts += RHS.ZExts;
+ Shift += RHS.Shift;
+ return *this;
+ }
+
+ bool operator==(const Cost &RHS) const {
+ return Loads == RHS.Loads && Truncates == RHS.Truncates &&
+ CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
+ ZExts == RHS.ZExts && Shift == RHS.Shift;
+ }
+
+ bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
+
+ bool operator<(const Cost &RHS) const {
+ // Assume cross register banks copies are as expensive as loads.
+ // FIXME: Do we want some more target hooks?
+ unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
+ unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
+ // Unless we are optimizing for code size, consider the
+ // expensive operation first.
+ if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
+ return ExpensiveOpsLHS < ExpensiveOpsRHS;
+ return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
+ (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
+ }
+
+ bool operator>(const Cost &RHS) const { return RHS < *this; }
+
+ bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
+
+ bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
+ };
+
+ // The last instruction that represent the slice. This should be a
+ // truncate instruction.
+ SDNode *Inst;
+
+ // The original load instruction.
+ LoadSDNode *Origin;
+
+ // The right shift amount in bits from the original load.
+ unsigned Shift;
+
+ // The DAG from which Origin came from.
+ // This is used to get some contextual information about legal types, etc.
+ SelectionDAG *DAG;
+
+ LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
+ unsigned Shift = 0, SelectionDAG *DAG = nullptr)
+ : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
+
+ /// Get the bits used in a chunk of bits \p BitWidth large.
+ /// \return Result is \p BitWidth and has used bits set to 1 and
+ /// not used bits set to 0.
+ APInt getUsedBits() const {
+ // Reproduce the trunc(lshr) sequence:
+ // - Start from the truncated value.
+ // - Zero extend to the desired bit width.
+ // - Shift left.
+ assert(Origin && "No original load to compare against.");
+ unsigned BitWidth = Origin->getValueSizeInBits(0);
+ assert(Inst && "This slice is not bound to an instruction");
+ assert(Inst->getValueSizeInBits(0) <= BitWidth &&
+ "Extracted slice is bigger than the whole type!");
+ APInt UsedBits(Inst->getValueSizeInBits(0), 0);
+ UsedBits.setAllBits();
+ UsedBits = UsedBits.zext(BitWidth);
+ UsedBits <<= Shift;
+ return UsedBits;
+ }
+
+ /// Get the size of the slice to be loaded in bytes.
+ unsigned getLoadedSize() const {
+ unsigned SliceSize = getUsedBits().popcount();
+ assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
+ return SliceSize / 8;
+ }
+
+ /// Get the type that will be loaded for this slice.
+ /// Note: This may not be the final type for the slice.
+ EVT getLoadedType() const {
+ assert(DAG && "Missing context");
+ LLVMContext &Ctxt = *DAG->getContext();
+ return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
+ }
+
+ /// Get the alignment of the load used for this slice.
+ Align getAlign() const {
+ Align Alignment = Origin->getAlign();
+ uint64_t Offset = getOffsetFromBase();
+ if (Offset != 0)
+ Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
+ return Alignment;
+ }
+
+ /// Check if this slice can be rewritten with legal operations.
+ bool isLegal() const {
+ // An invalid slice is not legal.
+ if (!Origin || !Inst || !DAG)
+ return false;
+
+ // Offsets are for indexed load only, we do not handle that.
+ if (!Origin->getOffset().isUndef())
+ return false;
+
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+
+ // Check that the type is legal.
+ EVT SliceType = getLoadedType();
+ if (!TLI.isTypeLegal(SliceType))
+ return false;
+
+ // Check that the load is legal for this type.
+ if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
+ return false;
+
+ // Check that the offset can be computed.
+ // 1. Check its type.
+ EVT PtrType = Origin->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ // 2. Check that it fits in the immediate.
+ if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
+ return false;
+
+ // 3. Check that the computation is legal.
+ if (!TLI.isOperationLegal(ISD::ADD, PtrType))
+ return false;
+
+ // Check that the zext is legal if it needs one.
+ EVT TruncateType = Inst->getValueType(0);
+ if (TruncateType != SliceType &&
+ !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
+ return false;
+
+ return true;
+ }
+
+ /// Get the offset in bytes of this slice in the original chunk of
+ /// bits.
+ /// \pre DAG != nullptr.
+ uint64_t getOffsetFromBase() const {
+ assert(DAG && "Missing context.");
+ bool IsBigEndian = DAG->getDataLayout().isBigEndian();
+ assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
+ uint64_t Offset = Shift / 8;
+ unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
+ assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
+ "The size of the original loaded type is not a multiple of a"
+ " byte.");
+ // If Offset is bigger than TySizeInBytes, it means we are loading all
+ // zeros. This should have been optimized before in the process.
+ assert(TySizeInBytes > Offset &&
+ "Invalid shift amount for given loaded size");
+ if (IsBigEndian)
+ Offset = TySizeInBytes - Offset - getLoadedSize();
+ return Offset;
+ }
+
+ /// Generate the sequence of instructions to load the slice
+ /// represented by this object and redirect the uses of this slice to
+ /// this new sequence of instructions.
+ /// \pre this->Inst && this->Origin are valid Instructions and this
+ /// object passed the legal check: LoadedSlice::isLegal returned true.
+ /// \return The last instruction of the sequence used to load the slice.
+ SDValue loadSlice() const {
+ assert(Inst && Origin && "Unable to replace a non-existing slice.");
+ const SDValue &OldBaseAddr = Origin->getBasePtr();
+ SDValue BaseAddr = OldBaseAddr;
+ // Get the offset in that chunk of bytes w.r.t. the endianness.
+ int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
+ assert(Offset >= 0 && "Offset too big to fit in int64_t!");
+ if (Offset) {
+ // BaseAddr = BaseAddr + Offset.
+ EVT ArithType = BaseAddr.getValueType();
+ SDLoc DL(Origin);
+ BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+ DAG->getConstant(Offset, DL, ArithType));
+ }
+
+ // Create the type of the loaded slice according to its size.
+ EVT SliceType = getLoadedType();
+
+ // Create the load for the slice.
+ SDValue LastInst =
+ DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
+ Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
+ Origin->getMemOperand()->getFlags());
+ // If the final type is not the same as the loaded type, this means that
+ // we have to pad with zero. Create a zero extend for that.
+ EVT FinalType = Inst->getValueType(0);
+ if (SliceType != FinalType)
+ LastInst =
+ DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
+ return LastInst;
+ }
+
+ /// Check if this slice can be merged with an expensive cross register
+ /// bank copy. E.g.,
+ /// i = load i32
+ /// f = bitcast i32 i to float
+ bool canMergeExpensiveCrossRegisterBankCopy() const {
+ if (!Inst || !Inst->hasOneUse())
+ return false;
+ SDNode *Use = *Inst->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST)
+ return false;
+ assert(DAG && "Missing context");
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ EVT ResVT = Use->getValueType(0);
+ const TargetRegisterClass *ResRC =
+ TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
+ const TargetRegisterClass *ArgRC =
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+ Use->getOperand(0)->isDivergent());
+ if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // At this point, we know that we perform a cross-register-bank copy.
+ // Check if it is expensive.
+ const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
+ // Assume bitcasts are cheap, unless both register classes do not
+ // explicitly share a common sub class.
+ if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
+ return false;
+
+ // Check if it will be merged with the load.
+ // 1. Check the alignment / fast memory access constraint.
+ unsigned IsFast = 0;
+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
+ Origin->getAddressSpace(), getAlign(),
+ Origin->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
+ return false;
+
+ // 2. Check that the load is a legal operation for that type.
+ if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // 3. Check that we do not have a zext in the way.
+ if (Inst->getValueType(0) != getLoadedType())
+ return false;
+
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+/// Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// \p UsedBits looks like 0..0 1..1 0..0.
+static bool areUsedBitsDense(const APInt &UsedBits) {
+ // If all the bits are one, this is dense!
+ if (UsedBits.isAllOnes())
+ return true;
+
+ // Get rid of the unused bits on the right.
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
+ // Get rid of the unused bits on the left.
+ if (NarrowedUsedBits.countl_zero())
+ NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
+ // Check that the chunk of bits is completely used.
+ return NarrowedUsedBits.isAllOnes();
+}
+
+/// Check whether or not \p First and \p Second are next to each other
+/// in memory. This means that there is no hole between the bits loaded
+/// by \p First and the bits loaded by \p Second.
+static bool areSlicesNextToEachOther(const LoadedSlice &First,
+ const LoadedSlice &Second) {
+ assert(First.Origin == Second.Origin && First.Origin &&
+ "Unable to match different memory origins.");
+ APInt UsedBits = First.getUsedBits();
+ assert((UsedBits & Second.getUsedBits()) == 0 &&
+ "Slices are not supposed to overlap.");
+ UsedBits |= Second.getUsedBits();
+ return areUsedBitsDense(UsedBits);
+}
+
+/// Adjust the \p GlobalLSCost according to the target
+/// paring capabilities and the layout of the slices.
+/// \pre \p GlobalLSCost should account for at least as many loads as
+/// there is in the slices in \p LoadedSlices.
+static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ LoadedSlice::Cost &GlobalLSCost) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ // If there is less than 2 elements, no pairing is possible.
+ if (NumberOfSlices < 2)
+ return;
+
+ // Sort the slices so that elements that are likely to be next to each
+ // other in memory are next to each other in the list.
+ llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ });
+ const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
+ // First (resp. Second) is the first (resp. Second) potentially candidate
+ // to be placed in a paired load.
+ const LoadedSlice *First = nullptr;
+ const LoadedSlice *Second = nullptr;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
+ // Set the beginning of the pair.
+ First = Second) {
+ Second = &LoadedSlices[CurrSlice];
+
+ // If First is NULL, it means we start a new pair.
+ // Get to the next slice.
+ if (!First)
+ continue;
+
+ EVT LoadedType = First->getLoadedType();
+
+ // If the types of the slices are different, we cannot pair them.
+ if (LoadedType != Second->getLoadedType())
+ continue;
+
+ // Check if the target supplies paired loads for this type.
+ Align RequiredAlignment;
+ if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
+ // move to the next pair, this type is hopeless.
+ Second = nullptr;
+ continue;
+ }
+ // Check if we meet the alignment requirement.
+ if (First->getAlign() < RequiredAlignment)
+ continue;
+
+ // Check that both loads are next to each other in memory.
+ if (!areSlicesNextToEachOther(*First, *Second))
+ continue;
+
+ assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
+ --GlobalLSCost.Loads;
+ // Move to the next pair.
+ Second = nullptr;
+ }
+}
+
+/// Check the profitability of all involved LoadedSlice.
+/// Currently, it is considered profitable if there is exactly two
+/// involved slices (1) which are (2) next to each other in memory, and
+/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
+///
+/// Note: The order of the elements in \p LoadedSlices may be modified, but not
+/// the elements themselves.
+///
+/// FIXME: When the cost model will be mature enough, we can relax
+/// constraints (1) and (2).
+static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ const APInt &UsedBits, bool ForCodeSize) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ if (StressLoadSlicing)
+ return NumberOfSlices > 1;
+
+ // Check (1).
+ if (NumberOfSlices != 2)
+ return false;
+
+ // Check (2).
+ if (!areUsedBitsDense(UsedBits))
+ return false;
+
+ // Check (3).
+ LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
+ // The original code has one big load.
+ OrigCost.Loads = 1;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
+ const LoadedSlice &LS = LoadedSlices[CurrSlice];
+ // Accumulate the cost of all the slices.
+ LoadedSlice::Cost SliceCost(LS, ForCodeSize);
+ GlobalSlicingCost += SliceCost;
+
+ // Account as cost in the original configuration the gain obtained
+ // with the current slices.
+ OrigCost.addSliceGain(LS);
+ }
+
+ // If the target supports paired load, adjust the cost accordingly.
+ adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
+ return OrigCost > GlobalSlicingCost;
+}
+
+/// If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// operations, split it in the various pieces being extracted.
+///
+/// This sort of thing is introduced by SROA.
+/// This slicing takes care not to insert overlapping loads.
+/// \pre LI is a simple load (i.e., not an atomic or volatile load).
+bool DAGCombiner::SliceUpLoad(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
+ !LD->getValueType(0).isInteger())
+ return false;
+
+ // The algorithm to split up a load of a scalable vector into individual
+ // elements currently requires knowing the length of the loaded type,
+ // so will need adjusting to work on scalable vectors.
+ if (LD->getValueType(0).isScalableVector())
+ return false;
+
+ // Keep track of already used bits to detect overlapping values.
+ // In that case, we will just abort the transformation.
+ APInt UsedBits(LD->getValueSizeInBits(0), 0);
+
+ SmallVector<LoadedSlice, 4> LoadedSlices;
+
+ // Check if this load is used as several smaller chunks of bits.
+ // Basically, look for uses in trunc or trunc(lshr) and record a new chain
+ // of computation for each trunc.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ SDNode *User = *UI;
+ unsigned Shift = 0;
+
+ // Check if this is a trunc(lshr).
+ if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
+ isa<ConstantSDNode>(User->getOperand(1))) {
+ Shift = User->getConstantOperandVal(1);
+ User = *User->use_begin();
+ }
+
+ // At this point, User is a Truncate, iff we encountered, trunc or
+ // trunc(lshr).
+ if (User->getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ // The width of the type must be a power of 2 and greater than 8-bits.
+ // Otherwise the load cannot be represented in LLVM IR.
+ // Moreover, if we shifted with a non-8-bits multiple, the slice
+ // will be across several bytes. We do not support that.
+ unsigned Width = User->getValueSizeInBits(0);
+ if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
+ return false;
+
+ // Build the slice for this chain of computations.
+ LoadedSlice LS(User, LD, Shift, &DAG);
+ APInt CurrentUsedBits = LS.getUsedBits();
+
+ // Check if this slice overlaps with another.
+ if ((CurrentUsedBits & UsedBits) != 0)
+ return false;
+ // Update the bits used globally.
+ UsedBits |= CurrentUsedBits;
+
+ // Check if the new slice would be legal.
+ if (!LS.isLegal())
+ return false;
+
+ // Record the slice.
+ LoadedSlices.push_back(LS);
+ }
+
+ // Abort slicing if it does not seem to be profitable.
+ if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
+ return false;
+
+ ++SlicedLoads;
+
+ // Rewrite each chain to use an independent load.
+ // By construction, each chain can be represented by a unique load.
+
+ // Prepare the argument for the new token factor for all the slices.
+ SmallVector<SDValue, 8> ArgChains;
+ for (const LoadedSlice &LS : LoadedSlices) {
+ SDValue SliceInst = LS.loadSlice();
+ CombineTo(LS.Inst, SliceInst, true);
+ if (SliceInst.getOpcode() != ISD::LOAD)
+ SliceInst = SliceInst.getOperand(0);
+ assert(SliceInst->getOpcode() == ISD::LOAD &&
+ "It takes more than a zext to get to the loaded slice!!");
+ ArgChains.push_back(SliceInst.getValue(1));
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
+ ArgChains);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddToWorklist(Chain.getNode());
+ return true;
+}
+
+/// Check to see if V is (and load (ptr), imm), where the load is having
+/// specific bytes cleared out. If so, return the byte size being masked out
+/// and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+ std::pair<unsigned, unsigned> Result(0, 0);
+
+ // Check for the structure we're looking for.
+ if (V->getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(V->getOperand(1)) ||
+ !ISD::isNormalLoad(V->getOperand(0).getNode()))
+ return Result;
+
+ // Check the chain and pointer.
+ LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+ if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
+
+ // This only handles simple types.
+ if (V.getValueType() != MVT::i16 &&
+ V.getValueType() != MVT::i32 &&
+ V.getValueType() != MVT::i64)
+ return Result;
+
+ // Check the constant mask. Invert it so that the bits being masked out are
+ // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
+ // follow the sign bit for uniformity.
+ uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+ unsigned NotMaskLZ = llvm::countl_zero(NotMask);
+ if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
+ unsigned NotMaskTZ = llvm::countr_zero(NotMask);
+ if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
+ if (NotMaskLZ == 64) return Result; // All zero mask.
+
+ // See if we have a continuous run of bits. If so, we have 0*1+0*
+ if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
+ return Result;
+
+ // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+ if (V.getValueType() != MVT::i64 && NotMaskLZ)
+ NotMaskLZ -= 64-V.getValueSizeInBits();
+
+ unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+ switch (MaskedBytes) {
+ case 1:
+ case 2:
+ case 4: break;
+ default: return Result; // All one mask, or 5-byte mask.
+ }
+
+ // Verify that the first bit starts at a multiple of mask so that the access
+ // is aligned the same as the access width.
+ if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+ // For narrowing to be valid, it must be the case that the load the
+ // immediately preceding memory operation before the store.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() == ISD::TokenFactor &&
+ SDValue(LD, 1).hasOneUse()) {
+ // LD has only 1 chain use so they are no indirect dependencies.
+ if (!LD->isOperandOf(Chain.getNode()))
+ return Result;
+ } else
+ return Result; // Fail.
+
+ Result.first = MaskedBytes;
+ Result.second = NotMaskTZ/8;
+ return Result;
+}
+
+/// Check to see if IVal is something that provides a value as specified by
+/// MaskInfo. If so, replace the specified store with a narrower store of
+/// truncated IVal.
+static SDValue
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+ SDValue IVal, StoreSDNode *St,
+ DAGCombiner *DC) {
+ unsigned NumBytes = MaskInfo.first;
+ unsigned ByteShift = MaskInfo.second;
+ SelectionDAG &DAG = DC->getDAG();
+
+ // Check to see if IVal is all zeros in the part being masked in by the 'or'
+ // that uses this. If not, this is not a replacement.
+ APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+ ByteShift*8, (ByteShift+NumBytes)*8);
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
+
+ // Check that it is legal on the target to do this. It is legal if the new
+ // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+ // legalization. If the source type is legal, but the store type isn't, see
+ // if we can use a truncating store.
+ MVT VT = MVT::getIntegerVT(NumBytes * 8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool UseTruncStore;
+ if (DC->isTypeLegal(VT))
+ UseTruncStore = false;
+ else if (TLI.isTypeLegal(IVal.getValueType()) &&
+ TLI.isTruncStoreLegal(IVal.getValueType(), VT))
+ UseTruncStore = true;
+ else
+ return SDValue();
+
+ // Can't do this for indexed stores.
+ if (St->isIndexed())
+ return SDValue();
+
+ // Check that the target doesn't think this is a bad idea.
+ if (St->getMemOperand() &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *St->getMemOperand()))
+ return SDValue();
+
+ // Okay, we can do this! Replace the 'St' store with a store of IVal that is
+ // shifted by ByteShift and truncated down to NumBytes.
+ if (ByteShift) {
+ SDLoc DL(IVal);
+ IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8, DL,
+ DC->getShiftAmountTy(IVal.getValueType())));
+ }
+
+ // Figure out the offset for the store and the alignment of the access.
+ unsigned StOffset;
+ if (DAG.getDataLayout().isLittleEndian())
+ StOffset = ByteShift;
+ else
+ StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+ SDValue Ptr = St->getBasePtr();
+ if (StOffset) {
+ SDLoc DL(IVal);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
+ }
+
+ ++OpsNarrowed;
+ if (UseTruncStore)
+ return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ VT, St->getOriginalAlign());
+
+ // Truncate down to the new size.
+ IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
+
+ return DAG
+ .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ St->getOriginalAlign());
+}
+
+/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
+/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
+/// narrowing the load and store if it would end up being a win for performance
+/// or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!ST->isSimple())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ !Value.hasOneUse())
+ return SDValue();
+
+ // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+ // is a byte mask indicating a consecutive number of bytes, check to see if
+ // Y is known to provide just those bytes. If so, we try to replace the
+ // load + replace + store sequence with a single (narrower) store, which makes
+ // the load dead.
+ if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
+ std::pair<unsigned, unsigned> MaskedLoad;
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(1), ST,this))
+ return NewST;
+
+ // Or is commutative, so try swapping X and Y.
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(0), ST,this))
+ return NewST;
+ }
+
+ if (!EnableReduceLoadOpStoreWidth)
+ return SDValue();
+
+ if (Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnes(BitWidth);
+ if (Imm == 0 || Imm.isAllOnes())
+ return SDValue();
+ unsigned ShAmt = Imm.countr_zero();
+ unsigned MSB = BitWidth - Imm.countl_zero() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ // The narrowing should be profitable, the load/store operation should be
+ // legal (or custom) and the store size should be equal to the NewVT width.
+ while (NewBW < BitWidth &&
+ (NewVT.getStoreSizeInBits() != NewBW ||
+ !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+ !TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+ std::min(BitWidth, ShAmt + NewBW));
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnes(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned IsFast = 0;
+ Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
+ return SDValue();
+
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
+ SDValue NewLD =
+ DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
+ DAG.getConstant(NewImm, SDLoc(Value),
+ NewVT));
+ SDValue NewST =
+ DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
+
+ AddToWorklist(NewPtr.getNode());
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewVal.getNode());
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+/// For a given floating point load / store pair, if the load value isn't used
+/// by any other operations, then consider transforming the pair to integer
+/// load / store operations if the target deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ TypeSize VTSize = VT.getSizeInBits();
+
+ // We don't know the size of scalable types at compile time so we cannot
+ // create an integer of the equivalent size.
+ if (VTSize.isScalable())
+ return SDValue();
+
+ unsigned FastLD = 0, FastST = 0;
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *LD->getMemOperand(), &FastLD) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *ST->getMemOperand(), &FastST) ||
+ !FastLD || !FastST)
+ return SDValue();
+
+ SDValue NewLD =
+ DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LD->getAlign());
+
+ SDValue NewST =
+ DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(), ST->getAlign());
+
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewST.getNode());
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
+ SDValue ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, and the target thinks the folding is
+ // profitable or does not lead to worse code, this would be OK to do.
+ if (AddNode->hasOneUse() &&
+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
+SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores) {
+ SmallVector<SDValue, 8> Chains;
+ SmallPtrSet<const SDNode *, 8> Visited;
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ for (unsigned i = 0; i < NumStores; ++i) {
+ Visited.insert(StoreNodes[i].MemNode);
+ }
+
+ // don't include nodes that are children or repeated nodes.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
+ Chains.push_back(StoreNodes[i].MemNode->getChain());
+ }
+
+ assert(Chains.size() > 0 && "Chain should have generated a chain");
+ return DAG.getTokenFactor(StoreDL, Chains);
+}
+
+bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
+ const Value *UnderlyingObj = nullptr;
+ for (const auto &MemOp : StoreNodes) {
+ const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
+ // Pseudo value like stack frame has its own frame index and size, should
+ // not use the first store's frame index for other frames.
+ if (MMO->getPseudoValue())
+ return false;
+
+ if (!MMO->getValue())
+ return false;
+
+ const Value *Obj = getUnderlyingObject(MMO->getValue());
+
+ if (UnderlyingObj && UnderlyingObj != Obj)
+ return false;
+
+ if (!UnderlyingObj)
+ UnderlyingObj = Obj;
+ }
+
+ return true;
+}
+
+bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector, bool UseTrunc) {
+ // Make sure we have something to merge.
+ if (NumStores < 2)
+ return false;
+
+ assert((!UseTrunc || !UseVector) &&
+ "This optimization cannot emit a vector truncating store");
+
+ // The latest Node in the DAG.
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
+ unsigned SizeInBits = NumStores * ElementSizeBits;
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+
+ std::optional<MachineMemOperand::Flags> Flags;
+ AAMDNodes AAInfo;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ if (!Flags) {
+ Flags = St->getMemOperand()->getFlags();
+ AAInfo = St->getAAInfo();
+ continue;
+ }
+ // Skip merging if there's an inconsistent flag.
+ if (Flags != St->getMemOperand()->getFlags())
+ return false;
+ // Concatenate AA metadata.
+ AAInfo = AAInfo.concat(St->getAAInfo());
+ }
+
+ EVT StoreTy;
+ if (UseVector) {
+ unsigned Elts = NumStores * NumMemElts;
+ // Get the type for the merged vector store.
+ StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ } else
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ if (IsConstantSrc) {
+ SmallVector<SDValue, 8> BuildVector;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ SDValue Val = St->getValue();
+ // If constant is of the wrong type, convert it now.
+ if (MemVT != Val.getValueType()) {
+ Val = peekThroughBitcasts(Val);
+ // Deal with constants of wrong size.
+ if (ElementSizeBits != Val.getValueSizeInBits()) {
+ EVT IntMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ if (isa<ConstantFPSDNode>(Val)) {
+ // Not clear how to truncate FP values.
+ return false;
+ }
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Val))
+ Val = DAG.getConstant(C->getAPIntValue()
+ .zextOrTrunc(Val.getValueSizeInBits())
+ .zextOrTrunc(ElementSizeBits),
+ SDLoc(C), IntMemVT);
+ }
+ // Make sure correctly size type is the correct type.
+ Val = DAG.getBitcast(MemVT, Val);
+ }
+ BuildVector.push_back(Val);
+ }
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, BuildVector);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
+ // type MemVT. If the underlying value is not the correct
+ // type, but it is an extraction of an appropriate vector we
+ // can recast Val to be of the correct type. This may require
+ // converting between EXTRACT_VECTOR_ELT and
+ // EXTRACT_SUBVECTOR.
+ if ((MemVT != Val.getValueType()) &&
+ (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
+ EVT MemVTScalarTy = MemVT.getScalarType();
+ // We may need to add a bitcast here to get types to line up.
+ if (MemVTScalarTy != Val.getValueType().getScalarType()) {
+ Val = DAG.getBitcast(MemVT, Val);
+ } else if (MemVT.isVector() &&
+ Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
+ } else {
+ unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT;
+ SDValue Vec = Val.getOperand(0);
+ SDValue Idx = Val.getOperand(1);
+ Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
+ }
+ }
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, Ops);
+ }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ APInt StoreInt(SizeInBits, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ for (unsigned i = 0; i < NumStores; ++i) {
+ unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+
+ SDValue Val = St->getValue();
+ Val = peekThroughBitcasts(Val);
+ StoreInt <<= ElementSizeBits;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF()
+ .bitcastToAPInt()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
+ // If fp truncation is necessary give up for now.
+ if (MemVT.getSizeInBits() != ElementSizeBits)
+ return false;
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
+ }
+
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
+ bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
+
+ // make sure we use trunc store if it's necessary to be legal.
+ // When generate the new widen store, if the first store's pointer info can
+ // not be reused, discard the pointer info except the address space because
+ // now the widen store can not be represented by the original pointer info
+ // which is for the narrow memory object.
+ SDValue NewStore;
+ if (!UseTrunc) {
+ NewStore = DAG.getStore(
+ NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ CanReusePtrInfo
+ ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
+ FirstInChain->getAlign(), *Flags, AAInfo);
+ } else { // Must be realized as a trunc store
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
+ ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
+ SDValue ExtendedStoreVal =
+ DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
+ LegalizedStoredValTy);
+ NewStore = DAG.getTruncStore(
+ NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
+ CanReusePtrInfo
+ ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
+ StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
+ AAInfo);
+ }
+
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+
+ AddToWorklist(NewChain.getNode());
+ return true;
+}
+
+void DAGCombiner::getStoreMergeCandidates(
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&RootNode) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer. We must have a base and an offset. Do not handle stores to undef
+ // base pointers.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+ if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
+ return;
+
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ StoreSource StoreSrc = getStoreSource(Val);
+ assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
+
+ // Match on loadbaseptr if relevant.
+ EVT MemVT = St->getMemoryVT();
+ BaseIndexOffset LBasePtr;
+ EVT LoadVT;
+ if (StoreSrc == StoreSource::Load) {
+ auto *Ld = cast<LoadSDNode>(Val);
+ LBasePtr = BaseIndexOffset::match(Ld, DAG);
+ LoadVT = Ld->getMemoryVT();
+ // Load and store should be the same type.
+ if (MemVT != LoadVT)
+ return;
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ return;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Ld->isSimple() || Ld->isIndexed())
+ return;
+ }
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
+ int64_t &Offset) -> bool {
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Other->isSimple() || Other->isIndexed())
+ return false;
+ // Don't mix temporal stores with non-temporal stores.
+ if (St->isNonTemporal() != Other->isNonTemporal())
+ return false;
+ if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*St, *Other))
+ return false;
+ SDValue OtherBC = peekThroughBitcasts(Other->getValue());
+ // Allow merging constants of different types as integers.
+ bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
+ : Other->getMemoryVT() != MemVT;
+ switch (StoreSrc) {
+ case StoreSource::Load: {
+ if (NoTypeMatch)
+ return false;
+ // The Load's Base Ptr must also match.
+ auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
+ if (!OtherLd)
+ return false;
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
+ return false;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() || OtherLd->isIndexed())
+ return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
+ if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
+ *OtherLd))
+ return false;
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
+ return false;
+ break;
+ }
+ case StoreSource::Constant:
+ if (NoTypeMatch)
+ return false;
+ if (!isIntOrFPConstant(OtherBC))
+ return false;
+ break;
+ case StoreSource::Extract:
+ // Do not merge truncated stores here.
+ if (Other->isTruncatingStore())
+ return false;
+ if (!MemVT.bitsEq(OtherBC.getValueType()))
+ return false;
+ if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+ break;
+ default:
+ llvm_unreachable("Unhandled store source for merging");
+ }
+ Ptr = BaseIndexOffset::match(Other, DAG);
+ return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
+ };
+
+ // Check if the pair of StoreNode and the RootNode already bail out many
+ // times which is over the limit in dependence check.
+ auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
+ SDNode *RootNode) -> bool {
+ auto RootCount = StoreRootCountMap.find(StoreNode);
+ return RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit;
+ };
+
+ auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
+ // This must be a chain use.
+ if (UseIter.getOperandNo() != 0)
+ return;
+ if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherStore, RootNode))
+ StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
+ }
+ };
+
+ // We looking for a root node which is an ancestor to all mergable
+ // stores. We search up through a load, to our root and then down
+ // through all children. For instance we will find Store{1,2,3} if
+ // St is Store1, Store2. or Store3 where the root is not a load
+ // which always true for nonvolatile ops. TODO: Expand
+ // the search to find all valid candidates through multiple layers of loads.
+ //
+ // Root
+ // |-------|-------|
+ // Load Load Store3
+ // | |
+ // Store1 Store2
+ //
+ // FIXME: We should be able to climb and
+ // descend TokenFactors to find candidates as well.
+
+ RootNode = St->getChain().getNode();
+
+ unsigned NumNodesExplored = 0;
+ const unsigned MaxSearchNodes = 1024;
+ if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ RootNode = Ldn->getChain().getNode();
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
+ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
+ TryToAddCandidate(I2);
+ }
+ // Check stores that depend on the root (e.g. Store 3 in the chart above).
+ if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
+ TryToAddCandidate(I);
+ }
+ }
+ } else {
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
+ TryToAddCandidate(I);
+ }
+}
+
+// We need to check that merging these stores does not cause a loop in the
+// DAG. Any store candidate may depend on another candidate indirectly through
+// its operands. Check in parallel by searching up from operands of candidates.
+bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode) {
+ // FIXME: We should be able to truncate a full search of
+ // predecessors by doing a BFS and keeping tabs the originating
+ // stores from which worklist nodes come from in a similar way to
+ // TokenFactor simplfication.
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+
+ // RootNode is a predecessor to all candidates so we need not search
+ // past it. Add RootNode (peeking through TokenFactors). Do not count
+ // these towards size check.
+
+ Worklist.push_back(RootNode);
+ while (!Worklist.empty()) {
+ auto N = Worklist.pop_back_val();
+ if (!Visited.insert(N).second)
+ continue; // Already present in Visited.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (SDValue Op : N->ops())
+ Worklist.push_back(Op.getNode());
+ }
+ }
+
+ // Don't count pruning nodes towards max.
+ unsigned int Max = 1024 + Visited.size();
+ // Search Ops of store candidates.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ SDNode *N = StoreNodes[i].MemNode;
+ // Of the 4 Store Operands:
+ // * Chain (Op 0) -> We have already considered these
+ // in candidate selection, but only by following the
+ // chain dependencies. We could still have a chain
+ // dependency to a load, that has a non-chain dep to
+ // another load, that depends on a store, etc. So it is
+ // possible to have dependencies that consist of a mix
+ // of chain and non-chain deps, and we need to include
+ // chain operands in the analysis here..
+ // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
+ // but aren't necessarily fromt the same base node, so
+ // cycles possible (e.g. via indexed store).
+ // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
+ // non-indexed stores). Not constant on all targets (e.g. ARM)
+ // and so can participate in a cycle.
+ for (unsigned j = 0; j < N->getNumOperands(); ++j)
+ Worklist.push_back(N->getOperand(j).getNode());
+ }
+ // Search through DAG. We can stop early if we find a store node.
+ for (unsigned i = 0; i < NumStores; ++i)
+ if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
+ Max)) {
+ // If the searching bail out, record the StoreNode and RootNode in the
+ // StoreRootCountMap. If we have seen the pair many times over a limit,
+ // we won't add the StoreNode into StoreNodes set again.
+ if (Visited.size() >= Max) {
+ auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
+ if (RootCount.first == RootNode)
+ RootCount.second++;
+ else
+ RootCount = {RootNode, 1};
+ }
+ return false;
+ }
+ return true;
+}
+
+unsigned
+DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+ int64_t ElementSizeBytes) const {
+ while (true) {
+ // Find a store past the width of the first store.
+ size_t StartIdx = 0;
+ while ((StartIdx + 1 < StoreNodes.size()) &&
+ StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
+ StoreNodes[StartIdx + 1].OffsetFromBase)
+ ++StartIdx;
+
+ // Bail if we don't have enough candidates to merge.
+ if (StartIdx + 1 >= StoreNodes.size())
+ return 0;
+
+ // Trim stores that overlapped with the first store.
+ if (StartIdx)
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive store memory address.
+ unsigned NumConsecutiveStores = 1;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ NumConsecutiveStores = i + 1;
+ }
+ if (NumConsecutiveStores > 1)
+ return NumConsecutiveStores;
+
+ // There are no consecutive stores at the start of the list.
+ // Remove the first store and try again.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ }
+}
+
+bool DAGCombiner::tryStoreMergeOfConstants(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *RootNode, bool AllowVectors) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Store the constants into memory as one consecutive store.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ Align FirstStoreAlign = FirstInChain->getAlign();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isZero();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
+ }
+ NonZero |= !IsElementZero;
+
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ unsigned IsFast = 0;
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = false;
+ LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
+ }
+ }
+
+ // We only use vectors if the target allows it and the function is not
+ // marked with the noimplicitfloat attribute.
+ if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
+ AllowVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
+ }
+
+ bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+ bool UseTrunc = LastIntegerTrunc && !UseVector;
+
+ // Check if we found a legal integer type that creates a meaningful
+ // merge.
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
+
+ MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ /*IsConstantSrc*/ true,
+ UseVector, UseTrunc);
+
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ return MadeChange;
+}
+
+bool DAGCombiner::tryStoreMergeOfExtracts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *RootNode) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Loop on Consecutive Stores on success.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ Align FirstStoreAlign = FirstInChain->getAlign();
+ unsigned NumStoresToMerge = 1;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ unsigned IsFast = 0;
+
+ // Break early when size is too large to be legal.
+ if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
+
+ // Check if we found a legal integer type creating a meaningful
+ // merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ continue;
+ }
+
+ MadeChange |= mergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
+ /*UseVector*/ true, /*UseTrunc*/ false);
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ }
+ return MadeChange;
+}
+
+bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *RootNode, bool AllowVectors,
+ bool IsNonTemporalStore,
+ bool IsNonTemporalLoad) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ LoadSDNode *Ld = cast<LoadSDNode>(Val);
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
+ // If this is not the first ptr that we check.
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
+ // The base ptr must be the same.
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+ }
+
+ while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ Align RequiredAlignment;
+ bool NeedRotate = false;
+ if (LoadNodes.size() == 2) {
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother merging.
+ if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+ break;
+ }
+ // If the loads are reversed, see if we can rotate the halves into place.
+ int64_t Offset0 = LoadNodes[0].OffsetFromBase;
+ int64_t Offset1 = LoadNodes[1].OffsetFromBase;
+ EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
+ if (Offset0 - Offset1 == ElementSizeBytes &&
+ (hasOperation(ISD::ROTL, PairVT) ||
+ hasOperation(ISD::ROTR, PairVT))) {
+ std::swap(LoadNodes[0], LoadNodes[1]);
+ NeedRotate = true;
+ }
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ Align FirstStoreAlign = FirstInChain->getAlign();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
+
+ unsigned LastConsecutiveLoad = 1;
+
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ int64_t StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue LoadChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != LoadChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ unsigned IsFastSt = 0;
+ unsigned IsFastLd = 0;
+ // Don't try vector types if we need a rotate. We may still fail the
+ // legality checks for the integer type, but we can't handle the rotate
+ // case with vectors.
+ // FIXME: We could use a shuffle in place of the rotate.
+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
+
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
+ }
+ }
+
+ // Only use vector types if the vector type is larger than the integer
+ // type. If they are the same, use integers.
+ bool UseVectorTy =
+ LastLegalVectorType > LastLegalIntegerType && AllowVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
+ Align FirstLoadAlign = FirstLoad->getAlign();
+
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags LdMMOFlags =
+ isDereferenceable ? MachineMemOperand::MODereferenceable
+ : MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
+
+ MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
+ ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+
+ StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad = DAG.getLoad(
+ JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
+ SDValue StoreOp = NewLoad;
+ if (NeedRotate) {
+ unsigned LoadWidth = ElementSizeBytes * 8 * 2;
+ assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
+ "Unexpected type for rotate-able load pair");
+ SDValue RotAmt =
+ DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
+ // Target can convert to the identical ROTR if it does not have ROTL.
+ StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
+ }
+ NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
+ CanReusePtrInfo ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstStoreAS),
+ FirstStoreAlign, StMMOFlags);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+ FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), JointMemOpVT,
+ FirstLoadAlign, LdMMOFlags);
+ NewStore = DAG.getTruncStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ CanReusePtrInfo ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstStoreAS),
+ JointMemOpVT, FirstInChain->getAlign(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Replace all stores with the new store. Recursively remove corresponding
+ // values if they are no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
+ MadeChange = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ return MadeChange;
+}
+
+bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+ return false;
+
+ // TODO: Extend this function to merge stores of scalable vectors.
+ // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
+ // store since we know <vscale x 16 x i8> is exactly twice as large as
+ // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
+ EVT MemVT = St->getMemoryVT();
+ if (MemVT.isScalableVT())
+ return false;
+ if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
+
+ // This function cannot currently deal with non-byte-sized memory sizes.
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
+ return false;
+
+ // Do not bother looking at stored values that are not constants, loads, or
+ // extracted vector elements.
+ SDValue StoredVal = peekThroughBitcasts(St->getValue());
+ const StoreSource StoreSrc = getStoreSource(StoredVal);
+ if (StoreSrc == StoreSource::Unknown)
+ return false;
+
+ SmallVector<MemOpLink, 8> StoreNodes;
+ SDNode *RootNode;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes, RootNode);
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the
+ // base pointer.
+ llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
+
+ bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
+ cast<LoadSDNode>(StoredVal)->isNonTemporal();
+
+ // Store Merge attempts to merge the lowest stores. This generally
+ // works out as if successful, as the remaining stores are checked
+ // after the first collection of stores is merged. However, in the
+ // case that a non-mergeable store is found first, e.g., {p[-2],
+ // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+ // mergeable cases. To prevent this, we prune such stores from the
+ // front of StoreNodes here.
+ bool MadeChange = false;
+ while (StoreNodes.size() > 1) {
+ unsigned NumConsecutiveStores =
+ getConsecutiveStores(StoreNodes, ElementSizeBytes);
+ // There are no more stores in the list to examine.
+ if (NumConsecutiveStores == 0)
+ return MadeChange;
+
+ // We have at least 2 consecutive stores. Try to merge them.
+ assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
+ switch (StoreSrc) {
+ case StoreSource::Constant:
+ MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode, AllowVectors);
+ break;
+
+ case StoreSource::Extract:
+ MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode);
+ break;
+
+ case StoreSource::Load:
+ MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode, AllowVectors,
+ IsNonTemporalStore, IsNonTemporalLoad);
+ break;
+
+ default:
+ llvm_unreachable("Unhandled store source type");
+ }
+ }
+ return MadeChange;
+}
+
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+ SDLoc SL(ST);
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+ ST->getMemOperand());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+ MVT::Other, ST->getChain(), ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+ SDValue Value = ST->getValue();
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
+ SDLoc DL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+
+ SDValue Tmp;
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::bf16:
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ return SDValue();
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
+ return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+ }
+
+ return SDValue();
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ ST->isSimple()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, DL, Tmp,
+ Ptr, ST->getMemOperand());
+ }
+
+ if (ST->isSimple() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ St0, St1);
+ }
+
+ return SDValue();
+ }
+}
+
+// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
+//
+// If a store of a load with an element inserted into it has no other
+// uses in between the chain, then we can consider the vector store
+// dead and replace it with just the single scalar element store.
+SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
+ SDLoc DL(ST);
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Chain = ST->getChain();
+ if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
+ return SDValue();
+
+ SDValue Elt = Value.getOperand(1);
+ SDValue Idx = Value.getOperand(2);
+
+ // If the element isn't byte sized or is implicitly truncated then we can't
+ // compute an offset.
+ EVT EltVT = Elt.getValueType();
+ if (!EltVT.isByteSized() ||
+ EltVT != Value.getOperand(0).getValueType().getVectorElementType())
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
+ if (!Ld || Ld->getBasePtr() != Ptr ||
+ ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
+ !ISD::isNormalStore(ST) ||
+ Ld->getAddressSpace() != ST->getAddressSpace() ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1)))
+ return SDValue();
+
+ unsigned IsFast;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ Elt.getValueType(), ST->getAddressSpace(),
+ ST->getAlign(), ST->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+ EVT PtrVT = Ptr.getValueType();
+
+ SDValue Offset =
+ DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
+ DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
+ MachinePointerInfo PointerInfo(ST->getAddressSpace());
+
+ // If the offset is a known constant then try to recover the pointer
+ // info
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
+ NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(COffset), DL);
+ PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
+ }
+
+ return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
+ ST->getMemOperand()->getFlags());
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ EVT SVT = Value.getOperand(0).getValueType();
+ // If the store is volatile, we only want to change the store type if the
+ // resulting store is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (((!LegalOperations && ST->isSimple()) ||
+ TLI.isOperationLegal(ISD::STORE, SVT)) &&
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
+ DAG, *ST->getMemOperand())) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getMemOperand());
+ }
+ }
+
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.isUndef() && ST->isUnindexed())
+ return Chain;
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
+ if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
+ if (*Alignment > ST->getAlign() &&
+ isAligned(*Alignment, ST->getSrcValueOffset())) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
+ ST->getMemoryVT(), *Alignment,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ // NewStore will always be N as we are only refining the alignment
+ assert(NewStore.getNode() == N);
+ (void)NewStore;
+ }
+ }
+ }
+
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ if (SDValue NewST = TransformFPLoadStorePair(N))
+ return NewST;
+
+ // Try transforming several stores into STORE (BSWAP).
+ if (SDValue Store = mergeTruncStores(ST))
+ return Store;
+
+ if (ST->isUnindexed()) {
+ // Walk up chain skipping non-aliasing memory nodes, on this store and any
+ // adjacent stores.
+ if (findBetterNeighborChains(ST)) {
+ // replaceStoreChain uses CombineTo, which handled all of the worklist
+ // manipulation. Return the original node to not do anything else.
+ return SDValue(ST, 0);
+ }
+ Chain = ST->getChain();
+ }
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
+ // Convert a truncating store of a extension into a standard store.
+ if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
+ Value.getOpcode() == ISD::SIGN_EXTEND ||
+ Value.getOpcode() == ISD::ANY_EXTEND) &&
+ Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getMemOperand());
+
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits());
+
+ // See if we can simplify the operation with SimplifyDemandedBits, which
+ // only works if the value has a single use.
+ AddToWorklist(Value.getNode());
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+
+ // Otherwise, see if we can simplify the input to this truncstore with
+ // knowledge that only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ if (SDValue Shorter =
+ TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
+
+ // If we're storing a truncated constant, see if we can simplify it.
+ // TODO: Move this to targetShrinkDemandedConstant?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
+ if (!Cst->isOpaque()) {
+ const APInt &CValue = Cst->getAPIntValue();
+ APInt NewVal = CValue & TruncDemandedBits;
+ if (NewVal != CValue) {
+ SDValue Shorter =
+ DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+ }
+ }
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
+ // TODO: Add big-endian truncate support with test coverage.
+ // TODO: Can relax for unordered atomics (see D66309)
+ SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
+ ? peekThroughTruncates(Value)
+ : Value;
+ if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && ST->isSimple() &&
+ Ld->getAddressSpace() == ST->getAddressSpace() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // Try scalarizing vector stores of loads where we only change one element
+ if (SDValue NewST = replaceStoreOfInsertLoad(ST))
+ return NewST;
+
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST->isUnindexed() && ST->isSimple() &&
+ ST1->isUnindexed() && ST1->isSimple()) {
+ if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
+ ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
+ ST->getAddressSpace() == ST1->getAddressSpace()) {
+ // If this is a store followed by a store with the same value to the
+ // same location, then the store is dead/noop.
+ return Chain;
+ }
+
+ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef() &&
+ ST->getAddressSpace() == ST1->getAddressSpace()) {
+ // If we consider two stores and one smaller in size is a scalable
+ // vector type and another one a bigger size store with a fixed type,
+ // then we could not allow the scalable store removal because we don't
+ // know its final size in the end.
+ if (ST->getMemoryVT().isScalableVector() ||
+ ST1->getMemoryVT().isScalableVector()) {
+ if (ST1->getBasePtr() == Ptr &&
+ TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
+ ST->getMemoryVT().getStoreSize())) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+ } else {
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they
+ // may be used as data sinks.
+ if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
+ ChainBase,
+ ST1->getMemoryVT().getFixedSizeInBits())) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+ }
+ }
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND ||
+ Value.getOpcode() == ISD::TRUNCATE) &&
+ Value->hasOneUse() && ST->isUnindexed() &&
+ TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT(), LegalOperations)) {
+ return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
+ }
+
+ // Always perform this optimization before types are legal. If the target
+ // prefers, also try this after legalization to catch stores that were created
+ // by intrinsics or other nodes.
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
+ while (true) {
+ // There can be multiple store sequences on the same chain.
+ // Keep trying to merge store sequences until we are unable to do so
+ // or until we merge the last store on the chain.
+ bool Changed = mergeConsecutiveStores(ST);
+ if (!Changed) break;
+ // Return N as merge only uses CombineTo and no worklist clean
+ // up is necessary.
+ if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
+ return SDValue(N, 0);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ //
+ // Make sure to do this only after attempting to merge stores in order to
+ // avoid changing the types of some subset of stores due to visit order,
+ // preventing their merging.
+ if (isa<ConstantFPSDNode>(ST->getValue())) {
+ if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+ return NewSt;
+ }
+
+ if (SDValue NewSt = splitMergedValStore(ST))
+ return NewSt;
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
+ const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
+ if (!LifetimeEnd->hasOffset())
+ return SDValue();
+
+ const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
+ LifetimeEnd->getOffset(), false);
+
+ // We walk up the chains to find stores.
+ SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.pop_back_val();
+ if (!Chain.hasOneUse())
+ continue;
+ switch (Chain.getOpcode()) {
+ case ISD::TokenFactor:
+ for (unsigned Nops = Chain.getNumOperands(); Nops;)
+ Chains.push_back(Chain.getOperand(--Nops));
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the node.
+ if (!mayAlias(Chain.getNode(), N))
+ Chains.push_back(Chain.getOperand(0));
+ break;
+ case ISD::STORE: {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (!ST->isSimple() || ST->isIndexed())
+ continue;
+ const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
+ // The bounds of a scalable store are not known until runtime, so this
+ // store cannot be elided.
+ if (StoreSize.isScalable())
+ continue;
+ const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
+ // If we store purely within object bounds just before its lifetime ends,
+ // we can remove the store.
+ if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
+ StoreSize.getFixedValue() * 8)) {
+ LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
+ dbgs() << "\nwithin LIFETIME_END of : ";
+ LifetimeEndBase.dump(); dbgs() << "\n");
+ CombineTo(ST, ST->getChain());
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+/// (store (or (zext (bitcast F to i32) to i64),
+/// (shl (zext I to i64), 32)), addr) -->
+/// (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8}, i32 store --> two i16 stores.
+/// For pair of {i8, i8}, i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+/// void goo(const std::pair<int, float> &);
+/// hoo() {
+/// ...
+/// goo(std::make_pair(tmp, ftmp));
+/// ...
+/// }
+///
+SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ // Can't change the number of memory accesses for a volatile store or break
+ // atomicity for an atomic one.
+ if (!ST->isSimple())
+ return SDValue();
+
+ SDValue Val = ST->getValue();
+ SDLoc DL(ST);
+
+ // Match OR operand.
+ if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
+ return SDValue();
+
+ // Match SHL operand and get Lower and Higher parts of Val.
+ SDValue Op1 = Val.getOperand(0);
+ SDValue Op2 = Val.getOperand(1);
+ SDValue Lo, Hi;
+ if (Op1.getOpcode() != ISD::SHL) {
+ std::swap(Op1, Op2);
+ if (Op1.getOpcode() != ISD::SHL)
+ return SDValue();
+ }
+ Lo = Op2;
+ Hi = Op1.getOperand(0);
+ if (!Op1.hasOneUse())
+ return SDValue();
+
+ // Match shift amount to HalfValBitSize.
+ unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
+ return SDValue();
+
+ // Lo and Hi are zero-extended from int with size less equal than 32
+ // to i64.
+ if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
+ !Lo.getOperand(0).getValueType().isScalarInteger() ||
+ Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
+ Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
+ !Hi.getOperand(0).getValueType().isScalarInteger() ||
+ Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
+ return SDValue();
+
+ // Use the EVT of low and high parts before bitcast as the input
+ // of target query.
+ EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Lo.getOperand(0).getValueType()
+ : Lo.getValueType();
+ EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Hi.getOperand(0).getValueType()
+ : Hi.getValueType();
+ if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+ return SDValue();
+
+ // Start to split store.
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ // Change the sizes of Lo and Hi's value types to HalfValBitSize.
+ EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ // Lower value store.
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
+ // Higher value store.
+ SDValue St1 = DAG.getStore(
+ St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ return St1;
+}
+
+// Merge an insertion into an existing shuffle:
+// (insert_vector_elt (vector_shuffle X, Y, Mask),
+// .(extract_vector_elt X, N), InsIndex)
+// --> (vector_shuffle X, Y, NewMask)
+// and variations where shuffle operands may be CONCAT_VECTORS.
+static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &NewMask, SDValue Elt,
+ unsigned InsIndex) {
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Elt.getOperand(1)))
+ return false;
+
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ SDValue InsertVal0 = Elt.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
+
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
+ }
+
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
+ }
+ }
+
+ // If we failed to find a match, see if we can replace an UNDEF shuffle
+ // operand.
+ if (ElementOffset == -1) {
+ if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
+ return false;
+ ElementOffset = Mask.size();
+ Y = InsertVal0;
+ }
+
+ NewMask.assign(Mask.begin(), Mask.end());
+ NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
+ assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
+ "NewMask[InsIndex] is out of bound");
+ return true;
+}
+
+// Merge an insertion into an existing shuffle:
+// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+// InsIndex)
+// --> (vector_shuffle X, Y) and variations where shuffle operands may be
+// CONCAT_VECTORS.
+SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
+ SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
+
+ auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
+ if (!SVN || !Vec.hasOneUse())
+ return SDValue();
+
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ SmallVector<int, 16> NewMask(Mask);
+ if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
+ SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
+ Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+
+ return SDValue();
+}
+
+// Convert a disguised subvector insertion into a shuffle:
+// insert_vector_elt V, (bitcast X from vector type), IdxC -->
+// bitcast(shuffle (bitcast V), (extended X), Mask)
+// Note: We do not use an insert_subvector node because that requires a
+// legal subvector type.
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
+ SDValue InsertVal = N->getOperand(1);
+
+ if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
+ !InsertVal.getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ SDValue SubVec = InsertVal.getOperand(0);
+ SDValue DestVec = N->getOperand(0);
+ EVT SubVecVT = SubVec.getValueType();
+ EVT VT = DestVec.getValueType();
+ unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ // If the source only has a single vector element, the cost of creating adding
+ // it to a vector is likely to exceed the cost of a insert_vector_elt.
+ if (NumSrcElts == 1)
+ return SDValue();
+ unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
+ unsigned NumMaskVals = ExtendRatio * NumSrcElts;
+
+ // Step 1: Create a shuffle mask that implements this insert operation. The
+ // vector that we are inserting into will be operand 0 of the shuffle, so
+ // those elements are just 'i'. The inserted subvector is in the first
+ // positions of operand 1 of the shuffle. Example:
+ // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
+ SmallVector<int, 16> Mask(NumMaskVals);
+ for (unsigned i = 0; i != NumMaskVals; ++i) {
+ if (i / NumSrcElts == InsIndex)
+ Mask[i] = (i % NumSrcElts) + NumMaskVals;
+ else
+ Mask[i] = i;
+ }
+
+ // Bail out if the target can not handle the shuffle we want to create.
+ EVT SubVecEltVT = SubVecVT.getVectorElementType();
+ EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
+ if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
+ return SDValue();
+
+ // Step 2: Create a wide vector from the inserted source vector by appending
+ // undefined elements. This is the same size as our destination vector.
+ SDLoc DL(N);
+ SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
+ ConcatOps[0] = SubVec;
+ SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
+
+ // Step 3: Shuffle in the padded subvector.
+ SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
+ SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
+ AddToWorklist(PaddedSubV.getNode());
+ AddToWorklist(DestVecBC.getNode());
+ AddToWorklist(Shuf.getNode());
+ return DAG.getBitcast(VT, Shuf);
+}
+
+// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
+// possible and the new load will be quick. We use more loads but less shuffles
+// and inserts.
+SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
+ EVT VT = N->getValueType(0);
+
+ // InsIndex is expected to be the first of last lane.
+ if (!VT.isFixedLengthVector() ||
+ (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
+ return SDValue();
+
+ // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
+ // depending on the InsIndex.
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
+ SDValue Scalar = N->getOperand(1);
+ if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
+ return InsIndex == P.index() || P.value() < 0 ||
+ (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
+ (InsIndex == VT.getVectorNumElements() - 1 &&
+ P.value() == (int)P.index() + 1);
+ }))
+ return SDValue();
+
+ // We optionally skip over an extend so long as both loads are extended in the
+ // same way from the same type.
+ unsigned Extend = 0;
+ if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
+ Scalar.getOpcode() == ISD::SIGN_EXTEND ||
+ Scalar.getOpcode() == ISD::ANY_EXTEND) {
+ Extend = Scalar.getOpcode();
+ Scalar = Scalar.getOperand(0);
+ }
+
+ auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
+ if (!ScalarLoad)
+ return SDValue();
+
+ SDValue Vec = Shuffle->getOperand(0);
+ if (Extend) {
+ if (Vec.getOpcode() != Extend)
+ return SDValue();
+ Vec = Vec.getOperand(0);
+ }
+ auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
+ if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
+ return SDValue();
+
+ int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
+ if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
+ !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
+ ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
+ ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
+ return SDValue();
+
+ // Check that the offset between the pointers to produce a single continuous
+ // load.
+ if (InsIndex == 0) {
+ if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
+ -1))
+ return SDValue();
+ } else {
+ if (!DAG.areNonVolatileConsecutiveLoads(
+ VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
+ return SDValue();
+ }
+
+ // And that the new unaligned load will be fast.
+ unsigned IsFast = 0;
+ Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ Vec.getValueType(), VecLoad->getAddressSpace(),
+ NewAlign, VecLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
+ // Calculate the new Ptr and create the new load.
+ SDLoc DL(N);
+ SDValue Ptr = ScalarLoad->getBasePtr();
+ if (InsIndex != 0)
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
+ DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
+ MachinePointerInfo PtrInfo =
+ InsIndex == 0 ? ScalarLoad->getPointerInfo()
+ : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
+
+ SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
+ ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
+ DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
+ return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ SDLoc DL(N);
+
+ EVT VT = InVec.getValueType();
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+
+ // Insert into out-of-bounds element is undefined.
+ if (IndexC && VT.isFixedLengthVector() &&
+ IndexC->getZExtValue() >= VT.getVectorNumElements())
+ return DAG.getUNDEF(VT);
+
+ // Remove redundant insertions:
+ // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
+ return InVec;
+
+ if (!IndexC) {
+ // If this is variable insert to undef vector, it might be better to splat:
+ // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
+ if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
+ return DAG.getSplat(VT, DL, InVal);
+ return SDValue();
+ }
+
+ if (VT.isScalableVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // We must know which element is being inserted for folds below here.
+ unsigned Elt = IndexC->getZExtValue();
+
+ // Handle <1 x ???> vector insertion special cases.
+ if (NumElts == 1) {
+ // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVal.getOperand(0).getValueType() == VT &&
+ isNullConstant(InVal.getOperand(1)))
+ return InVal.getOperand(0);
+ }
+
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt = InVec.getConstantOperandVal(2);
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
+ if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
+ return Shuf;
+
+ if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+ return Shuf;
+
+ if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
+ return Shuf;
+
+ // Attempt to convert an insert_vector_elt chain into a legal build_vector.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // vXi1 vector - we don't need to recurse.
+ if (NumElts == 1)
+ return DAG.getBuildVector(VT, DL, {InVal});
+
+ // If we haven't already collected the element, insert into the op list.
+ EVT MaxEltVT = InVal.getValueType();
+ auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
+ unsigned Idx) {
+ if (!Ops[Idx]) {
+ Ops[Idx] = Elt;
+ if (VT.isInteger()) {
+ EVT EltVT = Elt.getValueType();
+ MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
+ }
+ }
+ };
+
+ // Ensure all the operands are the same value type, fill any missing
+ // operands with UNDEF and create the BUILD_VECTOR.
+ auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
+ assert(Ops.size() == NumElts && "Unexpected vector size");
+ for (SDValue &Op : Ops) {
+ if (Op)
+ Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
+ else
+ Op = DAG.getUNDEF(MaxEltVT);
+ }
+ return DAG.getBuildVector(VT, DL, Ops);
+ };
+
+ SmallVector<SDValue, 8> Ops(NumElts, SDValue());
+ Ops[Elt] = InVal;
+
+ // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
+ for (SDValue CurVec = InVec; CurVec;) {
+ // UNDEF - build new BUILD_VECTOR from already inserted operands.
+ if (CurVec.isUndef())
+ return CanonicalizeBuildVector(Ops);
+
+ // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
+ if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
+ for (unsigned I = 0; I != NumElts; ++I)
+ AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
+ return CanonicalizeBuildVector(Ops);
+ }
+
+ // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
+ if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
+ AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
+ return CanonicalizeBuildVector(Ops);
+ }
+
+ // INSERT_VECTOR_ELT - insert operand and continue up the chain.
+ if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
+ if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
+ if (CurIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CurIdx->getZExtValue();
+ AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
+
+ // Found entire BUILD_VECTOR.
+ if (all_of(Ops, [](SDValue Op) { return !!Op; }))
+ return CanonicalizeBuildVector(Ops);
+
+ CurVec = CurVec->getOperand(0);
+ continue;
+ }
+
+ // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
+ // update the shuffle mask (and second operand if we started with unary
+ // shuffle) and create a new legal shuffle.
+ if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
+ auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
+ SDValue LHS = SVN->getOperand(0);
+ SDValue RHS = SVN->getOperand(1);
+ SmallVector<int, 16> Mask(SVN->getMask());
+ bool Merged = true;
+ for (auto I : enumerate(Ops)) {
+ SDValue &Op = I.value();
+ if (Op) {
+ SmallVector<int, 16> NewMask;
+ if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
+ Merged = false;
+ break;
+ }
+ Mask = std::move(NewMask);
+ }
+ }
+ if (Merged)
+ if (SDValue NewShuffle =
+ TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
+ return NewShuffle;
+ }
+
+ // If all insertions are zero value, try to convert to AND mask.
+ // TODO: Do this for -1 with OR mask?
+ if (!LegalOperations && llvm::isNullConstant(InVal) &&
+ all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
+ count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
+ SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
+ SmallVector<SDValue, 8> Mask(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ Mask[I] = Ops[I] ? Zero : AllOnes;
+ return DAG.getNode(ISD::AND, DL, VT, CurVec,
+ DAG.getBuildVector(VT, DL, Mask));
+ }
+
+ // Failed to find a match in the chain - bail.
+ break;
+ }
+
+ // See if we can fill in the missing constant elements as zeros.
+ // TODO: Should we do this for any constant?
+ APInt DemandedZeroElts = APInt::getZero(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ DemandedZeroElts.setBit(I);
+
+ if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
+ SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
+ : DAG.getConstantFP(0, DL, MaxEltVT);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ Ops[I] = Zero;
+
+ return CanonicalizeBuildVector(Ops);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad) {
+ assert(OriginalLoad->isSimple());
+
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltVT.isByteSized())
+ return SDValue();
+
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
+ Align Alignment = OriginalLoad->getAlign();
+ MachinePointerInfo MPI;
+ SDLoc DL(EVE);
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
+ } else {
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
+ }
+
+ unsigned IsFast = 0;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
+ SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
+ InVecVT, EltNo);
+
+ // We are replacing a vector load with a scalar load. The new load must have
+ // identical memory op ordering to the original.
+ SDValue Load;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType =
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
+ } else {
+ // The result type is narrower or the same width as the vector element
+ Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
+ Alignment, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
+ else
+ Load = DAG.getBitcast(ResultVT, Load);
+ }
+ ++OpsNarrowed;
+ return Load;
+}
+
+/// Transform a vector binary operation into a scalar binary operation by moving
+/// the math/logic after an extract element of a vector.
+static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
+ bool LegalOperations) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+ Vec->getNumValues() != 1)
+ return SDValue();
+
+ // Targets may want to avoid this to prevent an expensive register transfer.
+ if (!TLI.shouldScalarizeBinop(Vec))
+ return SDValue();
+
+ // Extracting an element of a vector constant is constant-folded, so this
+ // transform is just replacing a vector op with a scalar op while moving the
+ // extract.
+ SDValue Op0 = Vec.getOperand(0);
+ SDValue Op1 = Vec.getOperand(1);
+ APInt SplatVal;
+ if (isAnyConstantBuildVector(Op0, true) ||
+ ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
+ isAnyConstantBuildVector(Op1, true) ||
+ ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
+ // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
+ // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
+ SDLoc DL(ExtElt);
+ EVT VT = ExtElt->getValueType(0);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
+ return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
+ }
+
+ return SDValue();
+}
+
+// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
+// recursively analyse all of it's users. and try to model themselves as
+// bit sequence extractions. If all of them agree on the new, narrower element
+// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
+// new element type, do so now.
+// This is mainly useful to recover from legalization that scalarized
+// the vector as wide elements, but tries to rebuild it with narrower elements.
+//
+// Some more nodes could be modelled if that helps cover interesting patterns.
+bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
+ SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may cause legalizaton cycles.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return false;
+
+ // TODO: Add support for big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return false;
+
+ SDValue VecOp = N->getOperand(0);
+ EVT VecVT = VecOp.getValueType();
+ assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
+
+ // We must start with a constant extraction index.
+ auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!IndexC)
+ return false;
+
+ assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
+ "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
+
+ // TODO: deal with the case of implicit anyext of the extraction.
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ EVT ScalarVT = N->getValueType(0);
+ if (VecVT.getScalarType() != ScalarVT)
+ return false;
+
+ // TODO: deal with the cases other than everything being integer-typed.
+ if (!ScalarVT.isScalarInteger())
+ return false;
+
+ struct Entry {
+ SDNode *Producer;
+
+ // Which bits of VecOp does it contain?
+ unsigned BitPos;
+ int NumBits;
+ // NOTE: the actual width of \p Producer may be wider than NumBits!
+
+ Entry(Entry &&) = default;
+ Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
+ : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
+
+ Entry() = delete;
+ Entry(const Entry &) = delete;
+ Entry &operator=(const Entry &) = delete;
+ Entry &operator=(Entry &&) = delete;
+ };
+ SmallVector<Entry, 32> Worklist;
+ SmallVector<Entry, 32> Leafs;
+
+ // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
+ Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
+ /*NumBits=*/VecEltBitWidth);
+
+ while (!Worklist.empty()) {
+ Entry E = Worklist.pop_back_val();
+ // Does the node not even use any of the VecOp bits?
+ if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
+ E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
+ return false; // Let's allow the other combines clean this up first.
+ // Did we fail to model any of the users of the Producer?
+ bool ProducerIsLeaf = false;
+ // Look at each user of this Producer.
+ for (SDNode *User : E.Producer->uses()) {
+ switch (User->getOpcode()) {
+ // TODO: support ISD::BITCAST
+ // TODO: support ISD::ANY_EXTEND
+ // TODO: support ISD::ZERO_EXTEND
+ // TODO: support ISD::SIGN_EXTEND
+ case ISD::TRUNCATE:
+ // Truncation simply means we keep position, but extract less bits.
+ Worklist.emplace_back(User, E.BitPos,
+ /*NumBits=*/User->getValueSizeInBits(0));
+ break;
+ // TODO: support ISD::SRA
+ // TODO: support ISD::SHL
+ case ISD::SRL:
+ // We should be shifting the Producer by a constant amount.
+ if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
+ User->getOperand(0).getNode() == E.Producer && ShAmtC) {
+ // Logical right-shift means that we start extraction later,
+ // but stop it at the same position we did previously.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
+ break;
+ }
+ [[fallthrough]];
+ default:
+ // We can not model this user of the Producer.
+ // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
+ ProducerIsLeaf = true;
+ // Profitability check: all users that we can not model
+ // must be ISD::BUILD_VECTOR's.
+ if (User->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ break;
+ }
+ }
+ if (ProducerIsLeaf)
+ Leafs.emplace_back(std::move(E));
+ }
+
+ unsigned NewVecEltBitWidth = Leafs.front().NumBits;
+
+ // If we are still at the same element granularity, give up,
+ if (NewVecEltBitWidth == VecEltBitWidth)
+ return false;
+
+ // The vector width must be a multiple of the new element width.
+ if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
+ return false;
+
+ // All leafs must agree on the new element width.
+ // All leafs must not expect any "padding" bits ontop of that width.
+ // All leafs must start extraction from multiple of that width.
+ if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
+ return (unsigned)E.NumBits == NewVecEltBitWidth &&
+ E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
+ E.BitPos % NewVecEltBitWidth == 0;
+ }))
+ return false;
+
+ EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
+ VecVT.getSizeInBits() / NewVecEltBitWidth);
+
+ if (LegalTypes &&
+ !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
+ return false;
+
+ if (LegalOperations &&
+ !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
+ return false;
+
+ SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
+ for (const Entry &E : Leafs) {
+ SDLoc DL(E.Producer);
+ unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
+ assert(NewIndex < NewVecVT.getVectorNumElements() &&
+ "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
+ SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
+ DAG.getVectorIdxConstant(NewIndex, DL));
+ CombineTo(E.Producer, V);
+ }
+
+ return true;
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue VecOp = N->getOperand(0);
+ SDValue Index = N->getOperand(1);
+ EVT ScalarVT = N->getValueType(0);
+ EVT VecVT = VecOp.getValueType();
+ if (VecOp.isUndef())
+ return DAG.getUNDEF(ScalarVT);
+
+ // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+ //
+ // This only really matters if the index is non-constant since other combines
+ // on the constant elements already work.
+ SDLoc DL(N);
+ if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ Index == VecOp.getOperand(2)) {
+ SDValue Elt = VecOp.getOperand(1);
+ return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
+ }
+
+ // (vextract (scalar_to_vector val, 0) -> val
+ if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Only 0'th element of SCALAR_TO_VECTOR is defined.
+ if (DAG.isKnownNeverZero(Index))
+ return DAG.getUNDEF(ScalarVT);
+
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = VecOp.getOperand(0);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
+ InOp.getValueType().bitsGT(ScalarVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
+ }
+ return InOp;
+ }
+
+ // extract_vector_elt of out-of-bounds element -> UNDEF
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && VecVT.isFixedLengthVector() &&
+ IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
+ return DAG.getUNDEF(ScalarVT);
+
+ // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
+ if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
+ return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ VecOp.getOperand(0), Index));
+ }
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
+ VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
+ TLI.isTypeLegal(VecVT)) {
+ assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
+ VecVT.isFixedLengthVector()) &&
+ "BUILD_VECTOR used for scalable vectors");
+ unsigned IndexVal =
+ VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
+ SDValue Elt = VecOp.getOperand(IndexVal);
+ EVT InEltVT = Elt.getValueType();
+
+ if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
+ isNullConstant(Elt)) {
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (ScalarVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector
+ // implicitly converts.
+ }
+ }
+
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ return BO;
+
+ if (VecVT.isScalableVector())
+ return SDValue();
+
+ // All the code from this point onwards assumes fixed width vectors, but it's
+ // possible that some of the combinations could be made to work for scalable
+ // vectors too.
+ unsigned NumElts = VecVT.getVectorNumElements();
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+
+ // TODO: These transforms should not require the 'hasOneUse' restriction, but
+ // there are regressions on multiple targets without it. We can end up with a
+ // mess of scalar and vector code if we reduce only part of the DAG to scalar.
+ if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
+ VecOp.hasOneUse()) {
+ // The vector index of the LSBs of the source depend on the endian-ness.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ExtractIndex = IndexC->getZExtValue();
+ // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
+ unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
+ SDValue BCSrc = VecOp.getOperand(0);
+ if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
+ return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
+
+ if (LegalTypes && BCSrc.getValueType().isInteger() &&
+ BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
+ // trunc i64 X to i32
+ SDValue X = BCSrc.getOperand(0);
+ assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
+ "Extract element and scalar to vector can't change element type "
+ "from FP to integer.");
+ unsigned XBitWidth = X.getValueSizeInBits();
+ BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
+
+ // An extract element return value type can be wider than its vector
+ // operand element type. In that case, the high bits are undefined, so
+ // it's possible that we may need to extend rather than truncate.
+ if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
+ assert(XBitWidth % VecEltBitWidth == 0 &&
+ "Scalar bitwidth must be a multiple of vector element bitwidth");
+ return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
+ }
+ }
+ }
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD
+ // patterns. For example on AVX, extracting elements from a wide vector
+ // without using extract_subvector. However, if we can find an underlying
+ // scalar value, then we can always use that.
+ if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
+ // Find the new index to extract from.
+ int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(ScalarVT);
+
+ // Select the right vector half to extract from.
+ SDValue SVInVec;
+ if (OrigElt < (int)NumElts) {
+ SVInVec = VecOp.getOperand(0);
+ } else {
+ SVInVec = VecOp.getOperand(1);
+ OrigElt -= NumElts;
+ }
+
+ if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue InOp = SVInVec.getOperand(OrigElt);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
+ }
+
+ return InOp;
+ }
+
+ // FIXME: We should handle recursing on other vector shuffles and
+ // scalar_to_vector here as well.
+
+ if (!LegalOperations ||
+ // FIXME: Should really be just isOperationLegalOrCustom.
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
+ TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
+ DAG.getVectorIdxConstant(OrigElt, DL));
+ }
+ }
+
+ // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+ // simplify it based on the (valid) extraction indices.
+ if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Use->getOperand(0) == VecOp &&
+ isa<ConstantSDNode>(Use->getOperand(1));
+ })) {
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (SDNode *Use : VecOp->uses()) {
+ auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+ if (CstElt->getAPIntValue().ult(NumElts))
+ DemandedElts.setBit(CstElt->getZExtValue());
+ }
+ if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
+ if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
+ return SDValue(N, 0);
+
+ // Everything under here is trying to match an extract of a loaded value.
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VecVT.getVectorElementType();
+ EVT LVT = ExtVT;
+ if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
+ return SDValue();
+
+ if (VecOp.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = VecOp.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (NumElts != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ VecOp = VecOp.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // extract (vector load $addr), i --> load $addr + i * size
+ if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
+ ISD::isNormalLoad(VecOp.getNode()) &&
+ !Index->hasPredecessor(VecOp.getNode())) {
+ auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
+ if (VecLoad && VecLoad->isSimple())
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations || !IndexC)
+ return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ int Elt = IndexC->getZExtValue();
+ LoadSDNode *LN0 = nullptr;
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ VecOp.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
+ }
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
+ VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
+
+ if (VecOp.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ VecOp = VecOp.getOperand(0);
+ }
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
+ Index = DAG.getConstant(Elt, DL, Index.getValueType());
+ }
+ } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
+ VecVT.getVectorElementType() == ScalarVT &&
+ (!LegalTypes ||
+ TLI.isTypeLegal(
+ VecOp.getOperand(0).getValueType().getVectorElementType()))) {
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
+ // -> extract_vector_elt a, 0
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
+ // -> extract_vector_elt a, 1
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
+ // -> extract_vector_elt b, 0
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
+ // -> extract_vector_elt b, 1
+ SDLoc SL(N);
+ EVT ConcatVT = VecOp.getOperand(0).getValueType();
+ unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
+ SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
+ Index.getValueType());
+
+ SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
+ ConcatVT.getVectorElementType(),
+ ConcatOp, NewIdx);
+ return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
+ }
+
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
+ return SDValue();
+
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
+
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
+}
+
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.isUndef()) continue;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = VT.getScalarType();
+ bool ValidTypes =
+ SourceType != MVT::Other &&
+ llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
+ llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
+
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if (!ValidTypes)
+ return SDValue();
+
+ // If we already have a splat buildvector, then don't fold it if it means
+ // introducing zeros.
+ if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
+ return SDValue();
+
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, DL, SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.isUndef()) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.isUndef())
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT) ||
+ (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
+ return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorklist(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getBitcast(VT, BV);
+}
+
+// Simplify (build_vec (trunc $1)
+// (trunc (srl $1 half-width))
+// (trunc (srl $1 (2 * half-width))))
+// to (bitcast $1)
+SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // Only for little endian
+ if (!DAG.getDataLayout().isLittleEndian())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT OutScalarTy = VT.getScalarType();
+ uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
+
+ // Only for power of two types to be sure that bitcast works well
+ if (!isPowerOf2_64(ScalarTypeBitsize))
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+
+ // Look through bitcasts
+ auto PeekThroughBitcast = [](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST)
+ return Op.getOperand(0);
+ return Op;
+ };
+
+ // The source value where all the parts are extracted.
+ SDValue Src;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = PeekThroughBitcast(N->getOperand(i));
+ // Ignore undef inputs.
+ if (In.isUndef()) continue;
+
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ In = PeekThroughBitcast(In.getOperand(0));
+
+ if (In.getOpcode() != ISD::SRL) {
+ // For now only build_vec without shuffling, handle shifts here in the
+ // future.
+ if (i != 0)
+ return SDValue();
+
+ Src = In;
+ } else {
+ // In is SRL
+ SDValue part = PeekThroughBitcast(In.getOperand(0));
+
+ if (!Src) {
+ Src = part;
+ } else if (Src != part) {
+ // Vector parts do not stem from the same variable
+ return SDValue();
+ }
+
+ SDValue ShiftAmtVal = In.getOperand(1);
+ if (!isa<ConstantSDNode>(ShiftAmtVal))
+ return SDValue();
+
+ uint64_t ShiftAmt = In.getConstantOperandVal(1);
+
+ // The extracted value is not extracted at the right position
+ if (ShiftAmt != i * ScalarTypeBitsize)
+ return SDValue();
+ }
+ }
+
+ // Only cast if the size is the same
+ if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getBitcast(VT, Src);
+}
+
+SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask,
+ SDValue VecIn1, SDValue VecIn2,
+ unsigned LeftIdx, bool DidSplitVec) {
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
+
+ EVT VT = N->getValueType(0);
+ EVT InVT1 = VecIn1.getValueType();
+ EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ShuffleNumElems = NumElems;
+
+ // If we artificially split a vector in two already, then the offsets in the
+ // operands will all be based off of VecIn1, even those in VecIn2.
+ unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
+
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVT1Size = InVT1.getFixedSizeInBits();
+ uint64_t InVT2Size = InVT2.getFixedSizeInBits();
+
+ assert(InVT2Size <= InVT1Size &&
+ "Inputs must be sorted to be in non-increasing vector size order.");
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Try to make the types match the type of the output.
+ if (InVT1 != VT || InVT2 != VT) {
+ if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
+ // If the output vector length is a multiple of both input lengths,
+ // we can concatenate them and pad the rest with undefs.
+ unsigned NumConcats = VTSize / InVT1Size;
+ assert(NumConcats >= 2 && "Concat needs at least two inputs!");
+ SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
+ ConcatOps[0] = VecIn1;
+ ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ VecIn2 = SDValue();
+ } else if (InVT1Size == VTSize * 2) {
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
+ return SDValue();
+
+ if (!VecIn2.getNode()) {
+ // If we only have one input vector, and it's twice the size of the
+ // output, split it in two.
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
+ DAG.getVectorIdxConstant(NumElems, DL));
+ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
+ // Since we now have shorter input vectors, adjust the offset of the
+ // second vector's start.
+ Vec2Offset = NumElems;
+ } else {
+ assert(InVT2Size <= InVT1Size &&
+ "Second input is not going to be larger than the first one.");
+
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
+
+ // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+ // lower it back into a BUILD_VECTOR. So if the inserted type is
+ // illegal, don't even try.
+ if (InVT1 != InVT2) {
+ if (!TLI.isTypeLegal(InVT2))
+ return SDValue();
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = NumElems * 2;
+ }
+ } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
+ SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
+ ConcatOps[0] = VecIn2;
+ VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
+ !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
+ return SDValue();
+ // If dest vector has less than two elements, then use shuffle and extract
+ // from larger regs will cost even more.
+ if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
+ return SDValue();
+ assert(InVT2Size <= InVT1Size &&
+ "Second input is not going to be larger than the first one.");
+
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
+
+ if (InVT1 != InVT2) {
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = InVT1Size / VTSize * NumElems;
+ } else {
+ // TODO: Support cases where the length mismatch isn't exactly by a
+ // factor of 2.
+ // TODO: Move this check upwards, so that if we have bad type
+ // mismatches, we don't create any DAG nodes.
+ return SDValue();
+ }
+ }
+
+ // Initialize mask to undef.
+ SmallVector<int, 8> Mask(ShuffleNumElems, -1);
+
+ // Only need to run up to the number of elements actually used, not the
+ // total number of elements in the shuffle - if we are shuffling a wider
+ // vector, the high lanes should be set to undef.
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] <= 0)
+ continue;
+
+ unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
+ if (VectorMask[i] == (int)LeftIdx) {
+ Mask[i] = ExtIndex;
+ } else if (VectorMask[i] == (int)LeftIdx + 1) {
+ Mask[i] = Vec2Offset + ExtIndex;
+ }
+ }
+
+ // The type the input vectors may have changed above.
+ InVT1 = VecIn1.getValueType();
+
+ // If we already have a VecIn2, it should have the same type as VecIn1.
+ // If we don't, get an undef/zero vector of the appropriate type.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
+ assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
+
+ SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
+ if (ShuffleNumElems > NumElems)
+ Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
+
+ return Shuffle;
+}
+
+static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
+ assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // First, determine where the build vector is not undef.
+ // TODO: We could extend this to handle zero elements as well as undefs.
+ int NumBVOps = BV->getNumOperands();
+ int ZextElt = -1;
+ for (int i = 0; i != NumBVOps; ++i) {
+ SDValue Op = BV->getOperand(i);
+ if (Op.isUndef())
+ continue;
+ if (ZextElt == -1)
+ ZextElt = i;
+ else
+ return SDValue();
+ }
+ // Bail out if there's no non-undef element.
+ if (ZextElt == -1)
+ return SDValue();
+
+ // The build vector contains some number of undef elements and exactly
+ // one other element. That other element must be a zero-extended scalar
+ // extracted from a vector at a constant index to turn this into a shuffle.
+ // Also, require that the build vector does not implicitly truncate/extend
+ // its elements.
+ // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ EVT VT = BV->getValueType(0);
+ SDValue Zext = BV->getOperand(ZextElt);
+ if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
+ Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+ Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // The zero-extend must be a multiple of the source size, and we must be
+ // building a vector of the same size as the source of the extract element.
+ SDValue Extract = Zext.getOperand(0);
+ unsigned DestSize = Zext.getValueSizeInBits();
+ unsigned SrcSize = Extract.getValueSizeInBits();
+ if (DestSize % SrcSize != 0 ||
+ Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Create a shuffle mask that will combine the extracted element with zeros
+ // and undefs.
+ int ZextRatio = DestSize / SrcSize;
+ int NumMaskElts = NumBVOps * ZextRatio;
+ SmallVector<int, 32> ShufMask(NumMaskElts, -1);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ if (i / ZextRatio == ZextElt) {
+ // The low bits of the (potentially translated) extracted element map to
+ // the source vector. The high bits map to zero. We will use a zero vector
+ // as the 2nd source operand of the shuffle, so use the 1st element of
+ // that vector (mask value is number-of-elements) for the high bits.
+ int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+ ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+ : NumMaskElts;
+ }
+
+ // Undef elements of the build vector remain undef because we initialize
+ // the shuffle mask with -1.
+ }
+
+ // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
+ // bitcast (shuffle V, ZeroVec, VectorMask)
+ SDLoc DL(BV);
+ EVT VecVT = Extract.getOperand(0).getValueType();
+ SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
+ ZeroVec, ShufMask, DAG);
+ if (!Shuf)
+ return SDValue();
+ return DAG.getBitcast(VT, Shuf);
+}
+
+// FIXME: promote to STLExtras.
+template <typename R, typename T>
+static auto getFirstIndexOf(R &&Range, const T &Val) {
+ auto I = find(Range, Val);
+ if (I == Range.end())
+ return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
+ return std::distance(Range.begin(), I);
+}
+
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations. If the types of the vectors we're extracting from allow it,
+// turn this into a vector_shuffle node.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
+ return V;
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
+ bool UsesZeroVector = false;
+ unsigned NumElems = N->getNumOperands();
+
+ // Record, for each element of the newly built vector, which input vector
+ // that element comes from. -1 stands for undef, 0 for the zero vector,
+ // and positive values for the input vectors.
+ // VectorMask maps each element to its vector number, and VecIn maps vector
+ // numbers to their initial SDValues.
+
+ SmallVector<int, 8> VectorMask(NumElems, -1);
+ SmallVector<SDValue, 8> VecIn;
+ VecIn.push_back(SDValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.isUndef())
+ continue;
+
+ // See if we can use a blend with a zero vector.
+ // TODO: Should we generalize this to a blend with an arbitrary constant
+ // vector?
+ if (isNullConstant(Op) || isNullFPConstant(Op)) {
+ UsesZeroVector = true;
+ VectorMask[i] = 0;
+ continue;
+ }
+
+ // Not an undef or zero. If the input is something other than an
+ // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ SDValue ExtractedFromVec = Op.getOperand(0);
+
+ if (ExtractedFromVec.getValueType().isScalableVector())
+ return SDValue();
+
+ const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
+ if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
+ return SDValue();
+
+ // All inputs must have the same element type as the output.
+ if (VT.getVectorElementType() !=
+ ExtractedFromVec.getValueType().getVectorElementType())
+ return SDValue();
+
+ // Have we seen this input vector before?
+ // The vectors are expected to be tiny (usually 1 or 2 elements), so using
+ // a map back from SDValues to numbers isn't worth it.
+ int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
+ if (Idx == -1) { // A new source vector?
+ Idx = VecIn.size();
+ VecIn.push_back(ExtractedFromVec);
+ }
+
+ VectorMask[i] = Idx;
+ }
+
+ // If we didn't find at least one input vector, bail out.
+ if (VecIn.size() < 2)
+ return SDValue();
+
+ // If all the Operands of BUILD_VECTOR extract from same
+ // vector, then split the vector efficiently based on the maximum
+ // vector access index and adjust the VectorMask and
+ // VecIn accordingly.
+ bool DidSplitVec = false;
+ if (VecIn.size() == 2) {
+ unsigned MaxIndex = 0;
+ unsigned NearestPow2 = 0;
+ SDValue Vec = VecIn.back();
+ EVT InVT = Vec.getValueType();
+ SmallVector<unsigned, 8> IndexVec(NumElems, 0);
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ unsigned Index = N->getOperand(i).getConstantOperandVal(1);
+ IndexVec[i] = Index;
+ MaxIndex = std::max(MaxIndex, Index);
+ }
+
+ NearestPow2 = PowerOf2Ceil(MaxIndex);
+ if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
+ NumElems * 2 < NearestPow2) {
+ unsigned SplitSize = NearestPow2 / 2;
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), SplitSize);
+ if (TLI.isTypeLegal(SplitVT) &&
+ SplitSize + SplitVT.getVectorNumElements() <=
+ InVT.getVectorNumElements()) {
+ SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getVectorIdxConstant(SplitSize, DL));
+ SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ VecIn.pop_back();
+ VecIn.push_back(VecIn1);
+ VecIn.push_back(VecIn2);
+ DidSplitVec = true;
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
+ }
+ }
+ }
+ }
+
+ // Sort input vectors by decreasing vector element count,
+ // while preserving the relative order of equally-sized vectors.
+ // Note that we keep the first "implicit zero vector as-is.
+ SmallVector<SDValue, 8> SortedVecIn(VecIn);
+ llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
+ [](const SDValue &a, const SDValue &b) {
+ return a.getValueType().getVectorNumElements() >
+ b.getValueType().getVectorNumElements();
+ });
+
+ // We now also need to rebuild the VectorMask, because it referenced element
+ // order in VecIn, and we just sorted them.
+ for (int &SourceVectorIndex : VectorMask) {
+ if (SourceVectorIndex <= 0)
+ continue;
+ unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
+ assert(Idx > 0 && Idx < SortedVecIn.size() &&
+ VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
+ SourceVectorIndex = Idx;
+ }
+
+ VecIn = std::move(SortedVecIn);
+
+ // TODO: Should this fire if some of the input vectors has illegal type (like
+ // it does now), or should we let legalization run its course first?
+
+ // Shuffle phase:
+ // Take pairs of vectors, and shuffle them so that the result has elements
+ // from these vectors in the correct places.
+ // For example, given:
+ // t10: i32 = extract_vector_elt t1, Constant:i64<0>
+ // t11: i32 = extract_vector_elt t2, Constant:i64<0>
+ // t12: i32 = extract_vector_elt t3, Constant:i64<0>
+ // t13: i32 = extract_vector_elt t1, Constant:i64<1>
+ // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
+ // We will generate:
+ // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
+ // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
+ SmallVector<SDValue, 4> Shuffles;
+ for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
+ unsigned LeftIdx = 2 * In + 1;
+ SDValue VecLeft = VecIn[LeftIdx];
+ SDValue VecRight =
+ (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
+
+ if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
+ VecRight, LeftIdx, DidSplitVec))
+ Shuffles.push_back(Shuffle);
+ else
+ return SDValue();
+ }
+
+ // If we need the zero vector as an "ingredient" in the blend tree, add it
+ // to the list of shuffles.
+ if (UsesZeroVector)
+ Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
+ : DAG.getConstantFP(0.0, DL, VT));
+
+ // If we only have one shuffle, we're done.
+ if (Shuffles.size() == 1)
+ return Shuffles[0];
+
+ // Update the vector mask to point to the post-shuffle vectors.
+ for (int &Vec : VectorMask)
+ if (Vec == 0)
+ Vec = Shuffles.size() - 1;
+ else
+ Vec = (Vec - 1) / 2;
+
+ // More than one shuffle. Generate a binary tree of blends, e.g. if from
+ // the previous step we got the set of shuffles t10, t11, t12, t13, we will
+ // generate:
+ // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
+ // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
+ // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
+ // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
+ // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
+ // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
+ // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
+
+ // Make sure the initial size of the shuffle list is even.
+ if (Shuffles.size() % 2)
+ Shuffles.push_back(DAG.getUNDEF(VT));
+
+ for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
+ if (CurSize % 2) {
+ Shuffles[CurSize] = DAG.getUNDEF(VT);
+ CurSize++;
+ }
+ for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
+ int Left = 2 * In;
+ int Right = 2 * In + 1;
+ SmallVector<int, 8> Mask(NumElems, -1);
+ SDValue L = Shuffles[Left];
+ ArrayRef<int> LMask;
+ bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ L.use_empty() && L.getOperand(1).isUndef() &&
+ L.getOperand(0).getValueType() == L.getValueType();
+ if (IsLeftShuffle) {
+ LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
+ L = L.getOperand(0);
+ }
+ SDValue R = Shuffles[Right];
+ ArrayRef<int> RMask;
+ bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ R.use_empty() && R.getOperand(1).isUndef() &&
+ R.getOperand(0).getValueType() == R.getValueType();
+ if (IsRightShuffle) {
+ RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
+ R = R.getOperand(0);
+ }
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (VectorMask[I] == Left) {
+ Mask[I] = I;
+ if (IsLeftShuffle)
+ Mask[I] = LMask[I];
+ VectorMask[I] = In;
+ } else if (VectorMask[I] == Right) {
+ Mask[I] = I + NumElems;
+ if (IsRightShuffle)
+ Mask[I] = RMask[I] + NumElems;
+ VectorMask[I] = In;
+ }
+ }
+
+ Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
+ }
+ }
+ return Shuffles[0];
+}
+
+// Try to turn a build vector of zero extends of extract vector elts into a
+// a vector zero extend and possibly an extract subvector.
+// TODO: Support sign extend?
+// TODO: Allow undef elements?
+SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
+ if (LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ bool FoundZeroExtend = false;
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> int64_t {
+ unsigned Opc = Op.getOpcode();
+ FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+ if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
+ Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
+ if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
+ return C->getZExtValue();
+ return -1;
+ };
+
+ // Make sure the first element matches
+ // (zext (extract_vector_elt X, C))
+ // Offset must be a constant multiple of the
+ // known-minimum vector length of the result type.
+ int64_t Offset = checkElem(Op0);
+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
+ return SDValue();
+
+ unsigned NumElems = N->getNumOperands();
+ SDValue In = Op0.getOperand(0).getOperand(0);
+ EVT InSVT = In.getValueType().getScalarType();
+ EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
+
+ // Don't create an illegal input type after type legalization.
+ if (LegalTypes && !TLI.isTypeLegal(InVT))
+ return SDValue();
+
+ // Ensure all the elements come from the same vector and are adjacent.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ if ((Offset + i) != checkElem(N->getOperand(i)))
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
+ Op0.getOperand(0).getOperand(1));
+ return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+ VT, In);
+}
+
+// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
+// and all other elements being constant zero's, granularize the BUILD_VECTOR's
+// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
+// This patten can appear during legalization.
+//
+// NOTE: This can be generalized to allow more than a single
+// non-constant-zero op, UNDEF's, and to be KnownBits-based,
+SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
+ // Don't run this after legalization. Targets may have other preferences.
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // FIXME: support big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
+
+ EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+
+ if (!TLI.isTypeLegal(OpIntVT) ||
+ (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
+ return SDValue();
+
+ unsigned EltBitwidth = VT.getScalarSizeInBits();
+ // NOTE: the actual width of operands may be wider than that!
+
+ // Analyze all operands of this BUILD_VECTOR. What is the largest number of
+ // active bits they all have? We'll want to truncate them all to that width.
+ unsigned ActiveBits = 0;
+ APInt KnownZeroOps(VT.getVectorNumElements(), 0);
+ for (auto I : enumerate(N->ops())) {
+ SDValue Op = I.value();
+ // FIXME: support UNDEF elements?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ unsigned OpActiveBits =
+ Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
+ if (OpActiveBits == 0) {
+ KnownZeroOps.setBit(I.index());
+ continue;
+ }
+ // Profitability check: don't allow non-zero constant operands.
+ return SDValue();
+ }
+ // Profitability check: there must only be a single non-zero operand,
+ // and it must be the first operand of the BUILD_VECTOR.
+ if (I.index() != 0)
+ return SDValue();
+ // The operand must be a zero-extension itself.
+ // FIXME: this could be generalized to known leading zeros check.
+ if (Op.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ unsigned CurrActiveBits =
+ Op.getOperand(0).getValueSizeInBits().getFixedValue();
+ assert(!ActiveBits && "Already encountered non-constant-zero operand?");
+ ActiveBits = CurrActiveBits;
+ // We want to at least halve the element size.
+ if (2 * ActiveBits > EltBitwidth)
+ return SDValue();
+ }
+
+ // This BUILD_VECTOR must have at least one non-constant-zero operand.
+ if (ActiveBits == 0)
+ return SDValue();
+
+ // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
+ // into how many chunks can we split our element width?
+ EVT NewScalarIntVT, NewIntVT;
+ std::optional<unsigned> Factor;
+ // We can split the element into at least two chunks, but not into more
+ // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
+ // for which the element width is a multiple of it,
+ // and the resulting types/operations on that chunk width are legal.
+ assert(2 * ActiveBits <= EltBitwidth &&
+ "We know that half or less bits of the element are active.");
+ for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
+ if (EltBitwidth % Scale != 0)
+ continue;
+ unsigned ChunkBitwidth = EltBitwidth / Scale;
+ assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
+ NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
+ NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
+ Scale * N->getNumOperands());
+ if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
+ (LegalOperations &&
+ !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
+ continue;
+ Factor = Scale;
+ break;
+ }
+ if (!Factor)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
+
+ // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
+ SmallVector<SDValue, 16> NewOps;
+ NewOps.reserve(NewIntVT.getVectorNumElements());
+ for (auto I : enumerate(N->ops())) {
+ SDValue Op = I.value();
+ assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
+ unsigned SrcOpIdx = I.index();
+ if (KnownZeroOps[SrcOpIdx]) {
+ NewOps.append(*Factor, ZeroOp);
+ continue;
+ }
+ Op = DAG.getBitcast(OpIntVT, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
+ NewOps.emplace_back(Op);
+ NewOps.append(*Factor - 1, ZeroOp);
+ }
+ assert(NewOps.size() == NewIntVT.getVectorNumElements());
+ SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
+ NewBV = DAG.getBitcast(VT, NewBV);
+ return NewBV;
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ // If this is a splat of a bitcast from another vector, change to a
+ // concat_vector.
+ // For example:
+ // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
+ // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
+ //
+ // If X is a build_vector itself, the concat can become a larger build_vector.
+ // TODO: Maybe this is useful for non-splat too?
+ if (!LegalOperations) {
+ if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ Splat = peekThroughBitcasts(Splat);
+ EVT SrcVT = Splat.getValueType();
+ if (SrcVT.isVector()) {
+ unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), NumElts);
+ if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
+ NewVT, Ops);
+ return DAG.getBitcast(VT, Concat);
+ }
+ }
+ }
+ }
+
+ // Check if we can express BUILD VECTOR via subvector extract.
+ if (!LegalTypes && (N->getNumOperands() > 1)) {
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> uint64_t {
+ if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
+ (Op0.getOperand(0) == Op.getOperand(0)))
+ if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return CNode->getZExtValue();
+ return -1;
+ };
+
+ int Offset = checkElem(Op0);
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ if (Offset + i != checkElem(N->getOperand(i))) {
+ Offset = -1;
+ break;
+ }
+ }
+
+ if ((Offset == 0) &&
+ (Op0.getOperand(0).getValueType() == N->getValueType(0)))
+ return Op0.getOperand(0);
+ if ((Offset != -1) &&
+ ((Offset % N->getValueType(0).getVectorNumElements()) ==
+ 0)) // IDX must be multiple of output size.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+
+ if (SDValue V = convertBuildVecZextToZext(N))
+ return V;
+
+ if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecTruncToBitCast(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecToShuffle(N))
+ return V;
+
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ // Do this late as some of the above may replace the splat.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
+ return SDValue();
+}
+
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OpVT = N->getOperand(0).getValueType();
+
+ // If the operands are legal vectors, leave them alone.
+ if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 8> Ops;
+
+ EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+ // Keep track of what we encounter.
+ bool AnyInteger = false;
+ bool AnyFP = false;
+ for (const SDValue &Op : N->ops()) {
+ if (ISD::BITCAST == Op.getOpcode() &&
+ !Op.getOperand(0).getValueType().isVector())
+ Ops.push_back(Op.getOperand(0));
+ else if (ISD::UNDEF == Op.getOpcode())
+ Ops.push_back(ScalarUndef);
+ else
+ return SDValue();
+
+ // Note whether we encounter an integer or floating point scalar.
+ // If it's neither, bail out, it could be something weird like x86mmx.
+ EVT LastOpVT = Ops.back().getValueType();
+ if (LastOpVT.isFloatingPoint())
+ AnyFP = true;
+ else if (LastOpVT.isInteger())
+ AnyInteger = true;
+ else
+ return SDValue();
+ }
+
+ // If any of the operands is a floating point scalar bitcast to a vector,
+ // use floating point types throughout, and bitcast everything.
+ // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+ if (AnyFP) {
+ SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+ ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+ if (AnyInteger) {
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.isUndef())
+ Op = ScalarUndef;
+ else
+ Op = DAG.getBitcast(SVT, Op);
+ }
+ }
+ }
+
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getSizeInBits() / SVT.getSizeInBits());
+ return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
+}
+
+// Attempt to merge nested concat_vectors/undefs.
+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
+ EVT SubVT;
+ SDValue FirstConcat;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef())
+ continue;
+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+ if (!FirstConcat) {
+ SubVT = Op.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+ FirstConcat = Op;
+ continue;
+ }
+ if (SubVT != Op.getOperand(0).getValueType())
+ return SDValue();
+ }
+ assert(FirstConcat && "Concat of all-undefs found");
+
+ SmallVector<SDValue> ConcatOps;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef()) {
+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
+ continue;
+ }
+ ConcatOps.append(Op->op_begin(), Op->op_end());
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
+}
+
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+
+ // We currently can't generate an appropriate shuffle for a scalable vector.
+ if (VT.isScalableVector())
+ return SDValue();
+
+ int NumElts = VT.getVectorNumElements();
+ int NumOpElts = OpVT.getVectorNumElements();
+
+ SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask;
+
+ for (SDValue Op : N->ops()) {
+ Op = peekThroughBitcasts(Op);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (Op.isUndef()) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // What vector are we extracting the subvector from and at what index?
+ SDValue ExtVec = Op.getOperand(0);
+ int ExtIdx = Op.getConstantOperandVal(1);
+
+ // We want the EVT of the original extraction to correctly scale the
+ // extraction index.
+ EVT ExtVT = ExtVec.getValueType();
+ ExtVec = peekThroughBitcasts(ExtVec);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (ExtVec.isUndef()) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ // Ensure that we are extracting a subvector from a vector the same
+ // size as the result.
+ if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Scale the subvector index to account for any bitcast.
+ int NumExtElts = ExtVT.getVectorNumElements();
+ if (0 == (NumExtElts % NumElts))
+ ExtIdx /= (NumExtElts / NumElts);
+ else if (0 == (NumElts % NumExtElts))
+ ExtIdx *= (NumElts / NumExtElts);
+ else
+ return SDValue();
+
+ // At most we can reference 2 inputs in the final shuffle.
+ if (SV0.isUndef() || SV0 == ExtVec) {
+ SV0 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx);
+ } else if (SV1.isUndef() || SV1 == ExtVec) {
+ SV1 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx + NumElts);
+ } else {
+ return SDValue();
+ }
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask, DAG);
+}
+
+static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
+ unsigned CastOpcode = N->getOperand(0).getOpcode();
+ switch (CastOpcode) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // TODO: Allow more opcodes?
+ // case ISD::BITCAST:
+ // case ISD::TRUNCATE:
+ // case ISD::ZERO_EXTEND:
+ // case ISD::SIGN_EXTEND:
+ // case ISD::FP_EXTEND:
+ break;
+ default:
+ return SDValue();
+ }
+
+ EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
+ if (!SrcVT.isVector())
+ return SDValue();
+
+ // All operands of the concat must be the same kind of cast from the same
+ // source type.
+ SmallVector<SDValue, 4> SrcOps;
+ for (SDValue Op : N->ops()) {
+ if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
+ Op.getOperand(0).getValueType() != SrcVT)
+ return SDValue();
+ SrcOps.push_back(Op.getOperand(0));
+ }
+
+ // The wider cast must be supported by the target. This is unusual because
+ // the operation support type parameter depends on the opcode. In addition,
+ // check the other type in the cast to make sure this is really legal.
+ EVT VT = N->getValueType(0);
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+ ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
+ EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ switch (CastOpcode) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
+ !TLI.isTypeLegal(VT))
+ return SDValue();
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
+ !TLI.isTypeLegal(ConcatSrcVT))
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unexpected cast opcode");
+ }
+
+ // concat (cast X), (cast Y)... -> cast (concat X, Y...)
+ SDLoc DL(N);
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
+ return DAG.getNode(CastOpcode, DL, VT, NewConcat);
+}
+
+// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
+// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
+// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
+static SDValue combineConcatVectorOfShuffleAndItsOperands(
+ SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
+ bool LegalOperations) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // For now, only allow simple 2-operand concatenations.
+ if (N->getNumOperands() != 2)
+ return SDValue();
+
+ // Don't create illegal types/shuffles when not allowed to.
+ if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
+ (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
+ return SDValue();
+
+ // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
+ // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
+ // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
+ // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
+ // (4) and for now, the SHUFFLE_VECTOR must be unary.
+ ShuffleVectorSDNode *SVN = nullptr;
+ for (SDValue Op : N->ops()) {
+ if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
+ CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
+ all_of(N->ops(), [CurSVN](SDValue Op) {
+ // FIXME: can we allow UNDEF operands?
+ return !Op.isUndef() &&
+ (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
+ })) {
+ SVN = CurSVN;
+ break;
+ }
+ }
+ if (!SVN)
+ return SDValue();
+
+ // We are going to pad the shuffle operands, so any indice, that was picking
+ // from the second operand, must be adjusted.
+ SmallVector<int, 16> AdjustedMask;
+ AdjustedMask.reserve(SVN->getMask().size());
+ assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
+ append_range(AdjustedMask, SVN->getMask());
+
+ // Identity masks for the operands of the (padded) shuffle.
+ SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
+ MutableArrayRef<int> FirstShufOpIdentityMask =
+ MutableArrayRef<int>(IdentityMask)
+ .take_front(OpVT.getVectorNumElements());
+ MutableArrayRef<int> SecondShufOpIdentityMask =
+ MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
+ std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
+ std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
+ VT.getVectorNumElements());
+
+ // New combined shuffle mask.
+ SmallVector<int, 32> Mask;
+ Mask.reserve(VT.getVectorNumElements());
+ for (SDValue Op : N->ops()) {
+ assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
+ if (Op.getNode() == SVN) {
+ append_range(Mask, AdjustedMask);
+ continue;
+ }
+ if (Op == SVN->getOperand(0)) {
+ append_range(Mask, FirstShufOpIdentityMask);
+ continue;
+ }
+ if (Op == SVN->getOperand(1)) {
+ append_range(Mask, SecondShufOpIdentityMask);
+ continue;
+ }
+ llvm_unreachable("Unexpected operand!");
+ }
+
+ // Don't create illegal shuffle masks.
+ if (!TLI.isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ // Pad the shuffle operands with UNDEF.
+ SDLoc dl(N);
+ std::array<SDValue, 2> ShufOps;
+ for (auto I : zip(SVN->ops(), ShufOps)) {
+ SDValue ShufOp = std::get<0>(I);
+ SDValue &NewShufOp = std::get<1>(I);
+ if (ShufOp.isUndef())
+ NewShufOp = DAG.getUNDEF(VT);
+ else {
+ SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
+ DAG.getUNDEF(OpVT));
+ ShufOpParts[0] = ShufOp;
+ NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
+ }
+ }
+ // Finally, create the new wide shuffle.
+ return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ // Check if all of the operands are undefs.
+ EVT VT = N->getValueType(0);
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ // Optimize concat_vectors where all but the first of the vectors are undef.
+ if (all_of(drop_begin(N->ops()),
+ [](const SDValue &Op) { return Op.isUndef(); })) {
+ SDValue In = N->getOperand(0);
+ assert(In.getValueType().isVector() && "Must concat vectors");
+
+ // If the input is a concat_vectors, just make a larger concat by padding
+ // with smaller undefs.
+ //
+ // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
+ // here could cause an infinite loop. That legalizing happens when LegalDAG
+ // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
+ // scalable.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
+ !(LegalDAG && In.getValueType().isScalableVector())) {
+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();
+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
+ SDValue Scalar = peekThroughOneUseBitcasts(In);
+
+ // concat_vectors(scalar_to_vector(scalar), undef) ->
+ // scalar_to_vector(scalar)
+ if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Scalar.hasOneUse()) {
+ EVT SVT = Scalar.getValueType().getVectorElementType();
+ if (SVT == Scalar.getOperand(0).getValueType())
+ Scalar = Scalar.getOperand(0);
+ }
+
+ // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
+ if (!Scalar.getValueType().isVector()) {
+ // If the bitcast type isn't legal, it might be a trunc of a legal type;
+ // look through the trunc so we can still do the transform:
+ // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+ if (Scalar->getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTypeLegal(Scalar.getValueType()) &&
+ TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+ Scalar = Scalar->getOperand(0);
+
+ EVT SclTy = Scalar.getValueType();
+
+ if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
+ return SDValue();
+
+ // Bail out if the vector size is not a multiple of the scalar size.
+ if (VT.getSizeInBits() % SclTy.getSizeInBits())
+ return SDValue();
+
+ unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
+ if (VNTNumElms < 2)
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
+ if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
+ return SDValue();
+
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
+ // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+ // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
+ SmallVector<SDValue, 8> Opnds;
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0).getValueType();
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ Opnds.append(NumElts, DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
+ }
+
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ }
+
+ // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
+ if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+ return V;
+
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+ return V;
+ }
+
+ if (SDValue V = combineConcatVectorOfCasts(N, DAG))
+ return V;
+
+ if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
+ N, DAG, TLI, LegalTypes, LegalOperations))
+ return V;
+
+ // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+ // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
+ // operands and look for a CONCAT operations that place the incoming vectors
+ // at the exact same location.
+ //
+ // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
+ SDValue SingleSource = SDValue();
+ unsigned PartNumElem =
+ N->getOperand(0).getValueType().getVectorMinNumElements();
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.isUndef())
+ continue;
+
+ // Check if this is the identity extract:
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // Find the single incoming vector for the extract_subvector.
+ if (SingleSource.getNode()) {
+ if (Op.getOperand(0) != SingleSource)
+ return SDValue();
+ } else {
+ SingleSource = Op.getOperand(0);
+
+ // Check the source type is the same as the type of the result.
+ // If not, this concat may extend the vector, so we can not
+ // optimize it away.
+ if (SingleSource.getValueType() != N->getValueType(0))
+ return SDValue();
+ }
+
+ // Check that we are reading from the identity index.
+ unsigned IdentityIndex = i * PartNumElem;
+ if (Op.getConstantOperandAPInt(1) != IdentityIndex)
+ return SDValue();
+ }
+
+ if (SingleSource.getNode())
+ return SingleSource;
+
+ return SDValue();
+}
+
+// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+// if the subvector can be sourced for free.
+static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == SubVT &&
+ (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+}
+
+static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
+ SelectionDAG &DAG,
+ bool LegalOperations) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = Extract->getOperand(0);
+ unsigned BinOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
+ return SDValue();
+
+ EVT VecVT = BinOp.getValueType();
+ SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
+ if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
+ return SDValue();
+
+ SDValue Index = Extract->getOperand(1);
+ EVT SubVT = Extract->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
+ return SDValue();
+
+ SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
+ SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
+
+ // TODO: We could handle the case where only 1 operand is being inserted by
+ // creating an extract of the other operand, but that requires checking
+ // number of uses and/or costs.
+ if (!Sub0 || !Sub1)
+ return SDValue();
+
+ // We are inserting both operands of the wide binop only to extract back
+ // to the narrow vector size. Eliminate all of the insert/extract:
+ // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
+ return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
+ BinOp->getFlags());
+}
+
+/// If we are extracting a subvector produced by a wide binary operator try
+/// to use a narrow binary operator and/or avoid concatenation and extraction.
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
+ bool LegalOperations) {
+ // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
+ // some of these bailouts with other transforms.
+
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
+ return V;
+
+ // The extract index must be a constant, so we can map it to a concat operand.
+ auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!ExtractIndexC)
+ return SDValue();
+
+ // We are looking for an optionally bitcasted wide vector binary operator
+ // feeding an extract subvector.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
+ unsigned BOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
+ return SDValue();
+
+ // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
+ // reduced to the unary fneg when it is visited, and we probably want to deal
+ // with fneg in a target-specific way.
+ if (BOpcode == ISD::FSUB) {
+ auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
+ if (C && C->getValueAPF().isNegZero())
+ return SDValue();
+ }
+
+ // The binop must be a vector type, so we can extract some fraction of it.
+ EVT WideBVT = BinOp.getValueType();
+ // The optimisations below currently assume we are dealing with fixed length
+ // vectors. It is possible to add support for scalable vectors, but at the
+ // moment we've done no analysis to prove whether they are profitable or not.
+ if (!WideBVT.isFixedLengthVector())
+ return SDValue();
+
+ EVT VT = Extract->getValueType(0);
+ unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+ assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
+ "Extract index is not a multiple of the vector length.");
+
+ // Bail out if this is not a proper multiple width extraction.
+ unsigned WideWidth = WideBVT.getSizeInBits();
+ unsigned NarrowWidth = VT.getSizeInBits();
+ if (WideWidth % NarrowWidth != 0)
+ return SDValue();
+
+ // Bail out if we are extracting a fraction of a single operation. This can
+ // occur because we potentially looked through a bitcast of the binop.
+ unsigned NarrowingRatio = WideWidth / NarrowWidth;
+ unsigned WideNumElts = WideBVT.getVectorNumElements();
+ if (WideNumElts % NarrowingRatio != 0)
+ return SDValue();
+
+ // Bail out if the target does not support a narrower version of the binop.
+ EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
+ WideNumElts / NarrowingRatio);
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ return SDValue();
+
+ // If extraction is cheap, we don't need to look at the binop operands
+ // for concat ops. The narrow binop alone makes this transform profitable.
+ // We can't just reuse the original extract index operand because we may have
+ // bitcasted.
+ unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
+ unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+ if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
+ BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
+ // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
+ SDLoc DL(Extract);
+ SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
+ SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), NewExtIndex);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), NewExtIndex);
+ SDValue NarrowBinOp =
+ DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
+
+ // Only handle the case where we are doubling and then halving. A larger ratio
+ // may require more than two narrow binops to replace the wide binop.
+ if (NarrowingRatio != 2)
+ return SDValue();
+
+ // TODO: The motivating case for this transform is an x86 AVX1 target. That
+ // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+ // flavors, but no other 256-bit integer support. This could be extended to
+ // handle any binop, but that may require fixing/adding other folds to avoid
+ // codegen regressions.
+ if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ return SDValue();
+
+ // We need at least one concatenation operation of a binop operand to make
+ // this transform worthwhile. The concat must double the input vector sizes.
+ auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
+ return V.getOperand(ConcatOpNum);
+ return SDValue();
+ };
+ SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
+ SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
+
+ if (SubVecL || SubVecR) {
+ // If a binop operand was not the result of a concat, we must extract a
+ // half-sized operand for our new narrow binop:
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
+ SDLoc DL(Extract);
+ SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
+ SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), IndexC);
+
+ SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), IndexC);
+
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
+
+ return SDValue();
+}
+
+/// If we are extracting a subvector from a wide vector load, convert to a
+/// narrow load to eliminate the extraction:
+/// (extract_subvector (load wide vector)) --> (load narrow vector)
+static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Add support for big-endian. The offset calculation must be adjusted.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
+ return SDValue();
+
+ // Allow targets to opt-out.
+ EVT VT = Extract->getValueType(0);
+
+ // We can only create byte sized loads.
+ if (!VT.isByteSized())
+ return SDValue();
+
+ unsigned Index = Extract->getConstantOperandVal(1);
+ unsigned NumElts = VT.getVectorMinNumElements();
+
+ // The definition of EXTRACT_SUBVECTOR states that the index must be a
+ // multiple of the minimum number of elements in the result type.
+ assert(Index % NumElts == 0 && "The extract subvector index is not a "
+ "multiple of the result's element count");
+
+ // It's fine to use TypeSize here as we know the offset will not be negative.
+ TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
+ return SDValue();
+
+ // The narrow load will be offset from the base address of the old load if
+ // we are extracting from something besides index 0 (little-endian).
+ SDLoc DL(Extract);
+
+ // TODO: Use "BaseIndexOffset" to make this more effective.
+ SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
+
+ uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO;
+ if (Offset.isScalable()) {
+ MachinePointerInfo MPI =
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
+ } else
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
+ StoreSize);
+
+ SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
+ return NewLd;
+}
+
+/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
+/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
+/// EXTRACT_SUBVECTOR(Op?, ?),
+/// Mask'))
+/// iff it is legal and profitable to do so. Notably, the trimmed mask
+/// (containing only the elements that are extracted)
+/// must reference at most two subvectors.
+static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ "Must only be called on EXTRACT_SUBVECTOR's");
+
+ SDValue N0 = N->getOperand(0);
+
+ // Only deal with non-scalable vectors.
+ EVT NarrowVT = N->getValueType(0);
+ EVT WideVT = N0.getValueType();
+ if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
+ return SDValue();
+
+ // The operand must be a shufflevector.
+ auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
+ if (!WideShuffleVector)
+ return SDValue();
+
+ // The old shuffleneeds to go away.
+ if (!WideShuffleVector->hasOneUse())
+ return SDValue();
+
+ // And the narrow shufflevector that we'll form must be legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
+ return SDValue();
+
+ uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
+ int NumEltsExtracted = NarrowVT.getVectorNumElements();
+ assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
+ "Extract index is not a multiple of the output vector length.");
+
+ int WideNumElts = WideVT.getVectorNumElements();
+
+ SmallVector<int, 16> NewMask;
+ NewMask.reserve(NumEltsExtracted);
+ SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
+ DemandedSubvectors;
+
+ // Try to decode the wide mask into narrow mask from at most two subvectors.
+ for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
+ NumEltsExtracted)) {
+ assert((M >= -1) && (M < (2 * WideNumElts)) &&
+ "Out-of-bounds shuffle mask?");
+
+ if (M < 0) {
+ // Does not depend on operands, does not require adjustment.
+ NewMask.emplace_back(M);
+ continue;
+ }
+
+ // From which operand of the shuffle does this shuffle mask element pick?
+ int WideShufOpIdx = M / WideNumElts;
+ // Which element of that operand is picked?
+ int OpEltIdx = M % WideNumElts;
+
+ assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
+ "Shuffle mask vector decomposition failure.");
+
+ // And which NumEltsExtracted-sized subvector of that operand is that?
+ int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
+ // And which element within that subvector of that operand is that?
+ int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
+
+ assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
+ "Shuffle mask subvector decomposition failure.");
+
+ assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
+ WideShufOpIdx * WideNumElts) == M &&
+ "Shuffle mask full decomposition failure.");
+
+ SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
+
+ if (Op.isUndef()) {
+ // Picking from an undef operand. Let's adjust mask instead.
+ NewMask.emplace_back(-1);
+ continue;
+ }
+
+ const std::pair<SDValue, int> DemandedSubvector =
+ std::make_pair(Op, OpSubvecIdx);
+
+ if (DemandedSubvectors.insert(DemandedSubvector)) {
+ if (DemandedSubvectors.size() > 2)
+ return SDValue(); // We can't handle more than two subvectors.
+ // How many elements into the WideVT does this subvector start?
+ int Index = NumEltsExtracted * OpSubvecIdx;
+ // Bail out if the extraction isn't going to be cheap.
+ if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
+ return SDValue();
+ }
+
+ // Ok, but from which operand of the new shuffle will this element pick?
+ int NewOpIdx =
+ getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
+ assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
+
+ int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
+ NewMask.emplace_back(AdjM);
+ }
+ assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
+ assert(DemandedSubvectors.size() <= 2 &&
+ "Should have ended up demanding at most two subvectors.");
+
+ // Did we discover that the shuffle does not actually depend on operands?
+ if (DemandedSubvectors.empty())
+ return DAG.getUNDEF(NarrowVT);
+
+ // Profitability check: only deal with extractions from the first subvector
+ // unless the mask becomes an identity mask.
+ if (!ShuffleVectorInst::isIdentityMask(NewMask) ||
+ any_of(NewMask, [](int M) { return M < 0; }))
+ for (auto &DemandedSubvector : DemandedSubvectors)
+ if (DemandedSubvector.second != 0)
+ return SDValue();
+
+ // We still perform the exact same EXTRACT_SUBVECTOR, just on different
+ // operand[s]/index[es], so there is no point in checking for it's legality.
+
+ // Do not turn a legal shuffle into an illegal one.
+ if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
+ !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ SmallVector<SDValue, 2> NewOps;
+ for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
+ &DemandedSubvector : DemandedSubvectors) {
+ // How many elements into the WideVT does this subvector start?
+ int Index = NumEltsExtracted * DemandedSubvector.second;
+ SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
+ NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
+ DemandedSubvector.first, IndexC));
+ }
+ assert((NewOps.size() == 1 || NewOps.size() == 2) &&
+ "Should end up with either one or two ops");
+
+ // If we ended up with only one operand, pad with an undef.
+ if (NewOps.size() == 1)
+ NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
+
+ return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
+}
+
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+ uint64_t ExtIdx = N->getConstantOperandVal(1);
+
+ // Extract from UNDEF is UNDEF.
+ if (V.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
+ if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
+ return NarrowLoad;
+
+ // Combine an extract of an extract into a single extract_subvector.
+ // ext (ext X, C), 0 --> ext X, C
+ if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
+ if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
+ V.getConstantOperandVal(1)) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+ V.getOperand(1));
+ }
+ }
+
+ // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
+ if (V.getOpcode() == ISD::SPLAT_VECTOR)
+ if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
+ return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
+
+ // Try to move vector bitcast after extract_subv by scaling extraction index:
+ // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+ if (V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType().isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
+ SDValue SrcOp = V.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
+ if ((SrcNumElts % DestNumElts) == 0) {
+ unsigned SrcDestRatio = SrcNumElts / DestNumElts;
+ ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NewExtEC);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ if ((DestNumElts % SrcNumElts) == 0) {
+ unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+ if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
+ ElementCount NewExtEC =
+ NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
+ EVT ScalarVT = SrcVT.getScalarType();
+ if ((ExtIdx % DestSrcRatio) == 0) {
+ SDLoc DL(N);
+ unsigned IndexValScaled = ExtIdx / DestSrcRatio;
+ EVT NewExtVT =
+ EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ if (NewExtEC.isScalar() &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
+ SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ }
+ }
+ }
+
+ if (V.getOpcode() == ISD::CONCAT_VECTORS) {
+ unsigned ExtNumElts = NVT.getVectorMinNumElements();
+ EVT ConcatSrcVT = V.getOperand(0).getValueType();
+ assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "Concat and extract subvector do not change element type");
+ assert((ExtIdx % ExtNumElts) == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+
+ unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
+ unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
+
+ // If the concatenated source types match this extract, it's a direct
+ // simplification:
+ // extract_subvec (concat V1, V2, ...), i --> Vi
+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
+ return V.getOperand(ConcatOpIdx);
+
+ // If the concatenated source vectors are a multiple length of this extract,
+ // then extract a fraction of one of those source vectors directly from a
+ // concat operand. Example:
+ // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
+ // v2i8 extract_subvec v8i8 Y, 6
+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
+ ConcatSrcNumElts % ExtNumElts == 0) {
+ SDLoc DL(N);
+ unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
+ assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
+ "Trying to extract from >1 concat operand?");
+ assert(NewExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+ SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
+ V.getOperand(ConcatOpIdx), NewIndexC);
+ }
+ }
+
+ if (SDValue V =
+ foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
+ return V;
+
+ V = peekThroughBitcasts(V);
+
+ // If the input is a build vector. Try to make a smaller build vector.
+ if (V.getOpcode() == ISD::BUILD_VECTOR) {
+ EVT InVT = V.getValueType();
+ unsigned ExtractSize = NVT.getSizeInBits();
+ unsigned EltSize = InVT.getScalarSizeInBits();
+ // Only do this if we won't split any elements.
+ if (ExtractSize % EltSize == 0) {
+ unsigned NumElems = ExtractSize / EltSize;
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ExtractVT =
+ NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ if ((Level < AfterLegalizeDAG ||
+ (NumElems == 1 ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
+ (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+ unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
+
+ if (NumElems == 1) {
+ SDValue Src = V->getOperand(IdxVal);
+ if (EltVT != Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
+ return DAG.getBitcast(NVT, Src);
+ }
+
+ // Extract the pieces from the original build_vector.
+ SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
+ V->ops().slice(IdxVal, NumElems));
+ return DAG.getBitcast(NVT, BuildVec);
+ }
+ }
+ }
+
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same size.
+ EVT SmallVT = V.getOperand(1).getValueType();
+ if (!NVT.bitsEq(SmallVT))
+ return SDValue();
+
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ uint64_t InsIdx = V.getConstantOperandVal(2);
+ if (InsIdx * SmallVT.getScalarSizeInBits() ==
+ ExtIdx * NVT.getScalarSizeInBits()) {
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
+ return SDValue();
+
+ return DAG.getBitcast(NVT, V.getOperand(1));
+ }
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
+ DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+ N->getOperand(1));
+ }
+
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
+ return NarrowBOp;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
+/// followed by concatenation. Narrow vector ops may have better performance
+/// than wide ops, and this can unlock further narrowing of other vector ops.
+/// Targets can invert this transform later if it is not profitable.
+static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
+ if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+ N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
+ !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
+ return SDValue();
+
+ // Split the wide shuffle mask into halves. Any mask element that is accessing
+ // operand 1 is offset down to account for narrowing of the vectors.
+ ArrayRef<int> Mask = Shuf->getMask();
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> Mask0(HalfNumElts, -1);
+ SmallVector<int, 16> Mask1(HalfNumElts, -1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ // If we reference the upper (undef) subvector then the element is undef.
+ if ((Mask[i] % NumElts) >= HalfNumElts)
+ continue;
+ int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
+ if (i < HalfNumElts)
+ Mask0[i] = M;
+ else
+ Mask1[i - HalfNumElts] = M;
+ }
+
+ // Ask the target if this is a valid transform.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ HalfNumElts);
+ if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
+ !TLI.isShuffleMaskLegal(Mask1, HalfVT))
+ return SDValue();
+
+ // shuffle (concat X, undef), (concat Y, undef), Mask -->
+ // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
+ SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
+ SDLoc DL(Shuf);
+ SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
+ SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
+}
+
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
+
+ SmallVector<SDValue, 4> Ops;
+ EVT ConcatVT = N0.getOperand(0).getValueType();
+ unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+ unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+ auto IsUndefMaskElt = [](int i) { return i == -1; };
+
+ // Special case: shuffle(concat(A,B)) can be more efficiently represented
+ // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+ // half vector elements.
+ if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
+ llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
+ IsUndefMaskElt)) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
+ N0.getOperand(1),
+ Mask.slice(0, NumElemsPerConcat));
+ N1 = DAG.getUNDEF(ConcatVT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+ }
+
+ // Look at every vector that's inserted. We're looking for exact
+ // subvector-sized copies from a concatenated vector
+ for (unsigned I = 0; I != NumConcats; ++I) {
+ unsigned Begin = I * NumElemsPerConcat;
+ ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
+
+ // Make sure we're dealing with a copy.
+ if (llvm::all_of(SubMask, IsUndefMaskElt)) {
+ Ops.push_back(DAG.getUNDEF(ConcatVT));
+ continue;
+ }
+
+ int OpIdx = -1;
+ for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
+ if (IsUndefMaskElt(SubMask[i]))
+ continue;
+ if ((SubMask[i] % (int)NumElemsPerConcat) != i)
+ return SDValue();
+ int EltOpIdx = SubMask[i] / NumElemsPerConcat;
+ if (0 <= OpIdx && EltOpIdx != OpIdx)
+ return SDValue();
+ OpIdx = EltOpIdx;
+ }
+ assert(0 <= OpIdx && "Unknown concat_vectors op");
+
+ if (OpIdx < (int)N0.getNumOperands())
+ Ops.push_back(N0.getOperand(OpIdx));
+ else
+ Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+}
+
+// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load. A BUILD_VECTOR where each
+// element is identical is a splat. A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
+static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = SVN->getOperand(0);
+ SDValue N1 = SVN->getOperand(1);
+
+ if (!N0->hasOneUse())
+ return SDValue();
+
+ // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
+ // discussed above.
+ if (!N1.isUndef()) {
+ if (!N1->hasOneUse())
+ return SDValue();
+
+ bool N0AnyConst = isAnyConstantBuildVector(N0);
+ bool N1AnyConst = isAnyConstantBuildVector(N1);
+ if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
+ return SDValue();
+ if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
+ return SDValue();
+ }
+
+ // If both inputs are splats of the same value then we can safely merge this
+ // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+ bool IsSplat = false;
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (BV0 && BV1)
+ if (SDValue Splat0 = BV0->getSplatValue())
+ IsSplat = (Splat0 == BV1->getSplatValue());
+
+ SmallVector<SDValue, 8> Ops;
+ SmallSet<SDValue, 16> DuplicateOps;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M < (int)NumElts ? M : M - NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ SDValue Op0 = S.getOperand(0);
+ Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
+ } else {
+ // Operand can't be combined - bail out.
+ return SDValue();
+ }
+ }
+
+ // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+ // generating a splat; semantically, this is fine, but it's likely to
+ // generate low-quality code if the target can't reconstruct an appropriate
+ // shuffle.
+ if (!Op.isUndef() && !isIntOrFPConstant(Op))
+ if (!IsSplat && !DuplicateOps.insert(Op).second)
+ return SDValue();
+
+ Ops.push_back(Op);
+ }
+
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = Op.isUndef() ? DAG.getUNDEF(SVT)
+ : (TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
+ return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
+}
+
+// Match shuffles that can be converted to *_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
+// and returns the EVT to which the extension should be performed.
+// NOTE: this assumes that the src is the first operand of the shuffle.
+static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
+ unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
+ SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
+ bool LegalOperations) {
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return std::nullopt;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ // FIXME: should try Scale == NumElts case too,
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ // The vector width must be a multiple of Scale.
+ if (NumElts % Scale != 0)
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+
+ if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
+ (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
+ continue;
+
+ if (Match(Scale))
+ return OutVT;
+ }
+
+ return std::nullopt;
+}
+
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+ auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
+ Mask = SVN->getMask()](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
+ SDValue N0 = SVN->getOperand(0);
+ // Never create an illegal type. Only create unsupported operations if we
+ // are pre-legalization.
+ std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
+ Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
+ if (!OutVT)
+ return SDValue();
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
+}
+
+// Match shuffles that can be converted to zero_extend_vector_inreg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
+static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ bool LegalTypes = true;
+ EVT VT = SVN->getValueType(0);
+ assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ // TODO: add support for big-endian when we have a test case.
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
+ auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
+ for (int &Indice : Mask) {
+ if (Indice < 0)
+ continue;
+ int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
+ int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
+ Fn(Indice, OpIdx, OpEltIdx);
+ }
+ };
+
+ // Which elements of which operand does this shuffle demand?
+ std::array<APInt, 2> OpsDemandedElts;
+ for (APInt &OpDemandedElts : OpsDemandedElts)
+ OpDemandedElts = APInt::getZero(NumElts);
+ ForEachDecomposedIndice(
+ [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
+ OpsDemandedElts[OpIdx].setBit(OpEltIdx);
+ });
+
+ // Element-wise(!), which of these demanded elements are know to be zero?
+ std::array<APInt, 2> OpsKnownZeroElts;
+ for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
+ std::get<2>(I) =
+ DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
+
+ // Manifest zeroable element knowledge in the shuffle mask.
+ // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
+ // this is a local invention, but it won't leak into DAG.
+ // FIXME: should we not manifest them, but just check when matching?
+ bool HadZeroableElts = false;
+ ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
+ int &Indice, int OpIdx, int OpEltIdx) {
+ if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
+ Indice = -2; // Zeroable element.
+ HadZeroableElts = true;
+ }
+ });
+
+ // Don't proceed unless we've refined at least one zeroable mask indice.
+ // If we didn't, then we are still trying to match the same shuffle mask
+ // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
+ // and evidently failed. Proceeding will lead to endless combine loops.
+ if (!HadZeroableElts)
+ return SDValue();
+
+ // The shuffle may be more fine-grained than we want. Widen elements first.
+ // FIXME: should we do this before manifesting zeroable shuffle mask indices?
+ SmallVector<int, 16> ScaledMask;
+ getShuffleMaskWithWidestElts(Mask, ScaledMask);
+ assert(Mask.size() >= ScaledMask.size() &&
+ Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
+ int Prescale = Mask.size() / ScaledMask.size();
+
+ NumElts = ScaledMask.size();
+ EltSizeInBits *= Prescale;
+
+ EVT PrescaledVT = EVT::getVectorVT(
+ *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
+ NumElts);
+
+ if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // For example,
+ // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
+ // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
+ auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
+ assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
+ "Unexpected mask scaling factor.");
+ ArrayRef<int> Mask = ScaledMask;
+ for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
+ SrcElt != NumSrcElts; ++SrcElt) {
+ // Analyze the shuffle mask in Scale-sized chunks.
+ ArrayRef<int> MaskChunk = Mask.take_front(Scale);
+ assert(MaskChunk.size() == Scale && "Unexpected mask size.");
+ Mask = Mask.drop_front(MaskChunk.size());
+ // The first indice in this chunk must be SrcElt, but not zero!
+ // FIXME: undef should be fine, but that results in more-defined result.
+ if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
+ return false;
+ // The rest of the indices in this chunk must be zeros.
+ // FIXME: undef should be fine, but that results in more-defined result.
+ if (!all_of(MaskChunk.drop_front(1),
+ [](int Indice) { return Indice == -2; }))
+ return false;
+ }
+ assert(Mask.empty() && "Did not process the whole mask?");
+ return true;
+ };
+
+ unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
+ for (bool Commuted : {false, true}) {
+ SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
+ if (Commuted)
+ ShuffleVectorSDNode::commuteMask(ScaledMask);
+ std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
+ Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
+ LegalOperations);
+ if (OutVT)
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
+ DAG.getBitcast(PrescaledVT, Op)));
+ }
+ return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
+
+ unsigned Opcode = N0.getOpcode();
+ if (!ISD::isExtVecInRegOpcode(Opcode))
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+ unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
+
+ if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
+ return SDValue();
+ unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
+
+ // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+ // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+ // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+ auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // At the moment we just handle the case where we've truncated back to the
+ // same size as before the extension.
+ // TODO: handle more extension/truncation cases as cases arise.
+ if (EltSizeInBits != ExtSrcSizeInBits)
+ return SDValue();
+
+ // We can remove *extend_vector_inreg only if the truncation happens at
+ // the same scale as the extension.
+ if (isTruncate(ExtScale))
+ return DAG.getBitcast(VT, N00);
+
+ return SDValue();
+}
+
+// Combine shuffles of splat-shuffles of the form:
+// shuffle (shuffle V, undef, splat-mask), undef, M
+// If splat-mask contains undef elements, we need to be careful about
+// introducing undef's in the folded mask which are not the result of composing
+// the masks of the shuffles.
+static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if (!Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ // See if this unary non-splat shuffle actually *is* a splat shuffle,
+ // in disguise, with all demanded elements being identical.
+ // FIXME: this can be done per-operand.
+ if (!Shuf->isSplat()) {
+ APInt DemandedElts(NumElts, 0);
+ for (int Idx : Shuf->getMask()) {
+ if (Idx < 0)
+ continue; // Ignore sentinel indices.
+ assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
+ DemandedElts.setBit(Idx);
+ }
+ assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
+ APInt UndefElts;
+ if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
+ // Even if all demanded elements are splat, some of them could be undef.
+ // Which lowest demanded element is *not* known-undef?
+ std::optional<unsigned> MinNonUndefIdx;
+ for (int Idx : Shuf->getMask()) {
+ if (Idx < 0 || UndefElts[Idx])
+ continue; // Ignore sentinel indices, and undef elements.
+ MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
+ }
+ if (!MinNonUndefIdx)
+ return DAG.getUNDEF(VT); // All undef - result is undef.
+ assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
+ SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
+ Shuf->getMask().end());
+ for (int &Idx : SplatMask) {
+ if (Idx < 0)
+ continue; // Passthrough sentinel indices.
+ // Otherwise, just pick the lowest demanded non-undef element.
+ // Or sentinel undef, if we know we'd pick a known-undef element.
+ Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
+ }
+ assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
+ return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
+ Shuf->getOperand(1), SplatMask);
+ }
+ }
+
+ // If the inner operand is a known splat with no undefs, just return that directly.
+ // TODO: Create DemandedElts mask from Shuf's mask.
+ // TODO: Allow undef elements and merge with the shuffle code below.
+ if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
+ return Shuf->getOperand(0);
+
+ auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Splat || !Splat->isSplat())
+ return SDValue();
+
+ ArrayRef<int> ShufMask = Shuf->getMask();
+ ArrayRef<int> SplatMask = Splat->getMask();
+ assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
+
+ // Prefer simplifying to the splat-shuffle, if possible. This is legal if
+ // every undef mask element in the splat-shuffle has a corresponding undef
+ // element in the user-shuffle's mask or if the composition of mask elements
+ // would result in undef.
+ // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
+ // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
+ // In this case it is not legal to simplify to the splat-shuffle because we
+ // may be exposing the users of the shuffle an undef element at index 1
+ // which was not there before the combine.
+ // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
+ // In this case the composition of masks yields SplatMask, so it's ok to
+ // simplify to the splat-shuffle.
+ // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
+ // In this case the composed mask includes all undef elements of SplatMask
+ // and in addition sets element zero to undef. It is safe to simplify to
+ // the splat-shuffle.
+ auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
+ ArrayRef<int> SplatMask) {
+ for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
+ if (UserMask[i] != -1 && SplatMask[i] == -1 &&
+ SplatMask[UserMask[i]] != -1)
+ return false;
+ return true;
+ };
+ if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
+ return Shuf->getOperand(0);
+
+ // Create a new shuffle with a mask that is composed of the two shuffles'
+ // masks.
+ SmallVector<int, 32> NewMask;
+ for (int Idx : ShufMask)
+ NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
+
+ return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
+ Splat->getOperand(0), Splat->getOperand(1),
+ NewMask);
+}
+
+// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
+// the mask can be treated as a larger type.
+static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ SDValue Op0 = SVN->getOperand(0);
+ SDValue Op1 = SVN->getOperand(1);
+ EVT VT = SVN->getValueType(0);
+ if (Op0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ EVT InVT = Op0.getOperand(0).getValueType();
+ if (!InVT.isVector() ||
+ (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
+ Op1.getOperand(0).getValueType() != InVT)))
+ return SDValue();
+ if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
+ (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
+ return SDValue();
+
+ int VTLanes = VT.getVectorNumElements();
+ int InLanes = InVT.getVectorNumElements();
+ if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
+ (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
+ return SDValue();
+ int Factor = VTLanes / InLanes;
+
+ // Check that each group of lanes in the mask are either undef or make a valid
+ // mask for the wider lane type.
+ ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int> NewMask;
+ if (!widenShuffleMaskElts(Factor, Mask, NewMask))
+ return SDValue();
+
+ if (!TLI.isShuffleMaskLegal(NewMask, InVT))
+ return SDValue();
+
+ // Create the new shuffle with the new mask and bitcast it back to the
+ // original type.
+ SDLoc DL(SVN);
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
+ SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
+ return DAG.getBitcast(VT, NewShuf);
+}
+
+/// Combine shuffle of shuffle of the form:
+/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
+static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
+ SelectionDAG &DAG) {
+ if (!OuterShuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
+ if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> OuterMask = OuterShuf->getMask();
+ ArrayRef<int> InnerMask = InnerShuf->getMask();
+ unsigned NumElts = OuterMask.size();
+ assert(NumElts == InnerMask.size() && "Mask length mismatch");
+ SmallVector<int, 32> CombinedMask(NumElts, -1);
+ int SplatIndex = -1;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Undef lanes remain undef.
+ int OuterMaskElt = OuterMask[i];
+ if (OuterMaskElt == -1)
+ continue;
+
+ // Peek through the shuffle masks to get the underlying source element.
+ int InnerMaskElt = InnerMask[OuterMaskElt];
+ if (InnerMaskElt == -1)
+ continue;
+
+ // Initialize the splatted element.
+ if (SplatIndex == -1)
+ SplatIndex = InnerMaskElt;
+
+ // Non-matching index - this is not a splat.
+ if (SplatIndex != InnerMaskElt)
+ return SDValue();
+
+ CombinedMask[i] = InnerMaskElt;
+ }
+ assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
+ getSplatIndex(CombinedMask) != -1) &&
+ "Expected a splat mask");
+
+ // TODO: The transform may be a win even if the mask is not legal.
+ EVT VT = OuterShuf->getValueType(0);
+ assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
+ InnerShuf->getOperand(1), CombinedMask);
+}
+
+/// If the shuffle mask is taking exactly one element from the first vector
+/// operand and passing through all other elements from the second vector
+/// operand, return the index of the mask element that is choosing an element
+/// from the first operand. Otherwise, return -1.
+static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
+ int MaskSize = Mask.size();
+ int EltFromOp0 = -1;
+ // TODO: This does not match if there are undef elements in the shuffle mask.
+ // Should we ignore undefs in the shuffle mask instead? The trade-off is
+ // removing an instruction (a shuffle), but losing the knowledge that some
+ // vector lanes are not needed.
+ for (int i = 0; i != MaskSize; ++i) {
+ if (Mask[i] >= 0 && Mask[i] < MaskSize) {
+ // We're looking for a shuffle of exactly one element from operand 0.
+ if (EltFromOp0 != -1)
+ return -1;
+ EltFromOp0 = i;
+ } else if (Mask[i] != i + MaskSize) {
+ // Nothing from operand 1 can change lanes.
+ return -1;
+ }
+ }
+ return EltFromOp0;
+}
+
+/// If a shuffle inserts exactly one element from a source vector operand into
+/// another vector operand and we can access the specified element as a scalar,
+/// then we can eliminate the shuffle.
+static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ // First, check if we are taking one element of a vector and shuffling that
+ // element into another vector.
+ ArrayRef<int> Mask = Shuf->getMask();
+ SmallVector<int, 16> CommutedMask(Mask);
+ SDValue Op0 = Shuf->getOperand(0);
+ SDValue Op1 = Shuf->getOperand(1);
+ int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
+ if (ShufOp0Index == -1) {
+ // Commute mask and check again.
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+ ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
+ if (ShufOp0Index == -1)
+ return SDValue();
+ // Commute operands to match the commuted shuffle mask.
+ std::swap(Op0, Op1);
+ Mask = CommutedMask;
+ }
+
+ // The shuffle inserts exactly one element from operand 0 into operand 1.
+ // Now see if we can access that element as a scalar via a real insert element
+ // instruction.
+ // TODO: We can try harder to locate the element as a scalar. Examples: it
+ // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+ assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
+ "Shuffle mask value must be from operand 0");
+ if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
+ if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
+ return SDValue();
+
+ // There's an existing insertelement with constant insertion index, so we
+ // don't need to check the legality/profitability of a replacement operation
+ // that differs at most in the constant value. The target should be able to
+ // lower any of those in a similar way. If not, legalization will expand this
+ // to a scalar-to-vector plus shuffle.
+ //
+ // Note that the shuffle may move the scalar from the position that the insert
+ // element used. Therefore, our new insert element occurs at the shuffle's
+ // mask index value, not the insert's index value.
+ // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
+ SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+ Op1, Op0.getOperand(1), NewInsIndex);
+}
+
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.isUndef() && N1.isUndef())
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
+ createUnaryMask(SVN->getMask(), NumElts));
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.isUndef())
+ return DAG.getCommutedVectorShuffle(*SVN);
+
+ // Remove references to rhs if it is undef
+ if (N1.isUndef()) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
+ }
+
+ if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+ return InsElt;
+
+ // A shuffle of a single vector that is a splatted value can always be folded.
+ if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
+ return V;
+
+ if (SDValue V = formSplatFromShuffles(SVN, DAG))
+ return V;
+
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector.
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+ int SplatIndex = SVN->getSplatIndex();
+ if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
+ // splat (vector_bo L, R), Index -->
+ // splat (scalar_bo (extelt L, Index), (extelt R, Index))
+ SDValue L = N0.getOperand(0), R = N0.getOperand(1);
+ SDLoc DL(N);
+ EVT EltVT = VT.getScalarType();
+ SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
+ SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
+ SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
+ SDValue NewBO =
+ DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
+ SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
+ SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
+ return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
+ }
+
+ // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
+ // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
+ if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
+ if (Idx->getAPIntValue() == SplatIndex)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+
+ // Look through a bitcast if LE and splatting lane 0, through to a
+ // scalar_to_vector or a build_vector.
+ if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
+ SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
+ (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
+ VT.isInteger() && N00VT.isInteger()) {
+ EVT InVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
+ SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
+ SDLoc(N), InVT);
+ return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
+ }
+ }
+ }
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ SDNode *V = N0.getNode();
+ if (V->getOpcode() == ISD::BITCAST) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!V->getOperand(i).isUndef()) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+
+ // Canonicalize any other splat as a build_vector.
+ SDValue Splatted = V->getOperand(SplatIndex);
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getBitcast(VT, NewBV);
+ return NewBV;
+ }
+ }
+
+ // Simplify source operands based on shuffle mask.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
+ // Match shuffles that can be converted to any_vector_extend_in_reg.
+ if (SDValue V =
+ combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Combine "truncate_vector_in_reg" style shuffles.
+ if (SDValue V = combineTruncationShuffle(SVN, DAG))
+ return V;
+
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ Level < AfterLegalizeVectorOps &&
+ (N1.isUndef() ||
+ (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+ if (SDValue V = partitionShuffleOfConcats(N, DAG))
+ return V;
+ }
+
+ // A shuffle of a concat of the same narrow vector can be reduced to use
+ // only low-half elements of a concat with undef:
+ // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
+ N0.getNumOperands() == 2 &&
+ N0.getOperand(0) == N0.getOperand(1)) {
+ int HalfNumElts = (int)NumElts / 2;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= HalfNumElts) {
+ assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
+ Idx -= HalfNumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (TLI.isShuffleMaskLegal(NewMask, VT)) {
+ SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
+ SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N0.getOperand(0), UndefVec);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
+ }
+ }
+
+ // See if we can replace a shuffle with an insert_subvector.
+ // e.g. v2i32 into v8i32:
+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
+ // --> insert_subvector(lhs,rhs1,4).
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
+ // Ensure RHS subvectors are legal.
+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
+ EVT SubVT = RHS.getOperand(0).getValueType();
+ int NumSubVecs = RHS.getNumOperands();
+ int NumSubElts = SubVT.getVectorNumElements();
+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
+ if (!TLI.isTypeLegal(SubVT))
+ return SDValue();
+
+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).
+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
+ return SDValue();
+
+ // Search [NumSubElts] spans for RHS sequence.
+ // TODO: Can we avoid nested loops to increase performance?
+ SmallVector<int> InsertionMask(NumElts);
+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
+ // Reset mask to identity.
+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
+
+ // Add subvector insertion.
+ std::iota(InsertionMask.begin() + SubIdx,
+ InsertionMask.begin() + SubIdx + NumSubElts,
+ NumElts + (SubVec * NumSubElts));
+
+ // See if the shuffle mask matches the reference insertion mask.
+ bool MatchingShuffle = true;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int ExpectIdx = InsertionMask[i];
+ int ActualIdx = Mask[i];
+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
+ MatchingShuffle = false;
+ break;
+ }
+ }
+
+ if (MatchingShuffle)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
+ RHS.getOperand(SubVec),
+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
+ }
+ }
+ return SDValue();
+ };
+ ArrayRef<int> Mask = SVN->getMask();
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)
+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
+ return InsertN1;
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SmallVector<int> CommuteMask(Mask);
+ ShuffleVectorSDNode::commuteMask(CommuteMask);
+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
+ return InsertN0;
+ }
+ }
+
+ // If we're not performing a select/blend shuffle, see if we can convert the
+ // shuffle into a AND node, with all the out-of-lane elements are known zero.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ bool IsInLaneMask = true;
+ ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int, 16> ClearMask(NumElts, -1);
+ APInt DemandedLHS = APInt::getZero(NumElts);
+ APInt DemandedRHS = APInt::getZero(NumElts);
+ for (int I = 0; I != (int)NumElts; ++I) {
+ int M = Mask[I];
+ if (M < 0)
+ continue;
+ ClearMask[I] = M == I ? I : (I + NumElts);
+ IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
+ if (M != I) {
+ APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
+ Demanded.setBit(M % NumElts);
+ }
+ }
+ // TODO: Should we try to mask with N1 as well?
+ if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
+ (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
+ (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
+ SDLoc DL(N);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+ EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
+ // Transform the type to a legal type so that the buildvector constant
+ // elements are not illegal. Make sure that the result is larger than the
+ // original type, incase the value is split into two (eg i64->i32).
+ if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
+ IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
+ if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
+ SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
+ SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
+ SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
+ for (int I = 0; I != (int)NumElts; ++I)
+ if (0 <= Mask[I])
+ AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
+
+ // See if a clear mask is legal instead of going via
+ // XformToShuffleWithZero which loses UNDEF mask elements.
+ if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
+ DAG.getConstant(0, DL, IntVT), ClearMask));
+
+ if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
+ DAG.getBuildVector(IntVT, DL, AndMask)));
+ }
+ }
+ }
+
+ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+ // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
+ if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
+ return Res;
+
+ // If this shuffle only has a single input that is a bitcasted shuffle,
+ // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+ // back to their original types.
+ if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N1.isUndef() && Level < AfterLegalizeVectorOps &&
+ TLI.isTypeLegal(VT)) {
+
+ SDValue BC0 = peekThroughOneUseBitcasts(N0);
+ if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+ EVT SVT = VT.getScalarType();
+ EVT InnerVT = BC0->getValueType(0);
+ EVT InnerSVT = InnerVT.getScalarType();
+
+ // Determine which shuffle works with the smaller scalar type.
+ EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+ EVT ScaleSVT = ScaleVT.getScalarType();
+
+ if (TLI.isTypeLegal(ScaleVT) &&
+ 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+ 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+ int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+ int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+ // Scale the shuffle masks to the smaller scalar type.
+ ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+ SmallVector<int, 8> InnerMask;
+ SmallVector<int, 8> OuterMask;
+ narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
+ narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
+
+ // Merge the shuffle masks.
+ SmallVector<int, 8> NewMask;
+ for (int M : OuterMask)
+ NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+ // Test for shuffle mask legality over both commutations.
+ SDValue SV0 = BC0->getOperand(0);
+ SDValue SV1 = BC0->getOperand(1);
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ }
+
+ if (LegalMask) {
+ SV0 = DAG.getBitcast(ScaleVT, SV0);
+ SV1 = DAG.getBitcast(ScaleVT, SV1);
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ }
+ }
+ }
+ }
+
+ // Match shuffles of bitcasts, so long as the mask can be treated as the
+ // larger type.
+ if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
+ // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
+ auto MergeInnerShuffle =
+ [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
+ ShuffleVectorSDNode *OtherSVN, SDValue N1,
+ const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask) -> bool {
+ // Don't try to fold splats; they're likely to simplify somehow, or they
+ // might be free.
+ if (OtherSVN->isSplat())
+ return false;
+
+ SV0 = SV1 = SDValue();
+ Mask.clear();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ if (Commute)
+ Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
+
+ SDValue CurrentVec;
+ if (Idx < (int)NumElts) {
+ // This shuffle index refers to the inner shuffle N0. Lookup the inner
+ // shuffle mask to identify which vector is actually referenced.
+ Idx = OtherSVN->getMaskElt(Idx);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+ CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
+ : OtherSVN->getOperand(1);
+ } else {
+ // This shuffle index references an element within N1.
+ CurrentVec = N1;
+ }
+
+ // Simple case where 'CurrentVec' is UNDEF.
+ if (CurrentVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Canonicalize the shuffle index. We don't know yet if CurrentVec
+ // will be the first or second operand of the combined shuffle.
+ Idx = Idx % NumElts;
+ if (!SV0.getNode() || SV0 == CurrentVec) {
+ // Ok. CurrentVec is the left hand side.
+ // Update the mask accordingly.
+ SV0 = CurrentVec;
+ Mask.push_back(Idx);
+ continue;
+ }
+ if (!SV1.getNode() || SV1 == CurrentVec) {
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
+ continue;
+ }
+
+ // Last chance - see if the vector is another shuffle and if it
+ // uses one of the existing candidate shuffle ops.
+ if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
+ int InnerIdx = CurrentSVN->getMaskElt(Idx);
+ if (InnerIdx < 0) {
+ Mask.push_back(-1);
+ continue;
+ }
+ SDValue InnerVec = (InnerIdx < (int)NumElts)
+ ? CurrentSVN->getOperand(0)
+ : CurrentSVN->getOperand(1);
+ if (InnerVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+ InnerIdx %= NumElts;
+ if (InnerVec == SV0) {
+ Mask.push_back(InnerIdx);
+ continue;
+ }
+ if (InnerVec == SV1) {
+ Mask.push_back(InnerIdx + NumElts);
+ continue;
+ }
+ }
+
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ return false;
+ }
+
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return true;
+
+ // Avoid introducing shuffles with illegal mask.
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ return true;
+
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ return TLI.isShuffleMaskLegal(Mask, VT);
+ };
+
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so merging below will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Canonicalize splat shuffles to the RHS to improve merging below.
+ // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(N0)->isSplat() &&
+ !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
+ for (int i = 0; i != 2; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N->isOnlyUserOf(N->getOperand(i).getNode())) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
+ SV0, SV1, Mask)) {
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return DAG.getUNDEF(VT);
+
+ return DAG.getVectorShuffle(VT, SDLoc(N),
+ SV0 ? SV0 : DAG.getUNDEF(VT),
+ SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
+ }
+ }
+ }
+
+ // Merge shuffles through binops if we are able to merge it with at least
+ // one other shuffles.
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
+ unsigned SrcOpcode = N0.getOpcode();
+ if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
+ (N1.isUndef() ||
+ (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
+ // Get binop source ops, or just pass on the undef.
+ SDValue Op00 = N0.getOperand(0);
+ SDValue Op01 = N0.getOperand(1);
+ SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
+ SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
+ // TODO: We might be able to relax the VT check but we don't currently
+ // have any isBinOp() that has different result/ops VTs so play safe until
+ // we have test coverage.
+ if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
+ Op01.getValueType() == VT && Op11.getValueType() == VT &&
+ (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
+ auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask, bool LeftOp,
+ bool Commute) {
+ SDValue InnerN = Commute ? N1 : N0;
+ SDValue Op0 = LeftOp ? Op00 : Op01;
+ SDValue Op1 = LeftOp ? Op10 : Op11;
+ if (Commute)
+ std::swap(Op0, Op1);
+ // Only accept the merged shuffle if we don't introduce undef elements,
+ // or the inner shuffle already contained undef elements.
+ auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
+ return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
+ MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
+ Mask) &&
+ (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
+ llvm::none_of(Mask, [](int M) { return M < 0; }));
+ };
+
+ // Ensure we don't increase the number of shuffles - we must merge a
+ // shuffle from at least one of the LHS and RHS ops.
+ bool MergedLeft = false;
+ SDValue LeftSV0, LeftSV1;
+ SmallVector<int, 4> LeftMask;
+ if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
+ CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
+ MergedLeft = true;
+ } else {
+ LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
+ LeftSV0 = Op00, LeftSV1 = Op10;
+ }
+
+ bool MergedRight = false;
+ SDValue RightSV0, RightSV1;
+ SmallVector<int, 4> RightMask;
+ if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
+ CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
+ MergedRight = true;
+ } else {
+ RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
+ RightSV0 = Op01, RightSV1 = Op11;
+ }
+
+ if (MergedLeft || MergedRight) {
+ SDLoc DL(N);
+ SDValue LHS = DAG.getVectorShuffle(
+ VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
+ LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
+ SDValue RHS = DAG.getVectorShuffle(
+ VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
+ RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
+ return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
+ }
+ }
+ }
+ }
+
+ if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
+ return V;
+
+ // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
+ // Perform this really late, because it could eliminate knowledge
+ // of undef elements created by this shuffle.
+ if (Level < AfterLegalizeTypes)
+ if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
+ LegalOperations))
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isFixedLengthVector())
+ return SDValue();
+
+ // Try to convert a scalar binop with an extracted vector element to a vector
+ // binop. This is intended to reduce potentially expensive register moves.
+ // TODO: Check if both operands are extracted.
+ // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
+ SDValue Scalar = N->getOperand(0);
+ unsigned Opcode = Scalar.getOpcode();
+ EVT VecEltVT = VT.getScalarType();
+ if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
+ TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
+ Scalar.getOperand(0).getValueType() == VecEltVT &&
+ Scalar.getOperand(1).getValueType() == VecEltVT &&
+ DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
+ // Match an extract element and get a shuffle mask equivalent.
+ SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
+
+ for (int i : {0, 1}) {
+ // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
+ // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
+ SDValue EE = Scalar.getOperand(i);
+ auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
+ if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ EE.getOperand(0).getValueType() == VT &&
+ isa<ConstantSDNode>(EE.getOperand(1))) {
+ // Mask = {ExtractIndex, undef, undef....}
+ ShufMask[0] = EE.getConstantOperandVal(1);
+ // Make sure the shuffle is legal if we are crossing lanes.
+ if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
+ SDLoc DL(N);
+ SDValue V[] = {EE.getOperand(0),
+ DAG.getConstant(C->getAPIntValue(), DL, VT)};
+ SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
+ return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
+ ShufMask);
+ }
+ }
+ }
+ }
+
+ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+ // with a VECTOR_SHUFFLE and possible truncate.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !Scalar.getOperand(0).getValueType().isFixedLengthVector())
+ return SDValue();
+
+ // If we have an implicit truncate, truncate here if it is legal.
+ if (VecEltVT != Scalar.getValueType() &&
+ Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
+ SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+ }
+
+ auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
+ if (!ExtIndexC)
+ return SDValue();
+
+ SDValue SrcVec = Scalar.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned VTNumElts = VT.getVectorNumElements();
+ if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
+ // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
+ SmallVector<int, 8> Mask(SrcNumElts, -1);
+ Mask[0] = ExtIndexC->getZExtValue();
+ SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
+ SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
+ if (!LegalShuffle)
+ return SDValue();
+
+ // If the initial vector is the same size, the shuffle is the result.
+ if (VT == SrcVT)
+ return LegalShuffle;
+
+ // If not, shorten the shuffled vector.
+ if (VTNumElts != SrcNumElts) {
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), VTNumElts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
+ ZeroIdx);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ uint64_t InsIdx = N->getConstantOperandVal(2);
+
+ // If inserting an UNDEF, just return the original vector.
+ if (N1.isUndef())
+ return N0;
+
+ // If this is an insert of an extracted vector into an undef vector, we can
+ // just use the input to the extract.
+ if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+
+ // Simplify scalar inserts into an undef vector:
+ // insert_subvector undef, (splat X), N2 -> splat X
+ if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+
+ // If we are inserting a bitcast value into an undef, with the same
+ // number of elements, just use the bitcast input of the extract.
+ // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
+ if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
+ N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(0).getOperand(1) == N2 &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount() &&
+ N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
+ VT.getSizeInBits()) {
+ return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
+ }
+
+ // If both N1 and N2 are bitcast values on which insert_subvector
+ // would makes sense, pull the bitcast through.
+ // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
+ SDValue CN0 = N0.getOperand(0);
+ SDValue CN1 = N1.getOperand(0);
+ EVT CN0VT = CN0.getValueType();
+ EVT CN1VT = CN1.getValueType();
+ if (CN0VT.isVector() && CN1VT.isVector() &&
+ CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
+ CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
+ SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+ CN0.getValueType(), CN0, CN1, N2);
+ return DAG.getBitcast(VT, NewINSERT);
+ }
+ }
+
+ // Combine INSERT_SUBVECTORs where we are inserting to the same index.
+ // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
+ // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N0.getOperand(1).getValueType() == N1.getValueType() &&
+ N0.getOperand(2) == N2)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+ N1, N2);
+
+ // Eliminate an intermediate insert into an undef vector:
+ // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
+ // insert_subvector undef, X, N2
+ if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
+ N1.getOperand(1), N2);
+
+ // Push subvector bitcasts to the output, adjusting the index as we go.
+ // insert_subvector(bitcast(v), bitcast(s), c1)
+ // -> bitcast(insert_subvector(v, s, c2))
+ if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+ N1.getOpcode() == ISD::BITCAST) {
+ SDValue N0Src = peekThroughBitcasts(N0);
+ SDValue N1Src = peekThroughBitcasts(N1);
+ EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+ EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+ if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+ N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+ EVT NewVT;
+ SDLoc DL(N);
+ SDValue NewIdx;
+ LLVMContext &Ctx = *DAG.getContext();
+ ElementCount NumElts = VT.getVectorElementCount();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+ unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+ NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
+ } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+ unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+ if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
+ NumElts.divideCoefficientBy(Scale));
+ NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
+ }
+ }
+ if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+ SDValue Res = DAG.getBitcast(NewVT, N0Src);
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType()) {
+ unsigned OtherIdx = N0.getConstantOperandVal(2);
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
+ // If the input vector is a concatenation, and the insert replaces
+ // one of the pieces, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
+ N0.getOperand(0).getValueType() == N1.getValueType() &&
+ N0.getOperand(0).getValueType().isScalableVector() ==
+ N1.getValueType().isScalableVector()) {
+ unsigned Factor = N1.getValueType().getVectorMinNumElements();
+ SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
+ Ops[InsIdx / Factor] = N1;
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
+ // Simplify source operands based on insertion.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::FP16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
+ ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+ if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+ N0.getOperand(0));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_bf16 (bf16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::BF16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // VECREDUCE over 1-element vector is just an extract.
+ if (VT.getVectorElementCount().isScalar()) {
+ SDLoc dl(N);
+ SDValue Res =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getVectorIdxConstant(0, dl));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
+ return Res;
+ }
+
+ // On an boolean vector an and/or reduction is the same as a umin/umax
+ // reduction. Convert them if the latter is legal while the former isn't.
+ if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
+ unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
+ ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
+ TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
+ DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
+ }
+
+ // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
+ // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Subvec = N0.getOperand(1);
+ if ((Opcode == ISD::VECREDUCE_OR &&
+ (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
+ (Opcode == ISD::VECREDUCE_AND &&
+ (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // FSUB -> FMA combines:
+ if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVPOp(SDNode *N) {
+
+ if (N->getOpcode() == ISD::VP_GATHER)
+ if (SDValue SD = visitVPGATHER(N))
+ return SD;
+
+ if (N->getOpcode() == ISD::VP_SCATTER)
+ if (SDValue SD = visitVPSCATTER(N))
+ return SD;
+
+ // VP operations in which all vector elements are disabled - either by
+ // determining that the mask is all false or that the EVL is 0 - can be
+ // eliminated.
+ bool AreAllEltsDisabled = false;
+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
+ AreAllEltsDisabled |=
+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
+
+ // This is the only generic VP combine we support for now.
+ if (!AreAllEltsDisabled) {
+ switch (N->getOpcode()) {
+ case ISD::VP_FADD:
+ return visitVP_FADD(N);
+ case ISD::VP_FSUB:
+ return visitVP_FSUB(N);
+ case ISD::VP_FMA:
+ return visitFMA<VPMatchContext>(N);
+ }
+ return SDValue();
+ }
+
+ // Binary operations can be replaced by UNDEF.
+ if (ISD::isVPBinaryOp(N->getOpcode()))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ // VP Memory operations can be replaced by either the chain (stores) or the
+ // chain + undef (loads).
+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
+ if (MemSD->writeMem())
+ return MemSD->getChain();
+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
+ }
+
+ // Reduction operations return the start operand when no elements are active.
+ if (ISD::isVPReduction(N->getOpcode()))
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the memory, where FP state is written to, is used only in a single
+ // load operation.
+ LoadSDNode *LdNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
+ if (LdNode && LdNode != Ld)
+ return SDValue();
+ LdNode = Ld;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
+ return SDValue();
+
+ // Check if the loaded value is used only in a store operation.
+ StoreSDNode *StNode = nullptr;
+ for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
+ SDUse &U = I.getUse();
+ if (U.getResNo() == 0) {
+ if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
+ if (StNode)
+ return SDValue();
+ StNode = St;
+ } else {
+ return SDValue();
+ }
+ }
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node GET_FPENV_MEM, which uses the store address to write FP
+ // environment.
+ SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
+ StNode->getMemOperand());
+ CombineTo(StNode, Res, false);
+ return Res;
+}
+
+SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the address of FP state is used also in a store operation only.
+ StoreSDNode *StNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *St = dyn_cast<StoreSDNode>(U)) {
+ if (StNode && StNode != St)
+ return SDValue();
+ StNode = St;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
+ return SDValue();
+
+ // Check if the stored value is loaded from some location and the loaded
+ // value is used only in the store operation.
+ SDValue StValue = StNode->getValue();
+ auto *LdNode = dyn_cast<LoadSDNode>(StValue);
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node SET_FPENV_MEM, which uses the load address to read FP
+ // environment.
+ SDValue Res =
+ DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
+ LdNode->getMemOperand());
+ return Res;
+}
+
+/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
+/// with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = peekThroughBitcasts(N->getOperand(1));
+ SDLoc DL(N);
+
+ // Make sure we're not running after operation legalization where it
+ // may have custom lowered the vector shuffles.
+ if (LegalOperations)
+ return SDValue();
+
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ EVT RVT = RHS.getValueType();
+ unsigned NumElts = RHS.getNumOperands();
+
+ // Attempt to create a valid clear mask, splitting the mask into
+ // sub elements and checking to see if each is
+ // all zeros or all ones - suitable for shuffle masking.
+ auto BuildClearMask = [&](int Split) {
+ int NumSubElts = NumElts * Split;
+ int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
+ SmallVector<int, 8> Indices;
+ for (int i = 0; i != NumSubElts; ++i) {
+ int EltIdx = i / Split;
+ int SubIdx = i % Split;
+ SDValue Elt = RHS.getOperand(EltIdx);
+ // X & undef --> 0 (not undef). So this lane must be converted to choose
+ // from the zero constant vector (same as if the element had all 0-bits).
+ if (Elt.isUndef()) {
+ Indices.push_back(i + NumSubElts);
+ continue;
+ }
+
+ APInt Bits;
+ if (isa<ConstantSDNode>(Elt))
+ Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+ else if (isa<ConstantFPSDNode>(Elt))
+ Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ // Extract the sub element from the constant bit mask.
+ if (DAG.getDataLayout().isBigEndian())
+ Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
+ else
+ Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
+
+ if (Bits.isAllOnes())
+ Indices.push_back(i);
+ else if (Bits == 0)
+ Indices.push_back(i + NumSubElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+ EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+ if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
+ return SDValue();
+
+ SDValue Zero = DAG.getConstant(0, DL, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
+ DAG.getBitcast(ClearVT, LHS),
+ Zero, Indices));
+ };
+
+ // Determine maximum split level (byte level masking).
+ int MaxSplit = 1;
+ if (RVT.getScalarSizeInBits() % 8 == 0)
+ MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+ for (int Split = 1; Split <= MaxSplit; ++Split)
+ if (RVT.getScalarSizeInBits() % Split == 0)
+ if (SDValue S = BuildClearMask(Split))
+ return S;
+
+ return SDValue();
+}
+
+/// If a vector binop is performed on splat values, it may be profitable to
+/// extract, scalarize, and insert/splat.
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: Remove/replace the extract cost check? If the elements are available
+ // as scalars, then there may be no extract cost. Should we ask if
+ // inserting a scalar back into a vector is cheap instead?
+ int Index0, Index1;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ // Extract element from splat_vector should be free.
+ // TODO: use DAG.isSplatValue instead?
+ bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
+ N1.getOpcode() == ISD::SPLAT_VECTOR;
+ if (!Src0 || !Src1 || Index0 != Index1 ||
+ Src0.getValueType().getVectorElementType() != EltVT ||
+ Src1.getValueType().getVectorElementType() != EltVT ||
+ !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
+ !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ return SDValue();
+
+ SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
+
+ // If all lanes but 1 are undefined, no need to splat the scalar result.
+ // TODO: Keep track of undefs and use that info in the general case.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
+ count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
+ count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
+ // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
+ // build_vec ..undef, (bo X, Y), undef...
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
+ Ops[Index0] = ScalarBO;
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ return DAG.getSplat(VT, DL, ScalarBO);
+}
+
+/// Visit a vector cast operation, like FP_EXTEND.
+SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
+ EVT EltVT = VT.getVectorElementType();
+ unsigned Opcode = N->getOpcode();
+
+ SDValue N0 = N->getOperand(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: promote operation might be also good here?
+ int Index0;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ if (Src0 &&
+ (N0.getOpcode() == ISD::SPLAT_VECTOR ||
+ TLI.isExtractVecEltCheap(VT, Index0)) &&
+ TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
+ TLI.preferScalarizeSplat(N)) {
+ EVT SrcVT = N0.getValueType();
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+ SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
+ SDValue Elt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, ScalarBO);
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ return SDValue();
+}
+
+/// Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned Opcode = N->getOpcode();
+ SDNodeFlags Flags = N->getFlags();
+
+ // Move unary shuffles with identical masks after a vector binop:
+ // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
+ // --> shuffle (VBinOp A, B), Undef, Mask
+ // This does not require type legality checks because we are creating the
+ // same types of operations that are in the original sequence. We do have to
+ // restrict ops like integer div that have immediate UB (eg, div-by-zero)
+ // though. This code is adapted from the identical transform in instcombine.
+ if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+ auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+ if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+ LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+ (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
+ RHS.getOperand(0), Flags);
+ SDValue UndefV = LHS.getOperand(1);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+ }
+
+ // Try to sink a splat shuffle after a binop with a uniform constant.
+ // This is limited to cases where neither the shuffle nor the constant have
+ // undefined elements because that could be poison-unsafe or inhibit
+ // demanded elements analysis. It is further limited to not change a splat
+ // of an inserted scalar because that may be optimized better by
+ // load-folding or other target-specific behaviors.
+ if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
+ Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
+ Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
+ // binop (splat X), (splat C) --> splat (binop X, C)
+ SDValue X = Shuf0->getOperand(0);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
+ Shuf0->getMask());
+ }
+ if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
+ Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
+ Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
+ // binop (splat C), (splat X) --> splat (binop C, X)
+ SDValue X = Shuf1->getOperand(0);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
+ Shuf1->getMask());
+ }
+ }
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of insertion may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+ if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+ RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+ LHS.getOperand(2) == RHS.getOperand(2) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ SDValue X = LHS.getOperand(1);
+ SDValue Y = RHS.getOperand(1);
+ SDValue Z = LHS.getOperand(2);
+ EVT NarrowVT = X.getValueType();
+ if (NarrowVT == Y.getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
+ LegalOperations)) {
+ // (binop undef, undef) may not return undef, so compute that result.
+ SDValue VecC =
+ DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
+ SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
+ }
+ }
+
+ // Make sure all but the first op are undef or constant.
+ auto ConcatWithConstantOrUndef = [](SDValue Concat) {
+ return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+ all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
+ };
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of the concat may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
+ // concat (VBinOp X, Y), VecC
+ if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ EVT NarrowVT = LHS.getOperand(0).getValueType();
+ if (NarrowVT == RHS.getOperand(0).getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ unsigned NumOperands = LHS.getNumOperands();
+ SmallVector<SDValue, 4> ConcatOps;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ // This constant fold for operands 1 and up.
+ ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
+ RHS.getOperand(i)));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ }
+ }
+
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2) {
+ assert(N0.getOpcode() == ISD::SETCC &&
+ "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ const SDNodeFlags Flags = N0->getFlags();
+ SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4), Flags);
+ AddToWorklist(SETCC.getNode());
+ SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
+/// being selected between, see if we can simplify the select. Callers of this
+/// should assume that TheSelect is deleted if this returns true. As such, they
+/// should return the appropriate thing (e.g. the node) back to the top-level of
+/// the DAG combiner loop to avoid it being looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+ // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
+ if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+ if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+ // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+ SDValue Sqrt = RHS;
+ ISD::CondCode CC;
+ SDValue CmpLHS;
+ const ConstantFPSDNode *Zero = nullptr;
+
+ if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+ CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CmpLHS = TheSelect->getOperand(0);
+ Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+ } else {
+ // SELECT or VSELECT
+ SDValue Cmp = TheSelect->getOperand(0);
+ if (Cmp.getOpcode() == ISD::SETCC) {
+ CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CmpLHS = Cmp.getOperand(0);
+ Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
+ }
+ }
+ if (Zero && Zero->isZero() &&
+ Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+ CC == ISD::SETULT || CC == ISD::SETLT)) {
+ // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ CombineTo(TheSelect, Sqrt);
+ return true;
+ }
+ }
+ }
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ // Be conservative for atomics for the moment
+ // TODO: This does appear to be legal for unordered atomics (see D66309)
+ !LLD->isSimple() || !RLD->isSimple() ||
+ // FIXME: If either is a pre/post inc/dec load,
+ // we'd need to split out the address adjustment.
+ LLD->isIndexed() || RLD->isIndexed() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0 ||
+ // We can't produce a CMOV of a TargetFrameIndex since we won't
+ // generate the address generation required.
+ LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+ LLD->getBasePtr().getValueType()))
+ return false;
+
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
+ return false;
+
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ // Always fail if LLD and RLD are not independent. TheSelect is a
+ // predecessor to all Nodes in question so we need not search past it.
+
+ Visited.insert(TheSelect);
+ Worklist.push_back(LLD);
+ Worklist.push_back(RLD);
+
+ if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
+ return false;
+
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondNode}. As we've already compared the
+ // Loads, we only need to check if CondNode is a successor to one of the
+ // loads. We can further avoid this if there's no use of their chain
+ // value.
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ Worklist.push_back(CondNode);
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
+ return false;
+
+ Addr = DAG.getSelect(SDLoc(TheSelect),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
+ // the Loads, we only need to check if CondLHS/CondRHS is a successor to
+ // one of the loads. We can further avoid this if there's no use of their
+ // chain value.
+
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+ Worklist.push_back(CondLHS);
+ Worklist.push_back(CondRHS);
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
+ return false;
+
+ Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ // It is safe to replace the two loads if they have different alignments,
+ // but the new load must be the minimum (most restrictive) alignment of the
+ // inputs.
+ Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
+ MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
+ if (!RLD->isInvariant())
+ MMOFlags &= ~MachineMemOperand::MOInvariant;
+ if (!RLD->isDereferenceable())
+ MMOFlags &= ~MachineMemOperand::MODereferenceable;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
+ LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
+ MMOFlags);
+ } else {
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getExtLoad(
+ LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
+ : LLD->getExtensionType(),
+ SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
+ MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
+/// bitwise 'and'.
+SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
+ SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // If this is a select where the false operand is zero and the compare is a
+ // check of the sign bit, see if we can perform the "gzip trick":
+ // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+ // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (!isNullConstant(N3) || !XType.bitsGE(AType))
+ return SDValue();
+
+ // If the comparison is testing for a positive value, we have to invert
+ // the sign bit mask, so only do that transform if the target has a bitwise
+ // 'and not' instruction (the invert is free).
+ if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
+ // (X > -1) ? A : 0
+ // (X > 0) ? X : 0 <-- This is canonical signed max.
+ if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else if (CC == ISD::SETLT) {
+ // (X < 0) ? A : 0
+ // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
+ if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
+ // constant.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+ unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
+ if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ unsigned ShCt = XType.getSizeInBits() - 1;
+ if (TLI.shouldAvoidTransformToShift(XType, ShCt))
+ return SDValue();
+
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+}
+
+// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
+SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ unsigned BinOpc = N1.getOpcode();
+ if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
+ return SDValue();
+
+ // The use checks are intentionally on SDNode because we may be dealing
+ // with opcodes that produce more than one SDValue.
+ // TODO: Do we really need to check N0 (the condition operand of the select)?
+ // But removing that clause could cause an infinite loop...
+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
+ return SDValue();
+
+ // Binops may include opcodes that return multiple values, so all values
+ // must be created/propagated from the newly created binops below.
+ SDVTList OpVTs = N1->getVTList();
+
+ // Fold select(cond, binop(x, y), binop(z, y))
+ // --> binop(select(cond, x, z), y)
+ if (N1.getOperand(1) == N2.getOperand(1)) {
+ SDValue NewSel =
+ DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
+ NewBinOp->setFlags(N1->getFlags());
+ NewBinOp->intersectFlagsWith(N2->getFlags());
+ return NewBinOp;
+ }
+
+ // Fold select(cond, binop(x, y), binop(x, z))
+ // --> binop(x, select(cond, y, z))
+ // Second op VT might be different (e.g. shift amount type)
+ if (N1.getOperand(0) == N2.getOperand(0) &&
+ VT == N1.getOperand(1).getValueType() &&
+ VT == N2.getOperand(1).getValueType()) {
+ SDValue NewSel =
+ DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
+ NewBinOp->setFlags(N1->getFlags());
+ NewBinOp->intersectFlagsWith(N2->getFlags());
+ return NewBinOp;
+ }
+
+ // TODO: Handle isCommutativeBinOp patterns as well?
+ return SDValue();
+}
+
+// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
+SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool IsFabs = N->getOpcode() == ISD::FABS;
+ bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+
+ if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+
+ // The operand to cast should be integer.
+ if (!IntVT.isInteger() || IntVT.isVector())
+ return SDValue();
+
+ // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
+ // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For vector, create a sign mask (0x80...) or its inverse (for fabs,
+ // 0x7f...) per element and splat it.
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ }
+ SDLoc DL(N0);
+ Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(VT, Int);
+}
+
+/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+/// in it. This may be a win when the constant is not otherwise available
+/// because it replaces two constant pool loads with one.
+SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
+ return SDValue();
+
+ // If we are before legalize types, we want the other legalization to happen
+ // first (for example, to avoid messing with soft float).
+ auto *TV = dyn_cast<ConstantFPSDNode>(N2);
+ auto *FV = dyn_cast<ConstantFPSDNode>(N3);
+ EVT VT = N2.getValueType();
+ if (!TV || !FV || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // If a constant can be materialized without loads, this does not make sense.
+ if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
+ return SDValue();
+
+ // If both constants have multiple uses, then we won't need to do an extra
+ // load. The values are likely around in registers for other users.
+ if (!TV->hasOneUse() && !FV->hasOneUse())
+ return SDValue();
+
+ Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue()) };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = DAG.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+ TD.getPrefTypeAlign(FPTy));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
+
+ // Get offsets to the 0 and 1 elements of the array, so we can select between
+ // them.
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
+ SDValue Cond =
+ DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+ AddToWorklist(Cond.getNode());
+ SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
+ AddToWorklist(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
+ AddToWorklist(CPIdx.getNode());
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(
+ DAG.getMachineFunction()), Alignment);
+}
+
+/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT CmpOpVT = N0.getValueType();
+ EVT CmpResVT = getSetCCResultType(CmpOpVT);
+ EVT VT = N2.getValueType();
+ auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant.
+ if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
+ AddToWorklist(SCC.getNode());
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !(SCCC->isZero()) ? N2 : N3;
+ }
+ }
+
+ if (SDValue V =
+ convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
+ return V;
+
+ if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
+ return V;
+
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
+ SDValue AndLHS = N0->getOperand(0);
+ auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
+ // Shift the tested bit over the sign bit.
+ const APInt &AndMask = ConstAndRHS->getAPIntValue();
+ unsigned ShCt = AndMask.getBitWidth() - 1;
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is
+ // either all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(ShCt, SDLoc(Shl),
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+ bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+ if ((Fold || Swap) &&
+ TLI.getBooleanContents(CmpOpVT) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
+
+ if (Swap) {
+ CC = ISD::getSetCCInverse(CC, CmpOpVT);
+ std::swap(N2C, N3C);
+ }
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->isOne())
+ return SDValue();
+
+ SDValue Temp, SCC;
+ // zext (setcc n0, n1)
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
+ if (VT.bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ } else {
+ SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ }
+
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
+
+ if (N2C->isOne())
+ return Temp;
+
+ unsigned ShCt = N2C->getAPIntValue().logBase2();
+ if (TLI.shouldAvoidTransformToShift(VT, ShCt))
+ return SDValue();
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(ShCt, SDLoc(Temp),
+ getShiftAmountTy(Temp.getValueType())));
+ }
+
+ // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
+ // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
+ // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue ValueOnZero = N2;
+ SDValue Count = N3;
+ // If the condition is NE instead of E, swap the operands.
+ if (CC == ISD::SETNE)
+ std::swap(ValueOnZero, Count);
+ // Check if the value on zero is a constant equal to the bits in the type.
+ if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
+ if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
+ // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
+ // legal, combine to just cttz.
+ if ((Count.getOpcode() == ISD::CTTZ ||
+ Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
+ return DAG.getNode(ISD::CTTZ, DL, VT, N0);
+ // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
+ // legal, combine to just ctlz.
+ if ((Count.getOpcode() == ISD::CTLZ ||
+ Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
+ return DAG.getNode(ISD::CTLZ, DL, VT, N0);
+ }
+ }
+ }
+
+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
+ if (!NotExtCompare && N1C && N2C && N3C &&
+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||
+ (N1C->isZero() && CC == ISD::SETLT)) &&
+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASR = DAG.getNode(
+ ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
+ }
+
+ if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
+ if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
+
+ return SDValue();
+}
+
+/// This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
+/// DAG expression that will generate the same value by right shifting.
+SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isZero())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
+/// expression that will generate the same value by multiplying by a magic
+/// number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Given an ISD::SREM node expressing a remainder by constant power of 2,
+/// return a DAG expression that will generate the same value.
+SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isZero())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
+ EVT VT = V.getValueType();
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
+ SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
+ return LogBase2;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+/// F(X) = 1/X - A [which has a zero at X = 1/A]
+/// =>
+/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+/// does not require additional intermediate precision]
+/// For the last iteration, put numerator N into it to gain more precision:
+/// Result = N X_i + X_i (N - N A X_i)
+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
+ SDNodeFlags Flags) {
+ if (LegalDAG)
+ return SDValue();
+
+ // TODO: Handle extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getDivRefinementSteps(VT, MF);
+ if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+ AddToWorklist(Est.getNode());
+
+ SDLoc DL(Op);
+ if (Iterations) {
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+
+ // Newton iterations: Est = Est + Est (N - Arg * Est)
+ // If this is the last iteration, also multiply by the numerator.
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue MulEst = Est;
+
+ if (i == Iterations - 1) {
+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
+ AddToWorklist(MulEst.getNode());
+ }
+
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT,
+ (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
+ AddToWorklist(Est.getNode());
+ }
+ } else {
+ // If no iterations are available, multiply with N.
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
+ AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
+
+ // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ }
+
+ // If non-reciprocal square root is requested, multiply the result by Arg.
+ if (!Reciprocal)
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+
+ return Est;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
+
+ // This routine must enter the loop below to work correctly
+ // when (Reciprocal == false).
+ assert(Iterations > 0);
+
+ // Newton iterations for reciprocal square root:
+ // E = (E * -0.5) * ((A * E) * E + -3.0)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+
+ // When calculating a square root at the last iteration build:
+ // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
+ // (notice a common subexpression)
+ SDValue LHS;
+ if (Reciprocal || (i + 1) < Iterations) {
+ // RSQRT: LHS = (E * -0.5)
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ } else {
+ // SQRT: LHS = (A * E) * -0.5
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ }
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ }
+
+ return Est;
+}
+
+/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
+/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
+/// Op can be zero.
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
+ bool Reciprocal) {
+ if (LegalDAG)
+ return SDValue();
+
+ // TODO: Handle extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
+ bool UseOneConstNR = false;
+ if (SDValue Est =
+ TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+ Reciprocal)) {
+ AddToWorklist(Est.getNode());
+
+ if (Iterations > 0)
+ Est = UseOneConstNR
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ if (!Reciprocal) {
+ SDLoc DL(Op);
+ // Try the target specific test first.
+ SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
+
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 or value provided by
+ // target for those cases.
+ Est = DAG.getNode(
+ Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
+ return buildSqrtEstimateImpl(Op, Flags, true);
+}
+
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
+ return buildSqrtEstimateImpl(Op, Flags, false);
+}
+
+/// Return true if there is any possibility that the two addresses overlap.
+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
+
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ bool IsAtomic;
+ SDValue BasePtr;
+ int64_t Offset;
+ std::optional<int64_t> NumBytes;
+ MachineMemOperand *MMO;
+ };
+
+ auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
+ if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
+ int64_t Offset = 0;
+ if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
+ Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
+ ? C->getSExtValue()
+ : (LSN->getAddressingMode() == ISD::PRE_DEC)
+ ? -1 * C->getSExtValue()
+ : 0;
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
+ return {LSN->isVolatile(),
+ LSN->isAtomic(),
+ LSN->getBasePtr(),
+ Offset /*base offset*/,
+ std::optional<int64_t>(Size),
+ LSN->getMemOperand()};
+ }
+ if (const auto *LN = cast<LifetimeSDNode>(N))
+ return {false /*isVolatile*/,
+ /*isAtomic*/ false,
+ LN->getOperand(1),
+ (LN->hasOffset()) ? LN->getOffset() : 0,
+ (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
+ : std::optional<int64_t>(),
+ (MachineMemOperand *)nullptr};
+ // Default.
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/,
+ (MachineMemOperand *)nullptr};
+ };
+
+ MemUseCharacteristics MUC0 = getCharacteristics(Op0),
+ MUC1 = getCharacteristics(Op1);
+
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+ }
+
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
+ DAG, IsAlias))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
+ // either are not known.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large
+ // alignment compared to the size and offset of the access, we may be able
+ // to prove they do not alias. This check is conservative for now to catch
+ // cases created by splitting vector types, it only works when the offsets are
+ // multiples of the size of the data.
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
+ Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
+ auto &Size0 = MUC0.NumBytes;
+ auto &Size1 = MUC1.NumBytes;
+ if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
+ Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
+ OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
+ SrcValOffset1 % *Size1 == 0) {
+ int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
+ int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
+
+ // There is no overlap between these relatively aligned accesses of
+ // similar size. Return no alias.
+ if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
+ return false;
+ }
+
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
+ ? CombinerGlobalAA
+ : DAG.getSubtarget().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
+ Size1) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
+ if (AA->isNoAlias(
+ MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVectorImpl<SDValue> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ // TODO: relax aliasing for unordered atomics (see D66309)
+ const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Attempt to improve chain by a single step
+ auto ImproveChain = [&](SDValue &C) -> bool {
+ switch (C.getOpcode()) {
+ case ISD::EntryToken:
+ // No need to mark EntryToken.
+ C = SDValue();
+ return true;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for C.
+ // TODO: Relax aliasing for unordered atomics (see D66309)
+ bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
+ cast<LSBaseSDNode>(C.getNode())->isSimple();
+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ // Alias, so stop here.
+ return false;
+ }
+
+ case ISD::CopyFromReg:
+ // Always forward past past CopyFromReg.
+ C = C.getOperand(0);
+ return true;
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the memory access.
+ if (!mayAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+ };
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.pop_back_val();
+
+ // Don't bother if we've seen Chain before.
+ if (!Visited.insert(Chain.getNode()).second)
+ continue;
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we reach the depth limit.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ return;
+ }
+
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ continue;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ continue;
+ }
+ // Everything else
+ if (ImproveChain(Chain)) {
+ // Updated Chain Found, Consider new chain if one exists.
+ if (Chain.getNode())
+ Chains.push_back(Chain);
+ ++Depth;
+ continue;
+ }
+ // No Improved Chain Possible, treat as Alias.
+ Aliases.push_back(Chain);
+ }
+}
+
+/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
+/// (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ if (OptLevel == CodeGenOpt::None)
+ return OldChain;
+
+ // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Aliases;
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
+ return DAG.getEntryNode();
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
+ return Aliases[0];
+
+ // Construct a custom tailored token factor.
+ return DAG.getTokenFactor(SDLoc(N), Aliases);
+}
+
+// This function tries to collect a bunch of potentially interesting
+// nodes to improve the chains of, all at once. This might seem
+// redundant, as this function gets called when visiting every store
+// node, so why not let the work be done on each store as it's visited?
+//
+// I believe this is mainly important because mergeConsecutiveStores
+// is unable to deal with merging stores of different sizes, so unless
+// we improve the chains of all the potential candidates up-front
+// before running mergeConsecutiveStores, it might only see some of
+// the nodes that will eventually be candidates, and then not be able
+// to go from a partially-merged state to the desired final
+// fully-merged state.
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ StoreSDNode *STChain = St;
+ // Intervals records which offsets from BaseIndex have been covered. In
+ // the common case, every store writes to the immediately previous address
+ // space and thus merged with the previous interval at insertion time.
+
+ using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
+ IntervalMapHalfOpenInfo<int64_t>>;
+ IMap::Allocator A;
+ IMap Intervals(A);
+
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Do not handle stores to opaque types
+ if (St->getMemoryVT().isZeroSized())
+ return false;
+
+ // BaseIndexOffset assumes that offsets are fixed-size, which
+ // is not valid for scalable vectors where the offsets are
+ // scaled by `vscale`, so bail out early.
+ if (St->getMemoryVT().isScalableVT())
+ return false;
+
+ // Add ST's interval.
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
+ std::monostate{});
+
+ while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
+ if (Chain->getMemoryVT().isScalableVector())
+ return false;
+
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (!SDValue(Chain, 0)->hasOneUse())
+ break;
+ // TODO: Relax for unordered atomics (see D66309)
+ if (!Chain->isSimple() || Chain->isIndexed())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
+ // Check that the base pointer is the same as the original one.
+ int64_t Offset;
+ if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
+ break;
+ int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+ // Make sure we don't overlap with other intervals by checking the ones to
+ // the left or right before inserting.
+ auto I = Intervals.find(Offset);
+ // If there's a next interval, we should end before it.
+ if (I != Intervals.end() && I.start() < (Offset + Length))
+ break;
+ // If there's a previous interval, we should start after it.
+ if (I != Intervals.begin() && (--I).stop() <= Offset)
+ break;
+ Intervals.insert(Offset, Offset + Length, std::monostate{});
+
+ ChainedStores.push_back(Chain);
+ STChain = Chain;
+ }
+
+ // If we didn't find a chained store, exit.
+ if (ChainedStores.size() == 0)
+ return false;
+
+ // Improve all chained stores (St and ChainedStores members) starting from
+ // where the store chain ended and return single TokenFactor.
+ SDValue NewChain = STChain->getChain();
+ SmallVector<SDValue, 8> TFOps;
+ for (unsigned I = ChainedStores.size(); I;) {
+ StoreSDNode *S = ChainedStores[--I];
+ SDValue BetterChain = FindBetterChain(S, NewChain);
+ S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+ S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+ TFOps.push_back(SDValue(S, 0));
+ ChainedStores[I] = S;
+ }
+
+ // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+ SDValue BetterChain = FindBetterChain(St, NewChain);
+ SDValue NewST;
+ if (St->isTruncatingStore())
+ NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand());
+ else
+ NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemOperand());
+
+ TFOps.push_back(NewST);
+
+ // If we improved every element of TFOps, then we've lost the dependence on
+ // NewChain to successors of St and we need to add it back to TFOps. Do so at
+ // the beginning to keep relative order consistent with FindBetterChains.
+ auto hasImprovedChain = [&](SDValue ST) -> bool {
+ return ST->getOperand(0) != NewChain;
+ };
+ bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+ if (AddNewChain)
+ TFOps.insert(TFOps.begin(), NewChain);
+
+ SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
+ CombineTo(St, TF);
+
+ // Add TF and its operands to the worklist.
+ AddToWorklist(TF.getNode());
+ for (const SDValue &Op : TF->ops())
+ AddToWorklist(Op.getNode());
+ AddToWorklist(STChain);
+ return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Directly improve a chain of disjoint stores starting at St.
+ if (parallelizeChainedStores(St))
+ return true;
+
+ // Improve St's Chain..
+ SDValue BetterChain = FindBetterChain(St, St->getChain());
+ if (St->getChain() != BetterChain) {
+ replaceStoreChain(St, BetterChain);
+ return true;
+ }
+ return false;
+}
+
+/// This is the entry point for the file.
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
+ CodeGenOpt::Level OptLevel) {
+ /// This is the main entry point to this class.
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 000000000000..f0affce7b6b8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,2382 @@
+//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "isel"
+
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
+/// Set the current block to which generated machine instructions will be
+/// appended.
+void FastISel::startNewBlock() {
+ assert(LocalValueMap.empty() &&
+ "local values should be cleared after finishing a BB");
+
+ // Instructions are appended to FuncInfo.MBB. If the basic block already
+ // contains labels or copies, use the last instruction as the last local
+ // value.
+ EmitStartPt = nullptr;
+ if (!FuncInfo.MBB->empty())
+ EmitStartPt = &FuncInfo.MBB->back();
+ LastLocalValue = EmitStartPt;
+}
+
+void FastISel::finishBasicBlock() { flushLocalValueMap(); }
+
+bool FastISel::lowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ // Fallback to SDISel argument lowering code to deal with sret pointer
+ // parameter.
+ return false;
+
+ if (!fastLowerArguments())
+ return false;
+
+ // Enter arguments into ValueMap for uses in non-entry BBs.
+ for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+ E = FuncInfo.Fn->arg_end();
+ I != E; ++I) {
+ DenseMap<const Value *, Register>::iterator VI = LocalValueMap.find(&*I);
+ assert(VI != LocalValueMap.end() && "Missed an argument?");
+ FuncInfo.ValueMap[&*I] = VI->second;
+ }
+ return true;
+}
+
+/// Return the defined register if this instruction defines exactly one
+/// virtual register and uses no other virtual registers. Otherwise return 0.
+static Register findLocalRegDef(MachineInstr &MI) {
+ Register RegDef;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ if (RegDef)
+ return Register();
+ RegDef = MO.getReg();
+ } else if (MO.getReg().isVirtual()) {
+ // This is another use of a vreg. Don't delete it.
+ return Register();
+ }
+ }
+ return RegDef;
+}
+
+static bool isRegUsedByPhiNodes(Register DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
+void FastISel::flushLocalValueMap() {
+ // If FastISel bails out, it could leave local value instructions behind
+ // that aren't used for anything. Detect and erase those.
+ if (LastLocalValue != EmitStartPt) {
+ // Save the first instruction after local values, for later.
+ MachineBasicBlock::iterator FirstNonValue(LastLocalValue);
+ ++FirstNonValue;
+
+ MachineBasicBlock::reverse_iterator RE =
+ EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
+ : FuncInfo.MBB->rend();
+ MachineBasicBlock::reverse_iterator RI(LastLocalValue);
+ for (MachineInstr &LocalMI :
+ llvm::make_early_inc_range(llvm::make_range(RI, RE))) {
+ Register DefReg = findLocalRegDef(LocalMI);
+ if (!DefReg)
+ continue;
+ if (FuncInfo.RegsWithFixups.count(DefReg))
+ continue;
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ LLVM_DEBUG(dbgs() << "removing dead local value materialization"
+ << LocalMI);
+ LocalMI.eraseFromParent();
+ }
+ }
+
+ if (FirstNonValue != FuncInfo.MBB->end()) {
+ // See if there are any local value instructions left. If so, we want to
+ // make sure the first one has a debug location; if it doesn't, use the
+ // first non-value instruction's debug location.
+
+ // If EmitStartPt is non-null, this block had copies at the top before
+ // FastISel started doing anything; it points to the last one, so the
+ // first local value instruction is the one after EmitStartPt.
+ // If EmitStartPt is null, the first local value instruction is at the
+ // top of the block.
+ MachineBasicBlock::iterator FirstLocalValue =
+ EmitStartPt ? ++MachineBasicBlock::iterator(EmitStartPt)
+ : FuncInfo.MBB->begin();
+ if (FirstLocalValue != FirstNonValue && !FirstLocalValue->getDebugLoc())
+ FirstLocalValue->setDebugLoc(FirstNonValue->getDebugLoc());
+ }
+ }
+
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
+ SavedInsertPt = FuncInfo.InsertPt;
+}
+
+Register FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return Register();
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return Register();
+ }
+
+ // Look up the value to see if we already have a register for it.
+ Register Reg = lookUpRegForValue(V);
+ if (Reg)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+Register FastISel::materializeConstant(const Value *V, MVT VT) {
+ Register Reg;
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V))
+ Reg = fastMaterializeAlloca(cast<AllocaInst>(V));
+ else if (isa<ConstantPointerNull>(V))
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg =
+ getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType())));
+ else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue())
+ Reg = fastMaterializeFloatZero(CF);
+ else
+ // Try to emit the constant directly.
+ Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy(DL);
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false);
+ bool isExact;
+ (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ Register IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
+ if (IntegerReg)
+ Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg);
+ }
+ }
+ } else if (const auto *Op = dyn_cast<Operator>(V)) {
+ if (!selectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !fastSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+ return Reg;
+}
+
+/// Helper for getRegForValue. This function is called when the value isn't
+/// already available in a register and must be materialized with new
+/// instructions.
+Register FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ Register Reg;
+ // Give the target-specific code a try first.
+ if (isa<Constant>(V))
+ Reg = fastMaterializeConstant(cast<Constant>(V));
+
+ // If target-specific code couldn't or didn't want to handle the value, then
+ // give target-independent code a try.
+ if (!Reg)
+ Reg = materializeConstant(V, VT);
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+Register FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ Register &AssignedReg = FuncInfo.ValueMap[I];
+ if (!AssignedReg)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++) {
+ FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
+ FuncInfo.RegsWithFixups.insert(Reg + i);
+ }
+
+ AssignedReg = Reg;
+ }
+}
+
+Register FastISel::getRegForGEPIndex(const Value *Idx) {
+ Register IdxN = getRegForValue(Idx);
+ if (!IdxN)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return Register();
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy(DL);
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
+ } else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN =
+ fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
+ }
+ return IdxN;
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+}
+
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
+ "Invalid iterator!");
+ while (I != E) {
+ if (SavedInsertPt == I)
+ SavedInsertPt = E;
+ if (EmitStartPt == I)
+ EmitStartPt = E.isValid() ? &*E : nullptr;
+ if (LastLocalValue == I)
+ LastLocalValue = E.isValid() ? &*E : nullptr;
+
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ SavePoint OldInsertPt = FuncInfo.InsertPt;
+ recomputeInsertPt();
+ return OldInsertPt;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = &*std::prev(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt;
+}
+
+bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 && ISD::isBitwiseLogicOp(ISDOpcode))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ Register Op1 = getRegForValue(I->getOperand(1));
+ if (!Op1)
+ return false;
+
+ Register ResultReg =
+ fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ Register Op0 = getRegForValue(I->getOperand(0));
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getSExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
+ Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Imm,
+ VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ Register Op1 = getRegForValue(I->getOperand(1));
+ if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Now we have both operands in registers. Emit the instruction.
+ Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op1);
+ if (!ResultReg)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectGetElementPtr(const User *I) {
+ Register N = getRegForValue(I->getOperand(0));
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // FIXME: The code below does not handle vector GEPs. Halt "fast" selection
+ // and bail.
+ if (isa<VectorType>(I->getType()))
+ return false;
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
+ MVT VT = TLI.getPointerTy(DL);
+ for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
+ uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ TotalOffs = 0;
+ }
+ }
+ } else {
+ Type *Ty = GTI.getIndexedType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ // N = N + Offset
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
+ if (TotalOffs >= MaxOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ Register IdxN = getRegForGEPIndex(Idx);
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
+ const CallInst *CI, unsigned StartIdx) {
+ for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {
+ Value *Val = CI->getArgOperand(i);
+ // Check for constants and encode them with a StackMaps::ConstantOp prefix.
+ if (const auto *C = dyn_cast<ConstantInt>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (isa<ConstantPointerNull>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(0));
+ } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
+ // Values coming from a stack location also require a special encoding,
+ // but that is added later on by the target specific frame index
+ // elimination implementation.
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ Ops.push_back(MachineOperand::CreateFI(SI->second));
+ else
+ return false;
+ } else {
+ Register Reg = getRegForValue(Val);
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ }
+ }
+ return true;
+}
+
+bool FastISel::selectStackmap(const CallInst *I) {
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+ assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
+ "Stackmap cannot return a value.");
+
+ // The stackmap intrinsic only records the live variables (the arguments
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target-specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // CALLSEQ_START(0, 0...)
+ // STACKMAP(id, nbytes, ...)
+ // CALLSEQ_END(0, 0)
+ //
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Push live variables for the stack map (skipping the first two arguments
+ // <id> and <numBytes>).
+ if (!addStackMapLiveVars(Ops, I, 2))
+ return false;
+
+ // We are not adding any register mask info here, because the stackmap doesn't
+ // clobber anything.
+
+ // Add scratch registers as implicit def and early clobber.
+ CallingConv::ID CC = I->getCallingConv();
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown));
+ const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+ Builder.addImm(0);
+
+ // Issue STACKMAP.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::STACKMAP));
+ for (auto const &MO : Ops)
+ MIB.add(MO);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
+ .addImm(0)
+ .addImm(0);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo().setHasStackMap();
+
+ return true;
+}
+
+/// Lower an argument list according to the target calling convention.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
+ unsigned NumArgs, const Value *Callee,
+ bool ForceRetVoidTy, CallLoweringInfo &CLI) {
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(CI, ArgI);
+ Args.push_back(Entry);
+ }
+
+ Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext())
+ : CI->getType();
+ CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
+ const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
+ StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+ SmallString<32> MangledName;
+ Mangler::getNameWithPrefix(MangledName, Target, DL);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+ return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs);
+}
+
+bool FastISel::selectPatchpoint(const CallInst *I) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+ CallingConv::ID CC = I->getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !I->getType()->isVoidTy();
+ Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
+
+ // Get the real number of arguments participating in the call <numArgs>
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
+ "Expected a constant integer.");
+ const auto *NumArgsVal =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = NumArgsVal->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // This includes all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(I->arg_size() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ CallLoweringInfo CLI;
+ CLI.setIsPatchPoint();
+ if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
+ return false;
+
+ assert(CLI.Call && "No call instruction specified.");
+
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add an explicit result reg if we use the anyreg calling convention.
+ if (IsAnyRegCC && HasDef) {
+ assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
+ CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ CLI.NumResultRegs = 1;
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
+ }
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Add the call target.
+ if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) {
+ uint64_t CalleeConstAddr =
+ cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+ } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+ if (C->getOpcode() == Instruction::IntToPtr) {
+ uint64_t CalleeConstAddr =
+ cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+ } else
+ llvm_unreachable("Unsupported ConstantExpr.");
+ } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) {
+ Ops.push_back(MachineOperand::CreateGA(GV, 0));
+ } else if (isa<ConstantPointerNull>(Callee))
+ Ops.push_back(MachineOperand::CreateImm(0));
+ else
+ llvm_unreachable("Unsupported callee address.");
+
+ // Adjust <numArgs> to account for any arguments that have been passed on
+ // the stack instead.
+ unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
+ Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs));
+
+ // Add the calling convention
+ Ops.push_back(MachineOperand::CreateImm((unsigned)CC));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC) {
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
+ Register Reg = getRegForValue(I->getArgOperand(i));
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ }
+ }
+
+ // Push the arguments from the call instruction.
+ for (auto Reg : CLI.OutRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+
+ // Push live variables for the stack map.
+ if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
+ return false;
+
+ // Push the register mask info.
+ Ops.push_back(MachineOperand::CreateRegMask(
+ TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
+
+ // Add scratch registers as implicit def and early clobber.
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
+
+ // Add implicit defs (return values).
+ for (auto Reg : CLI.InRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
+ /*isImp=*/true));
+
+ // Insert the patchpoint instruction before the call generated by the target.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, MIMD,
+ TII.get(TargetOpcode::PATCHPOINT));
+
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ // Delete the original call instruction.
+ CLI.Call->eraseFromParent();
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo().setHasPatchPoint();
+
+ if (CLI.NumResultRegs)
+ updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
+ return true;
+}
+
+bool FastISel::selectXRayCustomEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*isDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*isDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ // Insert the Patchable Event Call instruction, that gets lowered properly.
+ return true;
+}
+
+bool FastISel::selectXRayTypedEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*isDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*isDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
+ /*isDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ // Insert the Patchable Typed Event Call instruction, that gets lowered properly.
+ return true;
+}
+
+/// Returns an AttributeList representing the attributes applied to the return
+/// value of the given call.
+static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
+ unsigned NumArgs) {
+ MCContext &Ctx = MF->getContext();
+ SmallString<32> MangledName;
+ Mangler::getNameWithPrefix(MangledName, SymName, DL);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+ return lowerCallTo(CI, Sym, NumArgs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
+ unsigned NumArgs) {
+ FunctionType *FTy = CI->getFunctionType();
+ Type *RetTy = CI->getType();
+
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(CI, ArgI);
+ Args.push_back(Entry);
+ }
+ TLI.markLibCallAttributes(MF, CI->getCallingConv(), Args);
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), *CI, NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
+ // Handle the incoming return values from the call.
+ CLI.clearIns();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(
+ CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ // FIXME: sret demotion isn't supported yet - bail out.
+ if (!CanLowerReturn)
+ return false;
+
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.clearOuts();
+ for (auto &Arg : CLI.getArgs()) {
+ Type *FinalType = Arg.Ty;
+ if (Arg.IsByVal)
+ FinalType = Arg.IndirectType;
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg, DL);
+
+ ISD::ArgFlagsTy Flags;
+ if (Arg.IsZExt)
+ Flags.setZExt();
+ if (Arg.IsSExt)
+ Flags.setSExt();
+ if (Arg.IsInReg)
+ Flags.setInReg();
+ if (Arg.IsSRet)
+ Flags.setSRet();
+ if (Arg.IsSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Arg.IsSwiftAsync)
+ Flags.setSwiftAsync();
+ if (Arg.IsSwiftError)
+ Flags.setSwiftError();
+ if (Arg.IsCFGuardTarget)
+ Flags.setCFGuardTarget();
+ if (Arg.IsByVal)
+ Flags.setByVal();
+ if (Arg.IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling in
+ // the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If we
+ // port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ MaybeAlign MemAlign = Arg.Alignment;
+ if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
+ unsigned FrameSize = DL.getTypeAllocSize(Arg.IndirectType);
+
+ // For ByVal, alignment should come from FE. BE will guess if this info
+ // is not there, but there are cases it cannot get right.
+ if (!MemAlign)
+ MemAlign = Align(TLI.getByValTypeAlignment(Arg.IndirectType, DL));
+ Flags.setByValSize(FrameSize);
+ } else if (!MemAlign) {
+ MemAlign = DL.getABITypeAlign(Arg.Ty);
+ }
+ Flags.setMemAlign(*MemAlign);
+ if (Arg.IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
+ CLI.OutVals.push_back(Arg.Val);
+ CLI.OutFlags.push_back(Flags);
+ }
+
+ if (!fastLowerCall(CLI))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ assert(CLI.Call && "No call instruction specified.");
+ CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ if (CLI.NumResultRegs && CLI.CB)
+ updateValueMap(CLI.CB, CLI.ResultReg, CLI.NumResultRegs);
+
+ // Set labels for heapallocsite call.
+ if (CLI.CB)
+ if (MDNode *MD = CLI.CB->getMetadata("heapallocsite"))
+ CLI.Call->setHeapAllocMarker(*MF, MD);
+
+ return true;
+}
+
+bool FastISel::lowerCall(const CallInst *CI) {
+ FunctionType *FuncTy = CI->getFunctionType();
+ Type *RetTy = CI->getType();
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Args.reserve(CI->arg_size());
+
+ for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) {
+ Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(CI, i - CI->arg_begin());
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within fastLowerCall.
+ bool IsTailCall = CI->isTailCall();
+ if (IsTailCall && !isInTailCallPosition(*CI, TM))
+ IsTailCall = false;
+ if (IsTailCall && !CI->isMustTailCall() &&
+ MF->getFunction().getFnAttribute("disable-tail-calls").getValueAsBool())
+ IsTailCall = false;
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
+ .setTailCall(IsTailCall);
+
+ diagnoseDontCall(*CI);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ if (Call->isConvergent())
+ ExtraInfo |= InlineAsm::Extra_IsConvergent;
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::INLINEASM));
+ MIB.addExternalSymbol(IA->getAsmString().c_str());
+ MIB.addImm(ExtraInfo);
+
+ const MDNode *SrcLoc = Call->getMetadata("srcloc");
+ if (SrcLoc)
+ MIB.addMetadata(SrcLoc);
+
+ return true;
+ }
+
+ // Handle intrinsic function calls.
+ if (const auto *II = dyn_cast<IntrinsicInst>(Call))
+ return selectIntrinsicCall(II);
+
+ return lowerCall(Call);
+}
+
+bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
+ // Neither does the sideeffect intrinsic.
+ case Intrinsic::sideeffect:
+ // Neither does the assume intrinsic; it's also OK not to codegen its operand.
+ case Intrinsic::assume:
+ // Neither does the llvm.experimental.noalias.scope.decl intrinsic
+ case Intrinsic::experimental_noalias_scope_decl:
+ return true;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
+ assert(DI->getVariable() && "Missing variable");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (!hasDebugInfo)\n");
+ return true;
+ }
+
+ if (FuncInfo.PreprocessedDbgDeclares.contains(DI))
+ return true;
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (bad/undef address)\n");
+ return true;
+ }
+
+ std::optional<MachineOperand> Op;
+ if (Register Reg = lookUpRegForValue(Address))
+ Op = MachineOperand::CreateReg(Reg, false);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Op && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
+ false);
+
+ if (Op) {
+ assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
+ "Expected inlined-at fields to agree");
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
+ // the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
+ SmallVector<uint64_t, 3> Ops(
+ {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref});
+ auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op,
+ DI->getVariable(), NewExpr);
+ } else {
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op,
+ DI->getVariable(), DI->getExpression());
+ }
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (no materialized reg for address)\n");
+ }
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(II);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ DIExpression *Expr = DI->getExpression();
+ DILocalVariable *Var = DI->getVariable();
+ assert(Var->isValidLocationForIntrinsic(MIMD.getDL()) &&
+ "Expected inlined-at fields to agree");
+ if (!V || isa<UndefValue>(V) || DI->hasArgList()) {
+ // DI is either undef or cannot produce a valid DBG_VALUE, so produce an
+ // undef DBG_VALUE to terminate any prior location.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U,
+ Var, Expr);
+ return true;
+ }
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ // See if there's an expression to constant-fold.
+ if (Expr)
+ std::tie(Expr, CI) = Expr->constantFold(CI);
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addCImm(CI)
+ .addImm(0U)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addImm(CI->getZExtValue())
+ .addImm(0U)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ return true;
+ }
+ if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addFPImm(CF)
+ .addImm(0U)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ return true;
+ }
+ if (const auto *Arg = dyn_cast<Argument>(V);
+ Arg && Expr && Expr->isEntryValue()) {
+ // As per the Verifier, this case is only valid for swift async Args.
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+
+ Register Reg = getRegForValue(Arg);
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (Reg == VirtReg || Reg == PhysReg) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II,
+ false /*IsIndirect*/, PhysReg, Var, Expr);
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << *DI << "\n");
+ return true;
+ }
+ if (Register Reg = lookUpRegForValue(V)) {
+ // FIXME: This does not handle register-indirect values at offset 0.
+ if (!FuncInfo.MF->useDebugInstrRef()) {
+ bool IsIndirect = false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
+ Reg, Var, Expr);
+ return true;
+ }
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs.
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ auto *NewExpr = DIExpression::prependOpcodes(Expr, Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
+ Var, NewExpr);
+ return true;
+ }
+ // We don't know how to handle other cases, so we drop.
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst *DI = cast<DbgLabelInst>(II);
+ assert(DI->getLabel() && "Missing label");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
+ return true;
+ }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ case Intrinsic::expect: {
+ Register ResultReg = getRegForValue(II->getArgOperand(0));
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::experimental_stackmap:
+ return selectStackmap(II);
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ return selectPatchpoint(II);
+
+ case Intrinsic::xray_customevent:
+ return selectXRayCustomEvent(II);
+ case Intrinsic::xray_typedevent:
+ return selectXRayTypedEvent(II);
+ }
+
+ return fastLowerIntrinsicCall(II);
+}
+
+bool FastISel::selectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
+ !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ Register InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Opcode, InputReg);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectBitCast(const User *I) {
+ EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstEVT = TLI.getValueType(DL, I->getType());
+ if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+ !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DstVT = DstEVT.getSimpleVT();
+ Register Op0 = getRegForValue(I->getOperand(0));
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (SrcVT == DstVT) {
+ updateValueMap(I, Op0);
+ return true;
+ }
+
+ // Otherwise, select a BITCAST opcode.
+ Register ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectFreeze(const User *I) {
+ Register Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ // Unhandled operand.
+ return false;
+
+ EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType());
+ if (ETy == MVT::Other || !TLI.isTypeLegal(ETy))
+ // Unhandled type, bail out.
+ return false;
+
+ MVT Ty = ETy.getSimpleVT();
+ const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty);
+ Register ResultReg = createResultReg(TyRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+ MachineInstr *CurLastLocalValue = getLastLocalValue();
+ if (CurLastLocalValue != SavedLastLocalValue) {
+ // Find the first local value instruction to be deleted.
+ // This is the instruction after SavedLastLocalValue if it is non-NULL.
+ // Otherwise it's the first instruction in the block.
+ MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+ if (SavedLastLocalValue)
+ ++FirstDeadInst;
+ else
+ FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+ setLastLocalValue(SavedLastLocalValue);
+ removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+ }
+}
+
+bool FastISel::selectInstruction(const Instruction *I) {
+ // Flush the local value map before starting each instruction.
+ // This improves locality and debugging, and can reduce spills.
+ // Reuse of values across IR instructions is relatively uncommon.
+ flushLocalValueMap();
+
+ MachineInstr *SavedLastLocalValue = getLastLocalValue();
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (I->isTerminator()) {
+ if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+ // PHI node handling may have generated local value instructions,
+ // even though it failed to handle all PHI nodes.
+ // We remove these instructions because SelectionDAGISel will generate
+ // them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
+ return false;
+ }
+ }
+
+ // FastISel does not handle any operand bundles except OB_funclet.
+ if (auto *Call = dyn_cast<CallBase>(I))
+ for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i)
+ if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
+ return false;
+
+ MIMD = MIMetadata(*I);
+
+ SavedInsertPt = FuncInfo.InsertPt;
+
+ if (const auto *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc Func;
+
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+
+ // Don't handle Intrinsic::trap if a trap function is specified.
+ if (F && F->getIntrinsicID() == Intrinsic::trap &&
+ Call->hasFnAttr("trap-func-name"))
+ return false;
+ }
+
+ // First, try doing target-independent selection.
+ if (!SkipTargetIndependentISel) {
+ if (selectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ MIMD = {};
+ return true;
+ }
+ // Remove dead code.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ SavedInsertPt = FuncInfo.InsertPt;
+ }
+ // Next, try calling the target to attempt to handle the instruction.
+ if (fastSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
+ MIMD = {};
+ return true;
+ }
+ // Remove dead code.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+
+ MIMD = {};
+ // Undo phi node updates, because they will be added again by SelectionDAG.
+ if (I->isTerminator()) {
+ // PHI node handling may have generated local value instructions.
+ // We remove them because SelectionDAGISel will generate them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ }
+ return false;
+}
+
+/// Emit an unconditional branch to the given block, unless it is the immediate
+/// (fall-through) successor, and update the CFG.
+void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
+ const DebugLoc &DbgLoc) {
+ if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&
+ FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only non-debug
+ // instruction in the block then emit it, otherwise we have the
+ // unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
+ SmallVector<MachineOperand, 0>(), DbgLoc);
+ }
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB) {
+ // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+ // happen in degenerate IR and MachineIR forbids to have a block twice in the
+ // successor/predecessor lists.
+ if (TrueMBB != FalseMBB) {
+ if (FuncInfo.BPI) {
+ auto BranchProbability =
+ FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+ }
+
+ fastEmitBranch(FalseMBB, MIMD.getDL());
+}
+
+/// Emit an FNeg operation.
+bool FastISel::selectFNeg(const User *I, const Value *In) {
+ Register OpReg = getRegForValue(In);
+ if (!OpReg)
+ return false;
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(DL, I->getType());
+ Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ OpReg);
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64)
+ return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg);
+ if (!IntReg)
+ return false;
+
+ Register IntResultReg = fastEmit_ri_(
+ IntVT.getSimpleVT(), ISD::XOR, IntReg,
+ UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
+ if (!IntResultReg)
+ return false;
+
+ ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
+ IntResultReg);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, DL, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ updateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool FastISel::selectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return selectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return selectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return selectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ return selectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return selectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return selectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return selectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return selectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return selectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return selectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return selectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return selectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return selectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return selectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return selectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return selectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return selectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return selectBinaryOp(I, ISD::XOR);
+
+ case Instruction::FNeg:
+ return selectFNeg(I, I->getOperand(0));
+
+ case Instruction::GetElementPtr:
+ return selectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ fastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ if (TM.Options.TrapUnreachable)
+ return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
+ else
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ // On AIX, normal call lowering uses the DAG-ISEL path currently so that the
+ // callee of the direct function call instruction will be mapped to the
+ // symbol for the function's entry point, which is distinct from the
+ // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+ // name is the C-linkage name of the source level function.
+ // But fast isel still has the ability to do selection for intrinsics.
+ if (TM.getTargetTriple().isOSAIX() && !isa<IntrinsicInst>(I))
+ return false;
+ return selectCall(I);
+
+ case Instruction::BitCast:
+ return selectBitCast(I);
+
+ case Instruction::FPToSI:
+ return selectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return selectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return selectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return selectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return selectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return selectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return selectCast(I, ISD::TRUNCATE);
+ Register Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return selectExtractValue(I);
+
+ case Instruction::Freeze:
+ return selectFreeze(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo,
+ bool SkipTargetIndependentISel)
+ : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
+ MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
+ TII(*MF->getSubtarget().getInstrInfo()),
+ TLI(*MF->getSubtarget().getTargetLowering()),
+ TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
+ SkipTargetIndependentISel(SkipTargetIndependentISel) {}
+
+FastISel::~FastISel() = default;
+
+bool FastISel::fastLowerArguments() { return false; }
+
+bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; }
+
+bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
+ return false;
+}
+
+unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
+
+unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
+ unsigned /*Op1*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// This method is a wrapper of fastEmit_ri. It first tries to emit an
+/// instruction with an immediate operand using fastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// fastEmit_rr instead.
+Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+ uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Imm);
+ if (ResultReg)
+ return ResultReg;
+ Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (!MaterialReg) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ IntegerType *ITy =
+ IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ if (!MaterialReg)
+ return 0;
+ }
+ return fastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+Register FastISel::createResultReg(const TargetRegisterClass *RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
+ unsigned OpNum) {
+ if (Op.isVirtual()) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ Register NewOp = createResultReg(RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
+Register FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC) {
+ Register ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg);
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addReg(Op0);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addReg(Op0);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ unsigned Op1) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addReg(Op0)
+ .addReg(Op1);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addReg(Op0)
+ .addReg(Op1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ unsigned Op1, unsigned Op2) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addReg(Op0)
+ .addReg(Op1)
+ .addReg(Op2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addReg(Op0)
+ .addReg(Op1)
+ .addReg(Op2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ uint64_t Imm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addReg(Op0)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addReg(Op0)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ uint64_t Imm1, uint64_t Imm2) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addReg(Op0)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addReg(Op0)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ unsigned Op1, uint64_t Imm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ Register ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addReg(Op0)
+ .addReg(Op1)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
+ .addReg(Op0)
+ .addReg(Op1)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm) {
+ Register ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
+ }
+ return ResultReg;
+}
+
+Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+ uint32_t Idx) {
+ Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(Register::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0, 0, Idx);
+ return ResultReg;
+}
+
+/// Emit MachineInstrs to compute the value of Op with all but the least
+/// significant bit set to zero.
+Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0) {
+ return fastEmit_ri(VT, VT, ISD::AND, Op0, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const Instruction *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin()))
+ continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (const PHINode &PN : SuccBB->phis()) {
+ // Ignore dead phi's.
+ if (PN.use_empty())
+ continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Use the location of the operand if
+ // there is one; otherwise no location, flushLocalValueMap will fix it.
+ MIMD = {};
+ if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
+ MIMD = MIMetadata(*Inst);
+
+ Register Reg = getRegForValue(PHIOp);
+ if (!Reg) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
+ MIMD = {};
+ }
+ }
+
+ return true;
+}
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+ assert(LI->hasOneUse() &&
+ "tryToFoldLoad expected a LoadInst with a single use");
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->user_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->user_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile())
+ return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ Register LoadReg = getRegForValue(LI);
+ if (!LoadReg)
+ return false;
+
+ // We can't fold if this vreg has no uses or more than one use. Multiple uses
+ // may mean that the instruction got lowered to multiple MIs, or the use of
+ // the loaded value ended up being multiple operands of the result.
+ if (!MRI.hasOneUse(LoadReg))
+ return false;
+
+ // If the register has fixups, there may be additional uses through a
+ // different alias of the register.
+ if (FuncInfo.RegsWithFixups.contains(LoadReg))
+ return false;
+
+ MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+ MachineInstr *User = RI->getParent();
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes; make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo.InsertPt = User;
+ FuncInfo.MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
+ // Must be an add.
+ if (!isa<AddOperator>(Add))
+ return false;
+ // Type size needs to match.
+ if (DL.getTypeSizeInBits(GEP->getType()) !=
+ DL.getTypeSizeInBits(Add->getType()))
+ return false;
+ // Must be in the same basic block.
+ if (isa<Instruction>(Add) &&
+ FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB)
+ return false;
+ // Must have a constant operand.
+ return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
+}
+
+MachineMemOperand *
+FastISel::createMachineMemOperandFor(const Instruction *I) const {
+ const Value *Ptr;
+ Type *ValTy;
+ MaybeAlign Alignment;
+ MachineMemOperand::Flags Flags;
+ bool IsVolatile;
+
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ Alignment = LI->getAlign();
+ IsVolatile = LI->isVolatile();
+ Flags = MachineMemOperand::MOLoad;
+ Ptr = LI->getPointerOperand();
+ ValTy = LI->getType();
+ } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ Alignment = SI->getAlign();
+ IsVolatile = SI->isVolatile();
+ Flags = MachineMemOperand::MOStore;
+ Ptr = SI->getPointerOperand();
+ ValTy = SI->getValueOperand()->getType();
+ } else
+ return nullptr;
+
+ bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);
+ bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);
+ bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
+ const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
+
+ AAMDNodes AAInfo = I->getAAMetadata();
+
+ if (!Alignment) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlign(ValTy);
+
+ unsigned Size = DL.getTypeStoreSize(ValTy);
+
+ if (IsVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (IsNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (IsDereferenceable)
+ Flags |= MachineMemOperand::MODereferenceable;
+ if (IsInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
+ *Alignment, AAInfo, Ranges);
+}
+
+CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 000000000000..1d0a03ccfcdc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,560 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "function-lowering-info"
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+ if (I->use_empty()) return false;
+ if (isa<PHINode>(I)) return true;
+ const BasicBlock *BB = I->getParent();
+ for (const User *U : I->users())
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+ return true;
+
+ return false;
+}
+
+static ISD::NodeType getPreferredExtendForValue(const Instruction *I) {
+ // For the users of the source value being used for compare instruction, if
+ // the number of signed predicate is greater than unsigned predicate, we
+ // prefer to use SIGN_EXTEND.
+ //
+ // With this optimization, we would be able to reduce some redundant sign or
+ // zero extension instruction, and eventually more machine CSE opportunities
+ // can be exposed.
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ unsigned NumOfSigned = 0, NumOfUnsigned = 0;
+ for (const User *U : I->users()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ NumOfSigned += CI->isSigned();
+ NumOfUnsigned += CI->isUnsigned();
+ }
+ }
+ if (NumOfSigned > NumOfUnsigned)
+ ExtendKind = ISD::SIGN_EXTEND;
+
+ return ExtendKind;
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
+ SelectionDAG *DAG) {
+ Fn = &fn;
+ MF = &mf;
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ UA = DAG->getUniformityInfo();
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ CallingConv::ID CC = Fn->getCallingConv();
+
+ GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+ mf.getDataLayout());
+ CanLowerReturn =
+ TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext());
+
+ // If this personality uses funclets, we need to do a bit more work.
+ DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
+ EHPersonality Personality = classifyEHPersonality(
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
+ if (isFuncletEHPersonality(Personality)) {
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (const AllocaInst *AI = H.CatchObj.Alloca)
+ CatchObjects.insert({AI, {}}).first->second.push_back(
+ &H.CatchObj.FrameIndex);
+ else
+ H.CatchObj.FrameIndex = INT_MAX;
+ }
+ }
+ }
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ const Align StackAlign = TFI->getStackAlign();
+ for (const BasicBlock &BB : *Fn) {
+ for (const Instruction &I : BB) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ Type *Ty = AI->getAllocatedType();
+ Align Alignment = AI->getAlign();
+
+ // Static allocas can be folded into the initial stack frame
+ // adjustment. For targets that don't realign the stack, don't
+ // do this if there is an extra alignment requirement.
+ if (AI->isStaticAlloca() &&
+ (TFI->isStackRealignable() || (Alignment <= StackAlign))) {
+ const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
+ uint64_t TySize =
+ MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinValue();
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ int FrameIndex = INT_MAX;
+ auto Iter = CatchObjects.find(AI);
+ if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
+ FrameIndex = MF->getFrameInfo().CreateFixedObject(
+ TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
+ MF->getFrameInfo().setObjectAlignment(FrameIndex, Alignment);
+ } else {
+ FrameIndex = MF->getFrameInfo().CreateStackObject(TySize, Alignment,
+ false, AI);
+ }
+
+ // Scalable vectors and structures that contain scalable vectors may
+ // need a special StackID to distinguish them from other (fixed size)
+ // stack objects.
+ if (Ty->isScalableTy())
+ MF->getFrameInfo().setStackID(FrameIndex,
+ TFI->getStackIDForScalableVectors());
+
+ StaticAllocaMap[AI] = FrameIndex;
+ // Update the catch handler information.
+ if (Iter != CatchObjects.end()) {
+ for (int *CatchObjPtr : Iter->second)
+ *CatchObjPtr = FrameIndex;
+ }
+ } else {
+ // FIXME: Overaligned static allocas should be grouped into
+ // a single dynamic allocation instead of using a separate
+ // stack allocation for each one.
+ // Inform the Frame Information that we have variable-sized objects.
+ MF->getFrameInfo().CreateVariableSizedObject(
+ Alignment <= StackAlign ? Align(1) : Alignment, AI);
+ }
+ } else if (auto *Call = dyn_cast<CallBase>(&I)) {
+ // Look for inline asm that clobbers the SP register.
+ if (Call->isInlineAsm()) {
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ std::vector<TargetLowering::AsmOperandInfo> Ops =
+ TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI,
+ *Call);
+ for (TargetLowering::AsmOperandInfo &Op : Ops) {
+ if (Op.Type == InlineAsm::isClobber) {
+ // Clobbers don't have SDValue operands, hence SDValue().
+ TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+ Op.ConstraintVT);
+ if (PhysReg.first == SP)
+ MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
+ }
+ }
+ }
+ // Look for calls to the @llvm.va_start intrinsic. We can omit some
+ // prologue boilerplate for variadic functions that don't examine their
+ // arguments.
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ MF->getFrameInfo().setHasVAStart(true);
+ }
+
+ // If we have a musttail call in a variadic function, we need to ensure
+ // we forward implicit register parameters.
+ if (const auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->isMustTailCall() && Fn->isVarArg())
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
+ }
+ }
+
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
+ if (isUsedOutsideOfDefiningBlock(&I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
+ InitializeRegForValue(&I);
+
+ // Decide the preferred extend type for a value.
+ PreferredExtendType[&I] = getPreferredExtendForValue(&I);
+ }
+ }
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (const BasicBlock &BB : *Fn) {
+ // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+ // are really data, and no instructions can live here.
+ if (BB.isEHPad()) {
+ const Instruction *PadInst = BB.getFirstNonPHI();
+ // If this is a non-landingpad EH pad, mark this function as using
+ // funclets.
+ // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid
+ // setting this in such cases in order to improve frame layout.
+ if (!isa<LandingPadInst>(PadInst)) {
+ MF->setHasEHScopes(true);
+ MF->setHasEHFunclets(true);
+ MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
+ }
+ if (isa<CatchSwitchInst>(PadInst)) {
+ assert(&*BB.begin() == PadInst &&
+ "WinEHPrepare failed to remove PHIs from imaginary BBs");
+ continue;
+ }
+ if (isa<FuncletPadInst>(PadInst))
+ assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs");
+ }
+
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB);
+ MBBMap[&BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB.hasAddressTaken())
+ MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
+
+ // Mark landing pad blocks.
+ if (BB.isEHPad())
+ MBB->setIsEHPad();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ for (const PHINode &PN : BB.phis()) {
+ if (PN.use_empty())
+ continue;
+
+ // Skip empty types
+ if (PN.getType()->isEmptyTy())
+ continue;
+
+ DebugLoc DL = PN.getDebugLoc();
+ unsigned PHIReg = ValueMap[&PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs);
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+
+ if (isFuncletEHPersonality(Personality)) {
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.Handler)
+ H.Handler = MBBMap[cast<const BasicBlock *>(H.Handler)];
+ }
+ }
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[cast<const BasicBlock *>(UME.Cleanup)];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const auto *BB = cast<const BasicBlock *>(UME.Handler);
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const auto *BB = cast<const BasicBlock *>(CME.Handler);
+ CME.Handler = MBBMap[BB];
+ }
+ } else if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ calculateWasmEHInfo(&fn, EHInfo);
+
+ // Map all BB references in the Wasm EH data to MBBs.
+ DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest;
+ for (auto &KV : EHInfo.SrcToUnwindDest) {
+ const auto *Src = cast<const BasicBlock *>(KV.first);
+ const auto *Dest = cast<const BasicBlock *>(KV.second);
+ SrcToUnwindDest[MBBMap[Src]] = MBBMap[Dest];
+ }
+ EHInfo.SrcToUnwindDest = std::move(SrcToUnwindDest);
+ DenseMap<BBOrMBB, SmallPtrSet<BBOrMBB, 4>> UnwindDestToSrcs;
+ for (auto &KV : EHInfo.UnwindDestToSrcs) {
+ const auto *Dest = cast<const BasicBlock *>(KV.first);
+ UnwindDestToSrcs[MBBMap[Dest]] = SmallPtrSet<BBOrMBB, 4>();
+ for (const auto P : KV.second)
+ UnwindDestToSrcs[MBBMap[Dest]].insert(
+ MBBMap[cast<const BasicBlock *>(P)]);
+ }
+ EHInfo.UnwindDestToSrcs = std::move(UnwindDestToSrcs);
+ }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ MBBMap.clear();
+ ValueMap.clear();
+ VirtReg2Value.clear();
+ StaticAllocaMap.clear();
+ LiveOutRegInfo.clear();
+ VisitedBBs.clear();
+ ArgDbgValues.clear();
+ DescribedArgs.clear();
+ ByValArgFrameIndexMap.clear();
+ RegFixups.clear();
+ RegsWithFixups.clear();
+ StatepointStackSlots.clear();
+ StatepointRelocationMaps.clear();
+ PreferredExtendType.clear();
+ PreprocessedDbgDeclares.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
+ return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
+
+ Register FirstReg;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ Register R = CreateReg(RegisterVT, isDivergent);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+Register FunctionLoweringInfo::CreateRegs(const Value *V) {
+ return CreateRegs(V->getType(), UA && UA->isDivergent(V) &&
+ !TLI->requiresUniformRegister(*MF, V));
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(Register Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return nullptr;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return nullptr;
+
+ if (BitWidth > LOI->Known.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->Known = LOI->Known.anyext(BitWidth);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ auto It = ValueMap.find(PN);
+ if (It == ValueMap.end())
+ return;
+
+ Register DestReg = It->second;
+ if (DestReg == 0)
+ return;
+ assert(DestReg.isVirtual() && "Expected a virtual reg");
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ DestLOI.Known = KnownBits(BitWidth);
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val;
+ if (TLI->signExtendConstant(CI))
+ Val = CI->getValue().sext(BitWidth);
+ else
+ Val = CI->getValue().zext(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.Known = KnownBits::makeConstant(Val);
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ Register SrcReg = ValueMap[V];
+ if (!SrcReg.isVirtual()) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.Known.Zero.getBitWidth() == BitWidth &&
+ DestLOI.Known.One.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ DestLOI.Known = KnownBits(BitWidth);
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val;
+ if (TLI->signExtendConstant(CI))
+ Val = CI->getValue().sext(BitWidth);
+ else
+ Val = CI->getValue().zext(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.Known.Zero &= ~Val;
+ DestLOI.Known.One &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ Register SrcReg = ValueMap[V];
+ if (!SrcReg.isVirtual()) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.Known = DestLOI.Known.intersectWith(SrcLOI->Known);
+ }
+}
+
+/// setArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
+ int FI) {
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
+ auto I = ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ return INT_MAX;
+}
+
+Register FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+ const Value *CPI, const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto I = CatchPadExceptionPointers.insert({CPI, 0});
+ Register &VReg = I.first->second;
+ if (I.second)
+ VReg = MRI.createVirtualRegister(RC);
+ assert(VReg && "null vreg in exception pointer table!");
+ return VReg;
+}
+
+const Value *
+FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) {
+ if (VirtReg2Value.empty()) {
+ SmallVector<EVT, 4> ValueVTs;
+ for (auto &P : ValueMap) {
+ ValueVTs.clear();
+ ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+ P.first->getType(), ValueVTs);
+ unsigned Reg = P.second;
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ VirtReg2Value[Reg++] = P.first;
+ }
+ }
+ }
+ return VirtReg2Value.lookup(Vreg);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 000000000000..4e7895c0b3cf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,1414 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "instr-emitter"
+
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// countOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (RN->getReg().isPhysical())
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ Register SrcReg,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ Register VRBase;
+ if (SrcReg.isVirtual()) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = nullptr;
+ MVT VT = Node->getSimpleValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
+
+ for (SDNode *User : Node->uses()) {
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = nullptr;
+ if (i + II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(TRI->isTypeLegalForClass(*UseRC, VT) &&
+ "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else
+ DstRC = SrcRC;
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ unsigned NumResults = CountResults(Node);
+ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
+ II.isVariadic() && II.variadicOpsAreDefs();
+ unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+ if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT)
+ NumVRegs = NumResults;
+ for (unsigned i = 0; i < NumVRegs; ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ Register VRBase;
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ // Always let the value type influence the used register class. The
+ // constraints on the instruction may be too lax to represent the value
+ // type correctly. For example, a 64-bit float (X86::FR64) can't live in
+ // the 32-bit float super-class (X86::FR32).
+ if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
+ const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+ Node->getSimpleValueType(i),
+ (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
+ if (RC)
+ VTRC = TRI->getCommonSubClass(RC, VTRC);
+ if (VTRC)
+ RC = VTRC;
+ }
+
+ if (!II.operands().empty() && II.operands()[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(VRBase.isPhysical());
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode *User : Node->uses()) {
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MIB.addReg(VRBase, RegState::Define);
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ // If this def corresponds to a result of the SDNode insert the VRBase into
+ // the lookup map.
+ if (i < NumResults) {
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+Register InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+ // does not include operand register class info.
+ const TargetRegisterClass *RC = TLI->getRegClassFor(
+ Op.getSimpleValueType(), Op.getNode()->isDivergent());
+ Register VReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ // Get/emit the operand.
+ Register VReg = getVR(Op, VRBaseMap);
+
+ const MCInstrDesc &MCID = MIB->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.operands()[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
+ if (II) {
+ const TargetRegisterClass *OpRC = nullptr;
+ if (IIOpNum < II->getNumOperands())
+ OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
+
+ if (OpRC) {
+ unsigned MinNumRegs = MinRCSize;
+ // Don't apply any RC size limit for IMPLICIT_DEF. Each use has a unique
+ // virtual register.
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ MinNumRegs = 0;
+
+ const TargetRegisterClass *ConstrainedRC
+ = MRI->constrainRegClass(VReg, OpRC, MinNumRegs);
+ if (!ConstrainedRC) {
+ OpRC = TRI->getAllocatableClass(OpRC);
+ assert(OpRC && "Constraints cannot be fulfilled for allocation");
+ Register NewVReg = MRI->createVirtualRegister(OpRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ } else {
+ assert(ConstrainedRC->isAllocatable() &&
+ "Constraining an allocatable VReg produced an unallocatable class?");
+ }
+ }
+ }
+
+ // If this value has only one use, that use is a kill. This is a
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MIB->getNumOperands();
+ while (Idx > 0 &&
+ MIB->getOperand(Idx-1).isReg() &&
+ MIB->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
+
+ MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+ getDebugRegState(IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding.
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MIB.addImm(C->getSExtValue());
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ MIB.addFPImm(F->getConstantFPValue());
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ Register VReg = R->getReg();
+ MVT OpVT = Op.getSimpleValueType();
+ const TargetRegisterClass *IIRC =
+ II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
+ : nullptr;
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT)
+ ? TLI->getRegClassFor(OpVT,
+ Op.getNode()->isDivergent() ||
+ (IIRC && TRI->isDivergentRegClass(IIRC)))
+ : nullptr;
+
+ if (OpRC && IIRC && OpRC != IIRC && VReg.isVirtual()) {
+ Register NewVReg = MRI->createVirtualRegister(IIRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MIB.addReg(VReg, getImplRegState(Imp));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MIB.addRegMask(RM->getRegMask());
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags());
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MIB.addMBB(BBNode->getBasicBlock());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MIB.addFrameIndex(FI->getIndex());
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ Align Alignment = CP->getAlign();
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Alignment);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Alignment);
+ MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
+ } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) {
+ MIB.addSym(SymNode->getMCSymbol());
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MIB.addBlockAddress(BA->getBlockAddress(),
+ BA->getOffset(),
+ BA->getTargetFlags());
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ }
+}
+
+Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx,
+ MVT VT, bool isDivergent, const DebugLoc &DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ Register NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ Register VRBase;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode *User : Node->uses()) {
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+
+ Register Reg;
+ MachineInstr *DefMI;
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
+ if (R && R->getReg().isPhysical()) {
+ Reg = R->getReg();
+ DefMI = nullptr;
+ } else {
+ Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap);
+ DefMI = MRI->getVRegDef(Reg);
+ }
+
+ Register SrcReg, DstReg;
+ unsigned DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ } else {
+ // Reg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ if (Reg.isVirtual())
+ Reg = ConstrainForSubReg(Reg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->isDivergent(), Node->getDebugLoc());
+ // Create the destreg if it is missing.
+ if (!VRBase)
+ VRBase = MRI->createVirtualRegister(TRC);
+
+ // Create the extract_subreg machine instruction.
+ MachineInstrBuilder CopyMI =
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+ if (Reg.isVirtual())
+ CopyMI.addReg(Reg, 0, SubIdx);
+ else
+ CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
+ }
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
+ VRBase = MRI->createVirtualRegister(SRC);
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MIB.addImm(SD->getZExtValue());
+ } else
+ AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ // Add the subregister being inserted
+ AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ MIB.addImm(SubIdx);
+ MBB->insert(InsertPos, MIB);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
+ unsigned NumOps = Node->getNumOperands();
+ // If the input pattern has a chain, then the root of the corresponding
+ // output pattern will get a chain as well. This can happen to be a
+ // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults).
+ if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ --NumOps; // Ignore chain if it exists.
+
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ for (unsigned i = 1; i != NumOps; ++i) {
+ SDValue Op = Node->getOperand(i);
+ if ((i & 1) == 0) {
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !R->getReg().isPhysical()) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
+ }
+ }
+ AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ }
+
+ MBB->insert(InsertPos, MIB);
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILocalVariable>(SD->getVariable())
+ ->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ SD->setIsEmitted();
+
+ assert(!SD->getLocationOps().empty() &&
+ "dbg_value with no location operands?");
+
+ if (SD->isInvalidated())
+ return EmitDbgNoLocation(SD);
+
+ // Attempt to produce a DBG_INSTR_REF if we've been asked to.
+ if (EmitDebugInstrRefs)
+ if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap))
+ return InstrRef;
+
+ // Emit variadic dbg_value nodes as DBG_VALUE_LIST if they have not been
+ // emitted as instruction references.
+ if (SD->isVariadic())
+ return EmitDbgValueList(SD, VRBaseMap);
+
+ // Emit single-location dbg_value nodes as DBG_VALUE if they have not been
+ // emitted as instruction references.
+ return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+}
+
+MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {
+ const Value *V = Op.getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ return MachineOperand::CreateCImm(CI);
+ return MachineOperand::CreateImm(CI->getSExtValue());
+ }
+ if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
+ return MachineOperand::CreateFPImm(CF);
+ // Note: This assumes that all nullptr constants are zero-valued.
+ if (isa<ConstantPointerNull>(V))
+ return MachineOperand::CreateImm(0);
+ // Undef or unhandled value type, so return an undef operand.
+ return MachineOperand::CreateReg(
+ /* Reg */ 0U, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
+}
+
+void InstrEmitter::AddDbgValueLocationOps(
+ MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc,
+ ArrayRef<SDDbgOperand> LocationOps,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ for (const SDDbgOperand &Op : LocationOps) {
+ switch (Op.getKind()) {
+ case SDDbgOperand::FRAMEIX:
+ MIB.addFrameIndex(Op.getFrameIx());
+ break;
+ case SDDbgOperand::VREG:
+ MIB.addReg(Op.getVReg());
+ break;
+ case SDDbgOperand::SDNODE: {
+ SDValue V = SDValue(Op.getSDNode(), Op.getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ if (VRBaseMap.count(V) == 0)
+ MIB.addReg(0U); // undef
+ else
+ AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } break;
+ case SDDbgOperand::CONST:
+ MIB.add(GetMOForConstDbgOp(Op));
+ break;
+ }
+ }
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ MDNode *Var = SD->getVariable();
+ const DIExpression *Expr = (DIExpression *)SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
+
+ // Returns true if the given operand is not a legal debug operand for a
+ // DBG_INSTR_REF.
+ auto IsInvalidOp = [](SDDbgOperand DbgOp) {
+ return DbgOp.getKind() == SDDbgOperand::FRAMEIX;
+ };
+ // Returns true if the given operand is not itself an instruction reference
+ // but is a legal debug operand for a DBG_INSTR_REF.
+ auto IsNonInstrRefOp = [](SDDbgOperand DbgOp) {
+ return DbgOp.getKind() == SDDbgOperand::CONST;
+ };
+
+ // If this variable location does not depend on any instructions or contains
+ // any stack locations, produce it as a standard debug value instead.
+ if (any_of(SD->getLocationOps(), IsInvalidOp) ||
+ all_of(SD->getLocationOps(), IsNonInstrRefOp)) {
+ if (SD->isVariadic())
+ return EmitDbgValueList(SD, VRBaseMap);
+ return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+ }
+
+ // Immediately fold any indirectness from the LLVM-IR intrinsic into the
+ // expression:
+ if (SD->isIndirect())
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
+ // If this is not already a variadic expression, it must be modified to become
+ // one.
+ if (!SD->isVariadic())
+ Expr = DIExpression::convertToVariadicExpression(Expr);
+
+ SmallVector<MachineOperand> MOs;
+
+ // It may not be immediately possible to identify the MachineInstr that
+ // defines a VReg, it can depend for example on the order blocks are
+ // emitted in. When this happens, or when further analysis is needed later,
+ // produce an instruction like this:
+ //
+ // DBG_INSTR_REF !123, !456, %0:gr64
+ //
+ // i.e., point the instruction at the vreg, and patch it up later in
+ // MachineFunction::finalizeDebugInstrRefs.
+ auto AddVRegOp = [&](unsigned VReg) {
+ MOs.push_back(MachineOperand::CreateReg(
+ /* Reg */ VReg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true));
+ };
+ unsigned OpCount = SD->getLocationOps().size();
+ for (unsigned OpIdx = 0; OpIdx < OpCount; ++OpIdx) {
+ SDDbgOperand DbgOperand = SD->getLocationOps()[OpIdx];
+
+ // Try to find both the defined register and the instruction defining it.
+ MachineInstr *DefMI = nullptr;
+ unsigned VReg;
+
+ if (DbgOperand.getKind() == SDDbgOperand::VREG) {
+ VReg = DbgOperand.getVReg();
+
+ // No definition means that block hasn't been emitted yet. Leave a vreg
+ // reference to be fixed later.
+ if (!MRI->hasOneDef(VReg)) {
+ AddVRegOp(VReg);
+ continue;
+ }
+
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else if (DbgOperand.getKind() == SDDbgOperand::SDNODE) {
+ // Look up the corresponding VReg for the given SDNode, if any.
+ SDNode *Node = DbgOperand.getSDNode();
+ SDValue Op = SDValue(Node, DbgOperand.getResNo());
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ // No VReg -> produce a DBG_VALUE $noreg instead.
+ if (I == VRBaseMap.end())
+ break;
+
+ // Try to pick out a defining instruction at this point.
+ VReg = getVR(Op, VRBaseMap);
+
+ // Again, if there's no instruction defining the VReg right now, fix it up
+ // later.
+ if (!MRI->hasOneDef(VReg)) {
+ AddVRegOp(VReg);
+ continue;
+ }
+
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else {
+ assert(DbgOperand.getKind() == SDDbgOperand::CONST);
+ MOs.push_back(GetMOForConstDbgOp(DbgOperand));
+ continue;
+ }
+
+ // Avoid copy like instructions: they don't define values, only move them.
+ // Leave a virtual-register reference until it can be fixed up later, to
+ // find the underlying value definition.
+ if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) {
+ AddVRegOp(VReg);
+ continue;
+ }
+
+ // Find the operand number which defines the specified VReg.
+ unsigned OperandIdx = 0;
+ for (const auto &MO : DefMI->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI->getNumOperands());
+
+ // Make the DBG_INSTR_REF refer to that instruction, and that operand.
+ unsigned InstrNum = DefMI->getDebugInstrNum();
+ MOs.push_back(MachineOperand::CreateDbgInstrRef(InstrNum, OperandIdx));
+ }
+
+ // If we haven't created a valid MachineOperand for every DbgOp, abort and
+ // produce an undef DBG_VALUE.
+ if (MOs.size() != OpCount)
+ return EmitDbgNoLocation(SD);
+
+ return BuildMI(*MF, DL, RefII, false, MOs, Var, Expr);
+}
+
+MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
+ // An invalidated SDNode must generate an undef DBG_VALUE: although the
+ // original value is no longer computed, earlier DBG_VALUEs live ranges
+ // must not leak into later code.
+ DIVariable *Var = SD->getVariable();
+ const DIExpression *Expr =
+ DIExpression::convertToUndefExpression(SD->getExpression());
+ DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &Desc = TII->get(TargetOpcode::DBG_VALUE);
+ return BuildMI(*MF, DL, Desc, false, 0U, Var, Expr);
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgValueList(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ MDNode *Var = SD->getVariable();
+ DIExpression *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)*
+ const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST);
+ // Build the DBG_VALUE_LIST instruction base.
+ auto MIB = BuildMI(*MF, DL, DbgValDesc);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ AddDbgValueLocationOps(MIB, DbgValDesc, SD->getLocationOps(), VRBaseMap);
+ return &*MIB;
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ MDNode *Var = SD->getVariable();
+ DIExpression *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+
+ assert(SD->getLocationOps().size() == 1 &&
+ "Non variadic dbg_value should have only one location op");
+
+ // See about constant-folding the expression.
+ // Copy the location operand in case we replace it.
+ SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]);
+ if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) {
+ const Value *V = LocationOps[0].getConst();
+ if (auto *C = dyn_cast<ConstantInt>(V)) {
+ std::tie(Expr, C) = Expr->constantFold(C);
+ LocationOps[0] = SDDbgOperand::fromConst(C);
+ }
+ }
+
+ // Emit non-variadic dbg_value nodes as DBG_VALUE.
+ // DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr
+ auto MIB = BuildMI(*MF, DL, II);
+ AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);
+
+ if (SD->isIndirect())
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U);
+
+ return MIB.addMetadata(Var).addMetadata(Expr);
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
+ MDNode *Label = SD->getLabel();
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ MIB.addMetadata(Label);
+
+ return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle REG_SEQUENCE specially.
+ if (Opc == TargetOpcode::REG_SEQUENCE) {
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const MCInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NumDefs = II.getNumDefs();
+ const MCPhysReg *ScratchRegs = nullptr;
+
+ // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
+ if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+ // Stackmaps do not have arguments and do not preserve their calling
+ // convention. However, to simplify runtime support, they clobber the same
+ // scratch registers as AnyRegCC.
+ unsigned CC = CallingConv::AnyReg;
+ if (Opc == TargetOpcode::PATCHPOINT) {
+ CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+ NumDefs = NumResults;
+ }
+ ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ } else if (Opc == TargetOpcode::STATEPOINT) {
+ NumDefs = NumResults;
+ }
+
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
+ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
+ II.isVariadic() && II.variadicOpsAreDefs();
+ bool HasPhysRegOuts = NumResults > NumDefs && !II.implicit_defs().empty() &&
+ !HasVRegVariadicDefs;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <=
+ II.getNumOperands() + II.implicit_defs().size() + NumImpUses &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults) {
+ CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+
+ // Transfer any IR flags from the SDNode to the MachineInstr
+ MachineInstr *MI = MIB.getInstr();
+ const SDNodeFlags Flags = Node->getFlags();
+ if (Flags.hasNoSignedZeros())
+ MI->setFlag(MachineInstr::MIFlag::FmNsz);
+
+ if (Flags.hasAllowReciprocal())
+ MI->setFlag(MachineInstr::MIFlag::FmArcp);
+
+ if (Flags.hasNoNaNs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoNans);
+
+ if (Flags.hasNoInfs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoInfs);
+
+ if (Flags.hasAllowContract())
+ MI->setFlag(MachineInstr::MIFlag::FmContract);
+
+ if (Flags.hasApproximateFuncs())
+ MI->setFlag(MachineInstr::MIFlag::FmAfn);
+
+ if (Flags.hasAllowReassociation())
+ MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+
+ if (Flags.hasNoUnsignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoUWrap);
+
+ if (Flags.hasNoSignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoSWrap);
+
+ if (Flags.hasExact())
+ MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+ if (Flags.hasNoFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ if (Flags.hasUnpredictable())
+ MI->setFlag(MachineInstr::MIFlag::Unpredictable);
+ }
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = NumDefs > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Add scratch registers as implicit def and early clobber
+ if (ScratchRegs)
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
+ RegState::EarlyClobber);
+
+ // Set the memory reference descriptions of this instruction now that it is
+ // part of the function.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
+
+ // Set the CFI type.
+ MIB->setCFIType(*MF, Node->getCFIType());
+
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MIB);
+
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<Register, 8> UsedRegs;
+
+ // Additional results must be physical register defs.
+ if (HasPhysRegOuts) {
+ for (unsigned i = NumDefs; i < NumResults; ++i) {
+ Register Reg = II.implicit_defs()[i - NumDefs];
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap);
+ }
+ }
+
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ append_range(UsedRegs, MCID.implicit_uses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ Register Reg = R->getReg();
+ if (Reg.isPhysical())
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ // Add rounding control registers as implicit def for function call.
+ if (II.isCall() && MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ UsedRegs.push_back(Reg);
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef())
+ MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+
+ // STATEPOINT is too 'dynamic' to have meaningful machine description.
+ // We have to manually tie operands.
+ if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) {
+ assert(!HasPhysRegOuts && "STATEPOINT mishandled");
+ MachineInstr *MI = MIB;
+ unsigned Def = 0;
+ int First = StatepointOpers(MI).getFirstGCPtrIdx();
+ assert(First > 0 && "Statepoint has Defs but no GC ptr list");
+ unsigned Use = (unsigned)First;
+ while (Def < NumDefs) {
+ if (MI->getOperand(Use).isReg())
+ MI->tieOperands(Def++, Use);
+ Use = StackMaps::getNextMetaArgIdx(MI, Use);
+ }
+ }
+
+ // Run post-isel target hook to adjust this instruction if needed.
+ if (II.hasPostISelHook())
+ TLI->AdjustInstrPostInstrSelection(*MIB, Node);
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ case ISD::EntryToken:
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ SDValue SrcVal = Node->getOperand(2);
+ if (DestReg.isVirtual() && SrcVal.isMachineOpcode() &&
+ SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Instead building a COPY to that vreg destination, build an
+ // IMPLICIT_DEF instruction instead.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ break;
+ }
+ Register SrcReg;
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL: {
+ unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL)
+ ? TargetOpcode::EH_LABEL
+ : TargetOpcode::ANNOTATION_LABEL;
+ MCSymbol *S = cast<LabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(Opc)).addSym(S);
+ break;
+ }
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START)
+ ? TargetOpcode::LIFETIME_START
+ : TargetOpcode::LIFETIME_END;
+ auto *FI = cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
+ case ISD::PSEUDO_PROBE: {
+ unsigned TarOp = TargetOpcode::PSEUDO_PROBE;
+ auto Guid = cast<PseudoProbeSDNode>(Node)->getGuid();
+ auto Index = cast<PseudoProbeSDNode>(Node)->getIndex();
+ auto Attr = cast<PseudoProbeSDNode>(Node)->getAttributes();
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addImm(Guid)
+ .addImm(Index)
+ .addImm((uint8_t)PseudoProbeType::Block)
+ .addImm(Attr);
+ break;
+ }
+
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR
+ ? TargetOpcode::INLINEASM_BR
+ : TargetOpcode::INLINEASM;
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MIB.addExternalSymbol(AsmStr);
+
+ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+ // bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MIB.addImm(ExtraInfo);
+
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
+ // Remember registers that are part of early-clobber defs.
+ SmallVector<unsigned, 8> ECRegs;
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ GroupIdx.push_back(MIB->getNumOperands());
+ MIB.addImm(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical()));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Reg.isPhysical()));
+ ECRegs.push_back(Reg);
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Non-function addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
+ AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MIB->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
+ break;
+ case InlineAsm::Kind_Func: // Function addressing mode.
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ SDValue Op = Node->getOperand(i);
+ AddOperand(MIB, Op, 0, nullptr, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Adjust Target Flags for function reference.
+ if (auto *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ unsigned NewFlags =
+ MF->getSubtarget().classifyGlobalFunctionReference(
+ TGA->getGlobal());
+ unsigned LastIdx = MIB.getInstr()->getNumOperands() - 1;
+ MIB.getInstr()->getOperand(LastIdx).setTargetFlags(NewFlags);
+ }
+ }
+ }
+ }
+
+ // GCC inline assembly allows input operands to also be early-clobber
+ // output operands (so long as the operand is written only after it's
+ // used), but this does not match the semantics of our early-clobber flag.
+ // If an early-clobber operand register is also an input operand register,
+ // then remove the early-clobber flag.
+ for (unsigned Reg : ECRegs) {
+ if (MIB->readsRegister(Reg, TRI)) {
+ MachineOperand *MO =
+ MIB->findRegisterDefOperand(Reg, false, false, TRI);
+ assert(MO && "No def operand for clobbered register?");
+ MO->setIsEarlyClobber(false);
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MIB.addMetadata(MD);
+
+ MBB->insert(InsertPos, MIB);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()), MRI(&MF->getRegInfo()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()),
+ TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
+ InsertPos(insertpos) {
+ EmitDebugInstrRefs = mbb->getParent()->useDebugInstrRef();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 000000000000..959bce31c8b2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,169 @@
+//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+class MachineInstrBuilder;
+class MCInstrDesc;
+class SDDbgLabel;
+class SDDbgValue;
+class SDDbgOperand;
+class TargetLowering;
+class TargetMachine;
+
+class LLVM_LIBRARY_VISIBILITY InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// Should we try to produce DBG_INSTR_REF instructions?
+ bool EmitDebugInstrRefs;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap);
+
+ void CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ Register getVR(SDValue Op,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ Register ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT,
+ bool isDivergent, const DebugLoc &DL);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+ ///
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ void AddDbgValueLocationOps(MachineInstrBuilder &MIB,
+ const MCInstrDesc &DbgValDesc,
+ ArrayRef<SDDbgOperand> Locations,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead
+ /// if there is no variable location; alternately a half-formed DBG_INSTR_REF
+ /// that refers to a virtual register and is corrected later in isel.
+ MachineInstr *EmitDbgInstrRef(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// Emit a DBG_VALUE $noreg, indicating a variable has no location.
+ MachineInstr *EmitDbgNoLocation(SDDbgValue *SD);
+
+ /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue.
+ MachineInstr *EmitDbgValueList(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// Emit a DBG_VALUE from the operands to SDDbgValue.
+ MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
+ /// Generate machine instruction for a dbg_label node.
+ MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 000000000000..61fc31715d71
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,5533 @@
+//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "legalizedag"
+
+namespace {
+
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+ uint8_t SignBit;
+};
+
+//===----------------------------------------------------------------------===//
+/// This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+class SelectionDAGLegalize {
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ /// The set of nodes which have already been legalized. We hold a
+ /// reference to it in order to update as necessary on node deletion.
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes;
+
+ /// A set of all the nodes updated during legalization.
+ SmallSetVector<SDNode *, 16> *UpdatedNodes;
+
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ // Libcall insertion helpers.
+
+public:
+ SelectionDAGLegalize(SelectionDAG &DAG,
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes,
+ SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
+ : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
+ LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
+
+ /// Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
+
+private:
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
+ /// Some targets cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+
+ /// Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const;
+
+ std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ TargetLowering::ArgListTy &&Args, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+
+ void ExpandFrexpLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC,
+ SmallVectorImpl<SDValue> &Results);
+ void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+ void ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl);
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl, SDValue ChainIn);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL,
+ SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL,
+ SDValue NewIntValue) const;
+ SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+ SDValue ExpandFABS(SDNode *Node) const;
+ SDValue ExpandFNEG(SDNode *Node) const;
+ SDValue expandLdexp(SDNode *Node) const;
+ SDValue expandFrexp(SDNode *Node) const;
+
+ SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
+ void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
+ void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl);
+
+ SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+ SDValue ExpandConstant(ConstantSDNode *CP);
+
+ // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+ bool ExpandNode(SDNode *Node);
+ void ConvertNodeToLibcall(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+public:
+ // Node replacement helpers
+
+ void ReplacedNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(N);
+ }
+
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ assert(Old->getNumValues() == New->getNumValues() &&
+ "Replacing one node with another that produces a different number "
+ "of values!");
+ DAG.ReplaceAllUsesWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New);
+ ReplacedNode(Old);
+ }
+
+ void ReplaceNode(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
+
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+
+ DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: ");
+ New[i]->dump(&DAG));
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New[i].getNode());
+ }
+ ReplacedNode(Old);
+ }
+
+ void ReplaceNodeWithValue(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesOfValueWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
+};
+
+} // end anonymous namespace
+
+/// Return a vector shuffle operation which
+/// performs the same shuffle in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
+ EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask);
+}
+
+/// Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
+ bool Extend = false;
+ SDLoc dl(CFP);
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl,
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ APFloat APF = CFP->getValueAPF();
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+
+ // We don't want to shrink SNaNs. Converting the SNaN back to its real type
+ // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
+ if (!APF.isSignaling()) {
+ while (SVT != MVT::f32 && SVT != MVT::f16 && SVT != MVT::bf16) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+ }
+
+ SDValue CPIdx =
+ DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
+ if (Extend) {
+ SDValue Result = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+ Alignment);
+ return Result;
+ }
+ SDValue Result = DAG.getLoad(
+ OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+ return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+ SDLoc dl(CP);
+ EVT VT = CP->getValueType(0);
+ SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
+ SDValue Result = DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+ return Result;
+}
+
+/// Some target cannot handle a variable insertion index for the
+/// INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
+ SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(
+ DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+
+ SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
+
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(
+ Ch, dl, Tmp2, StackPtr2,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), SPFI));
+}
+
+SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
+ LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDLoc dl(ST);
+
+ // Don't optimise TargetConstantFP
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ TLI.isTypeLegal(MVT::i32)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ SDLoc(CFP), MVT::i32);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (TLI.isTypeLegal(MVT::i64)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl);
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue();
+}
+
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDLoc dl(Node);
+
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ if (!ST->isTruncatingStore()) {
+ LLVM_DEBUG(dbgs() << "Legalizing store operation\n");
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ EVT MemVT = ST->getMemoryVT();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
+ LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ } else
+ LLVM_DEBUG(dbgs() << "Legal store\n");
+ break;
+ }
+ case TargetLowering::Custom: {
+ LLVM_DEBUG(dbgs() << "Trying custom lowering\n");
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+ TypeSize StWidth = StVT.getSizeInBits();
+ TypeSize StSize = StVT.getStoreSizeInBits();
+ auto &DL = DAG.getDataLayout();
+
+ if (StWidth != StSize) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedValue());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedValue())) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned StWidthBits = StWidth.getFixedValue();
+ unsigned LogStWidth = Log2_32(StWidthBits);
+ assert(LogStWidth < 32);
+ unsigned RoundWidth = 1 << LogStWidth;
+ assert(RoundWidth < StWidthBits);
+ unsigned ExtraWidth = StWidthBits - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (DL.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ SDValue Result;
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ if (TLI.isTypeLegal(StVT)) {
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ } else {
+ // The in-memory type isn't legal. Truncate to the type it would promote
+ // to, and then do a truncstore.
+ Value = DAG.getNode(ISD::TRUNCATE, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
+ Value);
+ Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ }
+
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ SDLoc dl(Node);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n");
+ MVT VT = Node->getSimpleValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = LD->getMemoryVT();
+ const DataLayout &DL = DAG.getDataLayout();
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
+ std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
+ break;
+ }
+ case TargetLowering::Custom:
+ if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote loads to same size type");
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(RVal.getNode());
+ UpdatedNodes->insert(RChain.getNode());
+ }
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
+ EVT SrcVT = LD->getMemoryVT();
+ TypeSize SrcWidth = SrcVT.getSizeInBits();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) ==
+ TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), NVT,
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (!isPowerOf2_64(SrcWidth.getKnownMinValue())) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned SrcWidthBits = SrcWidth.getFixedValue();
+ unsigned LogSrcWidth = Log2_32(SrcWidthBits);
+ assert(LogSrcWidth < 32);
+ unsigned RoundWidth = 1 << LogSrcWidth;
+ assert(RoundWidth < SrcWidthBits);
+ unsigned ExtraWidth = SrcWidthBits - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+ auto &DL = DAG.getDataLayout();
+
+ if (DL.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0),
+ SrcVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ [[fallthrough]];
+ case TargetLowering::Legal:
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ EVT MemVT = LD->getMemoryVT();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
+ std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
+ }
+ break;
+
+ case TargetLowering::Expand: {
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
+ // If the source type is not legal, see if there is a legal extload to
+ // an intermediate type that we can then extend further.
+ EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
+ if ((LoadVT.isFloatingPoint() == SrcVT.isFloatingPoint()) &&
+ (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT))) {
+ // If we are loading a legal type, this is a non-extload followed by a
+ // full extend.
+ ISD::LoadExtType MidExtType =
+ (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
+
+ SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
+ SrcVT, LD->getMemOperand());
+ unsigned ExtendOp =
+ ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,
+ Ptr, ISrcVT, LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign-
+ // and zero-extend operations are currently folded into extending
+ // loads, whether they are legal or not, and then we end up here
+ // without any support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an
+ // explicit zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0),
+ Chain, Ptr, SrcVT,
+ LD->getMemOperand());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Value.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
+ ReplacedNode(Node);
+ }
+}
+
+/// Return a legal replacement for the given operation, with all legal operands.
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+
+ // Allow illegal target nodes and illegal registers.
+ if (Node->getOpcode() == ISD::TargetConstant ||
+ Node->getOpcode() == ISD::Register)
+ return;
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
+ "Unexpected illegal type!");
+
+ for (const SDValue &Op : Node->op_values())
+ assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal ||
+ Op.getOpcode() == ISD::TargetConstant ||
+ Op.getOpcode() == ISD::Register) &&
+ "Unexpected illegal type!");
+#endif
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SET_FPENV:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::STRICT_FP_TO_FP16:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::ATOMIC_STORE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ case ISD::SELECT_CC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ case ISD::SETCC:
+ case ISD::SETCCCARRY:
+ case ISD::VP_SETCC:
+ case ISD::BR_CC: {
+ unsigned Opc = Node->getOpcode();
+ unsigned CCOperand = Opc == ISD::SELECT_CC ? 4
+ : Opc == ISD::STRICT_FSETCC ? 3
+ : Opc == ISD::STRICT_FSETCCS ? 3
+ : Opc == ISD::SETCCCARRY ? 3
+ : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2
+ : 1;
+ unsigned CompareOperand = Opc == ISD::BR_CC ? 2
+ : Opc == ISD::STRICT_FSETCC ? 1
+ : Opc == ISD::STRICT_FSETCCS ? 1
+ : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::GET_ROUNDING:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_DWARF_CFA:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::ADDROFRETURNADDR:
+ case ISD::SPONENTRY:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::READCYCLECOUNTER:
+ // READCYCLECOUNTER returns an i64, even if type legalization might have
+ // expanded that to several smaller types.
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+ break;
+ case ISD::READ_REGISTER:
+ case ISD::WRITE_REGISTER:
+ // Named register is legal in the DAG, but blocked by register name
+ // selection if not implemented by target (to chose the correct register)
+ // They'll be converted to Copy(To/From)Reg.
+ Action = TargetLowering::Legal;
+ break;
+ case ISD::UBSANTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::UBSANTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
+ case ISD::MSCATTER:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::MSTORE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VP_SCATTER:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VP_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStoreSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStridedStoreSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ case ISD::IS_FPCLASS:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(0).getValueType());
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(1).getValueType());
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TLI.getCustomOperationAction(*Node);
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SDNode *NewNode = Node;
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!Op1.getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1);
+ // The getShiftAmountOperand() may create a new operand node or
+ // return the existing one. If new operand is created we need
+ // to update the parent node.
+ // Do not try to legalize SAO here! It will be automatically legalized
+ // in the next round.
+ if (SAO != Op1)
+ NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO);
+ }
+ }
+ break;
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS: {
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ if (!Op2.getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2);
+ // The getShiftAmountOperand() may create a new operand node or
+ // return the existing one. If new operand is created we need
+ // to update the parent node.
+ if (SAO != Op2)
+ NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
+ }
+ break;
+ }
+ }
+
+ if (NewNode != Node) {
+ ReplaceNode(Node, NewNode);
+ Node = NewNode;
+ }
+ switch (Action) {
+ case TargetLowering::Legal:
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
+ return;
+ case TargetLowering::Custom:
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ if (!(Res.getNode() != Node || Res.getResNo() != 0))
+ return;
+
+ if (Node->getNumValues() == 1) {
+ // Verify the new types match the original. Glue is waived because
+ // ISD::ADDC can be legalized by replacing Glue with an integer type.
+ assert((Res.getValueType() == Node->getValueType(0) ||
+ Node->getValueType(0) == MVT::Glue) &&
+ "Type mismatch for custom legalized operation");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
+ // We can just directly replace this node with the lowered value.
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ // Verify the new types match the original. Glue is waived because
+ // ISD::ADDC can be legalized by replacing Glue with an integer type.
+ assert((Res->getValueType(i) == Node->getValueType(i) ||
+ Node->getValueType(i) == MVT::Glue) &&
+ "Type mismatch for custom legalized operation");
+ ResultVals.push_back(Res.getValue(i));
+ }
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
+ ReplaceNode(Node, ResultVals.data());
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
+ [[fallthrough]];
+ case TargetLowering::Expand:
+ if (ExpandNode(Node))
+ return;
+ [[fallthrough]];
+ case TargetLowering::LibCall:
+ ConvertNodeToLibcall(Node);
+ return;
+ case TargetLowering::Promote:
+ PromoteNode(Node);
+ return;
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to legalize this operator!");
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ break;
+ case ISD::LOAD:
+ return LegalizeLoadOps(Node);
+ case ISD::STORE:
+ return LegalizeStoreOps(Node);
+ }
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Before we generate a new store to a temporary stack slot, see if there is
+ // already one that we can use. There often is because when we scalarize
+ // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole
+ // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
+ // the vector. If all are expanded here, we don't want one store per vector
+ // element.
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Op.getNode());
+ Worklist.push_back(Idx.getNode());
+ SDValue StackPtr, Ch;
+ for (SDNode *User : Vec.getNode()->uses()) {
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
+ if (ST->isIndexed() || ST->isTruncatingStore() ||
+ ST->getValue() != Vec)
+ continue;
+
+ // Make sure that nothing else could have stored into the destination of
+ // this store.
+ if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
+ continue;
+
+ // If the index is dependent on the store we will introduce a cycle when
+ // creating the load (the load uses the index, and by replacing the chain
+ // we will make the index dependent on the load). Also, the store might be
+ // dependent on the extractelement and introduce a cycle when creating
+ // the load.
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
+ ST->hasPredecessor(Op.getNode()))
+ continue;
+
+ StackPtr = ST->getBasePtr();
+ Ch = SDValue(ST, 0);
+ break;
+ }
+ }
+
+ EVT VecVT = Vec.getValueType();
+
+ if (!Ch.getNode()) {
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ StackPtr = DAG.CreateStackTemporary(VecVT);
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo());
+ }
+
+ SDValue NewLoad;
+ Align ElementAlignment =
+ std::min(cast<StoreSDNode>(Ch)->getAlign(),
+ DAG.getDataLayout().getPrefTypeAlign(
+ Op.getValueType().getTypeForEVT(*DAG.getContext())));
+
+ if (Op.getValueType().isVector()) {
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT,
+ Op.getValueType(), Idx);
+ NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
+ MachinePointerInfo(), ElementAlignment);
+ } else {
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(), VecVT.getVectorElementType(),
+ ElementAlignment);
+ }
+
+ // Replace the chain going out of the store, by the one out of the load.
+ DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+ // We introduced a cycle though, so update the loads operands, making sure
+ // to use the original store's chain as an incoming chain.
+ SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+ NewLoad->op_end());
+ NewLoadOperands[0] = Ch;
+ NewLoad =
+ SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+ return NewLoad;
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ SDLoc dl(Op);
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ EVT VecVT = Vec.getValueType();
+ EVT SubVecVT = Part.getValueType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ // Then store the inserted part.
+ SDValue SubStackPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
+
+ // Store the subvector.
+ Ch = DAG.getStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ assert((Node->getOpcode() == ISD::BUILD_VECTOR ||
+ Node->getOpcode() == ISD::CONCAT_VECTORS) &&
+ "Unexpected opcode!");
+
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each operand into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT MemVT = isa<BuildVectorSDNode>(Node) ? VT.getVectorElementType()
+ : Node->getOperand(0).getValueType();
+ SDLoc dl(Node);
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = MemVT.getSizeInBits() / 8;
+ assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
+
+ // If the destination vector element type of a BUILD_VECTOR is narrower than
+ // the source element type, only store the bits necessary.
+ bool Truncate = isa<BuildVectorSDNode>(Node) &&
+ MemVT.bitsLT(Node->getOperand(0).getValueType());
+
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).isUndef()) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl);
+
+ if (Truncate)
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset), MemVT));
+ else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+ Idx, PtrInfo.getWithOffset(Offset)));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
+}
+
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+ const SDLoc &DL,
+ SDValue Value) const {
+ EVT FloatVT = Value.getValueType();
+ unsigned NumBits = FloatVT.getScalarSizeInBits();
+ State.FloatVT = FloatVT;
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+ // Convert to an integer of the same size.
+ if (TLI.isTypeLegal(IVT)) {
+ State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+ State.SignMask = APInt::getSignMask(NumBits);
+ State.SignBit = NumBits - 1;
+ return;
+ }
+
+ auto &DataLayout = DAG.getDataLayout();
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getRegisterType(MVT::i8);
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ // Then store the float to it.
+ State.FloatPtr = StackPtr;
+ MachineFunction &MF = DAG.getMachineFunction();
+ State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+ State.FloatPointerInfo);
+
+ SDValue IntPtr;
+ if (DataLayout.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ IntPtr = StackPtr;
+ State.IntPointerInfo = State.FloatPointerInfo;
+ } else {
+ // Advance the pointer so that the loaded byte will contain the sign bit.
+ unsigned ByteOffset = (NumBits / 8) - 1;
+ IntPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL);
+ State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+ ByteOffset);
+ }
+
+ State.IntPtr = IntPtr;
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr,
+ State.IntPointerInfo, MVT::i8);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getScalarSizeInBits(), 7);
+ State.SignBit = 7;
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+ const SDLoc &DL,
+ SDValue NewIntValue) const {
+ if (!State.Chain)
+ return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+ // Override the part containing the sign bit in the value stored on the stack.
+ SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+ State.IntPointerInfo, MVT::i8);
+ return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+ State.FloatPointerInfo);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Mag = Node->getOperand(0);
+ SDValue Sign = Node->getOperand(1);
+
+ // Get sign bit into an integer value.
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Sign);
+
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+ SignMask);
+
+ // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+ EVT FloatVT = Mag.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+ SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+ SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+ SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+ DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+ return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+ }
+
+ // Transform Mag value to integer, and clear the sign bit.
+ FloatSignAsInt MagAsInt;
+ getSignAsIntValue(MagAsInt, DL, Mag);
+ EVT MagVT = MagAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue,
+ ClearSignMask);
+
+ // Get the signbit at the right position for MagAsInt.
+ int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
+ EVT ShiftVT = IntVT;
+ if (SignBit.getScalarValueSizeInBits() <
+ ClearedSign.getScalarValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
+ ShiftVT = MagVT;
+ }
+ if (ShiftAmount > 0) {
+ SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst);
+ } else if (ShiftAmount < 0) {
+ SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
+ }
+ if (SignBit.getScalarValueSizeInBits() >
+ ClearedSign.getScalarValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
+ }
+
+ // Store the part with the modified sign and convert back to float.
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit);
+ return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const {
+ // Get the sign bit as an integer.
+ SDLoc DL(Node);
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Node->getOperand(0));
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+
+ // Flip the sign.
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignFlip =
+ DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask);
+
+ // Convert back to float.
+ return modifySignAsInt(SignAsInt, DL, SignFlip);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Value = Node->getOperand(0);
+
+ // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+ EVT FloatVT = Value.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+ }
+
+ // Transform value to integer, clear the sign bit and transform back.
+ FloatSignAsInt ValueAsInt;
+ getSignAsIntValue(ValueAsInt, DL, Value);
+ EVT IntVT = ValueAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+ ClearSignMask);
+ return modifySignAsInt(ValueAsInt, DL, ClearedSign);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue();
+ const TargetFrameLowering *TFL = DAG.getSubtarget().getFrameLowering();
+ unsigned Opc =
+ TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
+ ISD::ADD : ISD::SUB;
+
+ Align StackAlign = TFL->getStackAlign();
+ Tmp1 = DAG.getNode(Opc, dl, VT, SP, Size); // Value
+ if (Alignment > StackAlign)
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-Alignment.value(), dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl) {
+ return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
+}
+
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl,
+ SDValue Chain) {
+ EVT SrcVT = SrcOp.getValueType();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
+
+ // Don't convert with stack if the load/store is expensive.
+ if ((SrcVT.bitsGT(SlotVT) &&
+ !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) ||
+ (SlotVT.bitsLT(DestVT) &&
+ !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT)))
+ return SDValue();
+
+ // Create the stack frame object.
+ Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign(
+ SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT.getStoreSize(), SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcVT.bitsGT(SlotVT))
+ Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
+ SlotVT, SrcAlign);
+ else {
+ assert(SrcVT.bitsEq(SlotVT) && "Invalid store");
+ Store = DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotVT.bitsEq(DestVT))
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
+
+ assert(SlotVT.bitsLT(DestVT) && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
+ DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ SDLoc dl(Node);
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(
+ DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+ Node->getValueType(0).getVectorElementType());
+ return DAG.getLoad(
+ Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+}
+
+static bool
+ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue &Res) {
+ unsigned NumElems = Node->getNumOperands();
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Try to group the scalars into pairs, shuffle the pairs together, then
+ // shuffle the pairs of pairs together, etc. until the vector has
+ // been built. This will work only if all of the necessary shuffle masks
+ // are legal.
+
+ // We do this in two phases; first to check the legality of the shuffles,
+ // and next, assuming that all shuffles are legal, to create the new nodes.
+ for (int Phase = 0; Phase < 2; ++Phase) {
+ SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals,
+ NewIntermedVals;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+
+ SDValue Vec;
+ if (Phase)
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V);
+ IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i)));
+ }
+
+ while (IntermedVals.size() > 2) {
+ NewIntermedVals.clear();
+ for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) {
+ // This vector and the next vector are shuffled together (simply to
+ // append the one to the other).
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+
+ SmallVector<int, 16> FinalIndices;
+ FinalIndices.reserve(IntermedVals[i].second.size() +
+ IntermedVals[i+1].second.size());
+
+ int k = 0;
+ for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = j;
+ FinalIndices.push_back(IntermedVals[i].second[j]);
+ }
+ for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = NumElems + j;
+ FinalIndices.push_back(IntermedVals[i+1].second[j]);
+ }
+
+ SDValue Shuffle;
+ if (Phase)
+ Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first,
+ IntermedVals[i+1].first,
+ ShuffleVec);
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ NewIntermedVals.push_back(
+ std::make_pair(Shuffle, std::move(FinalIndices)));
+ }
+
+ // If we had an odd number of defined values, then append the last
+ // element to the array of new vectors.
+ if ((IntermedVals.size() & 1) != 0)
+ NewIntermedVals.push_back(IntermedVals.back());
+
+ IntermedVals.swap(NewIntermedVals);
+ }
+
+ assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 &&
+ "Invalid number of intermediate vectors");
+ SDValue Vec1 = IntermedVals[0].first;
+ SDValue Vec2;
+ if (IntermedVals.size() > 1)
+ Vec2 = IntermedVals[1].first;
+ else if (Phase)
+ Vec2 = DAG.getUNDEF(VT);
+
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[0].second[i]] = i;
+ for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[1].second[i]] = NumElems + i;
+
+ if (Phase)
+ Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ }
+
+ return true;
+}
+
+/// Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ SmallVector<Constant*, 16> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).isUndef());
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
+ return DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ }
+
+ SmallSet<SDValue, 16> DefinedValues;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Node->getOperand(i).isUndef())
+ continue;
+ DefinedValues.insert(Node->getOperand(i));
+ }
+
+ if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) {
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
+ }
+ } else {
+ SDValue Res;
+ if (ExpandBVWithShuffles(Node, DAG, TLI, Res))
+ return Res;
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue SplatVal = Node->getOperand(0);
+
+ return DAG.getSplatBuildVector(VT, DL, SplatVal);
+}
+
+// Expand a node into a call to a libcall, returning the value as the first
+// result and the chain as the second. If the result value does not fit into a
+// register, return the lo part and set the hi part to the by-reg argument in
+// the first. If it does fit into a single register, return the result and
+// leave the Hi part unset.
+std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ TargetLowering::ArgListTy &&Args,
+ bool isSigned) {
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position and that the return types match.
+ SDValue TCChain = InChain;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ bool isTailCall =
+ TLI.isInTailCallPosition(DAG, Node, TCChain) &&
+ (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setTailCall(isTailCall)
+ .setSExtResult(signExtend)
+ .setZExtResult(!signExtend)
+ .setIsPostTypeLegalization(true);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ if (!CallInfo.second.getNode()) {
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
+ // It's a tailcall, return the chain (which is the DAG root).
+ return {DAG.getRoot(), DAG.getRoot()};
+ }
+
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
+ return CallInfo;
+}
+
+std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
+ Entry.IsZExt = !Entry.IsSExt;
+ Args.push_back(Entry);
+ }
+
+ return ExpandLibCall(LC, Node, std::move(Args), isSigned);
+}
+
+void SelectionDAGLegalize::ExpandFrexpLibCall(
+ SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ExpVT = Node->getValueType(1);
+
+ SDValue FPOp = Node->getOperand(0);
+
+ EVT ArgVT = FPOp.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListEntry FPArgEntry;
+ FPArgEntry.Node = FPOp;
+ FPArgEntry.Ty = ArgTy;
+
+ SDValue StackSlot = DAG.CreateStackTemporary(ExpVT);
+ TargetLowering::ArgListEntry PtrArgEntry;
+ PtrArgEntry.Node = StackSlot;
+ PtrArgEntry.Ty = PointerType::get(*DAG.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace());
+
+ TargetLowering::ArgListTy Args = {FPArgEntry, PtrArgEntry};
+
+ RTLIB::Libcall LC = RTLIB::getFREXP(VT);
+ auto [Call, Chain] = ExpandLibCall(LC, Node, std::move(Args), false);
+
+ // FIXME: Get type of int for libcall declaration and cast
+
+ int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(ExpVT, dl, Chain, StackSlot, PtrInfo);
+ SDValue OutputChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ LoadExp.getValue(1), DAG.getRoot());
+ DAG.setRoot(OutputChain);
+
+ Results.push_back(Call);
+ Results.push_back(LoadExp);
+}
+
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<SDValue> &Results) {
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ llvm_unreachable("Can't create an unknown libcall!");
+
+ if (Node->isStrictFPOpcode()) {
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(drop_begin(Node->ops()));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+ Results.push_back(Tmp);
+ }
+}
+
+/// Expand the node to a libcall based on the result type.
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC = RTLIB::getFPLibCall(Node->getSimpleValueType(0),
+ Call_F32, Call_F64, Call_F80,
+ Call_F128, Call_PPCF128);
+ ExpandFPLibCall(Node, LC, Results);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned).first;
+}
+
+/// Expand the node to a libcall based on first argument type (for instance
+/// lround and its variant).
+void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType();
+ RTLIB::Libcall LC = RTLIB::getFPLibCall(InVT.getSimpleVT(),
+ Call_F32, Call_F64, Call_F80,
+ Call_F128, Call_PPCF128);
+ ExpandFPLibCall(Node, LC, Results);
+}
+
+/// Issue libcalls to __{u}divmod to compute div / rem pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDLoc dl(Node);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem =
+ DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo());
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
+/// Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Only issue sincos libcall if both sin and cos are needed.
+static bool useSinCos(SDNode *Node) {
+ unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+ ? ISD::FCOS : ISD::FSIN;
+
+ SDValue Op0 = Node->getOperand(0);
+ for (const SDNode *User : Op0.getNode()->uses()) {
+ if (User == Node)
+ continue;
+ // The other user might have been turned into sincos already.
+ if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+ return true;
+ }
+ return false;
+}
+
+/// Issue libcalls to sincos to compute sin / cos pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // Pass the argument.
+ Entry.Node = Node->getOperand(0);
+ Entry.Ty = RetTy;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ // Pass the return address of sin.
+ SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = SinPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ // Also pass the return address of the cos.
+ SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = CosPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDLoc dl(Node);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain).setLibCallee(
+ TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo()));
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
+}
+
+SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue X = Node->getOperand(0);
+ SDValue N = Node->getOperand(1);
+ EVT ExpVT = N.getValueType();
+ EVT AsIntVT = VT.changeTypeToInteger();
+ if (AsIntVT == EVT()) // TODO: How to handle f80?
+ return SDValue();
+
+ if (Node->getOpcode() == ISD::STRICT_FLDEXP) // TODO
+ return SDValue();
+
+ SDNodeFlags NSW;
+ NSW.setNoSignedWrap(true);
+ SDNodeFlags NUW_NSW;
+ NUW_NSW.setNoUnsignedWrap(true);
+ NUW_NSW.setNoSignedWrap(true);
+
+ EVT SetCCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ExpVT);
+ const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);
+
+ const APFloat::ExponentType MaxExpVal = APFloat::semanticsMaxExponent(FltSem);
+ const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
+ const int Precision = APFloat::semanticsPrecision(FltSem);
+
+ const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT);
+ const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+
+ const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT);
+
+ const APFloat One(FltSem, "1.0");
+ APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven);
+
+ // Offset by precision to avoid denormal range.
+ APFloat ScaleDownK =
+ scalbn(One, MinExpVal + Precision, APFloat::rmNearestTiesToEven);
+
+ // TODO: Should really introduce control flow and use a block for the >
+ // MaxExp, < MinExp cases
+
+ // First, handle exponents Exp > MaxExp and scale down.
+ SDValue NGtMaxExp = DAG.getSetCC(dl, SetCCVT, N, MaxExp, ISD::SETGT);
+
+ SDValue DecN0 = DAG.getNode(ISD::SUB, dl, ExpVT, N, MaxExp, NSW);
+ SDValue ClampMaxVal = DAG.getConstant(3 * MaxExpVal, dl, ExpVT);
+ SDValue ClampN_Big = DAG.getNode(ISD::SMIN, dl, ExpVT, N, ClampMaxVal);
+ SDValue DecN1 =
+ DAG.getNode(ISD::SUB, dl, ExpVT, ClampN_Big, DoubleMaxExp, NSW);
+
+ SDValue ScaleUpTwice =
+ DAG.getSetCC(dl, SetCCVT, N, DoubleMaxExp, ISD::SETUGT);
+
+ const SDValue ScaleUpVal = DAG.getConstantFP(ScaleUpK, dl, VT);
+ SDValue ScaleUp0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleUpVal);
+ SDValue ScaleUp1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleUp0, ScaleUpVal);
+
+ SDValue SelectN_Big =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleUpTwice, DecN1, DecN0);
+ SDValue SelectX_Big =
+ DAG.getNode(ISD::SELECT, dl, VT, ScaleUpTwice, ScaleUp1, ScaleUp0);
+
+ // Now handle exponents Exp < MinExp
+ SDValue NLtMinExp = DAG.getSetCC(dl, SetCCVT, N, MinExp, ISD::SETLT);
+
+ SDValue Increment0 = DAG.getConstant(-(MinExpVal + Precision), dl, ExpVT);
+ SDValue Increment1 = DAG.getConstant(-2 * (MinExpVal + Precision), dl, ExpVT);
+
+ SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW);
+
+ SDValue ClampMinVal =
+ DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT);
+ SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal);
+ SDValue IncN1 =
+ DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW);
+
+ const SDValue ScaleDownVal = DAG.getConstantFP(ScaleDownK, dl, VT);
+ SDValue ScaleDown0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleDownVal);
+ SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal);
+
+ SDValue ScaleDownTwice = DAG.getSetCC(
+ dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT),
+ ISD::SETULT);
+
+ SDValue SelectN_Small =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0);
+ SDValue SelectX_Small =
+ DAG.getNode(ISD::SELECT, dl, VT, ScaleDownTwice, ScaleDown1, ScaleDown0);
+
+ // Now combine the two out of range exponent handling cases with the base
+ // case.
+ SDValue NewX = DAG.getNode(
+ ISD::SELECT, dl, VT, NGtMaxExp, SelectX_Big,
+ DAG.getNode(ISD::SELECT, dl, VT, NLtMinExp, SelectX_Small, X));
+
+ SDValue NewN = DAG.getNode(
+ ISD::SELECT, dl, ExpVT, NGtMaxExp, SelectN_Big,
+ DAG.getNode(ISD::SELECT, dl, ExpVT, NLtMinExp, SelectN_Small, N));
+
+ SDValue BiasedN = DAG.getNode(ISD::ADD, dl, ExpVT, NewN, MaxExp, NSW);
+
+ SDValue ExponentShiftAmt =
+ DAG.getShiftAmountConstant(Precision - 1, ExpVT, dl);
+ SDValue CastExpToValTy = DAG.getZExtOrTrunc(BiasedN, dl, AsIntVT);
+
+ SDValue AsInt = DAG.getNode(ISD::SHL, dl, AsIntVT, CastExpToValTy,
+ ExponentShiftAmt, NUW_NSW);
+ SDValue AsFP = DAG.getNode(ISD::BITCAST, dl, VT, AsInt);
+ return DAG.getNode(ISD::FMUL, dl, VT, NewX, AsFP);
+}
+
+SDValue SelectionDAGLegalize::expandFrexp(SDNode *Node) const {
+ SDLoc dl(Node);
+ SDValue Val = Node->getOperand(0);
+ EVT VT = Val.getValueType();
+ EVT ExpVT = Node->getValueType(1);
+ EVT AsIntVT = VT.changeTypeToInteger();
+ if (AsIntVT == EVT()) // TODO: How to handle f80?
+ return SDValue();
+
+ const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);
+ const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
+ const unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ const unsigned BitSize = VT.getScalarSizeInBits();
+
+ // TODO: Could introduce control flow and skip over the denormal handling.
+
+ // scale_up = fmul value, scalbn(1.0, precision + 1)
+ // extracted_exp = (bitcast value to uint) >> precision - 1
+ // biased_exp = extracted_exp + min_exp
+ // extracted_fract = (bitcast value to uint) & (fract_mask | sign_mask)
+ //
+ // is_denormal = val < smallest_normalized
+ // computed_fract = is_denormal ? scale_up : extracted_fract
+ // computed_exp = is_denormal ? biased_exp + (-precision - 1) : biased_exp
+ //
+ // result_0 = (!isfinite(val) || iszero(val)) ? val : computed_fract
+ // result_1 = (!isfinite(val) || iszero(val)) ? 0 : computed_exp
+
+ SDValue NegSmallestNormalizedInt = DAG.getConstant(
+ APFloat::getSmallestNormalized(FltSem, true).bitcastToAPInt(), dl,
+ AsIntVT);
+
+ SDValue SmallestNormalizedInt = DAG.getConstant(
+ APFloat::getSmallestNormalized(FltSem, false).bitcastToAPInt(), dl,
+ AsIntVT);
+
+ // Masks out the exponent bits.
+ SDValue ExpMask =
+ DAG.getConstant(APFloat::getInf(FltSem).bitcastToAPInt(), dl, AsIntVT);
+
+ // Mask out the exponent part of the value.
+ //
+ // e.g, for f32 FractSignMaskVal = 0x807fffff
+ APInt FractSignMaskVal = APInt::getBitsSet(BitSize, 0, Precision - 1);
+ FractSignMaskVal.setBit(BitSize - 1); // Set the sign bit
+
+ APInt SignMaskVal = APInt::getSignedMaxValue(BitSize);
+ SDValue SignMask = DAG.getConstant(SignMaskVal, dl, AsIntVT);
+
+ SDValue FractSignMask = DAG.getConstant(FractSignMaskVal, dl, AsIntVT);
+
+ const APFloat One(FltSem, "1.0");
+ // Scale a possible denormal input.
+ // e.g., for f64, 0x1p+54
+ APFloat ScaleUpKVal =
+ scalbn(One, Precision + 1, APFloat::rmNearestTiesToEven);
+
+ SDValue ScaleUpK = DAG.getConstantFP(ScaleUpKVal, dl, VT);
+ SDValue ScaleUp = DAG.getNode(ISD::FMUL, dl, VT, Val, ScaleUpK);
+
+ EVT SetCCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue AsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, Val);
+
+ SDValue Abs = DAG.getNode(ISD::AND, dl, AsIntVT, AsInt, SignMask);
+
+ SDValue AddNegSmallestNormal =
+ DAG.getNode(ISD::ADD, dl, AsIntVT, Abs, NegSmallestNormalizedInt);
+ SDValue DenormOrZero = DAG.getSetCC(dl, SetCCVT, AddNegSmallestNormal,
+ NegSmallestNormalizedInt, ISD::SETULE);
+
+ SDValue IsDenormal =
+ DAG.getSetCC(dl, SetCCVT, Abs, SmallestNormalizedInt, ISD::SETULT);
+
+ SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+ SDValue Zero = DAG.getConstant(0, dl, ExpVT);
+
+ SDValue ScaledAsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, ScaleUp);
+ SDValue ScaledSelect =
+ DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ScaledAsInt, AsInt);
+
+ SDValue ExpMaskScaled =
+ DAG.getNode(ISD::AND, dl, AsIntVT, ScaledAsInt, ExpMask);
+
+ SDValue ScaledValue =
+ DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ExpMaskScaled, Abs);
+
+ // Extract the exponent bits.
+ SDValue ExponentShiftAmt =
+ DAG.getShiftAmountConstant(Precision - 1, AsIntVT, dl);
+ SDValue ShiftedExp =
+ DAG.getNode(ISD::SRL, dl, AsIntVT, ScaledValue, ExponentShiftAmt);
+ SDValue Exp = DAG.getSExtOrTrunc(ShiftedExp, dl, ExpVT);
+
+ SDValue NormalBiasedExp = DAG.getNode(ISD::ADD, dl, ExpVT, Exp, MinExp);
+ SDValue DenormalOffset = DAG.getConstant(-Precision - 1, dl, ExpVT);
+ SDValue DenormalExpBias =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, IsDenormal, DenormalOffset, Zero);
+
+ SDValue MaskedFractAsInt =
+ DAG.getNode(ISD::AND, dl, AsIntVT, ScaledSelect, FractSignMask);
+ const APFloat Half(FltSem, "0.5");
+ SDValue FPHalf = DAG.getConstant(Half.bitcastToAPInt(), dl, AsIntVT);
+ SDValue Or = DAG.getNode(ISD::OR, dl, AsIntVT, MaskedFractAsInt, FPHalf);
+ SDValue MaskedFract = DAG.getNode(ISD::BITCAST, dl, VT, Or);
+
+ SDValue ComputedExp =
+ DAG.getNode(ISD::ADD, dl, ExpVT, NormalBiasedExp, DenormalExpBias);
+
+ SDValue Result0 =
+ DAG.getNode(ISD::SELECT, dl, VT, DenormOrZero, Val, MaskedFract);
+
+ SDValue Result1 =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, DenormOrZero, Zero, ComputedExp);
+
+ return DAG.getMergeValues({Result0, Result1}, dl);
+}
+
+/// This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
+ SDValue &Chain) {
+ bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP);
+ EVT DestVT = Node->getValueType(0);
+ SDLoc dl(Node);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Op0 = Node->getOperand(OpNo);
+ EVT SrcVT = Op0.getValueType();
+
+ // TODO: Should any fast-math-flags be set for the created nodes?
+ LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
+ if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64) &&
+ (DestVT.bitsLE(MVT::f64) ||
+ TLI.isOperationLegal(Node->isStrictFPOpcode() ? ISD::STRICT_FP_EXTEND
+ : ISD::FP_EXTEND,
+ DestVT))) {
+ LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+ "expansion\n");
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ SDValue Lo = Op0;
+ // if signed map to unsigned space
+ if (isSigned) {
+ // Invert sign bit (signed to unsigned mapping).
+ Lo = DAG.getNode(ISD::XOR, dl, MVT::i32, Lo,
+ DAG.getConstant(0x80000000u, dl, MVT::i32));
+ }
+ // Initial hi portion of constructed double.
+ SDValue Hi = DAG.getConstant(0x43300000u, dl, MVT::i32);
+
+ // If this a big endian target, swap the lo and high data.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue MemChain = DAG.getEntryNode();
+
+ // Store the lo of the constructed double.
+ SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
+ MachinePointerInfo());
+ // Store the hi of the constructed double.
+ SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
+ SDValue Store2 =
+ DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
+ MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+
+ // load the constructed double
+ SDValue Load =
+ DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo());
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(
+ isSigned ? llvm::bit_cast<double>(0x4330000080000000ULL)
+ : llvm::bit_cast<double>(0x4330000000000000ULL),
+ dl, MVT::f64);
+ // Subtract the bias and get the final result.
+ SDValue Sub;
+ SDValue Result;
+ if (Node->isStrictFPOpcode()) {
+ Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+ {Node->getOperand(0), Load, Bias});
+ Chain = Sub.getValue(1);
+ if (DestVT != Sub.getValueType()) {
+ std::pair<SDValue, SDValue> ResultPair;
+ ResultPair =
+ DAG.getStrictFPExtendOrRound(Sub, Chain, dl, DestVT);
+ Result = ResultPair.first;
+ Chain = ResultPair.second;
+ }
+ else
+ Result = Sub;
+ } else {
+ Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ }
+ return Result;
+ }
+
+ if (isSigned)
+ return SDValue();
+
+ // TODO: Generalize this for use with other types.
+ if (((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) ||
+ (SrcVT == MVT::i64 && DestVT == MVT::f64)) {
+ LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32/f64\n");
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundisf in compiler_rt. That method
+ // should be valid for i32->f32 as well.
+
+ // More generally this transform should be valid if there are 3 more bits
+ // in the integer type than the significand. Rounding uses the first bit
+ // after the width of the significand and the OR of all bits after that. So
+ // we need to be able to OR the shifted out bit into one of the bits that
+ // participate in the OR.
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT = getSetCCResultType(SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+ EVT ShiftVT = TLI.getShiftAmountTy(SrcVT, DAG.getDataLayout());
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue Slow, Fast;
+ if (Node->isStrictFPOpcode()) {
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Op0);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Fast.getValue(1), Fast, Fast });
+ Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+ Fast->setFlags(Flags);
+ Flags.setNoFPExcept(true);
+ Slow->setFlags(Flags);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DestVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+ }
+
+ return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast);
+ }
+
+ // Don't expand it if there isn't cheap fadd.
+ if (!TLI.isOperationLegalOrCustom(
+ Node->isStrictFPOpcode() ? ISD::STRICT_FADD : ISD::FADD, DestVT))
+ return SDValue();
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DestVT. Check that the mantissa
+ // size of DestVT is >= than the number of bits in SrcVT -1.
+ assert(APFloat::semanticsPrecision(DAG.EVTToAPFloatSemantics(DestVT)) >=
+ SrcVT.getSizeInBits() - 1 &&
+ "Cannot perform lossless SINT_TO_FP!");
+
+ SDValue Tmp1;
+ if (Node->isStrictFPOpcode()) {
+ Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), Op0 });
+ } else
+ Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
+ DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0, dl),
+ Four = DAG.getIntPtrConstant(4, dl);
+ SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (DAG.getDataLayout().isLittleEndian())
+ FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx =
+ DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
+ CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
+ Alignment = commonAlignment(Alignment, 4);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(
+ MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ else {
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
+ }
+
+ if (Node->isStrictFPOpcode()) {
+ SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Tmp1.getValue(1), Tmp1, FudgeInReg });
+ Chain = Result.getValue(1);
+ return Result;
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+ SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+ unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+ unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (true) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
+ OpToUse = SIntOp;
+ break;
+ }
+ if (IsSigned)
+ continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
+ OpToUse = UIntOp;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ if (IsStrict) {
+ SDValue Res =
+ DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
+ {N->getOperand(0),
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
+ Results.push_back(
+ DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)));
+}
+
+/// This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (true) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ // A larger signed type can hold all unsigned values of the requested type,
+ // so using FP_TO_SINT is valid
+ OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT;
+ if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
+ break;
+
+ // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
+ OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT;
+ if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
+ break;
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation;
+ if (IsStrict) {
+ SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other);
+ Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp);
+ } else
+ Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+ Results.push_back(Trunc);
+ if (IsStrict)
+ Results.push_back(Operation.getValue(1));
+}
+
+/// Promote FP_TO_*INT_SAT operation to a larger result type. At this point
+/// the result and operand types are legal and there must be a legal
+/// FP_TO_*INT_SAT operation for a larger result type.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node,
+ const SDLoc &dl) {
+ unsigned Opcode = Node->getOpcode();
+
+ // Scan for the appropriate larger type to use.
+ EVT NewOutTy = Node->getValueType(0);
+ while (true) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy + 1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(Opcode, NewOutTy))
+ break;
+ }
+
+ // Saturation width is determined by second operand, so we don't have to
+ // perform any fixup and can directly truncate the result.
+ SDValue Result = DAG.getNode(Opcode, dl, NewOutTy, Node->getOperand(0),
+ Node->getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result);
+}
+
+/// Open code the operations for PARITY of the specified operation.
+SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ // If CTPOP is legal, use it. Otherwise use shifts and xor.
+ SDValue Result;
+ if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ } else {
+ Result = Op;
+ for (unsigned i = Log2_32_Ceil(Sz); i != 0;) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result,
+ DAG.getConstant(1ULL << (--i), dl, ShVT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift);
+ }
+ }
+
+ return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT));
+}
+
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "Trying to expand node\n");
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ bool NeedInvert;
+ switch (Node->getOpcode()) {
+ case ISD::ABS:
+ if ((Tmp1 = TLI.expandABS(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ if ((Tmp1 = TLI.expandABD(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::CTPOP:
+ if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BITREVERSE:
+ if ((Tmp1 = TLI.expandBITREVERSE(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ if ((Tmp1 = TLI.expandBSWAP(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::PARITY:
+ Results.push_back(ExpandPARITY(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ break;
+ case ISD::EH_DWARF_CFA: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ CfaArg.getValueType(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ CfaArg.getValueType()),
+ CfaArg);
+ SDValue FA = DAG.getNode(
+ ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()),
+ DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(),
+ FA, Offset));
+ break;
+ }
+ case ISD::GET_ROUNDING:
+ Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::READCYCLECOUNTER:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.append(Node->getNumValues() - 1,
+ DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, dl, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
+ // splits out the success value as a comparison. Expanding the resulting
+ // ATOMIC_CMP_SWAP will produce a libcall.
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
+ Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand());
+
+ SDValue ExtRes = Res;
+ SDValue LHS = Res;
+ SDValue RHS = Node->getOperand(1);
+
+ EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT();
+ EVT OuterType = Node->getValueType(0);
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
+ LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType,
+ Node->getOperand(2), DAG.getValueType(AtomicType));
+ ExtRes = LHS;
+ break;
+ case ISD::ZERO_EXTEND:
+ LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
+ ExtRes = LHS;
+ break;
+ case ISD::ANY_EXTEND:
+ LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+
+ SDValue Success =
+ DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ);
+
+ Results.push_back(ExtRes.getValue(0));
+ Results.push_back(Success);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, dl, VT));
+ else {
+ assert(VT.isFloatingPoint() && "Unknown value type!");
+ Results.push_back(DAG.getConstantFP(0, dl, VT));
+ }
+ break;
+ }
+ case ISD::STRICT_FP_ROUND:
+ // When strict mode is enforced we can't do expansion because it
+ // does not honor the "strict" properties. Only libcall is allowed.
+ if (TLI.isStrictFPEnabled())
+ break;
+ // We might as well mutate to FP_ROUND when FP_ROUND operation is legal
+ // since this operation is more efficient than stack operation.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
+ // We fall back to use stack operation when the FP_ROUND operation
+ // isn't available.
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0),
+ Node->getValueType(0), dl,
+ Node->getOperand(0)))) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+ return true;
+ }
+ break;
+ case ISD::FP_ROUND:
+ case ISD::BITCAST:
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::STRICT_FP_EXTEND:
+ // When strict mode is enforced we can't do expansion because it
+ // does not honor the "strict" properties. Only libcall is allowed.
+ if (TLI.isStrictFPEnabled())
+ break;
+ // We might as well mutate to FP_EXTEND when FP_EXTEND operation is legal
+ // since this operation is more efficient than stack operation.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
+ // We fall back to use stack operation when the FP_EXTEND operation
+ // isn't available.
+ if ((Tmp1 = EmitStackConvert(
+ Node->getOperand(1), Node->getOperand(1).getValueType(),
+ Node->getValueType(0), dl, Node->getOperand(0)))) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+ return true;
+ }
+ break;
+ case ISD::FP_EXTEND:
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl)))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BF16_TO_FP: {
+ // Always expand bf16 to f32 casts, they lower to ext + shift.
+ //
+ // Note that the operand of this code can be bf16 or an integer type in case
+ // bf16 is not supported on the target and was softened.
+ SDValue Op = Node->getOperand(0);
+ if (Op.getValueType() == MVT::bf16) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i16, Op));
+ } else {
+ Op = DAG.getAnyExtOrTrunc(Op, dl, MVT::i32);
+ }
+ Op = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, Op,
+ DAG.getConstant(16, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op);
+ // Add fp_extend in case the output is bigger than f32.
+ if (Node->getValueType(0) != MVT::f32)
+ Op = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Op);
+ Results.push_back(Op);
+ break;
+ }
+ case ISD::FP_TO_BF16: {
+ SDValue Op = Node->getOperand(0);
+ if (Op.getValueType() != MVT::f32)
+ Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ Op = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op),
+ DAG.getConstant(16, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ // The result of this node can be bf16 or an integer type in case bf16 is
+ // not supported on the target and was softened to i16 for storage.
+ if (Node->getValueType(0) == MVT::bf16) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::bf16,
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Op));
+ } else {
+ Op = DAG.getAnyExtOrTrunc(Op, dl, Node->getValueType(0));
+ }
+ Results.push_back(Op);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+
+ // An in-register sign-extend of a boolean is a negation:
+ // 'true' (1) sign-extended is -1.
+ // 'false' (0) sign-extended is 0.
+ // However, we must mask the high bits of the source operand because the
+ // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
+
+ // TODO: Do this for vectors too?
+ if (ExtraVT.isScalarInteger() && ExtraVT.getSizeInBits() == 1) {
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And);
+ Results.push_back(Neg);
+ break;
+ }
+
+ // NOTE: we could fall back on load/store here too for targets without
+ // SRA. However, it is doubtful that any exist.
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned BitsDiff = VT.getScalarSizeInBits() -
+ ExtraVT.getScalarSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
+ Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
+ break;
+ }
+ [[fallthrough]];
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) {
+ Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
+ }
+ break;
+ case ISD::FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::STRICT_FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");
+ return true;
+ }
+ break;
+ case ISD::FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::STRICT_FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {
+ // Relink the chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);
+ // Replace the new UINT result.
+ ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");
+ return true;
+ }
+ break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Results.push_back(TLI.expandFP_TO_INT_SAT(Node, DAG));
+ break;
+ case ISD::VAARG:
+ Results.push_back(DAG.expandVAArg(Node));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ case ISD::VACOPY:
+ Results.push_back(DAG.expandVACopy(Node));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS:
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
+ // it.
+ if (NewEltVT.bitsLT(EltVT)) {
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT =
+ EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits() / NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor =
+ NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getVectorIdxConstant(Idx, dl)));
+ else
+ Ops.push_back(
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+ DAG.getVectorIdxConstant(Idx - NumElems, dl)));
+ }
+
+ Tmp1 = DAG.getBuildVector(VT, dl, Ops);
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SPLICE: {
+ Results.push_back(TLI.expandVectorSplice(Node, DAG));
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (Node->getConstantOperandVal(1)) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(
+ Node->getOperand(0).getValueType(),
+ DAG.getDataLayout())));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(0));
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ Results.push_back(ExpandFNEG(Node));
+ break;
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
+ break;
+ case ISD::IS_FPCLASS: {
+ auto CNode = cast<ConstantSDNode>(Node->getOperand(1));
+ auto Test = static_cast<FPClassTest>(CNode->getZExtValue());
+ if (SDValue Expanded =
+ TLI.expandIS_FPCLASS(Node->getValueType(0), Node->getOperand(0),
+ Test, Node->getFlags(), SDLoc(Node), DAG))
+ Results.push_back(Expanded);
+ break;
+ }
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: {
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ ISD::CondCode Pred;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX: Pred = ISD::SETGT; break;
+ case ISD::SMIN: Pred = ISD::SETLT; break;
+ case ISD::UMAX: Pred = ISD::SETUGT; break;
+ case ISD::UMIN: Pred = ISD::SETULT; break;
+ }
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ EVT VT = Node->getValueType(0);
+ // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+ // fcos which share the same operand and both are used.
+ if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+ isSinCosLibcallAvailable(Node, TLI))
+ && useSinCos(Node)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::FCOS)
+ Tmp1 = Tmp1.getValue(1);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: {
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getLDEXP(VT);
+ // Use the LibCall instead, it is very likely faster
+ // FIXME: Use separate LibCall action.
+ if (TLI.getLibcallName(LC))
+ break;
+
+ if (SDValue Expanded = expandLdexp(Node)) {
+ Results.push_back(Expanded);
+ if (Node->getOpcode() == ISD::STRICT_FLDEXP)
+ Results.push_back(Expanded.getValue(1));
+ }
+
+ break;
+ }
+ case ISD::FFREXP: {
+ RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0));
+ // Use the LibCall instead, it is very likely faster
+ // FIXME: Use separate LibCall action.
+ if (TLI.getLibcallName(LC))
+ break;
+
+ if (SDValue Expanded = expandFrexp(Node)) {
+ Results.push_back(Expanded);
+ Results.push_back(Expanded.getValue(1));
+ }
+ break;
+ }
+ case ISD::FMAD:
+ llvm_unreachable("Illegal fmad should never be formed");
+
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
+ }
+ break;
+ case ISD::STRICT_FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ {Node->getValueType(0), MVT::Other},
+ {Res.getValue(1), Res});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ }
+ break;
+ case ISD::FP_TO_FP16:
+ LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
+ if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+ SDValue Op = Node->getOperand(0);
+ MVT SVT = Op.getSimpleValueType();
+ if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+ // Under fastmath, we can expand this node into a fround followed by
+ // a float-half conversion.
+ SDValue FloatVal =
+ DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ Results.push_back(
+ DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal));
+ }
+ }
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
+ DAG.shouldOptForSize()))
+ Results.push_back(ExpandConstantFP(CFP, true));
+ break;
+ }
+ case ISD::Constant: {
+ ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+ Results.push_back(ExpandConstant(CP));
+ break;
+ }
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags Flags = Node->getFlags();
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM:
+ if (TLI.expandREM(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode =
+ Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ MVT VT = LHS.getSimpleValueType();
+ unsigned MULHOpcode =
+ Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS;
+
+ if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) {
+ Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS));
+ Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS));
+ break;
+ }
+
+ SmallVector<SDValue, 4> Halves;
+ EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
+ assert(TLI.isTypeLegal(HalfType));
+ if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, dl, LHS, RHS, Halves,
+ HalfType, DAG,
+ TargetLowering::MulExpansionKind::Always)) {
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]);
+ SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]);
+ SDValue Shift = DAG.getConstant(
+ HalfType.getScalarSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ }
+ break;
+ }
+ break;
+ }
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+
+ SDValue Lo, Hi;
+ EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext());
+ if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
+ TLI.expandMUL(Node, Lo, Hi, HalfType, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
+ SDValue Shift =
+ DAG.getConstant(HalfType.getSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ }
+ break;
+ }
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
+ Results.push_back(Expanded);
+ break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ Results.push_back(TLI.expandAddSubSat(Node, DAG));
+ break;
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ Results.push_back(TLI.expandShlSat(Node, DAG));
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ Results.push_back(TLI.expandFixedPointMul(Node, DAG));
+ break;
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
+ if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
+ Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getConstantOperandVal(2),
+ DAG)) {
+ Results.push_back(V);
+ break;
+ }
+ // FIXME: We might want to retry here with a wider type if we fail, if that
+ // type is legal.
+ // FIXME: Technically, so long as we only have sdivfixes where BW+Scale is
+ // <= 128 (which is the case for all of the default Embedded-C types),
+ // we will only get here with types and scales that we could always expand
+ // if we were allowed to generate libcalls to division functions of illegal
+ // type. But we cannot do that.
+ llvm_unreachable("Cannot expand DIVFIX!");
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Carry = Node->getOperand(2);
+
+ bool IsAdd = Node->getOpcode() == ISD::UADDO_CARRY;
+
+ // Initial add of the 2 operands.
+ unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
+ EVT VT = LHS.getValueType();
+ SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
+
+ // Initial check for overflow.
+ EVT CarryType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ // Add of the sum and the carry.
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue CarryExt =
+ DAG.getNode(ISD::AND, dl, VT, DAG.getZExtOrTrunc(Carry, dl, VT), One);
+ SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
+
+ // Second check for overflow. If we are adding, we can only overflow if the
+ // initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
+ // If we are subtracting, we can only overflow if the initial sum is 0 and
+ // the carry is set, resulting in a new sum of all 1s.
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Overflow2 =
+ IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
+ : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
+ Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
+ DAG.getZExtOrTrunc(Carry, dl, SetCCType));
+
+ SDValue ResultCarry =
+ DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
+
+ Results.push_back(Sum2);
+ Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ SDValue Result, Overflow;
+ if (TLI.expandMULO(Node, Result, Overflow, DAG)) {
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ }
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(
+ ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(PairTy, DAG.getDataLayout())));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, dl, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Tmp1->setFlags(Node->getFlags());
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ const DataLayout &TD = DAG.getDataLayout();
+ EVT PTy = TLI.getPointerTy(TD);
+
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ // For power-of-two jumptable entry sizes convert multiplication to a shift.
+ // This transformation needs to be done here since otherwise the MIPS
+ // backend will end up emitting a three instruction multiply sequence
+ // instead of a single shift and MSP430 will call a runtime function.
+ if (llvm::isPowerOf2_32(EntrySize))
+ Index = DAG.getNode(
+ ISD::SHL, dl, Index.getValueType(), Index,
+ DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
+ else
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EntrySize, dl, Index.getValueType()));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
+ Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(
+ ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
+ Addr = LD;
+ if (TLI.isJumpTableRelative()) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+
+ Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ Tmp2.getOperand(0).getValueType())) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ // We test only the i1 bit. Skip the AND if UNDEF or another AND.
+ if (Tmp2.isUndef() ||
+ (Tmp2.getOpcode() == ISD::AND && isOneConstant(Tmp2.getOperand(1))))
+ Tmp3 = Tmp2;
+ else
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, dl, Tmp2.getValueType()));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, dl, Tmp3.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC:
+ case ISD::VP_SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
+ bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS;
+ bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ unsigned Offset = IsStrict ? 1 : 0;
+ Tmp1 = Node->getOperand(0 + Offset);
+ Tmp2 = Node->getOperand(1 + Offset);
+ Tmp3 = Node->getOperand(2 + Offset);
+ SDValue Mask, EVL;
+ if (IsVP) {
+ Mask = Node->getOperand(3 + Offset);
+ EVL = Node->getOperand(4 + Offset);
+ }
+ bool Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Mask, EVL, NeedInvert, dl,
+ Chain, IsSignaling);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (Tmp3.getNode()) {
+ if (IsStrict) {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
+ {Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags());
+ Chain = Tmp1.getValue(1);
+ } else if (IsVP) {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0),
+ {Tmp1, Tmp2, Tmp3, Mask, EVL}, Node->getFlags());
+ } else {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1,
+ Tmp2, Tmp3, Node->getFlags());
+ }
+ }
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert) {
+ if (!IsVP)
+ Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+ else
+ Tmp1 =
+ DAG.getVPLogicalNOT(dl, Tmp1, Mask, EVL, Tmp1->getValueType(0));
+ }
+
+ Results.push_back(Tmp1);
+ if (IsStrict)
+ Results.push_back(Chain);
+
+ break;
+ }
+
+ // FIXME: It seems Legalized is false iff CCCode is Legal. I don't
+ // understand if this code is useful for strict nodes.
+ assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ // FIXME: This drops the mask/evl for VP_SETCC.
+ EVT VT = Node->getValueType(0);
+ EVT Tmp1VT = Tmp1.getValueType();
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getBoolConstant(true, dl, VT, Tmp1VT),
+ DAG.getBoolConstant(false, dl, VT, Tmp1VT), Tmp3);
+ Tmp1->setFlags(Node->getFlags());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ // TODO: need to add STRICT_SELECT_CC and STRICT_SELECT_CCS
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ EVT VT = Node->getValueType(0);
+ SDValue Chain;
+ SDValue CC = Node->getOperand(4);
+ ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
+
+ if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) {
+ // If the condition code is legal, then we need to expand this
+ // node using SETCC and SELECT.
+ EVT CmpVT = Tmp1.getValueType();
+ assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
+ "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
+ "expanded.");
+ EVT CCVT = getSetCCResultType(CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
+ Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+ break;
+ }
+
+ // SELECT_CC is legal, so the condition code must not be.
+ bool Legalized = false;
+ // Try to legalize by inverting the condition. This is for targets that
+ // might support an ordered version of a condition, but not the unordered
+ // version (or vice versa).
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType());
+ if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
+ // Use the new condition code and swap true and false
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ Tmp1->setFlags(Node->getFlags());
+ } else {
+ // If The inverse is not legal, then try to swap the arguments using
+ // the inverse condition code.
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
+ if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) {
+ // The swapped inverse condition is legal, so swap true and false,
+ // lhs and rhs.
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ Tmp1->setFlags(Node->getFlags());
+ }
+ }
+
+ if (!Legalized) {
+ Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
+ /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain);
+
+ assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then swap
+ // the True/False operands to match.
+ if (NeedInvert)
+ std::swap(Tmp3, Tmp4);
+
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SELECT_CC node.
+ if (CC.getNode()) {
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3, Tmp4, CC);
+ } else {
+ Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Tmp3, Tmp4, CC);
+ }
+ Tmp1->setFlags(Node->getFlags());
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ // TODO: need to add STRICT_BR_CC and STRICT_BR_CCS
+ SDValue Chain;
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ bool Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4,
+ /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain);
+ (void)Legalized;
+ assert(Legalized && "Can't legalize BR_CC with legal condition!");
+
+ // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
+ // node.
+ if (Tmp4.getNode()) {
+ assert(!NeedInvert && "Don't know how to invert BR_CC!");
+
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp4, Tmp2, Tmp3, Node->getOperand(4));
+ } else {
+ Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(NeedInvert ? ISD::SETEQ : ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
+ Tmp2, Tmp3, Node->getOperand(4));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SPLAT_VECTOR:
+ Results.push_back(ExpandSPLAT_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(),
+ Node->getOperand(0), DAG.getVectorIdxConstant(Idx, dl));
+ SDValue Sh =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(),
+ Node->getOperand(1), DAG.getVectorIdxConstant(Idx, dl));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+
+ SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
+ Results.push_back(Result);
+ break;
+ }
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ break;
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ // Return true so that we don't call ConvertNodeToLibcall which also won't
+ // do anything.
+ return true;
+ }
+
+ if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) {
+ // FIXME: We were asked to expand a strict floating-point operation,
+ // but there is currently no expansion implemented that would preserve
+ // the "strict" properties. For now, we just fall back to the non-strict
+ // version if that is legal on the target. The actual mutation of the
+ // operation will happen in SelectionDAGISel::DoInstructionSelection.
+ switch (Node->getOpcode()) {
+ default:
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ return true;
+ break;
+ case ISD::STRICT_FSUB: {
+ if (TLI.getStrictFPOperationAction(
+ ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal)
+ return true;
+ if (TLI.getStrictFPOperationAction(
+ ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal)
+ break;
+
+ EVT VT = Node->getValueType(0);
+ const SDNodeFlags Flags = Node->getFlags();
+ SDValue Neg = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(2), Flags);
+ SDValue Fadd = DAG.getNode(ISD::STRICT_FADD, dl, Node->getVTList(),
+ {Node->getOperand(0), Node->getOperand(1), Neg},
+ Flags);
+
+ Results.push_back(Fadd);
+ Results.push_back(Fadd.getValue(1));
+ break;
+ }
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These are registered by the operand type instead of the value
+ // type. Reflect that here.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType())
+ == TargetLowering::Legal)
+ return true;
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (Results.empty()) {
+ LLVM_DEBUG(dbgs() << "Cannot expand node\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Successfully expanded node\n");
+ ReplaceNode(Node, Results.data());
+ return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ // FIXME: Check flags on the node to see if we can use a finite call.
+ unsigned Opc = Node->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_FENCE: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ AtomicOrdering Order = cast<AtomicSDNode>(Node)->getMergedOrdering();
+ RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
+ EVT RetVT = Node->getValueType(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SmallVector<SDValue, 4> Ops;
+ if (TLI.getLibcallName(LC)) {
+ // If outline atomic available, prepare its arguments and expand.
+ Ops.append(Node->op_begin() + 2, Node->op_end());
+ Ops.push_back(Node->getOperand(1));
+
+ } else {
+ LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected atomic op or value type!");
+ // Arguments for expansion to sync libcall
+ Ops.append(Node->op_begin() + 1, Node->op_end());
+ }
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "abort", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FMINNUM:
+ case ISD::STRICT_FMINNUM:
+ ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128, Results);
+ break;
+ // FIXME: We do not have libcalls for FMAXIMUM and FMINIMUM. So, we cannot use
+ // libcall legalization for these nodes, but there is no default expasion for
+ // these nodes either (see PR63267 for example).
+ case ISD::FMAXNUM:
+ case ISD::STRICT_FMAXNUM:
+ ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128, Results);
+ break;
+ case ISD::FSQRT:
+ case ISD::STRICT_FSQRT:
+ ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128, Results);
+ break;
+ case ISD::FCBRT:
+ ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128, Results);
+ break;
+ case ISD::FSIN:
+ case ISD::STRICT_FSIN:
+ ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128, Results);
+ break;
+ case ISD::FCOS:
+ case ISD::STRICT_FCOS:
+ ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128, Results);
+ break;
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ case ISD::STRICT_FLOG:
+ ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80,
+ RTLIB::LOG_F128, RTLIB::LOG_PPCF128, Results);
+ break;
+ case ISD::FLOG2:
+ case ISD::STRICT_FLOG2:
+ ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128, Results);
+ break;
+ case ISD::FLOG10:
+ case ISD::STRICT_FLOG10:
+ ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128, Results);
+ break;
+ case ISD::FEXP:
+ case ISD::STRICT_FEXP:
+ ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80,
+ RTLIB::EXP_F128, RTLIB::EXP_PPCF128, Results);
+ break;
+ case ISD::FEXP2:
+ case ISD::STRICT_FEXP2:
+ ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128, Results);
+ break;
+ case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
+ ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128, Results);
+ break;
+ case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
+ ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128, Results);
+ break;
+ case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
+ ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128, Results);
+ break;
+ case ISD::FRINT:
+ case ISD::STRICT_FRINT:
+ ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128, Results);
+ break;
+ case ISD::FNEARBYINT:
+ case ISD::STRICT_FNEARBYINT:
+ ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128, Results);
+ break;
+ case ISD::FROUND:
+ case ISD::STRICT_FROUND:
+ ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128, Results);
+ break;
+ case ISD::FROUNDEVEN:
+ case ISD::STRICT_FROUNDEVEN:
+ ExpandFPLibCall(Node, RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128, Results);
+ break;
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP:
+ ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80,
+ RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results);
+ break;
+ case ISD::FFREXP: {
+ ExpandFrexpLibCall(Node, Results);
+ break;
+ }
+ case ISD::FPOWI:
+ case ISD::STRICT_FPOWI: {
+ RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ if (Node->isStrictFPOpcode()) {
+ SDValue Exponent =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(Node),
+ {Node->getValueType(0), Node->getValueType(1)},
+ {Node->getOperand(0), Node->getOperand(2)});
+ SDValue FPOW =
+ DAG.getNode(ISD::STRICT_FPOW, SDLoc(Node),
+ {Node->getValueType(0), Node->getValueType(1)},
+ {Exponent.getValue(1), Node->getOperand(1), Exponent});
+ Results.push_back(FPOW);
+ Results.push_back(FPOW.getValue(1));
+ } else {
+ SDValue Exponent =
+ DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), Node->getValueType(0),
+ Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
+ Node->getValueType(0),
+ Node->getOperand(0), Exponent));
+ }
+ break;
+ }
+ unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0;
+ bool ExponentHasSizeOfInt =
+ DAG.getLibInfo().getIntSize() ==
+ Node->getOperand(1 + Offset).getValueType().getSizeInBits();
+ if (!ExponentHasSizeOfInt) {
+ // If the exponent does not match with sizeof(int) a libcall to
+ // RTLIB::POWI would use the wrong type for the argument.
+ DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ break;
+ }
+ ExpandFPLibCall(Node, LC, Results);
+ break;
+ }
+ case ISD::FPOW:
+ case ISD::STRICT_FPOW:
+ ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80,
+ RTLIB::POW_F128, RTLIB::POW_PPCF128, Results);
+ break;
+ case ISD::LROUND:
+ case ISD::STRICT_LROUND:
+ ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128, Results);
+ break;
+ case ISD::LLROUND:
+ case ISD::STRICT_LLROUND:
+ ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128, Results);
+ break;
+ case ISD::LRINT:
+ case ISD::STRICT_LRINT:
+ ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128, Results);
+ break;
+ case ISD::LLRINT:
+ case ISD::STRICT_LLRINT:
+ ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128, Results);
+ break;
+ case ISD::FDIV:
+ case ISD::STRICT_FDIV:
+ ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128, Results);
+ break;
+ case ISD::FREM:
+ case ISD::STRICT_FREM:
+ ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128, Results);
+ break;
+ case ISD::FMA:
+ case ISD::STRICT_FMA:
+ ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128, Results);
+ break;
+ case ISD::FADD:
+ case ISD::STRICT_FADD:
+ ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128, Results);
+ break;
+ case ISD::FMUL:
+ case ISD::STRICT_FMUL:
+ ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128, Results);
+ break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
+ }
+ break;
+ case ISD::STRICT_FP16_TO_FP: {
+ if (Node->getValueType(0) == MVT::f32) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Node->getOperand(1), CallOptions,
+ SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ }
+ break;
+ }
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false).first);
+ break;
+ }
+ case ISD::FP_TO_BF16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16");
+ Results.push_back(ExpandLibCall(LC, Node, false).first);
+ break;
+ }
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: {
+ // TODO - Common the code with DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP
+ bool IsStrict = Node->isStrictFPOpcode();
+ bool Signed = Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT SVT = Node->getOperand(IsStrict ? 1 : 0).getValueType();
+ EVT RVT = Node->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(Node);
+
+ // Even if the input is legal, no libcall may exactly match, eg. we don't
+ // have i1 -> fp conversions. So, it needs to be promoted to a larger type,
+ // eg: i13 -> fp. Then, look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT)
+ : RTLIB::getUINTTOFP(NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, Node->getOperand(IsStrict ? 1 : 0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain);
+ Results.push_back(Tmp.first);
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT: {
+ // TODO - Common the code with DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT.
+ bool IsStrict = Node->isStrictFPOpcode();
+ bool Signed = Node->getOpcode() == ISD::FP_TO_SINT ||
+ Node->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ EVT RVT = Node->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(Node);
+
+ // Even if the result is legal, no libcall may exactly match, eg. we don't
+ // have fp -> i1 conversions. So, it needs to be promoted to a larger type,
+ // eg: fp -> i32. Then, look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT)
+ : RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain);
+
+ // Truncate the result if the libcall returns a larger type.
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first));
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND: {
+ // X = FP_ROUND(Y, TRUNC)
+ // TRUNC is a flag, which is always an integer that is zero or one.
+ // If TRUNC is 0, this is a normal rounding, if it is 1, this FP_ROUND
+ // is known to not change the value of Y.
+ // We can only expand it into libcall if the TRUNC is 0.
+ bool IsStrict = Node->isStrictFPOpcode();
+ SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ EVT VT = Node->getValueType(0);
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&
+ "Unable to expand as libcall if it is not normal rounding");
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, SDLoc(Node), Chain);
+ Results.push_back(Tmp.first);
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ Results.push_back(
+ ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(),
+ Node->getValueType(0)),
+ Node, false).first);
+ break;
+ }
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ Node->getOpcode() == ISD::STRICT_FP_TO_FP16
+ ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
+ : RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
+ Node->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1),
+ CallOptions, SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FSUB:
+ case ISD::STRICT_FSUB:
+ ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128, Results);
+ break;
+ case ISD::SREM:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
+ break;
+ case ISD::UREM:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
+ break;
+ case ISD::SDIV:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ break;
+ case ISD::UDIV:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
+ break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("LibCall explicitly requested, but not available");
+ case MVT::i32:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
+ break;
+ case MVT::i64:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
+ break;
+ case MVT::i128:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
+ break;
+ }
+ break;
+ case ISD::RESET_FPENV: {
+ // It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets
+ // FE_DFL_ENV is defined as '((const fenv_t *) -1)' in glibc.
+ SDValue Ptr = DAG.getIntPtrConstant(-1LL, dl);
+ SDValue Chain = Node->getOperand(0);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETENV, Ptr, Chain, dl));
+ break;
+ }
+ case ISD::GET_FPENV_MEM: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue EnvPtr = Node->getOperand(1);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FEGETENV, EnvPtr, Chain, dl));
+ break;
+ }
+ case ISD::SET_FPENV_MEM: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue EnvPtr = Node->getOperand(1);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty()) {
+ LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n");
+ ReplaceNode(Node, Results.data());
+ } else
+ LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n");
+}
+
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+ MVT EltVT, MVT NewEltVT) {
+ unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+ MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ assert(TLI.isTypeLegal(MidVT) && "unexpected");
+ return MidVT;
+}
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "Trying to promote node\n");
+ SmallVector<SDValue, 8> Results;
+ MVT OVT = Node->getSimpleValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ OVT = Node->getOperand(0).getSimpleValueType();
+ }
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ OVT = Node->getOperand(1).getSimpleValueType();
+ if (Node->getOpcode() == ISD::BR_CC ||
+ Node->getOpcode() == ISD::SELECT_CC)
+ OVT = Node->getOperand(2).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ // Zero extend the argument unless its cttz, then use any_extend.
+ if (Node->getOpcode() == ISD::CTTZ ||
+ Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ else
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(),
+ OVT.getSizeInBits());
+ Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
+ DAG.getConstant(TopBit, dl, NVT));
+ }
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), dl, NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BITREVERSE:
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
+ PromoteLegalFP_TO_INT(Node, dl, Results);
+ break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ Results.push_back(PromoteLegalFP_TO_INT_SAT(Node, dl));
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ PromoteLegalINT_TO_FP(Node, dl, Results);
+ break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Tmp2.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
+ ReplacedNode(Node);
+ break;
+ }
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger() && "Cannot promote logic operation");
+
+ switch (Node->getOpcode()) {
+ default:
+ ExtOp = ISD::ANY_EXTEND;
+ break;
+ case ISD::SDIV:
+ case ISD::SREM:
+ ExtOp = ISD::SIGN_EXTEND;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ ExtOp = ISD::ZERO_EXTEND;
+ break;
+ }
+ TruncOp = ISD::TRUNCATE;
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: {
+ // Promote to a multiply in a wider integer type.
+ unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND;
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2);
+
+ auto &DL = DAG.getDataLayout();
+ unsigned OriginalSize = OVT.getScalarSizeInBits();
+ Tmp2 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT)));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector() ||
+ Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+ Tmp1->setFlags(Node->getFlags());
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SPLICE: {
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ISD::VECTOR_SPLICE, dl, NVT, Tmp1, Tmp2,
+ Node->getOperand(2));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp3));
+ break;
+ }
+ case ISD::SELECT_CC: {
+ SDValue Cond = Node->getOperand(4);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Cond)->get();
+ // Type of the comparison operands.
+ MVT CVT = Node->getSimpleValueType(0);
+ assert(CVT == OVT && "not handled");
+
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+
+ // Promote the comparison operands, if needed.
+ if (TLI.isCondCodeLegal(CCCode, CVT)) {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ } else {
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ }
+ // Cast the true/false operands.
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ Tmp4 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, NVT, {Tmp1, Tmp2, Tmp3, Tmp4, Cond},
+ Node->getFlags());
+
+ // Cast the result back to the original type.
+ if (ExtOp != ISD::FP_EXTEND)
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1);
+ else
+ Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ if (Node->isStrictFPOpcode()) {
+ SDValue InChain = Node->getOperand(0);
+ std::tie(Tmp1, std::ignore) =
+ DAG.getStrictFPExtendOrRound(Node->getOperand(1), InChain, dl, NVT);
+ std::tie(Tmp2, std::ignore) =
+ DAG.getStrictFPExtendOrRound(Node->getOperand(2), InChain, dl, NVT);
+ SmallVector<SDValue, 2> TmpChains = {Tmp1.getValue(1), Tmp2.getValue(1)};
+ SDValue OutChain = DAG.getTokenFactor(dl, TmpChains);
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ Results.push_back(DAG.getNode(Node->getOpcode(), dl, VTs,
+ {OutChain, Tmp1, Tmp2, Node->getOperand(3)},
+ Node->getFlags()));
+ Results.push_back(Results.back().getValue(1));
+ break;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Node->getOperand(2), Node->getFlags()));
+ break;
+ }
+ case ISD::BR_CC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(1))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+ Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+ Node->getOperand(0), Node->getOperand(1),
+ Tmp1, Tmp2, Node->getOperand(4)));
+ break;
+ }
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::FPOW:
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FPOW:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp3, Tmp1, Tmp2});
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ case ISD::FMA:
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ case ISD::STRICT_FMA:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(3)});
+ Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1), Tmp3.getValue(1));
+ Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp4, Tmp1, Tmp2, Tmp3});
+ Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp4);
+ Results.push_back(Tmp4.getValue(1));
+ break;
+ case ISD::FCOPYSIGN:
+ case ISD::FLDEXP:
+ case ISD::FPOWI: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+ // fcopysign doesn't change anything but the sign bit, so
+ // (fp_round (fcopysign (fpext a), b))
+ // is as precise as
+ // (fp_round (fpext a))
+ // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+ const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3,
+ DAG.getIntPtrConstant(isTrunc, dl, /*isTarget=*/true)));
+ break;
+ }
+ case ISD::STRICT_FPOWI:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
+ case ISD::FFREXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FFREXP, dl, {NVT, Node->getValueType(1)}, Tmp1);
+
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+
+ Results.push_back(Tmp2.getValue(1));
+ break;
+ }
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FTRUNC:
+ case ISD::FNEG:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FABS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
+ case ISD::BUILD_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+ // =>
+ // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for build_vector");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+ SDValue Op = Node->getOperand(I);
+ NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+ }
+
+ SDLoc SL(Node);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for extract_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Idx = Node->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVec, TmpIdx);
+ NewOps.push_back(Elt);
+ }
+
+ SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ // v2i32:casty = bitcast y:i64
+ //
+ // v2i64 = bitcast
+ // (v4i32 insert_vector_elt
+ // (v4i32 insert_vector_elt v4i32:castx,
+ // (extract_vector_elt casty, 0), 2 * z),
+ // (extract_vector_elt casty, 1), (2 * z + 1))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for insert_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Val = Node->getOperand(1);
+ SDValue Idx = Node->getOperand(2);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+ SDValue NewVec = CastVec;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVal, IdxOffset);
+
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+ NewVec, Elt, InEltIdx);
+ }
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = scalar_to_vector x:i64
+ // =>
+ // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+ //
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ SDValue Val = Node->getOperand(0);
+ SDLoc SL(Node);
+
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+ SDValue Undef = DAG.getUNDEF(MidVT);
+
+ SmallVector<SDValue, 8> NewElts;
+ NewElts.push_back(CastVal);
+ for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+ NewElts.push_back(Undef);
+
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::ATOMIC_SWAP: {
+ AtomicSDNode *AM = cast<AtomicSDNode>(Node);
+ SDLoc SL(Node);
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
+ assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
+ "unexpected promotion type");
+ assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
+ "unexpected atomic_swap with illegal type");
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
+ DAG.getVTList(NVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
+ Results.push_back(NewAtomic.getValue(1));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty()) {
+ LLVM_DEBUG(dbgs() << "Successfully promoted node\n");
+ ReplaceNode(Node, Results.data());
+ } else
+ LLVM_DEBUG(dbgs() << "Could not promote node\n");
+}
+
+/// This is the entry point for the file.
+void SelectionDAG::Legalize() {
+ AssignTopologicalOrder();
+
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ // Use a delete listener to remove nodes which were deleted during
+ // legalization from LegalizeNodes. This is needed to handle the situation
+ // where a new node is allocated by the object pool to the same address of a
+ // previously deleted node.
+ DAGNodeDeletedListener DeleteListener(
+ *this,
+ [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); });
+
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ while (true) {
+ bool AnyLegalized = false;
+ for (auto NI = allnodes_end(); NI != allnodes_begin();) {
+ --NI;
+
+ SDNode *N = &*NI;
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ continue;
+ }
+
+ if (LegalizedNodes.insert(N).second) {
+ AnyLegalized = true;
+ Legalizer.LegalizeOp(N);
+
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ }
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ RemoveDeadNodes();
+}
+
+bool SelectionDAG::LegalizeOp(SDNode *N,
+ SmallSetVector<SDNode *, 16> &UpdatedNodes) {
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes);
+
+ // Directly insert the node in question, and legalize it. This will recurse
+ // as needed through operands.
+ LegalizedNodes.insert(N);
+ Legalizer.LegalizeOp(N);
+
+ return LegalizedNodes.count(N);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 000000000000..7e035d21ef71
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,3207 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+/// FIXME: This is a local version of RTLIB::getFPLibCall that should be
+/// refactored away (see RTLIB::getPOWI for an example).
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Convert Float Results to Integer
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to soften the result of this "
+ "operator!");
+
+ case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::STRICT_FMINNUM:
+ case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::STRICT_FMAXNUM:
+ case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
+ case ISD::STRICT_FADD:
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break;
+ case ISD::STRICT_FCEIL:
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::STRICT_FCOS:
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::STRICT_FDIV:
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::STRICT_FEXP:
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::STRICT_FEXP2:
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::STRICT_FFLOOR:
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::STRICT_FLOG:
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::STRICT_FLOG2:
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::STRICT_FLOG10:
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::STRICT_FMA:
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::STRICT_FMUL:
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::BF16_TO_FP: R = SoftenFloatRes_BF16_TO_FP(N); break;
+ case ISD::STRICT_FPOW:
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::STRICT_FPOWI:
+ case ISD::FPOWI:
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
+ case ISD::FFREXP:
+ R = SoftenFloatRes_FFREXP(N);
+ break;
+ case ISD::STRICT_FREM:
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::STRICT_FRINT:
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::STRICT_FROUND:
+ case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break;
+ case ISD::STRICT_FSIN:
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::STRICT_FSQRT:
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::STRICT_FSUB:
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::STRICT_FTRUNC:
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ R = SoftenFloatRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = SoftenFloatRes_VECREDUCE_SEQ(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode()) {
+ assert(R.getNode() != N);
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getNumOperands() == (1 + Offset) &&
+ "Unexpected number of operands!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpVT = N->getOperand(0 + Offset).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getNumOperands() == (2 + Offset) &&
+ "Unexpected number of operands!");
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ GetSoftenedFloat(N->getOperand(1 + Offset)) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::FREEZE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+ return NewFence;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ // In ppcf128, the high 64 bits are always first in memory regardless
+ // of Endianness. LLVM's APFloat representation is not Endian sensitive,
+ // and so always converts into a 128-bit APInt in a non-Endian-sensitive
+ // way. However, APInt's are serialized in an Endian-sensitive fashion,
+ // so on big-Endian targets, the two doubles are output in the wrong
+ // order. Fix this by manually flipping the order of the high 64 bits
+ // and the low 64 bits here.
+ if (DAG.getDataLayout().isBigEndian() &&
+ CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) {
+ uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1],
+ CN->getValueAPF().bitcastToAPInt().getRawData()[0] };
+ APInt Val(128, words);
+ return DAG.getConstant(Val, SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ } else {
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ APInt API = APInt::getAllOnes(Size);
+ API.clearBit(Size - 1);
+ SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
+ return SoftenFloatRes_SELECT_CC(SelCC.getNode());
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
+ return SoftenFloatRes_SELECT_CC(SelCC.getNode());
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CBRT_F32,
+ RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80,
+ RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ GetSoftenedFloat(N->getOperand(1 + Offset)),
+ GetSoftenedFloat(N->getOperand(2 + Offset)) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType(),
+ N->getOperand(2 + Offset).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG,
+ GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N), Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ // Expand Y = FNEG(X) -> Y = X ^ sign mask
+ APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+ DAG.getConstant(SignMask, dl, NVT));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0))
+ return BitConvertToInteger(Op);
+ }
+
+ // There's only a libcall for f16 -> f32 and shifting is only valid for bf16
+ // -> f32, so proceed in two stages. Also, it's entirely possible for both
+ // f16 and f32 to be legal, so use the fully hard-float FP_EXTEND rather
+ // than FP16_TO_FP.
+ if ((Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16) &&
+ N->getValueType(0) != MVT::f32) {
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+ { MVT::f32, MVT::Other }, { Chain, Op });
+ Chain = Op.getValue(1);
+ } else {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+ }
+ }
+
+ if (Op.getValueType() == MVT::bf16)
+ return SoftenFloatRes_BF16_TO_FP(N);
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
+ EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
+ SDValue Op = N->getOperand(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
+ CallOptions, SDLoc(N)).first;
+ if (N->getValueType(0) == MVT::f32)
+ return Res32;
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_BF16_TO_FP(SDNode *N) {
+ assert(N->getValueType(0) == MVT::f32 &&
+ "Can only soften BF16_TO_FP with f32 result");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
+ SDValue Op = N->getOperand(0);
+ SDLoc DL(N);
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, NVT,
+ DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op));
+ SDValue Res = DAG.getNode(ISD::SHL, DL, NVT, Op,
+ DAG.getShiftAmountConstant(16, NVT, DL));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
+ N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
+ "Unsupported power type!");
+ bool IsPowI =
+ N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;
+
+ RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
+ : RTLIB::getLDEXP(N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ // FIXME: Implement this if some target needs it.
+ DAG.getContext()->emitError("Don't know how to soften fpowi to fpow");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
+ if (DAG.getLibInfo().getIntSize() !=
+ N->getOperand(1 + Offset).getValueType().getSizeInBits()) {
+ // If the exponent does not match with sizeof(int) a libcall to RTLIB::POWI
+ // would use the wrong type for the argument.
+ DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ N->getOperand(1 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
+ assert(!N->isStrictFPOpcode() && "strictfp not implemented for frexp");
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+ RTLIB::Libcall LC = RTLIB::getFREXP(VT0);
+
+ if (DAG.getLibInfo().getIntSize() != VT1.getSizeInBits()) {
+ // If the exponent does not match with sizeof(int) a libcall would use the
+ // wrong type for the argument.
+ // TODO: Should be able to handle mismatches.
+ DAG.getContext()->emitError("ffrexp exponent does not match sizeof(int)");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
+ EVT NVT0 = TLI.getTypeToTransformTo(*DAG.getContext(), VT0);
+ SDValue StackSlot = DAG.CreateStackTemporary(VT1);
+
+ SDLoc DL(N);
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Ops[2] = {GetSoftenedFloat(N->getOperand(0)), StackSlot};
+ EVT OpsVT[2] = {VT0, StackSlot.getValueType()};
+
+ // TODO: setTypeListBeforeSoften can't properly express multiple return types,
+ // but we only really need to handle the 0th one for softening anyway.
+ CallOptions.setTypeListBeforeSoften({OpsVT}, VT0, true);
+
+ auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT0, Ops, CallOptions, DL,
+ /*Chain=*/SDValue());
+ int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(VT1, DL, Chain, StackSlot, PtrInfo);
+
+ ReplaceValueWith(SDValue(N, 1), LoadExp);
+ return ReturnVal;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUNDEVEN(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ auto MMOFlags =
+ L->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
+ L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT, L->getOriginalAlign(),
+ MMOFlags, L->getAAInfo());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(),
+ dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), L->getMemoryVT(),
+ L->getOriginalAlign(), MMOFlags, L->getAAInfo());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+ return BitConvertToInteger(ExtendNode);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ if (N != NewVAARG.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT SVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(IsStrict ? 1 : 0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ Op, CallOptions, dl, Chain);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE(SDNode *N) {
+ // Expand and soften recursively.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) {
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Convert Float Operand to Integer
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to soften this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::STRICT_FP_TO_FP16:
+ case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::FP_TO_BF16:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SoftenFloatOp_FP_TO_XINT_SAT(N); break;
+ case ISD::STRICT_LROUND:
+ case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
+ case ISD::STRICT_LLROUND:
+ case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
+ case ISD::STRICT_LRINT:
+ case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
+ case ISD::STRICT_LLRINT:
+ case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this to re-analyze.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand softening");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ // We actually deal with the partially-softened FP_TO_FP16 node too, which
+ // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
+ N->getOpcode() == ISD::FP_TO_BF16 ||
+ N->getOpcode() == ISD::STRICT_FP_ROUND);
+
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT FloatRVT = RVT;
+ if (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ FloatRVT = MVT::f16;
+ else if (N->getOpcode() == ISD::FP_TO_BF16)
+ FloatRVT = MVT::bf16;
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ Op = GetSoftenedFloat(Op);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+ }
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(2), N->getOperand(3));
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)),
+ 0);
+}
+
+// Even if the result type is legal, no libcall may exactly match. (e.g. We
+// don't have FP-i8 conversions) This helper method looks for an appropriate
+// promoted libcall.
+static RTLIB::Libcall findFPToIntLibcall(EVT SrcVT, EVT RetVT, EVT &Promoted,
+ bool Signed) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ Promoted = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (Promoted.bitsGE(RetVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SrcVT, Promoted)
+ : RTLIB::getFPTOUINT(SrcVT, Promoted);
+ }
+ return LC;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+ // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+ // match, eg. we don't have fp -> i8 conversions.
+ // Look for an appropriate libcall.
+ RTLIB::Libcall LC = findFPToIntLibcall(SVT, RVT, NVT, Signed);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() &&
+ "Unsupported FP_TO_XINT!");
+
+ Op = GetSoftenedFloat(Op);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, dl, Chain);
+
+ // Truncate the result if the libcall returns a larger type.
+ SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first);
+
+ if (!IsStrict)
+ return Res;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Op1 = N->getOperand(IsStrict ? 2 : 1);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+
+ EVT VT = Op0.getValueType();
+ SDValue NewLHS = GetSoftenedFloat(Op0);
+ SDValue NewRHS = GetSoftenedFloat(Op1);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), Op0, Op1,
+ Chain, N->getOpcode() == ISD::STRICT_FSETCCS);
+
+ // Update N to have the operands specified.
+ if (NewRHS.getNode()) {
+ if (IsStrict)
+ NewLHS = DAG.getNode(ISD::SETCC, SDLoc(N), N->getValueType(0), NewLHS,
+ NewRHS, DAG.getCondCode(CCCode));
+ else
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+ }
+
+ // Otherwise, softenSetCCOperands returned a scalar, use it.
+ assert((NewRHS.getNode() || NewLHS.getValueType() == N->getValueType(0)) &&
+ "Unexpected setcc expansion!");
+
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 0), NewLHS);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ return SDValue();
+ }
+ return NewLHS;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc dl(N);
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(
+ DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), Val,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits());
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RSize - LSize;
+ if (SizeDiff > 0) {
+ RHS =
+ DAG.getNode(ISD::SRL, dl, RVT, RHS,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(RHS.getValueType(),
+ DAG.getDataLayout())));
+ RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS);
+ } else if (SizeDiff < 0) {
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS);
+ RHS =
+ DAG.getNode(ISD::SHL, dl, ILVT, RHS,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(RHS.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ RHS = DAG.getBitcast(LVT, RHS);
+ return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpVT = N->getOperand(0 + Offset).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+ }
+
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+}
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
+
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::STRICT_FMINNUM:
+ case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::STRICT_FMAXNUM:
+ case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+ case ISD::STRICT_FADD:
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break;
+ case ISD::STRICT_FCEIL:
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::STRICT_FCOS:
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::STRICT_FDIV:
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::STRICT_FEXP:
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::STRICT_FEXP2:
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::STRICT_FFLOOR:
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG:
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG2:
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG10:
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::STRICT_FMA:
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::STRICT_FMUL:
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::STRICT_FPOW:
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::STRICT_FPOWI:
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: ExpandFloatRes_FLDEXP(N, Lo, Hi); break;
+ case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break;
+ case ISD::STRICT_FRINT:
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::STRICT_FROUND:
+ case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break;
+ case ISD::STRICT_FSIN:
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::STRICT_FSQRT:
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::STRICT_FSUB:
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::STRICT_FTRUNC:
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::STRICT_FREM:
+ case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == 64 &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ SDLoc dl(N);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(64, C.getRawData()[1])),
+ dl, NVT);
+ Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(64, C.getRawData()[0])),
+ dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Op = N->getOperand(0 + Offset);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ Op, CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ Ops, CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDLoc dl(N);
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ ISD::SETEQ);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
+ RTLIB::CBRT_F64, RTLIB::CBRT_F80,
+ RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset),
+ N->getOperand(2 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, CallOptions,
+ SDLoc(N), Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ bool IsStrict = N->isStrictFPOpcode();
+
+ SDValue Chain;
+ if (IsStrict) {
+ // If the expanded type is the same as the input type, just bypass the node.
+ if (NVT == N->getOperand(1).getValueType()) {
+ Hi = N->getOperand(1);
+ Chain = N->getOperand(0);
+ } else {
+ // Other we need to extend.
+ Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ Chain = Hi.getValue(1);
+ }
+ } else {
+ Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ }
+
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLDEXP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, RTLIB::getLDEXP(N->getValueType(0)), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FREEZE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FREEZE, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FROUNDEVEN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getMemoryVT(), LD->getMemOperand());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ bool Strict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(Strict ? 1 : 0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ SDLoc dl(N);
+ SDValue Chain = Strict ? N->getOperand(0) : DAG.getEntryNode();
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(N->getFlags().hasNoFPExcept());
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+ if (Strict) {
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
+ {Chain, Src}, Flags);
+ Chain = Hi.getValue(1);
+ } else
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
+ if (Strict)
+ Chain = Tmp.second;
+ GetPairElements(Tmp.first, Lo, Hi);
+ }
+
+ // No need to complement for unsigned 32-bit integers
+ if (isSigned || SrcVT.bitsLE(MVT::i32)) {
+ if (Strict)
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return;
+ }
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ // FIXME: For unsigned i128 to ppc_fp128 conversion, we need to carefully
+ // keep semantics correctness if the integer is not exactly representable
+ // here. See ExpandLegalINT_TO_FP.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ ArrayRef<uint64_t> Parts;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ // TODO: Are there other fast-math-flags to propagate to this FADD?
+ SDValue NewLo = DAG.getConstantFP(
+ APFloat(APFloat::PPCDoubleDouble(), APInt(128, Parts)), dl, MVT::ppcf128);
+ if (Strict) {
+ Lo = DAG.getNode(ISD::STRICT_FADD, dl, DAG.getVTList(VT, MVT::Other),
+ {Chain, Hi, NewLo}, Flags);
+ Chain = Lo.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ } else
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, NewLo);
+ Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
+ Lo, Hi, ISD::SETLT);
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_XINT(N); break;
+ case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3, OutputChain;
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, ISD::SETOEQ, Chain, IsSignaling);
+ OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue();
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, CCCode, OutputChain, IsSignaling);
+ OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue();
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 =
+ DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi,
+ ISD::SETUNE, OutputChain, IsSignaling);
+ OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue();
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, CCCode, OutputChain, IsSignaling);
+ OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue();
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+ Chain = OutputChain;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue Chain;
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain);
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(1), Lo, Hi);
+ // The ppcf128 value is providing only the sign; take it from the
+ // higher-order double (which must have the larger magnitude).
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N),
+ N->getValueType(0), N->getOperand(0), Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi);
+
+ if (!IsStrict)
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0), Hi, N->getOperand(1));
+
+ // Eliminate the node if the input float type is the same as the output float
+ // type.
+ if (Hi.getValueType() == N->getValueType(0)) {
+ // Connect the output chain to the input chain, unlinking the node.
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ ReplaceValueWith(SDValue(N, 0), Hi);
+ return SDValue();
+ }
+
+ SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), Hi, N->getOperand(2)});
+ ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Expansion);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_XINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+
+ EVT NVT;
+ RTLIB::Libcall LC = findFPToIntLibcall(Op.getValueType(), RVT, NVT, Signed);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() &&
+ "Unsupported FP_TO_XINT!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain);
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ SDValue Chain;
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain);
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue NewLHS = N->getOperand(IsStrict ? 1 : 0);
+ SDValue NewRHS = N->getOperand(IsStrict ? 2 : 1);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain,
+ N->getOpcode() == ISD::STRICT_FSETCCS);
+
+ // FloatExpandSetCCOperands always returned a scalar.
+ assert(!NewRHS.getNode() && "Expect to return scalar");
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ if (Chain) {
+ ReplaceValueWith(SDValue(N, 0), NewLHS);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ return SDValue();
+ }
+ return NewLHS;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+//===----------------------------------------------------------------------===//
+// Float Operand Promotion
+//===----------------------------------------------------------------------===//
+//
+
+static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ return ISD::FP16_TO_FP;
+ } else if (RetVT == MVT::f16) {
+ return ISD::FP_TO_FP16;
+ } else if (OpVT == MVT::bf16) {
+ return ISD::BF16_TO_FP;
+ } else if (RetVT == MVT::bf16) {
+ return ISD::FP_TO_BF16;
+ }
+
+ report_fatal_error("Attempt at an invalid promotion-related conversion");
+}
+
+bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ // Nodes that use a promotion-requiring floating point operand, but doesn't
+ // produce a promotion-requiring floating point result, need to be legalized
+ // to use the promoted float operand. Nodes that produce at least one
+ // promotion-requiring floating point result have their operands legalized as
+ // a part of PromoteFloatResult.
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ report_fatal_error("Do not know how to promote this operator's operand!");
+
+ case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
+ case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
+ case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+ case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
+ case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
+ }
+
+ if (R.getNode())
+ ReplaceValueWith(SDValue(N, 0), R);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op->getValueType(0);
+
+ SDValue Promoted = GetPromotedFloat(N->getOperand(0));
+ EVT PromotedVT = Promoted->getValueType(0);
+
+ // Convert the promoted float value to the desired IVT.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N),
+ IVT, Promoted);
+ // The final result type might not be an scalar so we need a bitcast. The
+ // bitcast will be further legalized if needed.
+ return DAG.getBitcast(N->getValueType(0), Convert);
+}
+
+// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by
+// PromoteFloatRes_FCOPYSIGN.
+SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
+ assert (OpNo == 1 && "Only Operand 1 must need promotion here");
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), Op1);
+}
+
+// Convert the promoted float value to the desired integer type
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N,
+ unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ // Desired VT is same as promoted type. Use promoted float directly.
+ if (VT == Op->getValueType(0))
+ return Op;
+
+ // Else, extend the promoted float value to the desired VT.
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
+}
+
+// Promote the float operands used for comparison. The true- and false-
+// operands have the same type as the result and are promoted, if needed, by
+// PromoteFloatRes_SELECT_CC
+SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ SDValue LHS = GetPromotedFloat(N->getOperand(0));
+ SDValue RHS = GetPromotedFloat(N->getOperand(1));
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+ LHS, RHS, N->getOperand(2), N->getOperand(3),
+ N->getOperand(4));
+}
+
+// Construct a SETCC that compares the promoted values and sets the conditional
+// code.
+SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ return DAG.getSetCC(SDLoc(N), VT, Op0, Op1, CCCode);
+
+}
+
+// Lower the promoted Float down to the integer value of same size and construct
+// a STORE of the integer value.
+SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc DL(N);
+
+ SDValue Promoted = GetPromotedFloat(Val);
+ EVT VT = ST->getOperand(1).getValueType();
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ SDValue NewVal;
+ NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL,
+ IVT, Promoted);
+
+ return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+//===----------------------------------------------------------------------===//
+// Float Result Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ // These opcodes cannot appear if promotion of FP16 is done in the backend
+ // instead of Clang
+ case ISD::FP16_TO_FP:
+ case ISD::FP_TO_FP16:
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to promote this operator's result!");
+
+ case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
+ case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break;
+
+ // Unary FP Operations
+ case ISD::FABS:
+ case ISD::FCBRT:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
+
+ // Binary FP Operations
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break;
+
+ case ISD::FMA: // FMA is same as FMAD
+ case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break;
+
+ case ISD::FPOWI:
+ case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
+ case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
+
+ case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
+ case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ R = PromoteFloatRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = PromoteFloatRes_VECREDUCE_SEQ(N);
+ break;
+ }
+
+ if (R.getNode())
+ SetPromotedFloat(SDValue(N, ResNo), R);
+}
+
+// Bitcast from i16 to f16: convert the i16 to a f32 value instead.
+// At this point, it is not possible to determine if the bitcast value is
+// eventually stored to memory or promoted to f32 or promoted to a floating
+// point at a higher precision. Some of these cases are handled by FP_EXTEND,
+// STORE promotion handlers.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ // Input type isn't guaranteed to be a scalar int so bitcast if not. The
+ // bitcast will be legalized further if necessary.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(),
+ N->getOperand(0).getValueType().getSizeInBits());
+ SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0));
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Get the (bit-cast) APInt of the APFloat and build an integer constant
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL,
+ IVT);
+
+ // Convert the Constant to the desired FP type
+ // FIXME We might be able to do the conversion during compilation and get rid
+ // of it from the object code
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C);
+}
+
+// If the Index operand is a constant, try to redirect the extract operation to
+// the correct legalized vector. If not, bit-convert the input vector to
+// equivalent integer vector. Extract the element as an (bit-cast) integer
+// value and convert it to the promoted type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc DL(N);
+
+ // If the index is constant, try to extract the value from the legalized
+ // vector type.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ switch (getTypeAction(VecVT)) {
+ default: break;
+ case TargetLowering::TypeScalarizeVector: {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+ case TargetLowering::TypeWidenVector: {
+ Vec = GetWidenedVector(Vec);
+ SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+ case TargetLowering::TypeSplitVector: {
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ SDValue Res;
+ if (IdxVal < LoElts)
+ Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx);
+ else
+ Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, DL,
+ Idx.getValueType()));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+
+ }
+ }
+
+ // Bit-convert the input vector to the equivalent integer vector
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ EVT IVT = NewOp.getValueType().getVectorElementType();
+
+ // Extract the element as an (bit-cast) integer value
+ SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT,
+ NewOp, N->getOperand(1));
+
+ // Convert the element to the desired FP type
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal);
+}
+
+// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result
+// needs promotion, so does the argument X. Note that Y, if needed, will be
+// handled during operand promotion.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+
+ SDValue Op1 = N->getOperand(1);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Unary operation where the result and the operand have PromoteFloat type
+// action. Construct a new SDNode with the promoted float value of the old
+// operand.
+SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op);
+}
+
+// Binary operations where the result and both operands have PromoteFloat type
+// action. Construct a new SDNode with the promoted float values of the old
+// operands.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ SDValue Op2 = GetPromotedFloat(N->getOperand(2));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2);
+}
+
+// Promote the Float (first) operand and retain the Integer (second) operand
+SDValue DAGTypeLegalizer::PromoteFloatRes_ExpOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = N->getOperand(1);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, N->getValueType(1)}, Op);
+
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+// Explicit operation to reduce precision. Reduce the value to half precision
+// and promote it back to the legal type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
+ SDLoc DL(N);
+
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = Op->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ // Round promoted float to desired precision
+ SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op);
+ // Promote it back to the legal output type
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+
+ // Load the value as an integer value with the same number of bits.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ SDValue newL = DAG.getLoad(
+ L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N),
+ L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT,
+ L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo());
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
+
+ // Convert the integer value to the desired FP type
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
+}
+
+// Construct a new SELECT node with the promoted true- and false- values.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
+ SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
+ SDValue FalseVal = GetPromotedFloat(N->getOperand(2));
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0),
+ N->getOperand(0), TrueVal, FalseVal);
+}
+
+// Construct a new SELECT_CC node with the promoted true- and false- values.
+// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
+ SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
+ SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ TrueVal.getNode()->getValueType(0), N->getOperand(0),
+ N->getOperand(1), TrueVal, FalseVal, N->getOperand(4));
+}
+
+// Construct a SDNode that transforms the SINT or UINT operand to the promoted
+// float type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0));
+ // Round the value to the desired precision (that of the source type).
+ return DAG.getNode(
+ ISD::FP_EXTEND, DL, NVT,
+ DAG.getNode(ISD::FP_ROUND, DL, VT, NV,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE(SDNode *N) {
+ // Expand and promote recursively.
+ // TODO: This is non-optimal, but dealing with the concurrently happening
+ // vector-legalization is non-trivial. We could do something similar to
+ // PromoteFloatRes_EXTRACT_VECTOR_ELT here.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE_SEQ(SDNode *N) {
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ AtomicSDNode *AM = cast<AtomicSDNode>(N);
+ SDLoc SL(N);
+
+ SDValue CastVal = BitConvertToInteger(AM->getVal());
+ EVT CastVT = CastVal.getValueType();
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
+ DAG.getVTList(CastVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+
+ SDValue Result = NewAtomic;
+
+ if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+ NewAtomic);
+ }
+
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
+
+ return Result;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Half Result Soft Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n");
+ SDValue R = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftPromoteHalfResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to soft promote this operator's "
+ "result!");
+
+ case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
+ case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FCOPYSIGN: R = SoftPromoteHalfRes_FCOPYSIGN(N); break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break;
+
+ // Unary FP Operations
+ case ISD::FABS:
+ case ISD::FCBRT:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FREEZE:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
+
+ // Binary FP Operations
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break;
+
+ case ISD::FMA: // FMA is same as FMAD
+ case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break;
+
+ case ISD::FPOWI:
+ case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break;
+
+ case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ R = SoftPromoteHalfRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = SoftPromoteHalfRes_VECREDUCE_SEQ(N);
+ break;
+ }
+
+ if (R.getNode())
+ SetSoftPromotedHalf(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+
+ // Get the (bit-cast) APInt of the APFloat and build an integer constant
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ MVT::i16);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ NewOp.getValueType().getVectorElementType(), NewOp,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ auto PromotionOpcode = GetPromotionOpcode(OVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
+ Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op0);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT SVT = N->getOperand(0).getValueType();
+
+ if (N->isStrictFPOpcode()) {
+ assert(RVT == MVT::f16);
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+ }
+
+ return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), MVT::i16,
+ N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+
+ // Load the value as an integer value with the same number of bits.
+ assert(L->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension!");
+ SDValue NewL =
+ DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), MVT::i16,
+ SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), MVT::i16, L->getOriginalAlign(),
+ L->getMemOperand()->getFlags(), L->getAAInfo());
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N), Op1.getValueType(), N->getOperand(0), Op1,
+ Op2);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) {
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ SDValue Op3 = GetSoftPromotedHalf(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), Op2.getValueType(),
+ N->getOperand(0), N->getOperand(1), Op2, Op3,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+ // Round the value to the softened type.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(MVT::i16);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ auto PromotionOpcode = GetPromotionOpcode(OVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) {
+ // Expand and soften recursively.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) {
+ // Expand and soften.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Half Operand Soft Promotion
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ // Nodes that use a promotion-requiring floating point operand, but doesn't
+ // produce a soft promotion-requiring floating point result, need to be
+ // legalized to use the soft promoted float operand. Nodes that produce at
+ // least one soft promotion-requiring floating point result have their
+ // operands legalized as a part of PromoteFloatResult.
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ report_fatal_error("Do not know how to soft promote this operator's "
+ "operand!");
+
+ case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break;
+ case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break;
+ case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break;
+ case ISD::STACKMAP:
+ Res = SoftPromoteHalfOp_STACKMAP(N, OpNo);
+ break;
+ case ISD::PATCHPOINT:
+ Res = SoftPromoteHalfOp_PATCHPOINT(N, OpNo);
+ break;
+ }
+
+ if (!Res.getNode())
+ return false;
+
+ assert(Res.getNode() != N && "Expected a new node!");
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) {
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+ SDValue Op1 = N->getOperand(1);
+ EVT RVT = Op1.getValueType();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType());
+
+ Op1 = GetSoftPromotedHalf(Op1);
+ Op1 = DAG.getNode(GetPromotionOpcode(RVT, NVT), dl, NVT, Op1);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0),
+ Op1);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
+
+ if (IsStrict) {
+ assert(SVT == MVT::f16);
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
+ {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+
+ return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT SVT = Op.getValueType();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+
+ Op = GetSoftPromotedHalf(Op);
+
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT SVT = Op.getValueType();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+
+ Op = GetSoftPromotedHalf(Op);
+
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 0 && "Can only soften the comparison values");
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT SVT = Op0.getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), SVT);
+
+ Op0 = GetSoftPromotedHalf(Op0);
+ Op1 = GetSoftPromotedHalf(Op1);
+
+ // Promote to the larger FP type.
+ auto PromotionOpcode = GetPromotionOpcode(SVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1,
+ N->getOperand(2), N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDLoc dl(N);
+
+ EVT SVT = Op0.getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+
+ Op0 = GetSoftPromotedHalf(Op0);
+ Op1 = GetSoftPromotedHalf(Op1);
+
+ // Promote to the larger FP type.
+ auto PromotionOpcode = GetPromotionOpcode(SVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
+
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc dl(N);
+
+ assert(!ST->isTruncatingStore() && "Unexpected truncating store.");
+ SDValue Promoted = GetSoftPromotedHalf(Val);
+ return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Op = N->getOperand(OpNo);
+ NewOps[OpNo] = GetSoftPromotedHalf(Op);
+ SDValue NewNode =
+ DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we replaced the node ourselves.
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_PATCHPOINT(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo >= 7);
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Op = N->getOperand(OpNo);
+ NewOps[OpNo] = GetSoftPromotedHalf(Op);
+ SDValue NewNode =
+ DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we replaced the node ourselves.
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 000000000000..df5878fcdf2e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,5977 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::PARITY:
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::VP_SELECT:
+ case ISD::VP_MERGE:
+ Res = PromoteIntRes_Select(N);
+ break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SMIN:
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::UMIN:
+ case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
+
+ case ISD::SHL:
+ case ISD::VP_SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA:
+ case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL:
+ case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break;
+ case ISD::VP_TRUNCATE:
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+ case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = PromoteIntRes_INSERT_SUBVECTOR(N); break;
+ case ISD::VECTOR_REVERSE:
+ Res = PromoteIntRes_VECTOR_REVERSE(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::VECTOR_SPLICE:
+ Res = PromoteIntRes_VECTOR_SPLICE(N); break;
+ case ISD::VECTOR_INTERLEAVE:
+ case ISD::VECTOR_DEINTERLEAVE:
+ Res = PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(N);
+ return;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N);
+ break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_ScalarOp(N);
+ break;
+ case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;
+
+ case ISD::FP_TO_BF16:
+ case ISD::FP_TO_FP16:
+ Res = PromoteIntRes_FP_TO_FP16_BF16(N);
+ break;
+
+ case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::VP_AND:
+ case ISD::VP_OR:
+ case ISD::VP_XOR:
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::VP_SMIN:
+ case ISD::VP_SMAX:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::VP_SDIV:
+ case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+
+ case ISD::VP_UMIN:
+ case ISD::VP_UMAX:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::VP_UDIV:
+ case ISD::VP_UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE:
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: Res = PromoteIntRes_UADDSUBO_CARRY(N, ResNo); break;
+
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break;
+
+ case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
+
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
+ break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Res = PromoteIntRes_VECREDUCE(N);
+ break;
+
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntRes_VP_REDUCE(N);
+ break;
+
+ case ISD::FREEZE:
+ Res = PromoteIntRes_FREEZE(N);
+ break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Res = PromoteIntRes_Rotate(N);
+ break;
+
+ case ISD::FSHL:
+ case ISD::FSHR:
+ Res = PromoteIntRes_FunnelShift(N);
+ break;
+
+ case ISD::IS_FPCLASS:
+ Res = PromoteIntRes_IS_FPCLASS(N);
+ break;
+ case ISD::FFREXP:
+ Res = PromoteIntRes_FFREXP(N);
+ break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
+ unsigned ResNo) {
+ if (ResNo == 1) {
+ assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
+ N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
+ N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res.getValue(1);
+ }
+
+ // Op2 is used for the comparison and thus must be extended according to the
+ // target's atomic operations. Op3 is merely stored and so can be left alone.
+ SDValue Op2 = N->getOperand(2);
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ switch (TLI.getExtendForAtomicCmpSwapArg()) {
+ case ISD::SIGN_EXTEND:
+ Op2 = SExtPromotedInteger(Op2);
+ break;
+ case ISD::ZERO_EXTEND:
+ Op2 = ZExtPromotedInteger(Op2);
+ break;
+ case ISD::ANY_EXTEND:
+ Op2 = GetPromotedInteger(Op2);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+
+ SDVTList VTs =
+ DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
+ N->getBasePtr(), Op2, Op3, N->getMemOperand());
+ // Update the use to N with the newly created Res.
+ for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
+ ReplaceValueWith(SDValue(N, i), Res.getValue(i));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypeSoftPromoteHalf:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp));
+ case TargetLowering::TypePromoteFloat: {
+ // Convert the promoted float by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp));
+ break;
+ }
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
+ case TargetLowering::TypeSplitVector: {
+ if (!NOutVT.isVector()) {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ break;
+ }
+ case TargetLowering::TypeWidenVector:
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) {
+ SDValue Res =
+ DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+
+ // For big endian targets we need to shift the casted value or the
+ // interesting bits will end up at the wrong place.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+ assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
+ Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
+ DAG.getShiftAmountConstant(ShiftAmt, NOutVT, dl));
+ }
+ return Res;
+ }
+ // If the output type is also a vector and widening it to the same size
+ // as the widened input type would be a legal type, we can widen the bitcast
+ // and handle the promotion after.
+ if (NOutVT.isVector()) {
+ TypeSize WidenInSize = NInVT.getSizeInBits();
+ TypeSize OutSize = OutVT.getSizeInBits();
+ if (WidenInSize.hasKnownScalarFactor(OutSize)) {
+ unsigned Scale = WidenInSize.getKnownScalarFactor(OutSize);
+ EVT WideOutVT =
+ EVT::getVectorVT(*DAG.getContext(), OutVT.getVectorElementType(),
+ OutVT.getVectorElementCount() * Scale);
+ if (isTypeLegal(WideOutVT)) {
+ InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
+ DAG.getVectorIdxConstant(0, dl));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
+ }
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
+ SDValue V = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::FREEZE, SDLoc(N),
+ V.getValueType(), V);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ // If the larger BSWAP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. We only do this for scalars since we have a shuffle
+ // based lowering for vectors in LegalizeVectorOps.
+ if (!OVT.isVector() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::BSWAP, NVT)) {
+ if (SDValue Res = TLI.expandBSWAP(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
+ }
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ // If the larger BITREVERSE isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. We only do this for scalars since we have a shuffle
+ // based lowering for vectors in LegalizeVectorOps.
+ if (!OVT.isVector() && OVT.isSimple() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::BITREVERSE, NVT)) {
+ if (SDValue Res = TLI.expandBITREVERSE(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
+ }
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ SDLoc dl(N);
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ // If the larger CTLZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) {
+ if (SDValue Result = TLI.expandCTLZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(
+ ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
+ NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If the larger CTPOP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to
+ // TargetLowering.
+ if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) {
+ if (SDValue Result = TLI.expandCTPOP(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result);
+ return Result;
+ }
+ }
+
+ // Zero extend to the promoted type and do the count or parity there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ // If the larger CTTZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. Don't expand if we can use CTPOP or CTLZ expansion on the
+ // larger type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) &&
+ !TLI.isOperationLegal(ISD::CTPOP, NVT) &&
+ !TLI.isOperationLegal(ISD::CTLZ, NVT)) {
+ if (SDValue Result = TLI.expandCTTZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
+ OVT.getScalarSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // If the input also needs to be promoted, do that first so we can get a
+ // get a good idea for the output type.
+ if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue In = GetPromotedInteger(Op0);
+
+ // If the new type is larger than NVT, use it. We probably won't need to
+ // promote it again.
+ EVT SVT = In.getValueType().getScalarType();
+ if (SVT.bitsGE(NVT)) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
+ return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
+ }
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ SDLoc dl(N);
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
+ NewOpc = ISD::VP_FP_TO_SINT;
+
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else if (NewOpc == ISD::VP_FP_TO_SINT || NewOpc == ISD::VP_FP_TO_UINT) {
+ Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2)});
+ } else {
+ Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+ }
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ //
+ // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
+ // before legalization: fp-to-uint16, 65534. -> 0xfffe
+ // after legalization: fp-to-sint32, 65534. -> 0x0000fffe
+ return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT ||
+ N->getOpcode() == ISD::VP_FP_TO_UINT)
+ ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
+ // Promote the result type, while keeping the original width in Op1.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension. Unless this is a VP_*_EXTEND.
+ if (NVT == Res.getValueType() && N->getNumOperands() == 1) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ if (N->getNumOperands() != 1) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ SDLoc dl(N);
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getMemoryVT(), N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
+ SDLoc dl(N);
+ SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
+ N->getOffset(), N->getMask(), ExtPassThru,
+ N->getMemoryVT(), N->getMemOperand(),
+ N->getAddressingMode(), ExtType,
+ N->isExpandingLoad());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ assert(NVT == ExtPassThru.getValueType() &&
+ "Gather result type and the passThru argument type should be the same");
+
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
+ N->getIndex(), N->getScale() };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand(), N->getIndexType(),
+ ExtType);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Change the return type of the boolean result while obeying
+ // getSetCCResultType.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT VT = N->getValueType(0);
+ EVT SVT = getSetCCResultType(VT);
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
+ unsigned NumOps = N->getNumOperands();
+ assert(NumOps <= 3 && "Too many operands");
+ if (NumOps == 3)
+ Ops[2] = N->getOperand(2);
+
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
+ ArrayRef(Ops, NumOps));
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ // Convert to the expected type.
+ return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
+ // If the promoted type is legal, we can convert this to:
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB|SHL]SAT
+ // 4. L/ASHR by M-N
+ // Else it is more efficient to convert this to a min and a max
+ // operation in the higher precision arithmetic.
+ SDLoc dl(N);
+ SDValue Op1 = N->getOperand(0);
+ SDValue Op2 = N->getOperand(1);
+ unsigned OldBits = Op1.getScalarValueSizeInBits();
+
+ unsigned Opcode = N->getOpcode();
+ bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT;
+
+ SDValue Op1Promoted, Op2Promoted;
+ if (IsShift) {
+ Op1Promoted = GetPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ Op1Promoted = ZExtPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else {
+ Op1Promoted = SExtPromotedInteger(Op1);
+ Op2Promoted = SExtPromotedInteger(Op2);
+ }
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned NewBits = PromotedType.getScalarSizeInBits();
+
+ if (Opcode == ISD::UADDSAT) {
+ APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Add =
+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ }
+
+ // USUBSAT can always be promoted as long as we have zero-extended the args.
+ if (Opcode == ISD::USUBSAT)
+ return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
+ Op2Promoted);
+
+ // Shift cannot use a min/max expansion, we can't detect overflow if all of
+ // the bits have been shifted out.
+ if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::SSHLSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::USHLSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition, subtraction or left shift");
+ }
+
+ unsigned SHLAmount = NewBits - OldBits;
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(SHLAmount, PromotedType, dl);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ if (!IsShift)
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ }
+
+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Result =
+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
+ // Can just promote the operands then continue with operation.
+ SDLoc dl(N);
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed =
+ N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating =
+ N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT OldType = N->getOperand(0).getValueType();
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned DiffSize =
+ PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
+
+ if (Saturating) {
+ // Promoting the operand and result values changes the saturation width,
+ // which is extends the values that we clamp to on saturation. This could be
+ // resolved by shifting one of the operands the same amount, which would
+ // also shift the result we compare against, then shifting back.
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getShiftAmountConstant(DiffSize, PromotedType, dl));
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result,
+ DAG.getShiftAmountConstant(DiffSize, PromotedType, dl));
+ }
+ return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
+ N->getOperand(2));
+}
+
+static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
+ unsigned SatW, bool Signed,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT VT = V.getValueType();
+ unsigned VTW = VT.getScalarSizeInBits();
+
+ if (!Signed) {
+ // Saturate to the unsigned maximum by getting the minimum of V and the
+ // maximum.
+ return DAG.getNode(ISD::UMIN, dl, VT, V,
+ DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
+ dl, VT));
+ }
+
+ // Saturate to the signed maximum (the low SatW - 1 bits) by taking the
+ // signed minimum of it and V.
+ V = DAG.getNode(ISD::SMIN, dl, VT, V,
+ DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
+ dl, VT));
+ // Saturate to the signed minimum (the high SatW + 1 bits) by taking the
+ // signed maximum of it and V.
+ V = DAG.getNode(ISD::SMAX, dl, VT, V,
+ DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
+ dl, VT));
+ return V;
+}
+
+static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
+ unsigned Scale, const TargetLowering &TLI,
+ SelectionDAG &DAG, unsigned SatW = 0) {
+ EVT VT = LHS.getValueType();
+ unsigned VTSize = VT.getScalarSizeInBits();
+ bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+ N->getOpcode() == ISD::SDIVFIXSAT;
+ bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+ N->getOpcode() == ISD::UDIVFIXSAT;
+
+ SDLoc dl(N);
+ // Widen the types by a factor of two. This is guaranteed to expand, since it
+ // will always have enough high bits in the LHS to shift into.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ LHS = DAG.getExtOrTrunc(Signed, LHS, dl, WideVT);
+ RHS = DAG.getExtOrTrunc(Signed, RHS, dl, WideVT);
+ SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+ DAG);
+ assert(Res && "Expanding DIVFIX with wide type failed?");
+ if (Saturating) {
+ // If the caller has told us to saturate at something less, use that width
+ // instead of the type before doubling. However, it cannot be more than
+ // what we just widened!
+ assert(SatW <= VTSize &&
+ "Tried to saturate to more than the original type?");
+ Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
+ TLI, DAG);
+ }
+ return DAG.getZExtOrTrunc(Res, dl, VT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+ N->getOpcode() == ISD::SDIVFIXSAT;
+ bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+ N->getOpcode() == ISD::UDIVFIXSAT;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned Scale = N->getConstantOperandVal(2);
+
+ // If the type is already legal and the operation is legal in that type, we
+ // should not early expand.
+ if (TLI.isTypeLegal(PromotedType)) {
+ TargetLowering::LegalizeAction Action =
+ TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
+ if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
+ unsigned Diff = PromotedType.getScalarSizeInBits() -
+ N->getValueType(0).getScalarSizeInBits();
+ if (Saturating)
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getShiftAmountConstant(Diff, PromotedType, dl));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ if (Saturating)
+ Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
+ DAG.getShiftAmountConstant(Diff, PromotedType, dl));
+ return Res;
+ }
+ }
+
+ // See if we can perform the division in this type without expanding.
+ if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
+ Op2Promoted, Scale, DAG)) {
+ if (Saturating)
+ Res = SaturateWidenedDIVFIX(Res, dl,
+ N->getValueType(0).getScalarSizeInBits(),
+ Signed, TLI, DAG);
+ return Res;
+ }
+ // If we cannot, expand it to twice the type width. If we are saturating, give
+ // it the original width as a saturating width so we don't need to emit
+ // two saturations.
+ return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
+ N->getValueType(0).getScalarSizeInBits());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ SDLoc dl(N);
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) {
+ SDValue Mask = N->getOperand(0);
+
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
+ return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS,
+ N->getOperand(3));
+ return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ EVT SVT = getSetCCResultType(InVT);
+
+ // If we got back a type that needs to be promoted, this likely means the
+ // the input type also needs to be promoted. So get the promoted type for
+ // the input and try the query again.
+ if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) {
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ SVT = getSetCCResultType(InVT);
+ } else {
+ // Input type isn't promoted, just use the default promoted type.
+ SVT = NVT;
+ }
+ }
+
+ SDLoc dl(N);
+ assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC;
+ if (N->isStrictFPOpcode()) {
+ SDVTList VTs = DAG.getVTList({SVT, MVT::Other});
+ SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3)};
+ SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers, N->getFlags());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
+ } else
+ SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2), N->getFlags());
+
+ // Convert to the expected type.
+ return DAG.getSExtOrTrunc(SetCC, dl, NVT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Arg = N->getOperand(0);
+ SDValue Test = N->getOperand(1);
+ EVT NResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT VT = N->getValueType(0);
+
+ SDLoc dl(N);
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, NVT), N->getOperand(0));
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return Res.getValue(1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ if (N->getOpcode() != ISD::VP_SHL)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ if (N->getNumOperands() == 2)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ if (N->getNumOperands() == 2)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ if (N->getNumOperands() == 2)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
+ // It doesn't matter if we sign extend or zero extend in the inputs. So do
+ // whatever is best for the target.
+ SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ if (N->getOpcode() != ISD::VP_ASHR)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ if (N->getOpcode() != ISD::VP_LSHR)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
+ // Lower the rotate to shifts and ORs which can be promoted.
+ SDValue Res = TLI.expandROT(N, true /*AllowVectorOps*/, DAG);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
+ SDValue Hi = GetPromotedInteger(N->getOperand(0));
+ SDValue Lo = GetPromotedInteger(N->getOperand(1));
+ SDValue Amt = N->getOperand(2);
+ if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
+ Amt = ZExtPromotedInteger(Amt);
+ EVT AmtVT = Amt.getValueType();
+
+ SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT VT = Lo.getValueType();
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
+
+ // Amount has to be interpreted modulo the old bit width.
+ Amt = DAG.getNode(ISD::UREM, DL, AmtVT, Amt,
+ DAG.getConstant(OldBits, DL, AmtVT));
+
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
+ Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amt);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
+ return Res;
+ }
+
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT);
+ Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amt = DAG.getNode(ISD::ADD, DL, AmtVT, Amt, ShiftOffset);
+
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InOp.getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
+ break;
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
+ break;
+ case TargetLowering::TypeSplitVector: {
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ ElementCount NumElts = InVT.getVectorElementCount();
+ assert(NumElts == NVT.getVectorElementCount() &&
+ "Dst and Src must have the same number of elements");
+ assert(isPowerOf2_32(NumElts.getKnownMinValue()) &&
+ "Promoted vector type must be a power of two");
+
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts.divideCoefficientBy(2));
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+ } else {
+ assert(N->getOpcode() == ISD::VP_TRUNCATE &&
+ "Expected VP_TRUNCATE opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo);
+ EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+ }
+ // TODO: VP_TRUNCATE need to handle when TypeWidenVector access to some
+ // targets.
+ case TargetLowering::TypeWidenVector: {
+ SDValue WideInOp = GetWidenedVector(InOp);
+
+ // Truncate widened InOp.
+ unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getValueType(0).getScalarType(), NumElem);
+ SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);
+
+ // Zero extend so that the elements are of same type as those of NVT
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
+ NumElem);
+ SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
+
+ // Extract the low NVT subvector.
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
+ }
+ }
+
+ // Truncate to NVT instead of VT
+ if (N->getOpcode() == ISD::VP_TRUNCATE)
+ return DAG.getNode(ISD::VP_TRUNCATE, dl, NVT, Res, N->getOperand(1),
+ N->getOperand(2));
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ SDLoc dl(N);
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+// Handle promotion for the ADDE/SUBE/UADDO_CARRY/USUBO_CARRY nodes. Notice that
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from
+// the UADDO_CARRY/USUBO_CARRY nodes in that the third operand is carry Boolean.
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // We need to sign-extend the operands so the carry value computed by the
+ // wide operation will be equivalent to the carry value computed by the
+ // narrow operation.
+ // An UADDO_CARRY can generate carry only if any of the operands has its
+ // most significant bit set. Sign extension propagates the most significant
+ // bit into the higher bits which means the extra bit that the narrow
+ // addition would need (i.e. the carry) will be propagated through the higher
+ // bits of the wide addition.
+ // A USUBO_CARRY can generate borrow only if LHS < RHS and this property will
+ // be preserved by sign extension.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+
+ EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};
+
+ // Do the arithmetic in the wide type.
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
+ LHS, RHS, N->getOperand(2));
+
+ // Update the users of the original carry/borrow value.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ return SDValue(Res.getNode(), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
+ assert(ResNo == 1 && "Don't know how to promote other results yet.");
+ return PromoteIntRes_Overflow(N);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If a larger ABS or SMAX isn't supported by the target, try to expand now.
+ // If we expand later we'll end up sign extending more than just the sra input
+ // in sra+xor+sub expansion.
+ if (!OVT.isVector() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) &&
+ !TLI.isOperationLegal(ISD::SMAX, NVT)) {
+ if (SDValue Res = TLI.expandABS(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res);
+ }
+
+ SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ SDLoc DL(N);
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+ SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
+
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred if the high part is non-zero.
+ unsigned Shift = SmallVT.getScalarSizeInBits();
+ SDValue Hi =
+ DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getShiftAmountConstant(Shift, Mul.getValueType(), DL));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, DL, Hi.getValueType()),
+ ISD::SETNE);
+ } else {
+ // Signed overflow occurred if the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // The only other way for overflow to occur is if the multiplication in the
+ // larger type itself overflowed.
+ Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+ SDValue(Mul.getNode(), 1));
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
+ EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ const APInt &MulImm = N->getConstantOperandAPInt(0);
+ return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ report_fatal_error("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);
+ break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_ScalarOp(N);
+ break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::VP_SETCC:
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::VP_SIGN_EXTEND: Res = PromoteIntOp_VP_SIGN_EXTEND(N); break;
+ case ISD::VP_SINT_TO_FP:
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
+ OpNo); break;
+ case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
+ OpNo); break;
+ case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+ OpNo); break;
+ case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+ OpNo); break;
+ case ISD::VP_TRUNCATE:
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::BF16_TO_FP:
+ case ISD::FP16_TO_FP:
+ case ISD::VP_UINT_TO_FP:
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+ case ISD::VP_ZERO_EXTEND: Res = PromoteIntOp_VP_ZERO_EXTEND(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+
+ case ISD::FSHL:
+ case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break;
+
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY:
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: Res = PromoteIntOp_ADDSUBO_CARRY(N, OpNo); break;
+
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
+
+ case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
+ case ISD::FPOWI:
+ case ISD::STRICT_FPOWI:
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: Res = PromoteIntOp_ExpOp(N); break;
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntOp_VP_REDUCE(N, OpNo);
+ break;
+
+ case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
+ case ISD::STACKMAP:
+ Res = PromoteIntOp_STACKMAP(N, OpNo);
+ break;
+ case ISD::PATCHPOINT:
+ Res = PromoteIntOp_PATCHPOINT(N, OpNo);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = PromoteIntOp_VP_STRIDED(N, OpNo);
+ break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ const bool IsStrictFp = N->isStrictFPOpcode();
+ assert(Res.getValueType() == N->getValueType(0) &&
+ N->getNumValues() == (IsStrictFp ? 2 : 1) &&
+ "Invalid operand expansion");
+ LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: ";
+ Res.dump());
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ if (IsStrictFp)
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions. For those operations where either
+ // zero or sign extension would be valid, we ask the target which extension
+ // it would prefer.
+
+ // Signed comparisons always require sign extension.
+ if (ISD::isSignedIntSetCC(CCCode)) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ return;
+ }
+
+ assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) &&
+ "Unknown integer comparison!");
+
+ SDValue OpL = GetPromotedInteger(LHS);
+ SDValue OpR = GetPromotedInteger(RHS);
+
+ if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) {
+ // The target would prefer to promote the comparison operand with sign
+ // extension. Honor that unless the promoted values are already zero
+ // extended.
+ unsigned OpLEffectiveBits =
+ DAG.computeKnownBits(OpL).countMaxActiveBits();
+ unsigned OpREffectiveBits =
+ DAG.computeKnownBits(OpR).countMaxActiveBits();
+ if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= RHS.getScalarValueSizeInBits()) {
+ LHS = OpL;
+ RHS = OpR;
+ return;
+ }
+
+ // The promoted values aren't zero extended, use a sext_inreg.
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ return;
+ }
+
+ // Prefer to promote the comparison operand with zero extension.
+
+ // If the width of OpL/OpR excluding the duplicated sign bits is no greater
+ // than the width of LHS/RHS, we can avoid/ inserting a zext_inreg operation
+ // that we might not be able to remove.
+ unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(OpL);
+ unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(OpR);
+ if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= RHS.getScalarValueSizeInBits()) {
+ LHS = OpL;
+ RHS = OpR;
+ return;
+ }
+
+ // Otherwise, use zext_inreg.
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ SDLoc dl(N);
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
+ "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueSizeInBits() >=
+ N->getValueType(0).getScalarSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueSizeInBits() >=
+ N->getValueType(0).getScalarSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2)),
+ 0);
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
+ // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
+ // so just promote the operand in place.
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::VSELECT)
+ if (SDValue Res = WidenVSELECTMask(N))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ Res, N->getOperand(1), N->getOperand(2));
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
+ Cond = PromoteTargetBoolean(Cond, OpVT);
+
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ if (N->getOpcode() == ISD::SETCC)
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ ZExtPromotedInteger(N->getOperand(2))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ SDLoc dl(N);
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_SIGN_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // FIXME: There is no VP_ANY_EXTEND yet.
+ Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
+ N->getOperand(2));
+ unsigned Diff =
+ VT.getScalarSizeInBits() - N->getOperand(0).getScalarValueSizeInBits();
+ SDValue ShAmt = DAG.getShiftAmountConstant(Diff, VT, dl);
+ // FIXME: There is no VP_SIGN_EXTEND_INREG so use a pair of shifts.
+ SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShAmt, N->getOperand(1),
+ N->getOperand(2));
+ return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShAmt, N->getOperand(1),
+ N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ if (N->getOpcode() == ISD::VP_SINT_TO_FP)
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0)),
+ N->getOperand(1), N->getOperand(2)),
+ 0);
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ SExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ SDLoc dl(N);
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr,
+ N->getMemoryVT(), N->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ SDValue DataOp = N->getValue();
+ SDValue Mask = N->getMask();
+
+ if (OpNo == 4) {
+ // The Mask. Update in place.
+ EVT DataVT = DataOp.getValueType();
+ Mask = PromoteTargetBoolean(Mask, DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[4] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+
+ return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
+ N->getOffset(), Mask, N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(),
+ /*IsTruncating*/ true, N->isCompressingStore());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 3 && "Only know how to promote the mask!");
+ EVT DataVT = N->getValueType(0);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
+ if (Res == N)
+ return SDValue(Res, 0);
+
+ // Update triggered CSE, do our own replacement since caller can't.
+ ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+ unsigned OpNo) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValueType(0);
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+
+ SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
+ if (Res == N)
+ return SDValue(Res, 0);
+
+ // Update triggered CSE, do our own replacement since caller can't.
+ ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ bool TruncateStore = N->isTruncatingStore();
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValue().getValueType();
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
+ } else {
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ TruncateStore = true;
+ }
+
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(),
+ SDLoc(N), NewOps, N->getMemOperand(),
+ N->getIndexType(), TruncateStore);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ if (N->getOpcode() == ISD::VP_TRUNCATE)
+ return DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op,
+ N->getOperand(1), N->getOperand(2));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ if (N->getOpcode() == ISD::VP_UINT_TO_FP)
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0)),
+ N->getOperand(1), N->getOperand(2)),
+ 0);
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // FIXME: There is no VP_ANY_EXTEND yet.
+ Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
+ N->getOperand(2));
+ APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
+ N->getOperand(0).getScalarValueSizeInBits());
+ return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT),
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDLoc DL(N);
+
+ Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(
+ DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
+ // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1 && "Don't know how to promote this operand!");
+ // Promote the rw, locality, and cache type arguments to a supported integer
+ // width.
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
+ SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ Op2, Op3, Op4),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+
+ bool IsPowI =
+ N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;
+
+ // The integer operand is the last operand in FPOWI (or FLDEXP) (so the result
+ // and floating point operand is already type legalized).
+ RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
+ : RTLIB::getLDEXP(N->getValueType(0));
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ SDValue Op = SExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+ }
+
+ // We can't just promote the exponent type in FPOWI, since we want to lower
+ // the node to a libcall and we if we promote to a type larger than
+ // sizeof(int) the libcall might not be according to the targets ABI. Instead
+ // we rewrite to a libcall here directly, letting makeLibCall handle promotion
+ // if the target accepts it according to shouldSignExtendTypeInLibCall.
+
+ unsigned OpOffset = IsStrict ? 1 : 0;
+ // The exponent should fit in a sizeof(int) type for the libcall to be valid.
+ assert(DAG.getLibInfo().getIntSize() ==
+ N->getOperand(1 + OpOffset).getValueType().getSizeInBits() &&
+ "POWI exponent should match with sizeof(int) when doing the libcall.");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)};
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return SDValue();
+}
+
+static unsigned getExtendForIntVecReduction(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Expected integer vector reduction");
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ return ISD::ANY_EXTEND;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ return ISD::SIGN_EXTEND;
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ return ISD::ZERO_EXTEND;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) {
+ switch (getExtendForIntVecReduction(N)) {
+ default:
+ llvm_unreachable("Impossible extension kind for integer reduction");
+ case ISD::ANY_EXTEND:
+ return GetPromotedInteger(V);
+ case ISD::SIGN_EXTEND:
+ return SExtPromotedInteger(V);
+ case ISD::ZERO_EXTEND:
+ return ZExtPromotedInteger(V);
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
+
+ EVT OrigEltVT = N->getOperand(0).getValueType().getVectorElementType();
+ EVT InVT = Op.getValueType();
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ResVT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
+
+ // An i1 vecreduce_xor is equivalent to vecreduce_add, use that instead if
+ // vecreduce_xor is not legal
+ if (Opcode == ISD::VECREDUCE_XOR && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_XOR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_ADD, InVT))
+ Opcode = ISD::VECREDUCE_ADD;
+
+ // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
+ // vecreduce_or is not legal
+ else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) {
+ Opcode = ISD::VECREDUCE_UMAX;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
+
+ // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
+ // vecreduce_and is not legal
+ else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) {
+ Opcode = ISD::VECREDUCE_UMIN;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
+
+ if (ResVT.bitsGE(EltVT))
+ return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);
+
+ // Result size must be >= element size. If this is not the case after
+ // promotion, also promote the result type and then truncate.
+ SDValue Reduce = DAG.getNode(Opcode, dl, EltVT, Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Reduce);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(OpNo);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) { // Mask
+ // Update in place.
+ NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType());
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+
+ Op = PromoteIntOpVectorReduction(N, Op);
+
+ NewOps[OpNo] = Op;
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = Op.getValueType().getScalarType();
+
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps);
+
+ // Result size must be >= element/start-value size. If this is not the case
+ // after promotion, also promote both the start value and result type and
+ // then truncate.
+ NewOps[0] =
+ DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0));
+ SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
+ SDValue Op = ZExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Operand = N->getOperand(OpNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
+ NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
+ assert(OpNo >= 7);
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Operand = N->getOperand(OpNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
+ NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
+ assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) ||
+ (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4));
+
+ SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
+
+ case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::LROUND:
+ case ISD::LRINT:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LLRINT:
+ case ISD::LLROUND:
+ case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
+ SDValue Tmp = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
+ N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ AN->getMemOperand());
+
+ // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
+ // success simply by comparing the loaded value against the ingoing
+ // comparison.
+ SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
+ N->getOperand(2), ISD::SETEQ);
+
+ SplitInteger(Tmp, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Success);
+ ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::UMAX:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: ExpandIntRes_UADDSUBO_CARRY(N, Lo, Hi); break;
+
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ ExpandIntRes_Rotate(N, Lo, Hi);
+ break;
+
+ case ISD::FSHL:
+ case ISD::FSHR:
+ ExpandIntRes_FunnelShift(N, Lo, Hi);
+ break;
+
+ case ISD::VSCALE:
+ ExpandIntRes_VSCALE(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ AtomicOrdering order = cast<AtomicSDNode>(Node)->getMergedOrdering();
+ // Lower to outline atomic libcall if outline atomics enabled,
+ // or to sync libcall otherwise
+ RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
+ EVT RetVT = Node->getValueType(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SmallVector<SDValue, 4> Ops;
+ if (TLI.getLibcallName(LC)) {
+ Ops.append(Node->op_begin() + 2, Node->op_end());
+ Ops.push_back(Node->getOperand(1));
+ } else {
+ LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected atomic op or value type!");
+ Ops.append(Node->op_begin() + 1, Node->op_end());
+ }
+ return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
+ Node->getOperand(0));
+}
+
+/// N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+ // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+ if (!Amt) {
+ Lo = InL;
+ Hi = InH;
+ return;
+ }
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt.uge(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getConstant(0, DL, NVT);
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, DL, NVT);
+ Hi = InL;
+ } else {
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt.uge(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getNode(ISD::SRL, DL,
+ NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, DL, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt.uge(VTBits)) {
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getScalarSizeInBits();
+ unsigned NVTBits = NVT.getScalarSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ SDLoc dl(N);
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
+
+ // If we don't know anything about the high bits, exit.
+ if (((Known.Zero|Known.One) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (Known.One.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, dl, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, dl, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if (HighBitMask.isSubsetOf(Known.Zero)) {
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ SDLoc dl(N);
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+ SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+ Amt, DAG.getConstant(0, dl, ShTy),
+ ISD::SETEQ);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getSelect(dl, NVT, isZero, InH,
+ DAG.getSelect(dl, NVT, isShort, HiS, HiL));
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getSelect(dl, NVT, isZero, InL,
+ DAG.getSelect(dl, NVT, isShort, LoS, LoL));
+ Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getSelect(dl, NVT, isZero, InL,
+ DAG.getSelect(dl, NVT, isShort, LoS, LoL));
+ Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+}
+
+static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
+
+ switch (Op) {
+ default: llvm_unreachable("invalid min/max opcode");
+ case ISD::SMAX:
+ return std::make_pair(ISD::SETGT, ISD::UMAX);
+ case ISD::UMAX:
+ return std::make_pair(ISD::SETUGT, ISD::UMAX);
+ case ISD::SMIN:
+ return std::make_pair(ISD::SETLT, ISD::UMIN);
+ case ISD::UMIN:
+ return std::make_pair(ISD::SETULT, ISD::UMIN);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // If the upper halves are all sign bits, then we can perform the MINMAX on
+ // the lower half and sign-extend the result to the upper half.
+ unsigned NumBits = N->getValueType(0).getScalarSizeInBits();
+ unsigned NumHalfBits = NumBits / 2;
+ if (DAG.ComputeNumSignBits(LHS) > NumHalfBits &&
+ DAG.ComputeNumSignBits(RHS) > NumHalfBits) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+
+ Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL);
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo,
+ DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL));
+ return;
+ }
+
+ // The Lo of smin(X, -1) is LHSL if X is negative. Otherwise it's -1.
+ // The Lo of smax(X, 0) is 0 if X is negative. Otherwise it's LHSL.
+ if ((N->getOpcode() == ISD::SMAX && isNullConstant(RHS)) ||
+ (N->getOpcode() == ISD::SMIN && isAllOnesConstant(RHS))) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ SDValue HiNeg =
+ DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT);
+ if (N->getOpcode() == ISD::SMIN) {
+ Lo = DAG.getSelect(DL, NVT, HiNeg, LHSL, DAG.getConstant(-1, DL, NVT));
+ } else {
+ Lo = DAG.getSelect(DL, NVT, HiNeg, DAG.getConstant(0, DL, NVT), LHSL);
+ }
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ return;
+ }
+
+ const APInt *RHSVal = nullptr;
+ if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS))
+ RHSVal = &RHSConst->getAPIntValue();
+
+ // The high half of MIN/MAX is always just the the MIN/MAX of the
+ // high halves of the operands. Expand this way if it appears profitable.
+ if (RHSVal && (N->getOpcode() == ISD::UMIN || N->getOpcode() == ISD::UMAX) &&
+ (RHSVal->countLeadingOnes() >= NumHalfBits ||
+ RHSVal->countLeadingZeros() >= NumHalfBits)) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ ISD::NodeType LoOpc;
+ ISD::CondCode CondC;
+ std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ // We need to know whether to select Lo part that corresponds to 'winning'
+ // Hi part or if Hi parts are equal.
+ SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
+ SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
+
+ // Lo part corresponding to the 'winning' Hi part
+ SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+
+ // Recursed Lo part if Hi parts are equal, this uses unsigned version
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
+
+ Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+ return;
+ }
+
+ // Expand to "a < b ? a : b" etc. Prefer ge/le if that simplifies
+ // the compare.
+ ISD::CondCode Pred;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX:
+ if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits)
+ Pred = ISD::SETGE;
+ else
+ Pred = ISD::SETGT;
+ break;
+ case ISD::SMIN:
+ if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits)
+ Pred = ISD::SETLE;
+ else
+ Pred = ISD::SETLT;
+ break;
+ case ISD::UMAX:
+ if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits)
+ Pred = ISD::SETUGE;
+ else
+ Pred = ISD::SETUGT;
+ break;
+ case ISD::UMIN:
+ if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits)
+ Pred = ISD::SETULE;
+ else
+ Pred = ISD::SETULT;
+ break;
+ }
+ EVT VT = N->getValueType(0);
+ EVT CCT = getSetCCResultType(VT);
+ SDValue Cond = DAG.getSetCC(DL, CCT, LHS, RHS, Pred);
+ SDValue Result = DAG.getSelect(DL, VT, Cond, LHS, RHS);
+ SplitInteger(Result, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ bool HasOpCarry = TLI.isOperationLegalOrCustom(
+ N->getOpcode() == ISD::ADD ? ISD::UADDO_CARRY : ISD::USUBO_CARRY,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasOpCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.computeKnownBits(HiOps[2]).isZero()
+ ? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2))
+ : DAG.getNode(ISD::UADDO_CARRY, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.computeKnownBits(HiOps[2]).isZero()
+ ? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2))
+ : DAG.getNode(ISD::USUBO_CARRY, dl, VTList, HiOps);
+ }
+ return;
+ }
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
+ }
+ return;
+ }
+
+ bool hasOVF =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::UADDO : ISD::USUBO,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+
+ if (hasOVF) {
+ EVT OvfVT = getSetCCResultType(NVT);
+ SDVTList VTList = DAG.getVTList(NVT, OvfVT);
+ int RevOpc;
+ if (N->getOpcode() == ISD::ADD) {
+ RevOpc = ISD::SUB;
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
+ } else {
+ RevOpc = ISD::ADD;
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
+ }
+ SDValue OVF = Lo.getValue(1);
+
+ switch (BoolType) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
+ [[fallthrough]];
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ OVF = DAG.getSExtOrTrunc(OVF, dl, NVT);
+ Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
+ SDValue Cmp;
+ // Special case: X+1 has a carry out if X+1==0. This may reduce the live
+ // range of X. We assume comparing with 0 is cheap.
+ if (isOneConstant(LoOps[1]))
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, dl, NVT), ISD::SETEQ);
+ else if (isAllOnesConstant(LoOps[1])) {
+ if (isAllOnesConstant(HiOps[1]))
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0],
+ DAG.getConstant(0, dl, NVT), ISD::SETEQ);
+ else
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0],
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+ } else
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+
+ SDValue Carry;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+
+ if (isAllOnesConstant(LoOps[1]) && isAllOnesConstant(HiOps[1]))
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps[0], Carry);
+ else
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
+ SDValue Cmp =
+ DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+
+ SDValue Borrow;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc dl(N);
+
+ SDValue Ovf;
+
+ unsigned CarryOp, NoCarryOp;
+ ISD::CondCode Cond;
+ switch(N->getOpcode()) {
+ case ISD::UADDO:
+ CarryOp = ISD::UADDO_CARRY;
+ NoCarryOp = ISD::ADD;
+ Cond = ISD::SETULT;
+ break;
+ case ISD::USUBO:
+ CarryOp = ISD::USUBO_CARRY;
+ NoCarryOp = ISD::SUB;
+ Cond = ISD::SETUGT;
+ break;
+ default:
+ llvm_unreachable("Node has unexpected Opcode");
+ }
+
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+ if (HasCarryOp) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(CarryOp, dl, VTList, HiOps);
+
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ if (N->getOpcode() == ISD::UADDO && isOneConstant(RHS)) {
+ // Special case: uaddo X, 1 overflowed if X+1 == 0. We can detect this
+ // with (Lo | Hi) == 0.
+ SDValue Or = DAG.getNode(ISD::OR, dl, Lo.getValueType(), Lo, Hi);
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Or,
+ DAG.getConstant(0, dl, Lo.getValueType()), ISD::SETEQ);
+ } else if (N->getOpcode() == ISD::UADDO && isAllOnesConstant(RHS)) {
+ // Special case: uaddo X, -1 overflows if X == 0.
+ Ovf =
+ DAG.getSetCC(dl, N->getValueType(1), LHS,
+ DAG.getConstant(0, dl, LHS.getValueType()), ISD::SETNE);
+ } else {
+ // Calculate the overflow: addition overflows iff a + b < a, and
+ // subtraction overflows iff a - b > a.
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ }
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ovf);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO_CARRY(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH, SDValue() };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+
+ // We need to use an unsigned carry op for the lo part.
+ unsigned CarryOp =
+ N->getOpcode() == ISD::SADDO_CARRY ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
+ Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, dl, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ // parity(HiLo) -> parity(Lo^Hi)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo =
+ DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ auto Constant = cast<ConstantSDNode>(N);
+ const APInt &Cst = Constant->getAPIntValue();
+ bool IsTarget = Constant->isTargetOpcode();
+ bool IsOpaque = Constant->isOpaque();
+ SDLoc dl(N);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
+ IsOpaque);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+
+ SDValue N0 = N->getOperand(0);
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ // If the upper half is all sign bits, then we can perform the ABS on the
+ // lower half and zero-extend.
+ if (DAG.ComputeNumSignBits(N0) > NVT.getScalarSizeInBits()) {
+ Lo = DAG.getNode(ISD::ABS, dl, NVT, Lo);
+ Hi = DAG.getConstant(0, dl, NVT);
+ return;
+ }
+
+ // If we have USUBO_CARRY, use the expanded form of the sra+xor+sub sequence
+ // we use in LegalizeDAG. The SUB part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses USUBO_CARRY/USUBO after checking that
+ // USUBO_CARRY is LegalOrCustom. Each of the pieces here can be further
+ // expanded if needed. Shift expansion has a special case for filling with
+ // sign bits so that we will only end up with one SRA.
+ bool HasSubCarry = TLI.isOperationLegalOrCustom(
+ ISD::USUBO_CARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasSubCarry) {
+ SDValue Sign = DAG.getNode(
+ ISD::SRA, dl, NVT, Hi,
+ DAG.getShiftAmountConstant(NVT.getSizeInBits() - 1, NVT, dl));
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
+ Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign);
+ Hi = DAG.getNode(ISD::USUBO_CARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
+ return;
+ }
+
+ // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
+ EVT VT = N->getValueType(0);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, dl, VT), N0);
+ SDValue NegLo, NegHi;
+ SplitInteger(Neg, NegLo, NegHi);
+
+ SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, dl, NVT), ISD::SETLT);
+ Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
+ Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
+
+ Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ NVT)));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
+
+ Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ NVT)));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+
+ Lo = DAG.getNode(ISD::GET_ROUNDING, dl, {NVT, MVT::Other}, N->getOperand(0));
+ SDValue Chain = Lo.getValue(1);
+ // The high part is the sign of Lo, as -1 is a valid value for GET_ROUNDING
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ Op = GetSoftPromotedHalf(Op);
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+ CallOptions, dl, Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ Op = GetSoftPromotedHalf(Op);
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+ CallOptions, dl, Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+
+ assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
+ "Input type needs to be promoted!");
+
+ EVT VT = Op.getValueType();
+
+ if (VT == MVT::f16) {
+ VT = MVT::f32;
+ // Extend to f32.
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op});
+ Chain = Op.getValue(1);
+ } else {
+ Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op);
+ }
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (N->getOpcode() == ISD::LROUND ||
+ N->getOpcode() == ISD::STRICT_LROUND) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!");
+ } else if (N->getOpcode() == ISD::LRINT ||
+ N->getOpcode() == ISD::STRICT_LRINT) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!");
+ } else if (N->getOpcode() == ISD::LLROUND ||
+ N->getOpcode() == ISD::STRICT_LLROUND) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LLROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+ } else if (N->getOpcode() == ISD::LLRINT ||
+ N->getOpcode() == ISD::STRICT_LLRINT) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LLRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+ } else
+ llvm_unreachable("Unexpected opcode!");
+
+ EVT RetVT = N->getValueType(0);
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Op, CallOptions, dl,
+ Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (N->isStrictFPOpcode())
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (N->isAtomic()) {
+ // It's typical to have larger CAS than atomic load instructions.
+ SDLoc dl(N);
+ EVT VT = N->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ VT, VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+ return;
+ }
+
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDLoc dl(N);
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, dl, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (DAG.getDataLayout().isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(
+ ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
+ Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ }
+ }
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+
+ if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH))
+ return;
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // We'll expand the multiplication by brute force because we have no other
+ // options. This is a trivially-generalized version of the code from
+ // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+ // 4.3.1).
+ unsigned Bits = NVT.getSizeInBits();
+ unsigned HalfBits = Bits >> 1;
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
+ NVT);
+ SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
+ SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
+
+ SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
+ SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
+
+ SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
+ SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
+ SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
+ SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
+
+ SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
+ SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
+ SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
+
+ SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
+ SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
+
+ SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
+ DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
+ DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
+
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
+ DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
+ DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+ return;
+ }
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+ SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+ Lo = R.getValue(0);
+ Hi = R.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandAddSubSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandShlSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
+/// This performs an expansion of the integer result for a fixed point
+/// multiplication. The default expansion performs rounding down towards
+/// negative infinity, though targets that do care about rounding should specify
+/// a target hook for rounding and provide their own expansion or lowering of
+/// fixed point multiplication to be consistent with rounding.
+void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ unsigned VTSize = VT.getScalarSizeInBits();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ uint64_t Scale = N->getConstantOperandVal(2);
+ bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||
+ N->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+
+ // Handle special case when scale is equal to zero.
+ if (!Scale) {
+ SDValue Result;
+ if (!Saturating) {
+ Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else {
+ EVT BoolVT = getSetCCResultType(VT);
+ unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
+ Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ if (Signed) {
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else {
+ // For unsigned multiplication, we only need to check the max since we
+ // can't really overflow towards zero.
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
+ }
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
+ // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
+ // cover for unhandled cases below, while still being valid for UMULFIX[SAT].
+ assert(Scale <= VTSize && "Scale can't be larger than the value type size.");
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(LHS, LL, LH);
+ GetExpandedInteger(RHS, RL, RH);
+ SmallVector<SDValue, 4> Result;
+
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH)) {
+ report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
+ return;
+ }
+
+ unsigned NVTSize = NVT.getScalarSizeInBits();
+ assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
+ "the size of the current value type");
+
+ // After getting the multiplication result in 4 parts, we need to perform a
+ // shift right by the amount of the scale to get the result in that scale.
+ //
+ // Let's say we multiply 2 64 bit numbers. The resulting value can be held in
+ // 128 bits that are cut into 4 32-bit parts:
+ //
+ // HH HL LH LL
+ // |---32---|---32---|---32---|---32---|
+ // 128 96 64 32 0
+ //
+ // |------VTSize-----|
+ //
+ // |NVTSize-|
+ //
+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But
+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift
+ // amount and get Lo and Hi using two funnel shifts. Or for the special case
+ // when Scale is a multiple of NVTSize we can just pick the result without
+ // shifting.
+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
+ if (Scale % NVTSize) {
+ SDValue ShiftAmount = DAG.getShiftAmountConstant(Scale % NVTSize, NVT, dl);
+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
+ ShiftAmount);
+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
+ ShiftAmount);
+ } else {
+ Lo = Result[Part0];
+ Hi = Result[Part0 + 1];
+ }
+
+ // Unless saturation is requested we are done. The result is in <Hi,Lo>.
+ if (!Saturating)
+ return;
+
+ // Can not overflow when there is no integer part.
+ if (Scale == VTSize)
+ return;
+
+ // To handle saturation we must check for overflow in the multiplication.
+ //
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
+ // aren't all zeroes.
+ //
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
+ // aren't all ones or all zeroes.
+ //
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of signed
+ // saturation.
+
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
+ if (!Signed) {
+ if (Scale < NVTSize) {
+ // Overflow happened if ((HH | (HL >> Scale)) != 0).
+ SDValue HLAdjusted =
+ DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getShiftAmountConstant(Scale, NVT, dl));
+ SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
+ SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
+ } else if (Scale == NVTSize) {
+ // Overflow happened if (HH != 0).
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
+ } else if (Scale < VTSize) {
+ // Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
+ SDValue HLAdjusted =
+ DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getShiftAmountConstant(Scale - NVTSize, NVT, dl));
+ SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
+ } else
+ llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
+ "(and saturation can't happen with Scale==VTSize).");
+
+ Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
+ return;
+ }
+
+ if (Scale < NVTSize) {
+ // The number of overflow bits we can check are VTSize - Scale + 1 (we
+ // include the sign bit). If these top bits are > 0, then we overflowed past
+ // the max value. If these top bits are < -1, then we overflowed past the
+ // min value. Otherwise, we did not overflow.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+ // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
+ // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
+ } else if (Scale == NVTSize) {
+ // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
+ // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
+ } else if (Scale < VTSize) {
+ // This is similar to the case when we saturate if Scale < NVTSize, but we
+ // only need to check HH.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ } else
+ llvm_unreachable("Illegal scale for signed fixed point mul.");
+
+ // Saturate to signed maximum.
+ APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnes(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
+ // Saturate to signed minimum.
+ APInt MinHi = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ // Try expanding in the existing type first.
+ SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
+ N->getOperand(1),
+ N->getConstantOperandVal(2), DAG);
+
+ if (!Res)
+ Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+ N->getConstantOperandVal(2), TLI, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ assert((Node->getOpcode() == ISD::SADDO || Node->getOpcode() == ISD::SSUBO) &&
+ "Node has unexpected Opcode");
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDLoc dl(Node);
+
+ SDValue Ovf;
+
+ bool IsAdd = Node->getOpcode() == ISD::SADDO;
+ unsigned CarryOp = IsAdd ? ISD::SADDO_CARRY : ISD::SSUBO_CARRY;
+
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+ if (HasCarryOp) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
+
+ Lo = DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, dl, VTList, {LHSL, RHSL});
+ Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS < 0
+ // RHSSign -> RHS < 0
+ // SumSign -> Sum < 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ // To get better codegen we can rewrite this by doing bitwise math on
+ // the integers and extract the final sign bit at the end. So the
+ // above becomes:
+ //
+ // Add:
+ // Overflow -> (~(LHS ^ RHS) & (LHS ^ Sum)) < 0
+ // Sub:
+ // Overflow -> ((LHS ^ RHS) & (LHS ^ Sum)) < 0
+ //
+ // NOTE: This is different than the expansion we do in expandSADDSUBO
+ // because it is more costly to determine the RHS is > 0 for SSUBO with the
+ // integers split.
+ EVT VT = LHS.getValueType();
+ SDValue SignsMatch = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ if (IsAdd)
+ SignsMatch = DAG.getNOT(dl, SignsMatch, VT);
+
+ SDValue SumSignNE = DAG.getNode(ISD::XOR, dl, VT, LHS, Sum);
+ Ovf = DAG.getNode(ISD::AND, dl, VT, SignsMatch, SumSignNE);
+ EVT OType = Node->getValueType(1);
+ Ovf = DAG.getSetCC(dl, OType, Ovf, DAG.getConstant(0, dl, VT), ISD::SETLT);
+ }
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Ovf);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(0), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ SDValue Shiftee = N->getOperand(0);
+ EVT VT = Shiftee.getValueType();
+ SDValue ShAmt = N->getOperand(1);
+ EVT ShAmtVT = ShAmt.getValueType();
+
+ // This legalization is optimal when the shift is by a multiple of byte width,
+ // %x * 8 <-> %x << 3 so 3 low bits should be be known zero.
+ bool ShiftByByteMultiple =
+ DAG.computeKnownBits(ShAmt).countMinTrailingZeros() >= 3;
+
+ // If we can't do it as one step, we'll have two uses of shift amount,
+ // and thus must freeze it.
+ if (!ShiftByByteMultiple)
+ ShAmt = DAG.getFreeze(ShAmt);
+
+ unsigned VTBitWidth = VT.getScalarSizeInBits();
+ assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?");
+ unsigned VTByteWidth = VTBitWidth / 8;
+ assert(isPowerOf2_32(VTByteWidth) &&
+ "Shiftee type size is not a power of two!");
+ unsigned StackSlotByteWidth = 2 * VTByteWidth;
+ unsigned StackSlotBitWidth = 8 * StackSlotByteWidth;
+ EVT StackSlotVT = EVT::getIntegerVT(*DAG.getContext(), StackSlotBitWidth);
+
+ // Get a temporary stack slot 2x the width of our VT.
+ // FIXME: reuse stack slots?
+ // FIXME: should we be more picky about alignment?
+ Align StackSlotAlignment(1);
+ SDValue StackPtr = DAG.CreateStackTemporary(
+ TypeSize::getFixed(StackSlotByteWidth), StackSlotAlignment);
+ EVT PtrTy = StackPtr.getValueType();
+ SDValue Ch = DAG.getEntryNode();
+
+ MachinePointerInfo StackPtrInfo = MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex());
+
+ // Extend the value, that is being shifted, to the entire stack slot's width.
+ SDValue Init;
+ if (N->getOpcode() != ISD::SHL) {
+ unsigned WideningOpc =
+ N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Init = DAG.getNode(WideningOpc, dl, StackSlotVT, Shiftee);
+ } else {
+ // For left-shifts, pad the Shiftee's LSB with zeros to twice it's width.
+ SDValue AllZeros = DAG.getConstant(0, dl, VT);
+ Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee);
+ }
+ // And spill it into the stack slot.
+ Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo, StackSlotAlignment);
+
+ // Now, compute the full-byte offset into stack slot from where we can load.
+ // We have shift amount, which is in bits, but in multiples of byte.
+ // So just divide by CHAR_BIT.
+ SDNodeFlags Flags;
+ if (ShiftByByteMultiple)
+ Flags.setExact(true);
+ SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(3, dl, ShAmtVT), Flags);
+ // And clamp it, because OOB load is an immediate UB,
+ // while shift overflow would have *just* been poison.
+ ByteOffset = DAG.getNode(ISD::AND, dl, ShAmtVT, ByteOffset,
+ DAG.getConstant(VTByteWidth - 1, dl, ShAmtVT));
+ // We have exactly two strategies on indexing into stack slot here:
+ // 1. upwards starting from the beginning of the slot
+ // 2. downwards starting from the middle of the slot
+ // On little-endian machine, we pick 1. for right shifts and 2. for left-shift
+ // and vice versa on big-endian machine.
+ bool WillIndexUpwards = N->getOpcode() != ISD::SHL;
+ if (DAG.getDataLayout().isBigEndian())
+ WillIndexUpwards = !WillIndexUpwards;
+
+ SDValue AdjStackPtr;
+ if (WillIndexUpwards) {
+ AdjStackPtr = StackPtr;
+ } else {
+ AdjStackPtr = DAG.getMemBasePlusOffset(
+ StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl);
+ ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT);
+ }
+
+ // Get the pointer somewhere into the stack slot from which we need to load.
+ ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy);
+ AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl);
+
+ // And load it! While the load is not legal, legalizing it is obvious.
+ SDValue Res = DAG.getLoad(
+ VT, dl, Ch, AdjStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), Align(1));
+ // We've performed the shift by a CHAR_BIT * [_ShAmt / CHAR_BIT_]
+
+ // If we may still have a less-than-CHAR_BIT to shift by, do so now.
+ if (!ShiftByByteMultiple) {
+ SDValue ShAmtRem = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(7, dl, ShAmtVT));
+ Res = DAG.getNode(N->getOpcode(), dl, VT, Res, ShAmtRem);
+ }
+
+ // Finally, split the computed value.
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it. Don't lower this to SHL_PARTS when we optimise for
+ // size, but create a libcall instead.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ const bool LegalOrCustom =
+ (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom;
+
+ unsigned ExpansionFactor = 1;
+ // That VT->NVT expansion is one step. But will we re-expand NVT?
+ for (EVT TmpVT = NVT;;) {
+ EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT);
+ if (NewTMPVT == TmpVT)
+ break;
+ TmpVT = NewTMPVT;
+ ++ExpansionFactor;
+ }
+
+ TargetLowering::ShiftLegalizationStrategy S =
+ TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor);
+
+ if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack)
+ return ExpandIntRes_ShiftThroughStack(N, Lo, Hi);
+
+ if (LegalOrCustom &&
+ S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ EVT VT = LHSL.getValueType();
+
+ // If the shift amount operand is coming from a vector legalization it may
+ // have an illegal type. Fix that first by casting the operand, otherwise
+ // the new SHL_PARTS operation would need further legalization.
+ SDValue ShiftOp = N->getOperand(1);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ if (ShiftOp.getValueType() != ShiftTy)
+ ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+ SDValue Ops[] = { LHSL, LHSH, ShiftOp };
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ EVT ShAmtTy =
+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
+ SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);
+ SDValue Ops[2] = {N->getOperand(0), ShAmt};
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(
+ ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(1), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (N->getOpcode() == ISD::UMULO) {
+ // This section expands the operation into the following sequence of
+ // instructions. `iNh` here refers to a type which has half the bit width of
+ // the type the original operation operated on.
+ //
+ // %0 = %LHS.HI != 0 && %RHS.HI != 0
+ // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
+ // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
+ // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
+ // %4 = add iNh %1.0, %2.0 as iN
+ // %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH)
+ //
+ // %lo = %3.LO
+ // %hi = %5.0
+ // %ovf = %0 || %1.1 || %2.1 || %5.1
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
+ GetExpandedInteger(LHS, LHSLow, LHSHigh);
+ GetExpandedInteger(RHS, RHSLow, RHSHigh);
+ EVT HalfVT = LHSLow.getValueType();
+ EVT BitVT = N->getValueType(1);
+ SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT);
+
+ SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
+ SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
+ DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
+ DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
+
+ SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
+
+ SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
+
+ SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two);
+
+ // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
+ // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
+ // operation recursively legalized?).
+ //
+ // Many backends understand this pattern and will convert into LOHI
+ // themselves, if applicable.
+ SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
+ SplitInteger(Three, Lo, Hi);
+
+ Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+
+ // If we don't have the libcall or if the function we are compiling is the
+ // implementation of the expected libcall (avoid inf-loop), expand inline.
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
+ TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
+ // FIXME: This is not an optimal expansion, but better than crashing.
+ EVT WideVT =
+ EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ SDValue MulLo, MulHi;
+ SplitInteger(Mul, MulLo, MulHi);
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VT, MulLo,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+ SDValue Overflow =
+ DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);
+ SplitInteger(MulLo, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain =
+ DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp,
+ MachinePointerInfo());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : N->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setSExtResult();
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 =
+ DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo());
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, dl, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(0), Lo, Hi);
+ return;
+ }
+
+ // Try to expand UDIV by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(1), Lo, Hi);
+ return;
+ }
+
+ // Try to expand UREM by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());
+
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
+ // both halves independently.
+ SDValue Res = TLI.expandVecReduce(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Delegate to funnel-shift expansion.
+ SDLoc DL(N);
+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;
+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),
+ N->getOperand(0), N->getOperand(1));
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+
+ SDLoc DL(N);
+ unsigned Opc = N->getOpcode();
+ SDValue ShAmt = N->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);
+
+ // If the shift amount is at least half the bitwidth, swap the inputs.
+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();
+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,
+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));
+ SDValue Cond =
+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),
+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);
+
+ // Expand to a pair of funnel shifts.
+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);
+
+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);
+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);
+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);
+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);
+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT HalfVT =
+ EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2);
+ SDLoc dl(N);
+
+ // We assume VSCALE(1) fits into a legal integer.
+ APInt One(HalfVT.getSizeInBits(), 1);
+ SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One);
+ VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase);
+ SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0));
+ SplitInteger(Res, Lo, Hi);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ report_fatal_error("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ case ISD::STACKMAP:
+ Res = ExpandIntOp_STACKMAP(N, OpNo);
+ break;
+ case ISD::PATCHPOINT:
+ Res = ExpandIntOp_PATCHPOINT(N, OpNo);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = ExpandIntOp_VP_STRIDED(N, OpNo);
+ break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnes()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // LoCmp = lo(op1) < lo(op2) // Always unsigned comparison
+ // HiCmp = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? LoCmp : HiCmp;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
+ nullptr);
+ SDValue LoCmp, HiCmp;
+ if (TLI.isTypeLegal(LHSLo.getValueType()) &&
+ TLI.isTypeLegal(RHSLo.getValueType()))
+ LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!LoCmp.getNode())
+ LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, LowCC);
+ if (TLI.isTypeLegal(LHSHi.getValueType()) &&
+ TLI.isTypeLegal(RHSHi.getValueType()))
+ HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!HiCmp.getNode())
+ HiCmp =
+ DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
+ ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
+
+ bool EqAllowed = ISD::isTrueWhenEqual(CCCode);
+
+ // FIXME: Is the HiCmpC->isOne() here correct for
+ // ZeroOrNegativeOneBooleanContent.
+ if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) ||
+ (!EqAllowed &&
+ ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT: if low part is known false, return the high part.
+ // if high part is known true, ignore the low part.
+ NewLHS = HiCmp;
+ NewRHS = SDValue();
+ return;
+ }
+
+ if (LHSHi == RHSHi) {
+ // Comparing the low bits is enough.
+ NewLHS = LoCmp;
+ NewRHS = SDValue();
+ return;
+ }
+
+ // Lower with SETCCCARRY if the target supports it.
+ EVT HiVT = LHSHi.getValueType();
+ EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
+ bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
+
+ // FIXME: Make all targets support this, then remove the other lowering.
+ if (HasSETCCCARRY) {
+ // SETCCCARRY can detect < and >= directly. For > and <=, flip
+ // operands and condition code.
+ bool FlipOperands = false;
+ switch (CCCode) {
+ case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
+ case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+ case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
+ case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+ default: break;
+ }
+ if (FlipOperands) {
+ std::swap(LHSLo, RHSLo);
+ std::swap(LHSHi, RHSHi);
+ }
+ // Perform a wide subtraction, feeding the carry from the low part into
+ // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high
+ // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is
+ // zero or positive iff LHS >= RHS.
+ EVT LoVT = LHSLo.getValueType();
+ SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT));
+ SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo);
+ SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT),
+ LHSHi, RHSHi, LowCmp.getValue(1),
+ DAG.getCondCode(CCCode));
+ NewLHS = Res;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ,
+ false, DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS =
+ DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a USUBO_CARRY for the low part and a SETCCCARRY for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
+ SDValue LowCmp =
+ DAG.getNode(ISD::USUBO_CARRY, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Split the operand and replace with SPLAT_VECTOR_PARTS.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, SDLoc(N), N->getValueType(0), Lo,
+ Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (N->isAtomic()) {
+ // It's typical to have larger CAS than atomic store instructions.
+ SDLoc dl(N);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ N->getMemoryVT(),
+ N->getOperand(0), N->getOperand(2),
+ N->getOperand(1),
+ N->getMemOperand());
+ return Swap.getValue(1);
+ }
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(), N->getOriginalAlign(), MMOFlags,
+ AAInfo);
+ }
+
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT,
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand());
+ return Swap.getValue(1);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
+ assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) ||
+ (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4));
+
+ SDValue Hi; // The upper half is dropped out.
+ SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
+ GetExpandedInteger(NewOps[OpNo], NewOps[OpNo], Hi);
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT ResVT = V0.getValueType();
+ SDValue Res = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(ResVT, ResVT), V0, V1);
+ SetPromotedInteger(SDValue(N, 0), Res.getValue(0));
+ SetPromotedInteger(SDValue(N, 1), Res.getValue(1));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue BaseIdx = N->getOperand(1);
+
+ // TODO: We may be able to use this for types other than scalable
+ // vectors and fix those tests that expect BUILD_VECTOR to be used
+ if (OutVT.isScalableVector()) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ // Try and extract from a smaller type so that it eventually falls
+ // into the promotion code below.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||
+ getTypeAction(InVT) == TargetLowering::TypeLegal) {
+ EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NElts = NInVT.getVectorMinNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();
+
+ SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,
+ DAG.getConstant(alignDown(IdxVal, NElts), dl,
+ BaseIdx.getValueType()));
+ SDValue Step2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,
+ DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);
+ }
+
+ // Try and extract from a widened type.
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx};
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+
+ // Promote operands and see if this is handled by target lowering,
+ // Otherwise, use the BUILD_VECTOR approach below
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ // Collect the (promoted) operands
+ SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx };
+
+ EVT PromEltVT = Ops[0].getValueType().getVectorElementType();
+ assert(PromEltVT.bitsLE(NOutVTElem) &&
+ "Promoted operand has an element type greater than result");
+
+ EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+ }
+
+ if (OutVT.isScalableVector())
+ report_fatal_error("Unable to promote scalable types using BUILD_VECTOR");
+
+ SDValue InOp0 = N->getOperand(0);
+ if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
+ InOp0 = GetPromotedInteger(N->getOperand(0));
+
+ EVT InVT = InOp0.getValueType();
+
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getBuildVector(NOutVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ SDLoc dl(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+
+ EVT SubVecVT = SubVec.getValueType();
+ EVT NSubVT =
+ EVT::getVectorVT(*DAG.getContext(), NOutVT.getVectorElementType(),
+ SubVecVT.getVectorElementCount());
+
+ Vec = GetPromotedInteger(Vec);
+ SubVec = DAG.getNode(ISD::ANY_EXTEND, dl, NSubVT, SubVec);
+
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NOutVT, Vec, SubVec, Idx);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+ TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT);
+ unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType);
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = N->getOperand(i);
+ EVT OpVT = Op.getValueType();
+ // BUILD_VECTOR integer operand types are allowed to be larger than the
+ // result's element type. This may still be true after the promotion. For
+ // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
+ // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
+ if (OpVT.bitsLT(NOutVTElem)) {
+ unsigned ExtOpc = ISD::ANY_EXTEND;
+ // Attempt to extend constant bool vectors to match target's BooleanContent.
+ // While not necessary, this improves chances of the constant correctly
+ // folding with compare results (e.g. for NOT patterns).
+ if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant)
+ ExtOpc = NOutExtOpc;
+ Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op);
+ }
+ Ops.push_back(Op);
+ }
+
+ return DAG.getBuildVector(NOutVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
+
+ SDLoc dl(N);
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutElemVT = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
+
+ return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isScalableVector() &&
+ "Type must be promoted to a scalable vector type");
+ const APInt &StepVal = N->getConstantOperandAPInt(0);
+ return DAG.getStepVector(dl, NOutVT,
+ StepVal.sext(NOutVT.getScalarSizeInBits()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ SDLoc dl(N);
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ unsigned NumOperands = N->getNumOperands();
+ unsigned NumOutElem = NOutVT.getVectorMinNumElements();
+ EVT OutElemTy = NOutVT.getVectorElementType();
+ if (OutVT.isScalableVector()) {
+ // Find the largest promoted element type for each of the operands.
+ SDUse *MaxSizedValue = std::max_element(
+ N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) {
+ EVT AVT = A.getValueType().getVectorElementType();
+ EVT BVT = B.getValueType().getVectorElementType();
+ return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits();
+ });
+ EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType();
+
+ // Then promote all vectors to the largest element type.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ SDValue Op = N->getOperand(I);
+ EVT OpVT = Op.getValueType();
+ if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ else
+ assert(getTypeAction(OpVT) == TargetLowering::TypeLegal &&
+ "Unhandled legalization type");
+
+ if (OpVT.getVectorElementType().getScalarSizeInBits() <
+ MaxElementVT.getScalarSizeInBits())
+ Op = DAG.getAnyExtOrTrunc(Op, dl,
+ OpVT.changeVectorElementType(MaxElementVT));
+ Ops.push_back(Op);
+ }
+
+ // Do the CONCAT on the promoted type and finally truncate to (the promoted)
+ // NOutVT.
+ return DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ OutVT.changeVectorElementType(MaxElementVT), Ops),
+ dl, NOutVT);
+ }
+
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ EVT SclrTy = Op.getValueType().getVectorElementType();
+ assert(NumElem == Op.getValueType().getVectorNumElements() &&
+ "Unexpected number of elements");
+
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
+ DAG.getVectorIdxConstant(j, dl));
+ Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
+ }
+ }
+
+ return DAG.getBuildVector(NOutVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.isVector() && "This type must be promoted to a vector type");
+
+ SDLoc dl(N);
+
+ // For operands whose TypeAction is to promote, extend the promoted node
+ // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
+ // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
+ // type..
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Promoted;
+
+ switch(N->getOpcode()) {
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Promoted = SExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Promoted = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Promoted = GetPromotedInteger(N->getOperand(0));
+ break;
+ default:
+ llvm_unreachable("Node has unexpected Opcode");
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
+ }
+
+ // Directly extend to the appropriate transform-to type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
+ // The VECREDUCE result size may be larger than the element size, so
+ // we can simply change the result type.
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {
+ // The VP_REDUCE result size may be larger than the element size, so we can
+ // simply change the result type. However the start value and result must be
+ // the same.
+ SDLoc DL(N);
+ SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start,
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // The result type is equal to the first input operand's type, so the
+ // type that needs promoting must be the second source vector.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ SDValue Idx = N->getOperand(2);
+ EVT PromVT = EVT::getVectorVT(*DAG.getContext(),
+ V1.getValueType().getVectorElementType(),
+ V0.getValueType().getVectorElementCount());
+ V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT);
+ SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx);
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ MVT InVT = V0.getValueType().getSimpleVT();
+ MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc dl(N);
+
+ EVT ResVT = N->getValueType(0);
+ unsigned NumElems = N->getNumOperands();
+
+ if (ResVT.isScalableVector()) {
+ SDValue ResVec = DAG.getUNDEF(ResVT);
+
+ for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) {
+ SDValue Op = N->getOperand(OpIdx);
+ unsigned OpNumElts = Op.getValueType().getVectorMinNumElements();
+ ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op,
+ DAG.getIntPtrConstant(OpIdx * OpNumElts, dl));
+ }
+
+ return ResVec;
+ }
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1);
+ SDValue Op = N->getOperand(OpNo);
+
+ // FIXME: Non-constant operands are not yet handled:
+ // - https://github.com/llvm/llvm-project/issues/26431
+ // - https://github.com/llvm/llvm-project/issues/55957
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op);
+ if (!CN)
+ return SDValue();
+
+ // Copy operands before the one being expanded.
+ SmallVector<SDValue> NewOps;
+ for (unsigned I = 0; I < OpNo; I++)
+ NewOps.push_back(N->getOperand(I));
+
+ EVT Ty = Op.getValueType();
+ SDLoc DL = SDLoc(N);
+ if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
+ NewOps.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
+ } else {
+ // FIXME: https://github.com/llvm/llvm-project/issues/55609
+ return SDValue();
+ }
+
+ // Copy remaining operands.
+ for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++)
+ NewOps.push_back(N->getOperand(I));
+
+ SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we have replaced the node already.
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
+ assert(OpNo >= 7);
+ SDValue Op = N->getOperand(OpNo);
+
+ // FIXME: Non-constant operands are not yet handled:
+ // - https://github.com/llvm/llvm-project/issues/26431
+ // - https://github.com/llvm/llvm-project/issues/55957
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op);
+ if (!CN)
+ return SDValue();
+
+ // Copy operands before the one being expanded.
+ SmallVector<SDValue> NewOps;
+ for (unsigned I = 0; I < OpNo; I++)
+ NewOps.push_back(N->getOperand(I));
+
+ EVT Ty = Op.getValueType();
+ SDLoc DL = SDLoc(N);
+ if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
+ NewOps.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
+ } else {
+ // FIXME: https://github.com/llvm/llvm-project/issues/55609
+ return SDValue();
+ }
+
+ // Copy remaining operands.
+ for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++)
+ NewOps.push_back(N->getOperand(I));
+
+ SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we have replaced the node already.
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 000000000000..328939e44dcb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1060 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// Do extensive, expensive, basic correctness checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SDNode &Node : DAG.allnodes()) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (Node.getNodeId() == NewNode)
+ NewNodes.push_back(&Node);
+
+ for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+ SDValue Res(&Node, i);
+ bool Failed = false;
+ // Don't create a value in map.
+ auto ResId = ValueToIdMap.lookup(Res);
+
+ unsigned Mapped = 0;
+ if (ResId) {
+ auto I = ReplacedValues.find(ResId);
+ if (I != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ auto NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
+ while (I != ReplacedValues.end()) {
+ NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
+ }
+ SDValue NewVal = getSDValue(NewValId);
+ (void)NewVal;
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.count(ResId))
+ Mapped |= 2;
+ if (SoftenedFloats.count(ResId))
+ Mapped |= 4;
+ if (ScalarizedVectors.count(ResId))
+ Mapped |= 8;
+ if (ExpandedIntegers.count(ResId))
+ Mapped |= 16;
+ if (ExpandedFloats.count(ResId))
+ Mapped |= 32;
+ if (SplitVectors.count(ResId))
+ Mapped |= 64;
+ if (WidenedVectors.count(ResId))
+ Mapped |= 128;
+ if (PromotedFloats.count(ResId))
+ Mapped |= 256;
+ if (SoftPromotedHalfs.count(ResId))
+ Mapped |= 512;
+ }
+
+ if (Node.getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+ (Node.getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ SDValue NodeById = IdToValueMap.lookup(ResId);
+ // It is possible the node has been remapped to another node and had
+ // its Id updated in the Value to Id table. The node it remapped to
+ // may not have been processed yet. Look up the Id in the Id to Value
+ // table and re-check the Processed state. If the node hasn't been
+ // remapped we'll get the same state as we got earlier.
+ if (NodeById->getNodeId() == Processed) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ }
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ if (Mapped & 256)
+ dbgs() << " PromotedFloats";
+ if (Mapped & 512)
+ dbgs() << " SoftPromoteHalfs";
+ dbgs() << "\n";
+ llvm_unreachable(nullptr);
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode *U : N->uses())
+ assert(U->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+#endif
+}
+
+/// This is the main entry point for the type legalizer. This does a top-down
+/// traversal of the dag, legalizing types as it goes. Returns "true" if it made
+/// any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SDNode &Node : DAG.allnodes()) {
+ if (Node.getNumOperands() == 0) {
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
+ } else {
+ Node.setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef EXPENSIVE_CHECKS
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.pop_back_val();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
+ if (IgnoreNodeResults(N)) {
+ LLVM_DEBUG(dbgs() << "Ignoring node results\n");
+ goto ScanOperands;
+ }
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT << "\n");
+ switch (getTypeAction(ResultVT)) {
+ case TargetLowering::TypeLegal:
+ LLVM_DEBUG(dbgs() << "Legal result type\n");
+ break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error(
+ "Scalarization of scalable vectors is not supported.");
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case TargetLowering::TypePromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeWidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypePromoteFloat:
+ PromoteFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftPromoteHalf:
+ SoftPromoteHalfResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ const auto &Op = N->getOperand(i);
+ LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
+ EVT OpVT = Op.getValueType();
+ switch (getTypeAction(OpVT)) {
+ case TargetLowering::TypeLegal:
+ LLVM_DEBUG(dbgs() << "Legal operand\n");
+ continue;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error(
+ "Scalarization of scalable vectors is not supported.");
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case TargetLowering::TypePromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeWidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypePromoteFloat:
+ NeedsReanalyzing = PromoteFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftPromoteHalf:
+ NeedsReanalyzing = SoftPromoteHalfOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG);
+ dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode *User : N->uses()) {
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef EXPENSIVE_CHECKS
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SDNode &Node : DAG.allnodes()) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(&Node))
+ for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(Node.getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal: ";
+ Node.dump(&DAG);
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+ !isTypeLegal(Node.getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal: ";
+ Node.getOperand(i).dump(&DAG);
+ Failed = true;
+ }
+
+ if (Node.getNodeId() != Processed) {
+ if (Node.getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (Node.getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (Node.getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (Node.getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ Node.dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(nullptr);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// The specified node is the root of a subtree of potentially new nodes.
+/// Correct any processed operands (this may change the node) and calculate the
+/// NodeId. If the node itself changes to a processed node, it is not remapped -
+/// the caller needs to take care of this. Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ std::vector<SDValue> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(N, NewOps);
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help basic correctness
+ // checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &V) {
+ auto Id = getTableId(V);
+ V = getSDValue(Id);
+}
+
+void DAGTypeLegalizer::RemapId(TableId &Id) {
+ auto I = ReplacedValues.find(Id);
+ if (I != ReplacedValues.end()) {
+ assert(Id != I->second && "Id is mapped to itself.");
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapId(I->second);
+ Id = I->second;
+
+ // Note that N = IdToValueMap[Id] it is possible to have
+ // N.getNode()->getNodeId() == NewNode at this point because it is possible
+ // for a node to be put in the map before being processed.
+ }
+}
+
+namespace {
+ /// This class is a DAGUpdateListener that listens for updates to nodes and
+ /// recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ void NodeUpdated(SDNode *N) override {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// The specified value was legalized to the specified other value.
+/// Update the DAG and NodeIds replacing any uses of From to use To instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ AnalyzeNewValue(To);
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ do {
+
+ // The old node may be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ auto FromId = getTableId(From);
+ auto ToId = getTableId(To);
+
+ if (FromId != ToId)
+ ReplacedValues[FromId] = ToId;
+ DAG.ReplaceAllUsesOfValueWith(From, To);
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.pop_back_val();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ auto OldValId = getTableId(OldVal);
+ auto NewValId = getTableId(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ if (OldValId != NewValId)
+ ReplacedValues[OldValId] = NewValId;
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+
+ DAG.transferDbgValues(Op, Result);
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+#ifndef NDEBUG
+ EVT VT = Result.getValueType();
+ LLVMContext &Ctx = *DAG.getContext();
+ assert((VT == EVT::getIntegerVT(Ctx, 80) ||
+ VT == TLI.getTypeToTransformTo(Ctx, Op.getValueType())) &&
+ "Invalid type for softened float");
+#endif
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already converted to integer!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted float");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = PromotedFloats[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::SetSoftPromotedHalf(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == MVT::i16 &&
+ "Invalid type for soft-promoted half");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = SoftPromotedHalfs[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+
+ // We don't currently support the scalarization of scalable vector types.
+ assert(Result.getValueSizeInBits().getFixedValue() >=
+ Op.getScalarValueSizeInBits() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = ScalarizedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already scalarized!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Transfer debug values. Don't invalidate the source debug value until it's
+ // been transferred to the high and low bits.
+ if (DAG.getDataLayout().isBigEndian()) {
+ DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(),
+ Lo.getValueSizeInBits());
+ } else {
+ DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(),
+ Hi.getValueSizeInBits());
+ }
+
+ // Remember that this is the result of the node.
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+ assert(Lo.getNode() && "Operand isn't split");
+ ;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ Lo.getValueType().getVectorElementCount() * 2 ==
+ Op.getValueType().getVectorElementCount() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already split");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = WidenedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node already widened!");
+ OpIdEntry = getTableId(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueSizeInBits();
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// Convert to a vector of integers of the same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getScalarValueSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ auto EltCnt = Op.getValueType().getVectorElementCount();
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ SDLoc dl(Op);
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align DestAlign = DAG.getReducedAlign(DestVT, /*UseABI=*/false);
+ Align OpAlign = DAG.getReducedAlign(Op.getValueType(), /*UseABI=*/false);
+ Align Align = std::max(DestAlign, OpAlign);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(Op.getValueType().getStoreSize(), Align);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), Align);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), Align);
+}
+
+/// Replace the node's results with custom code provided by the target and
+/// return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ }
+ return true;
+}
+
+
+/// Widen the node's results with custom code provided by the target and return
+/// "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ // If this is a chain output or already widened just replace it.
+ bool WasWidened = SDValue(N, i).getValueType() != Results[i].getValueType();
+ if (WasWidened)
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ else
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ }
+ return true;
+}
+
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N->getOperand(ResNo));
+}
+
+/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+/// given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(Pair);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ std::tie(Lo, Hi) = DAG.SplitScalar(Pair, dl, NVT, NVT);
+}
+
+/// Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result SDLoc
+ SDLoc dlHi(Hi);
+ SDLoc dlLo(Lo);
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// Promote the given target boolean to a target boolean of the given type.
+/// A target boolean is an integer value, not necessarily of type i1, the bits
+/// of which conform to getBooleanContents.
+///
+/// ValVT is the type of values that produced the boolean.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
+ return TLI.promoteTargetBoolean(DAG, Bool, ValVT);
+}
+
+/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(Op);
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ unsigned ReqShiftAmountInBits =
+ Log2_32_Ceil(Op.getValueType().getSizeInBits());
+ MVT ShiftAmountTy =
+ TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType());
+ if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits())
+ ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits));
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// Return the lower and upper halves of Op's bits in a value type half the
+/// size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT =
+ EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// This transforms the SelectionDAG into a SelectionDAG that only uses types
+/// natively supported by the target. Returns "true" if it made any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 000000000000..db8f61eee606
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,1137 @@
+//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// This takes an arbitrary SelectionDAG as input and hacks on it until only
+/// value types the target machine can handle are left. This involves promoting
+/// small sizes to large sizes or splitting up large values into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ /// This pass uses the NodeId on the SDNodes to hold information about the
+ /// state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// All operands have been processed, so this node is ready to be handled.
+ ReadyToProcess = 0,
+
+ /// This is a new node, not before seen, that was created in the process of
+ /// legalizing some other node.
+ NewNode = -1,
+
+ /// This node's ID needs to be set to the number of its unprocessed
+ /// operands.
+ Unanalyzed = -2,
+
+ /// This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+
+ /// This is a bitvector that contains two bits for each simple value type,
+ /// where the two bits correspond to the LegalizeAction enum from
+ /// TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// Return how we should legalize values of this type.
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+ }
+
+ /// Return true if this is a simple legal type.
+ bool isSimpleLegalType(EVT VT) const {
+ return VT.isSimple() && TLI.isTypeLegal(VT);
+ }
+
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ /// Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant ||
+ N->getOpcode() == ISD::Register;
+ }
+
+ // Bijection from SDValue to unique id. As each created node gets a
+ // new id we do not need to worry about reuse expunging. Should we
+ // run out of ids, we can do a one time expensive compactifcation.
+ typedef unsigned TableId;
+
+ TableId NextValueId = 1;
+
+ SmallDenseMap<SDValue, TableId, 8> ValueToIdMap;
+ SmallDenseMap<TableId, SDValue, 8> IdToValueMap;
+
+ /// For integer nodes that are below legal width, this map indicates what
+ /// promoted value to use.
+ SmallDenseMap<TableId, TableId, 8> PromotedIntegers;
+
+ /// For integer nodes that need to be expanded this map indicates which
+ /// operands are the expanded version of the input.
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers;
+
+ /// For floating-point nodes converted to integers of the same size, this map
+ /// indicates the converted value to use.
+ SmallDenseMap<TableId, TableId, 8> SoftenedFloats;
+
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates what promoted value to use.
+ SmallDenseMap<TableId, TableId, 8> PromotedFloats;
+
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates the converted value to use.
+ SmallDenseMap<TableId, TableId, 8> SoftPromotedHalfs;
+
+ /// For float nodes that need to be expanded this map indicates which operands
+ /// are the expanded version of the input.
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;
+
+ /// For nodes that are <1 x ty>, this map indicates the scalar value of type
+ /// 'ty' to use.
+ SmallDenseMap<TableId, TableId, 8> ScalarizedVectors;
+
+ /// For nodes that need to be split this map indicates which operands are the
+ /// expanded version of the input.
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors;
+
+ /// For vector nodes that need to be widened, indicates the widened value to
+ /// use.
+ SmallDenseMap<TableId, TableId, 8> WidenedVectors;
+
+ /// For values that have been replaced with another, indicates the replacement
+ /// value to use.
+ SmallDenseMap<TableId, TableId, 8> ReplacedValues;
+
+ /// This defines a worklist of nodes to process. In order to be pushed onto
+ /// this worklist, all operands of a node must have already been processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+ TableId getTableId(SDValue V) {
+ assert(V.getNode() && "Getting TableId on SDValue()");
+
+ auto I = ValueToIdMap.find(V);
+ if (I != ValueToIdMap.end()) {
+ // replace if there's been a shift.
+ RemapId(I->second);
+ assert(I->second && "All Ids should be nonzero");
+ return I->second;
+ }
+ // Add if it's not there.
+ ValueToIdMap.insert(std::make_pair(V, NextValueId));
+ IdToValueMap.insert(std::make_pair(NextValueId, V));
+ ++NextValueId;
+ assert(NextValueId != 0 &&
+ "Ran out of Ids. Increase id type size or add compactification");
+ return NextValueId - 1;
+ }
+
+ const SDValue &getSDValue(TableId &Id) {
+ RemapId(Id);
+ assert(Id && "TableId should be non-zero");
+ auto I = IdToValueMap.find(Id);
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
+ return I->second;
+ }
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ assert(Old != New && "node replaced with self");
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ TableId NewId = getTableId(SDValue(New, i));
+ TableId OldId = getTableId(SDValue(Old, i));
+
+ if (OldId != NewId) {
+ ReplacedValues[OldId] = NewId;
+
+ // Delete Node from tables. We cannot do this when OldId == NewId,
+ // because NewId can still have table references to it in
+ // ReplacedValues.
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ SoftPromotedHalfs.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
+
+ ValueToIdMap.erase(SDValue(Old, i));
+ }
+ }
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void PerformExpensiveChecks();
+ void RemapId(TableId &Id);
+ void RemapValue(SDValue &V);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// Replace each result of the given MERGE_VALUES node with the corresponding
+ /// input operand, except for the result 'ResNo', for which the corresponding
+ /// input operand is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was promoted to a larger integer type,
+ /// this returns the promoted value. The low bits of the promoted value
+ /// corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ TableId &PromotedId = PromotedIntegers[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// Get a promoted operand and sign extend it to the final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ }
+
+ // Get a promoted operand and sign or zero extend it to the final size
+ // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given
+ // subtarget and type, the choice of sign or zero-extension will be
+ // consistent.
+ SDValue SExtOrZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ Op = GetPromotedInteger(Op);
+ if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ return DAG.getZeroExtendInReg(Op, DL, OldVT);
+ }
+
+ // Promote the given operand V (vector or scalar) according to N's specific
+ // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns
+ // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the
+ // promoted value.
+ SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V);
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_ScalarOp(SDNode *N);
+ SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BITREVERSE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
+ SDValue PromoteIntRes_FREEZE(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+ SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_FFREXP(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_Select(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_UMINUMAX(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UADDSUBO_CARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_VSCALE(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
+ SDValue PromoteIntRes_MULFIX(SDNode *N);
+ SDValue PromoteIntRes_DIVFIX(SDNode *N);
+ SDValue PromoteIntRes_GET_ROUNDING(SDNode *N);
+ SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
+ SDValue PromoteIntRes_ABS(SDNode *N);
+ SDValue PromoteIntRes_Rotate(SDNode *N);
+ SDValue PromoteIntRes_FunnelShift(SDNode *N);
+ SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntOp_ScalarOp(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_FunnelShift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_VP_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
+ SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_FIX(SDNode *N);
+ SDValue PromoteIntOp_ExpOp(SDNode *N);
+ SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+ SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
+ SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was expanded into two integers of half
+ /// the size, this returns the two halves. The low bits of Op are exactly
+ /// equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ShiftThroughStack (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
+ SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, const SDLoc &dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op
+ SDValue GetSoftenedFloat(SDValue Op) {
+ TableId Id = getTableId(Op);
+ auto Iter = SoftenedFloats.find(Id);
+ if (Iter == SoftenedFloats.end()) {
+ assert(isSimpleLegalType(Op.getValueType()) &&
+ "Operand wasn't converted to integer?");
+ return Op;
+ }
+ SDValue SoftenedOp = getSDValue(Iter->second);
+ assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Convert Float Results to Integer.
+ void SoftenFloatResult(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FMINNUM(SDNode *N);
+ SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCBRT(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_BF16_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_ExpOp(SDNode *N);
+ SDValue SoftenFloatRes_FFREXP(SDNode *N);
+ SDValue SoftenFloatRes_FREEZE(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FROUND(SDNode *N);
+ SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_VECREDUCE(SDNode *N);
+ SDValue SoftenFloatRes_VECREDUCE_SEQ(SDNode *N);
+
+ // Convert Float Operand to Integer.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N);
+ SDValue SoftenFloatOp_LROUND(SDNode *N);
+ SDValue SoftenFloatOp_LLROUND(SDNode *N);
+ SDValue SoftenFloatOp_LRINT(SDNode *N);
+ SDValue SoftenFloatOp_LLRINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was expanded into two floating-point
+ /// values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCBRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLDEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue ExpandFloatOp_LROUND(SDNode *N);
+ SDValue ExpandFloatOp_LLROUND(SDNode *N);
+ SDValue ExpandFloatOp_LRINT(SDNode *N);
+ SDValue ExpandFloatOp_LLRINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, const SDLoc &dl,
+ SDValue &Chain, bool IsSignaling = false);
+
+ //===--------------------------------------------------------------------===//
+ // Float promotion support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ SDValue GetPromotedFloat(SDValue Op) {
+ TableId &PromotedId = PromotedFloats[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedFloat(SDValue Op, SDValue Result);
+
+ void PromoteFloatResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteFloatRes_BITCAST(SDNode *N);
+ SDValue PromoteFloatRes_BinOp(SDNode *N);
+ SDValue PromoteFloatRes_ConstantFP(SDNode *N);
+ SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue PromoteFloatRes_FMAD(SDNode *N);
+ SDValue PromoteFloatRes_ExpOp(SDNode *N);
+ SDValue PromoteFloatRes_FFREXP(SDNode *N);
+ SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
+ SDValue PromoteFloatRes_LOAD(SDNode *N);
+ SDValue PromoteFloatRes_SELECT(SDNode *N);
+ SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
+ SDValue PromoteFloatRes_UnaryOp(SDNode *N);
+ SDValue PromoteFloatRes_UNDEF(SDNode *N);
+ SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
+ SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+ SDValue PromoteFloatRes_VECREDUCE(SDNode *N);
+ SDValue PromoteFloatRes_VECREDUCE_SEQ(SDNode *N);
+
+ bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Half soft promotion support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ SDValue GetSoftPromotedHalf(SDValue Op) {
+ TableId &PromotedId = SoftPromotedHalfs[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetSoftPromotedHalf(SDValue Op, SDValue Result);
+
+ void SoftPromoteHalfResult(SDNode *N, unsigned ResNo);
+ SDValue SoftPromoteHalfRes_BinOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_BITCAST(SDNode *N);
+ SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N);
+ SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
+ SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
+ SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
+ SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
+ SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N);
+ SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N);
+
+ bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
+ SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
+ SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
+ SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
+ SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed one-element vector Op which was scalarized to its
+ /// element type, this returns the element. For example, if Op is a v1i32,
+ /// Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)];
+ SDValue ScalarizedOp = getSDValue(ScalarizedId);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
+ SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+ SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_ExpOp(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
+
+ SDValue ScalarizeVecRes_FIX(SDNode *N);
+ SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_VSELECT(SDNode *N);
+ SDValue ScalarizeVecOp_VSETCC(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
+ SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed vector Op which was split into vectors of half the size,
+ /// this method returns the halves. The first elements of Op coincide with the
+ /// elements of Lo; the remaining elements of Op coincide with the elements of
+ /// Hi: Op is what you would get by concatenating Lo and Hi.
+ /// For example, if Op is a v8i32 that was split into two v4i32's, then this
+ /// method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ /// Split mask operator of a VP intrinsic.
+ std::pair<SDValue, SDValue> SplitMask(SDValue Mask);
+
+ /// Split mask operator of a VP intrinsic in a given location.
+ std::pair<SDValue, SDValue> SplitMask(SDValue Mask, const SDLoc &DL);
+
+ // Helper function for incrementing the pointer when splitting
+ // memory operations
+ void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
+ SDValue &Ptr, uint64_t *ScaledOffset = nullptr);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned ResNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
+ SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
+ bool SplitSETCC = false);
+ void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+ void SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N);
+ void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N);
+ void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N);
+ SDValue SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+ SDValue SplitVecOp_TruncateHelper(SDNode *N);
+
+ SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
+ SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed vector Op which was widened into a larger vector, this
+ /// method returns the larger vector. The elements of the returned vector
+ /// consist of the elements of Op followed by elements containing rubbish.
+ /// For example, if Op is a v2i32 that was widened to a v4i32, then this
+ /// method returns a v4i32 for which the first two elements are the same as
+ /// those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ TableId &WidenedId = WidenedVectors[getTableId(Op)];
+ SDValue WidenedOp = getSDValue(WidenedId);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ /// Given a mask Mask, returns the larger vector into which Mask was widened.
+ SDValue GetWidenedMask(SDValue Mask, ElementCount EC) {
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle
+ // the case where the mask needs widening to an identically-sized type as
+ // the vector inputs.
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() == EC &&
+ "Unable to widen binary VP op");
+ return Mask;
+ }
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_AssertZext(SDNode* N);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
+ SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
+ SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
+ SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
+ SDValue WidenVecRes_ScalarOp(SDNode* N);
+ SDValue WidenVecRes_Select(SDNode *N);
+ SDValue WidenVSELECTMask(SDNode *N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VECTOR_REVERSE(SDNode *N);
+
+ SDValue WidenVecRes_Ternary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
+ SDValue WidenVecRes_StrictFP(SDNode *N);
+ SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
+ SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
+ SDValue WidenVecRes_ExpOp(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTEND(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
+ SDValue WidenVecOp_VSELECT(SDNode *N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
+ SDValue WidenVecOp_UnrollVectorOp(SDNode *N);
+ SDValue WidenVecOp_IS_FPCLASS(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
+ SDValue WidenVecOp_VP_REDUCE(SDNode *N);
+ SDValue WidenVecOp_ExpOp(SDNode *N);
+
+ /// Helper function to generate a set of operations to perform
+ /// a vector operation for a wider type.
+ ///
+ SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper function to generate a set of loads to load a vector with a
+ /// resulting wider type. It takes:
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD);
+
+ /// Helper function to generate a set of extension loads to load a vector with
+ /// a resulting wider type. It takes:
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper function to generate a set of stores to store a widen vector into
+ /// non-widen memory. Returns true if successful, false otherwise.
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ /// When FillWithZeroes is "on" the vector will be widened with zeroes.
+ /// By default, the vector will be widened with undefined values.
+ SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
+
+ /// Return a mask of vector type MaskVT to replace InMask. Also adjust
+ /// MaskVT to ToMaskVT if needed with vector extension or truncation.
+ SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+ /// given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+
+ /// This function will split the integer \p Op into \p NumElements
+ /// operations of type \p EltVT and store them in \p Ops.
+ void IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops, EVT EltVT);
+
+ // Generic Result Expansion.
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BITCAST (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 000000000000..296242c00401
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,601 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ break;
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
+ llvm_unreachable("Bitcast of a promotion-needing float should never need"
+ "expansion");
+ case TargetLowering::TypeSoftenFloat:
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat: {
+ auto &DL = DAG.getDataLayout();
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(InVT, DL) !=
+ TLI.hasBigEndianPartOrdering(OutVT, DL))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+ InOp = GetWidenedVector(InOp);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+ // is legal but the result is not.
+ unsigned NumElems = 2;
+ EVT ElemVT = NOutVT;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+ // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+ while (!isTypeLegal(NVT)) {
+ unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+ // If the element size is smaller than byte, bail.
+ if (NewSizeInBits < 8)
+ break;
+ NumElems *= 2;
+ ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+ NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+ CastInOp, DAG.getVectorIdxConstant(i, dl)));
+
+ // Build Lo, Hi pair by pairing extracted elements if needed.
+ unsigned Slot = 0;
+ for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+ // Each iteration will BUILD_PAIR two nodes and append the result until
+ // there are only two nodes left, i.e. Lo and Hi.
+ SDValue LHS = Vals[Slot];
+ SDValue RHS = Vals[Slot + 1];
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LHS, RHS);
+
+ Vals.push_back(DAG.getNode(
+ ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1),
+ LHS, RHS));
+ }
+ Lo = Vals[Slot++];
+ Hi = Vals[Slot++];
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align InAlign = DAG.getReducedAlign(InVT, /*UseABI=*/false);
+ Align NOutAlign = DAG.getReducedAlign(NOutVT, /*UseABI=*/false);
+ Align Align = std::max(InAlign, NOutAlign);
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Align);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, NOutAlign);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), NOutAlign);
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = N->getConstantOperandVal(1) ? Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ ElementCount OldEltCount = OldVec.getValueType().getVectorElementCount();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
+ SDLoc dl(N);
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldEltCount);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
+ SDValue NewVec = DAG.getNode(
+ ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(), NewVT, OldEltCount * 2), OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, dl, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ SDLoc dl(N);
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ assert(!LD->isAtomic() && "Atomics can not be split");
+ EVT ValueVT = LD->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ AAInfo);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Hi = DAG.getLoad(
+ NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDLoc dl(N);
+ const unsigned Align = N->getConstantOperandVal(3);
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+ Chain = Hi.getValue(1);
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops,
+ EVT EltVT) {
+ assert(Op.getValueType().isInteger());
+ SDLoc DL(Op);
+ SDValue Parts[2];
+
+ if (NumElements > 1) {
+ NumElements >>= 1;
+ SplitInteger(Op, Parts[0], Parts[1]);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+ IntegerToVector(Parts[0], NumElements, Ops, EltVT);
+ IntegerToVector(Parts[1], NumElements, Ops, EltVT);
+ } else {
+ Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op));
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+ SDLoc dl(N);
+ if (N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isInteger()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ //
+ // FIXME: I'm not sure why we are first trying to split the input into
+ // a 2 element vector, so I'm leaving it here to maintain the current
+ // behavior.
+ unsigned NumElts = 2;
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+ NumElts);
+ if (!isTypeLegal(NVT)) {
+ // If we can't find a legal type by splitting the integer in half,
+ // then we can use the node's value type.
+ NumElts = N->getValueType(0).getVectorNumElements();
+ NVT = N->getValueType(0);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
+
+ SDValue Vec = DAG.getBuildVector(NVT, dl, ArrayRef(Ops.data(), NumElts));
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ SDLoc dl(N);
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ SmallVector<SDValue, 16> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size());
+ SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return N->getConstantOperandVal(1) ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx,
+ DAG.getConstant(1, dl, Idx.getValueType()));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ SDLoc dl(N);
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ assert(!St->isAtomic() && "Atomics can not be split");
+ EVT ValueVT = St->getValue().getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ AAMDNodes AAInfo = St->getAAInfo();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(),
+ AAInfo);
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Hi = DAG.getStore(
+ Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue LL, LH, RL, RH, CL, CH;
+ SDLoc dl(N);
+ unsigned Opcode = N->getOpcode();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ if (SDValue Res = WidenVSELECTMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res, dl);
+ // Check if there are already splitted versions of the vector available and
+ // use those instead of splitting the mask operand again.
+ else if (getTypeAction(Cond.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Cond, CL, CH);
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ }
+
+ if (Opcode != ISD::VP_SELECT && Opcode != ISD::VP_MERGE) {
+ Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH);
+ return;
+ }
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL, EVLLo);
+ Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH, EVLHi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_AssertZext(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::AssertZext, dl, L.getValueType(), L, N->getOperand(1));
+ Hi = DAG.getNode(ISD::AssertZext, dl, H.getValueType(), H, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue L, H;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
+}
+
+void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc DL(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 000000000000..3862fd241897
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,1771 @@
+//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "legalizevectorops"
+
+namespace {
+
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ const TargetLowering &TLI;
+ bool Changed = false; // Keep track of whether anything changed
+
+ /// For nodes that are of legal width, and that have more than one use, this
+ /// map indicates what regularized operand to use. This allows us to avoid
+ /// legalizing the same thing more than once.
+ SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
+
+ /// Adds a node to the translation cache.
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ /// Legalizes the given node.
+ SDValue LegalizeOp(SDValue Op);
+
+ /// Assuming the node is legal, "legalize" the results.
+ SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
+
+ /// Make sure Results are legal and update the translation cache.
+ SDValue RecursivelyLegalizeResults(SDValue Op,
+ MutableArrayRef<SDValue> Results);
+
+ /// Wrapper to interface LowerOperation with a vector of Results.
+ /// Returns false if the target wants to use default expansion. Otherwise
+ /// returns true. If return is true and the Results are empty, then the
+ /// target wants to keep the input node as is.
+ bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDNode *Node);
+
+ /// Implement expand-based legalization of vector operations.
+ ///
+ /// This is just a high-level routine to dispatch to specific code paths for
+ /// operations to legalize them.
+ void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
+ /// FP_TO_SINT isn't legal.
+ void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ /// SINT_TO_FLOAT and SHR on vectors isn't legal.
+ void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDNode *Node);
+
+ /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+ /// type. The contents of the bits in the extended part of each element are
+ /// undef.
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
+
+ /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+ /// type, then shifts left and arithmetic shifts right to introduce a sign
+ /// extension.
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
+
+ /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and blends zeros into
+ /// the remaining lanes, finally bitcasting to the proper type.
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
+
+ /// Expand bswap of vectors into a shuffle if legal.
+ SDValue ExpandBSWAP(SDNode *Node);
+
+ /// Implement vselect in terms of XOR, AND, OR when blend is not
+ /// supported by the target.
+ SDValue ExpandVSELECT(SDNode *Node);
+ SDValue ExpandVP_SELECT(SDNode *Node);
+ SDValue ExpandVP_MERGE(SDNode *Node);
+ SDValue ExpandVP_REM(SDNode *Node);
+ SDValue ExpandSELECT(SDNode *Node);
+ std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
+ SDValue ExpandStore(SDNode *N);
+ SDValue ExpandFNEG(SDNode *Node);
+ void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements vector promotion.
+ ///
+ /// This is essentially just bitcasting the operands to a different type and
+ /// bitcasting the result back to the original type.
+ void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements [SU]INT_TO_FP vector promotion.
+ ///
+ /// This is a [zs]ext of the input operand to a larger integer type.
+ void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements FP_TO_[SU]INT vector promotion of the result type.
+ ///
+ /// It is promoted to a larger integer type. The result is then
+ /// truncated back to the original type.
+ void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+public:
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
+
+ /// Begin legalizer the vector operations in the DAG.
+ bool Run();
+};
+
+} // end anonymous namespace
+
+bool VectorLegalizer::Run() {
+ // Before we start legalizing vector nodes, check if there are any vectors.
+ bool HasVectors = false;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
+ // Check if the values of the nodes contain vectors. We don't need to check
+ // the operands because we are going to check their values at some point.
+ HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
+
+ // If we found a vector node we can start the legalization.
+ if (HasVectors)
+ break;
+ }
+
+ // If this basic block has no vectors then no need to legalize vectors.
+ if (!HasVectors)
+ return false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
+ LegalizeOp(SDValue(&*I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
+ assert(Op->getNumValues() == Result->getNumValues() &&
+ "Unexpected number of results");
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
+ return SDValue(Result, Op.getResNo());
+}
+
+SDValue
+VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
+ MutableArrayRef<SDValue> Results) {
+ assert(Results.size() == Op->getNumValues() &&
+ "Unexpected number of results");
+ // Make sure that the generated code is itself legal.
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ Results[i] = LegalizeOp(Results[i]);
+ AddLegalizedOperand(Op.getValue(i), Results[i]);
+ }
+
+ return Results[Op.getResNo()];
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (const SDValue &Oper : Op->op_values())
+ Ops.push_back(LegalizeOp(Oper));
+
+ SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
+
+ bool HasVectorValueOrOp =
+ llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
+ llvm::any_of(Node->op_values(),
+ [](SDValue O) { return O.getValueType().isVector(); });
+ if (!HasVectorValueOrOp)
+ return TranslateLegalizeResults(Op, Node);
+
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ EVT ValVT;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Node);
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ EVT LoadedVT = LD->getMemoryVT();
+ if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
+ Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ Action = TLI.getTruncStoreAction(ValVT, StVT);
+ break;
+ }
+ case ISD::MERGE_VALUES:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // This operation lies about being legal: when it claims to be legal,
+ // it should actually be expanded.
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ ValVT = Node->getValueType(0);
+ if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ValVT = Node->getOperand(1).getValueType();
+ if (Op.getOpcode() == ISD::STRICT_FSETCC ||
+ Op.getOpcode() == ISD::STRICT_FSETCCS) {
+ MVT OpVT = Node->getOperand(1).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal)
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
+ }
+ // If we're asked to expand a strict vector floating-point operation,
+ // by default we're going to simply unroll it. That is usually the
+ // best approach, except in the case where the resulting strict (scalar)
+ // operations would themselves use the fallback mutation to non-strict.
+ // In that specific case, just do the fallback on the vector op.
+ if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
+ TargetLowering::Legal) {
+ EVT EltVT = ValVT.getVectorElementType();
+ if (TLI.getOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Legal)
+ Action = TargetLowering::Legal;
+ }
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::ABS:
+ case ISD::BSWAP:
+ case ISD::BITREVERSE:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::SELECT_CC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::FCOPYSIGN:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FLDEXP:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FFLOOR:
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ case ISD::FMA:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FCANONICALIZE:
+ case ISD::FFREXP:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::MGATHER:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
+ case ISD::SETCC: {
+ MVT OpVT = Node->getOperand(0).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal)
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ break;
+ }
+
+#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
+ case ISD::VPID: { \
+ EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
+ : Node->getOperand(LEGALPOS).getValueType(); \
+ if (ISD::VPID == ISD::VP_SETCC) { \
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
+ Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
+ if (Action != TargetLowering::Legal) \
+ break; \
+ } \
+ Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
+ } break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+
+ LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+
+ SmallVector<SDValue, 8> ResultVals;
+ switch (Action) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Promote:
+ assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
+ "This action is not supported yet!");
+ LLVM_DEBUG(dbgs() << "Promoting\n");
+ Promote(Node, ResultVals);
+ assert(!ResultVals.empty() && "No results for promotion?");
+ break;
+ case TargetLowering::Legal:
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
+ break;
+ case TargetLowering::Custom:
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
+ if (LowerOperationWrapper(Node, ResultVals))
+ break;
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
+ [[fallthrough]];
+ case TargetLowering::Expand:
+ LLVM_DEBUG(dbgs() << "Expanding\n");
+ Expand(Node, ResultVals);
+ break;
+ }
+
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
+ Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
+}
+
+// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
+// merge them somehow?
+bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+
+ if (!Res.getNode())
+ return false;
+
+ if (Res == SDValue(Node, 0))
+ return true;
+
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (Node->getNumValues() == 1) {
+ Results.push_back(Res);
+ return true;
+ }
+
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((Node->getNumValues() == Res->getNumValues()) &&
+ "Lowering returned the wrong number of results!");
+
+ // Places new result values base on N result number.
+ for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
+
+ return true;
+}
+
+void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ // For a few operations there is a specific concept for promotion based on
+ // the operand's type.
+ switch (Node->getOpcode()) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ PromoteINT_TO_FP(Node, Results);
+ return;
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ // Promote the operation by extending the operand.
+ PromoteFP_TO_INT(Node, Results);
+ return;
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ // These operations are used to do promotion so they can't be promoted
+ // themselves.
+ llvm_unreachable("Don't know how to promote this operation!");
+ }
+
+ // There are currently two cases of vector promotion:
+ // 1) Bitcasting a vector of integers to a different type to a vector of the
+ // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+ // 2) Extending a vector of floats to a vector of the same number of larger
+ // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
+ assert(Node->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT VT = Node->getSimpleValueType(0);
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ SDLoc dl(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+ if (Node->getOperand(j).getValueType().isVector())
+ if (Node->getOperand(j)
+ .getValueType()
+ .getVectorElementType()
+ .isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
+ else
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
+ else
+ Operands[j] = Node->getOperand(j);
+ }
+
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
+
+ if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
+ Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ else
+ Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+
+ Results.push_back(Res);
+}
+
+void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ bool IsStrict = Node->isStrictFPOpcode();
+ MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
+
+ SDLoc dl(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+ if (Node->getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
+ else
+ Operands[j] = Node->getOperand(j);
+ }
+
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(Node->getOpcode(), dl,
+ {Node->getValueType(0), MVT::Other}, Operands);
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
+ Results.push_back(Res);
+}
+
+// For FP_TO_INT we promote the result type to a vector type with wider
+// elements and then truncate the result. This is different from the default
+// PromoteVector which uses bitcast to promote thus assumning that the
+// promoted vector type has the same overall size.
+void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VT = Node->getSimpleValueType(0);
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ bool IsStrict = Node->isStrictFPOpcode();
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
+
+ unsigned NewOpc = Node->getOpcode();
+ // Change FP_TO_UINT to FP_TO_SINT if possible.
+ // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
+ if (NewOpc == ISD::FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ if (NewOpc == ISD::STRICT_FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDLoc dl(Node);
+ SDValue Promoted, Chain;
+ if (IsStrict) {
+ Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Chain = Promoted.getValue(1);
+ } else
+ Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ if (Node->getOpcode() == ISD::FP_TO_UINT ||
+ Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
+ NewOpc = ISD::AssertZext;
+ else
+ NewOpc = ISD::AssertSext;
+
+ Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
+ DAG.getValueType(VT.getScalarType()));
+ Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+ Results.push_back(Promoted);
+ if (IsStrict)
+ Results.push_back(Chain);
+}
+
+std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return TLI.scalarizeVectorLoad(LD, DAG);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
+ return TF;
+}
+
+void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ switch (Node->getOpcode()) {
+ case ISD::LOAD: {
+ std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ return;
+ }
+ case ISD::STORE:
+ Results.push_back(ExpandStore(Node));
+ return;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(Node->getOperand(i));
+ return;
+ case ISD::SIGN_EXTEND_INREG:
+ Results.push_back(ExpandSEXTINREG(Node));
+ return;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
+ return;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
+ return;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
+ return;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node));
+ return;
+ case ISD::VP_BSWAP:
+ Results.push_back(TLI.expandVPBSWAP(Node, DAG));
+ return;
+ case ISD::VSELECT:
+ Results.push_back(ExpandVSELECT(Node));
+ return;
+ case ISD::VP_SELECT:
+ Results.push_back(ExpandVP_SELECT(Node));
+ return;
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ if (SDValue Expanded = ExpandVP_REM(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::SELECT:
+ Results.push_back(ExpandSELECT(Node));
+ return;
+ case ISD::SELECT_CC: {
+ if (Node->getValueType(0).isScalableVector()) {
+ EVT CondVT = TLI.getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+ SDValue SetCC =
+ DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(4));
+ Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
+ Node->getOperand(2),
+ Node->getOperand(3)));
+ return;
+ }
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ ExpandFP_TO_UINT(Node, Results);
+ return;
+ case ISD::UINT_TO_FP:
+ ExpandUINT_TO_FLOAT(Node, Results);
+ return;
+ case ISD::FNEG:
+ Results.push_back(ExpandFNEG(Node));
+ return;
+ case ISD::FSUB:
+ ExpandFSUB(Node, Results);
+ return;
+ case ISD::SETCC:
+ case ISD::VP_SETCC:
+ ExpandSETCC(Node, Results);
+ return;
+ case ISD::ABS:
+ if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::BITREVERSE:
+ ExpandBITREVERSE(Node, Results);
+ return;
+ case ISD::VP_BITREVERSE:
+ if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::CTPOP:
+ if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::VP_CTPOP:
+ if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::FSHL:
+ case ISD::VP_FSHL:
+ case ISD::FSHR:
+ case ISD::VP_FSHR:
+ if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::UADDO:
+ case ISD::USUBO:
+ ExpandUADDSUBO(Node, Results);
+ return;
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ ExpandSADDSUBO(Node, Results);
+ return;
+ case ISD::UMULO:
+ case ISD::SMULO:
+ ExpandMULO(Node, Results);
+ return;
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::USHLSAT:
+ case ISD::SSHLSAT:
+ if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
+ if (Node->getValueType(0).isScalableVector()) {
+ if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ }
+ break;
+ case ISD::SMULFIX:
+ case ISD::UMULFIX:
+ if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIXSAT:
+ // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
+ // why. Maybe it results in worse codegen compared to the unroll for some
+ // targets? This should probably be investigated. And if we still prefer to
+ // unroll an explanation could be helpful.
+ break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ ExpandFixedPointDiv(Node, Results);
+ return;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIXSAT:
+ break;
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ ExpandStrictFPOp(Node, Results);
+ return;
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ return;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
+ return;
+ case ISD::SREM:
+ case ISD::UREM:
+ ExpandREM(Node, Results);
+ return;
+ case ISD::VP_MERGE:
+ Results.push_back(ExpandVP_MERGE(Node));
+ return;
+ }
+
+ SDValue Unrolled = DAG.UnrollVectorOp(Node);
+ for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
+ Results.push_back(Unrolled.getValue(I));
+}
+
+SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using either
+ // BUILD_VECTOR or SPLAT_VECTOR.
+ // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
+ // BUILD_VECTOR?
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
+ : ISD::SPLAT_VECTOR,
+ VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ // Generate a mask operand.
+ EVT MaskTy = VT.changeVectorElementTypeToInteger();
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
+ DAG.getConstant(0, DL, BitTy));
+
+ // Broadcast the mask so that the entire vector is all one or all zero.
+ Mask = DAG.getSplat(MaskTy, DL, Mask);
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
+}
+
+SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+
+ // Make sure that the SRA and SHL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ SDLoc DL(Node);
+ EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
+
+ SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ANY_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
+ Src, DAG.getVectorIdxConstant(0, DL));
+ }
+
+ // Build a base mask of undef shuffles.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.resize(NumSrcElements, -1);
+
+ // Place the extended lanes into the correct locations.
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // First build an any-extend node which can be legalized above when we
+ // recurse through it.
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
+
+ // Now we need sign extend. Do this by shifting the elements. Even if these
+ // aren't legal operations, they have a better chance of being legalized
+ // without full scalarization than the sign extension does.
+ unsigned EltWidth = VT.getScalarSizeInBits();
+ unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
+ SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
+ return DAG.getNode(ISD::SRA, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+ ShiftAmount);
+}
+
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
+ Src, DAG.getVectorIdxConstant(0, DL));
+ }
+
+ // Build up a zero vector to blend into this one.
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
+
+ // Shuffle the incoming lanes into the correct position, and pull all other
+ // lanes from the zero vector.
+ auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
+
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
+static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
+ for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
+ for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
+ ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+}
+
+SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+
+ // Scalable vectors can't use shuffle expansion.
+ if (VT.isScalableVector())
+ return TLI.expandBSWAP(Node, DAG);
+
+ // Generate a byte wise shuffle mask for the BSWAP.
+ SmallVector<int, 16> ShuffleMask;
+ createBSWAPShuffleMask(VT, ShuffleMask);
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
+
+ // Only emit a shuffle if the mask is legal.
+ if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return TLI.expandBSWAP(Node, DAG);
+
+ // Otherwise unroll.
+ return DAG.UnrollVectorOp(Node);
+}
+
+void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT VT = Node->getValueType(0);
+
+ // We can't unroll or use shuffles for scalable vectors.
+ if (VT.isScalableVector()) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
+ return;
+ }
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
+ return;
+ }
+
+ // If the vector element width is a whole number of bytes, test if its legal
+ // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
+ // vector. This greatly reduces the number of bit shifts necessary.
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
+ SmallVector<int, 16> BSWAPMask;
+ createBSWAPShuffleMask(VT, BSWAPMask);
+
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
+ if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
+ (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
+ (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
+ BSWAPMask);
+ Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ Results.push_back(Op);
+ return;
+ }
+ }
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
+ return;
+ }
+
+ // Otherwise unroll.
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
+}
+
+SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+
+ EVT VT = Mask.getValueType();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ // This operation also isn't safe with AND, OR, XOR when the boolean type is
+ // 0/1 and the select operands aren't also booleans, as we need an all-ones
+ // vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if that's legal on the target.
+ auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
+ if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
+ !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
+ Op1.getValueType().getVectorElementType() == MVT::i1))
+ return DAG.UnrollVectorOp(Node);
+
+ // If the mask and the type are different sizes, unroll the vector op. This
+ // can occur when getSetCCResultType returns something that is different in
+ // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
+ if (VT.getSizeInBits() != Op1.getValueSizeInBits())
+ return DAG.UnrollVectorOp(Node);
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue NotMask = DAG.getNOT(DL, Mask, VT);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
+}
+
+SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
+ // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
+ // do not support it natively.
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ EVT VT = Mask.getValueType();
+
+ // If we can't even use the basic vector operations of
+ // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
+ if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ // This operation also isn't safe when the operands aren't also booleans.
+ if (Op1.getValueType().getVectorElementType() != MVT::i1)
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue Ones = DAG.getAllOnesConstant(DL, VT);
+ SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
+
+ Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
+ Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
+ return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
+}
+
+SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
+ // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
+ // indices less than the EVL/pivot are true. Combine that with the original
+ // mask for a full-length mask. Use a full-length VSELECT to select between
+ // the true and false values.
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ EVT MaskVT = Mask.getValueType();
+ bool IsFixedLen = MaskVT.isFixedLengthVector();
+
+ EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
+ MaskVT.getVectorElementCount());
+
+ // If we can't construct the EVL mask efficiently, it's better to unroll.
+ if ((IsFixedLen &&
+ !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
+ (!IsFixedLen &&
+ (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
+ return DAG.UnrollVectorOp(Node);
+
+ // If using a SETCC would result in a different type than the mask type,
+ // unroll.
+ if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ EVLVecVT) != MaskVT)
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
+ SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
+ SDValue EVLMask =
+ DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
+
+ SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
+ return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
+}
+
+SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
+ // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
+ EVT VT = Node->getValueType(0);
+
+ unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
+
+ if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
+ return SDValue();
+
+ SDLoc DL(Node);
+
+ SDValue Dividend = Node->getOperand(0);
+ SDValue Divisor = Node->getOperand(1);
+ SDValue Mask = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ // X % Y -> X-X/Y*Y
+ SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
+ SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
+ return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
+}
+
+void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result, Chain;
+ if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
+ Results.push_back(Result);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Chain);
+ return;
+ }
+
+ // Otherwise go ahead and unroll.
+ if (Node->isStrictFPOpcode()) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
+}
+
+void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = Node->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+ EVT VT = Src.getValueType();
+ SDLoc DL(Node);
+
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ SDValue Chain;
+ if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
+ Results.push_back(Result);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+ TargetLowering::Expand) ||
+ (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+ TargetLowering::Expand)) ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
+ if (IsStrict) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
+ return;
+ }
+
+ unsigned BW = VT.getScalarSizeInBits();
+ assert((BW == 64 || BW == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW =
+ DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+
+ if (IsStrict) {
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+ {Node->getValueType(0), MVT::Other},
+ {Node->getOperand(0), HI});
+ fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
+ {fHI.getValue(1), fHI, TWOHW});
+ SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+ {Node->getValueType(0), MVT::Other},
+ {Node->getOperand(0), LO});
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
+ fLO.getValue(1));
+
+ // Add the two halves
+ SDValue Result =
+ DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
+ {TF, fHI, fLO});
+
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ return;
+ }
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
+
+ // Add the two halves
+ Results.push_back(
+ DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+ SDLoc DL(Node);
+ SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
+ return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
+ Node->getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Node);
+}
+
+void VectorLegalizer::ExpandFSUB(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
+ // we can defer this to operation legalization where it will be lowered as
+ // a+(-b).
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FADD, VT))
+ return; // Defer to LegalizeDAG
+
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
+}
+
+void VectorLegalizer::ExpandSETCC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ bool NeedInvert = false;
+ bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
+ bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS;
+ bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+ unsigned Offset = IsStrict ? 1 : 0;
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ SDValue LHS = Node->getOperand(0 + Offset);
+ SDValue RHS = Node->getOperand(1 + Offset);
+ SDValue CC = Node->getOperand(2 + Offset);
+
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+
+ if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
+ if (IsStrict) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+ Results.push_back(UnrollVSETCC(Node));
+ return;
+ }
+
+ SDValue Mask, EVL;
+ if (IsVP) {
+ Mask = Node->getOperand(3 + Offset);
+ EVL = Node->getOperand(4 + Offset);
+ }
+
+ SDLoc dl(Node);
+ bool Legalized =
+ TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
+ EVL, NeedInvert, dl, Chain, IsSignaling);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (CC.getNode()) {
+ if (IsStrict) {
+ LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
+ {Chain, LHS, RHS, CC}, Node->getFlags());
+ Chain = LHS.getValue(1);
+ } else if (IsVP) {
+ LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
+ {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
+ } else {
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+ }
+ }
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert) {
+ if (!IsVP)
+ LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ else
+ LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
+ }
+ } else {
+ assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ LHS =
+ DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
+ DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
+ DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
+ LHS->setFlags(Node->getFlags());
+ }
+
+ Results.push_back(LHS);
+ if (IsStrict)
+ Results.push_back(Chain);
+}
+
+void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+}
+
+void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+}
+
+void VectorLegalizer::ExpandMULO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Result, Overflow;
+ if (!TLI.expandMULO(Node, Result, Overflow, DAG))
+ std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
+
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+}
+
+void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDNode *N = Node;
+ if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
+ N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
+ Results.push_back(Expanded);
+}
+
+void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
+ ExpandUINT_TO_FLOAT(Node, Results);
+ return;
+ }
+ if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
+ ExpandFP_TO_UINT(Node, Results);
+ return;
+ }
+
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS) {
+ ExpandSETCC(Node, Results);
+ return;
+ }
+
+ UnrollStrictFPOp(Node, Results);
+}
+
+void VectorLegalizer::ExpandREM(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
+ "Expected REM node");
+
+ SDValue Result;
+ if (!TLI.expandREM(Node, Result, DAG))
+ Result = DAG.UnrollVectorOp(Node);
+ Results.push_back(Result);
+}
+
+void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumOpers = Node->getNumOperands();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ EVT TmpEltVT = EltVT;
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT);
+
+ EVT ValueVTs[] = {TmpEltVT, MVT::Other};
+ SDValue Chain = Node->getOperand(0);
+ SDLoc dl(Node);
+
+ SmallVector<SDValue, 32> OpValues;
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SmallVector<SDValue, 4> Opers;
+ SDValue Idx = DAG.getVectorIdxConstant(i, dl);
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned j = 1; j < NumOpers; ++j) {
+ SDValue Oper = Node->getOperand(j);
+ EVT OperVT = Oper.getValueType();
+
+ if (OperVT.isVector())
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperVT.getVectorElementType(), Oper, Idx);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
+ SDValue ScalarResult = ScalarOp.getValue(0);
+ SDValue ScalarChain = ScalarOp.getValue(1);
+
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
+ DAG.getAllOnesConstant(dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
+
+ OpValues.push_back(ScalarResult);
+ OpChains.push_back(ScalarChain);
+ }
+
+ SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+
+ Results.push_back(Result);
+ Results.push_back(NewChain);
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue CC = Node->getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ SDLoc dl(Node);
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
+ }
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 000000000000..8c117c1c74dc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,7262 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ R = ScalarizeVecRes_VecInregOp(N);
+ break;
+ case ISD::ABS:
+ case ISD::ANY_EXTEND:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FREEZE:
+ case ISD::ARITH_FENCE:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ case ISD::FCANONICALIZE:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+ case ISD::FFREXP:
+ R = ScalarizeVecRes_FFREXP(N, ResNo);
+ break;
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::FLDEXP:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
+
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ R = ScalarizeVecRes_StrictFPOp(N);
+ break;
+
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ R = ScalarizeVecRes_FP_TO_XINT_SAT(N);
+ break;
+
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ R = ScalarizeVecRes_OverflowOp(N, ResNo);
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
+ R = ScalarizeVecRes_FIX(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+ SDLoc dl(N);
+
+ SDNode *ScalarNode =
+ DAG.getNode(N->getOpcode(), dl,
+ {VT0.getScalarType(), VT1.getScalarType()}, Elt)
+ .getNode();
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT,
+ SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
+ EVT VT = N->getValueType(0).getVectorElementType();
+ unsigned NumOpers = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT ValueVTs[] = {VT, MVT::Other};
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 4> Opers(NumOpers);
+
+ // The Chain is the first operand.
+ Opers[0] = Chain;
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+ EVT OperVT = Oper.getValueType();
+
+ if (OperVT.isVector()) {
+ if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector)
+ Oper = GetScalarizedVector(Oper);
+ else
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperVT.getVectorElementType(), Oper,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+
+ Opers[i] = Oper;
+ }
+
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs),
+ Opers, N->getFlags());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
+ unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+
+ SDValue ScalarLHS, ScalarRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
+ ScalarLHS = GetScalarizedVector(N->getOperand(0));
+ ScalarRHS = GetScalarizedVector(N->getOperand(1));
+ } else {
+ SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
+ DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
+ DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
+ ScalarLHS = ElemsLHS[0];
+ ScalarRHS = ElemsRHS[0];
+ }
+
+ SDVTList ScalarVTs = DAG.getVTList(
+ ResVT.getVectorElementType(), OvVT.getVectorElementType());
+ SDNode *ScalarNode = DAG.getNode(
+ N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+ ScalarNode->setFlags(N->getFlags());
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().isVector()
+ && Op.getValueType().getVectorNumElements() == 1
+ && !isSimpleLegalType(Op.getValueType()))
+ Op = GetScalarizedVector(Op);
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ NewVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ // The result needs scalarizing, but it's not a given that the source does.
+ // See similar logic in ScalarizeVecRes_UnaryOp.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+ return DAG.getNode(ISD::FP_ROUND, DL,
+ N->getValueType(0).getVectorElementType(), Op,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(
+ ISD::UNINDEXED, N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
+ N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
+ N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_SETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+
+ EVT OpVT = Op.getValueType();
+ EVT OpEltVT = OpVT.getVectorElementType();
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ switch (N->getOpcode()) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
+ }
+
+ llvm_unreachable("Illegal extend_vector_inreg opcode");
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ EVT OpVT = Cond.getValueType();
+ SDLoc DL(N);
+ // The vselect result and true/value operands needs scalarizing, but it's
+ // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
+ // See the similar logic in ScalarizeVecRes_SETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Cond = GetScalarizedVector(Cond);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Cond = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool =
+ TLI.getBooleanContents(false, false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+ // If integer and float booleans have different contents then we can't
+ // reliably optimize in all cases. There is a full explanation for this in
+ // DAGCombiner::visitSELECT() where the same issue affects folding
+ // (select C, 0, 1) to (xor C, 1).
+ if (TLI.getBooleanContents(false, false) !=
+ TLI.getBooleanContents(false, true)) {
+ // At least try the common case where the boolean is generated by a
+ // comparison.
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT OpVT = Cond->getOperand(0).getValueType();
+ ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+ VecBool = TLI.getBooleanContents(OpVT);
+ } else
+ ScalarBool = TargetLowering::UndefinedBooleanContent;
+ }
+
+ EVT CondVT = Cond.getValueType();
+ if (ScalarBool != VecBool) {
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
+ Cond, DAG.getConstant(1, SDLoc(N), CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+
+ // Truncate the condition if needed
+ auto BoolVT = getSetCCResultType(CondVT);
+ if (BoolVT.bitsLT(CondVT))
+ Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
+
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.isUndef())
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ SDLoc dl(N);
+
+ // Handle case where result is scalarized but operand is not
+ if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector)
+ Src = GetScalarizedVector(Src);
+ else
+ Src = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SrcVT.getVectorElementType(), Src,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EVT DstVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ SDLoc DL(N);
+
+ // The result needs scalarizing, but it's not a given that the source does.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getVectorIdxConstant(0, DL));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Arg = N->getOperand(0);
+ SDValue Test = N->getOperand(1);
+ EVT ArgVT = Arg.getValueType();
+ EVT ResultVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) {
+ Arg = GetScalarizedVector(Arg);
+ } else {
+ EVT VT = ArgVT.getVectorElementType();
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Arg,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ SDValue Res =
+ DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags());
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT));
+ return DAG.getNode(ExtendCode, DL, ResultVT, Res);
+}
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize this operator's "
+ "operand!\n");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Res = ScalarizeVecOp_UnaryOp(N);
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
+ case ISD::SETCC:
+ Res = ScalarizeVecOp_VSETCC(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+ break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
+ case ISD::STRICT_FP_EXTEND:
+ Res = ScalarizeVecOp_STRICT_FP_EXTEND(N);
+ break;
+ case ISD::FP_EXTEND:
+ Res = ScalarizeVecOp_FP_EXTEND(N);
+ break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ Res = ScalarizeVecOp_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
+ break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// If the value to convert is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ N->getValueType(0), Elt);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the strict FP operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+ { N->getValueType(0).getScalarType(), MVT::Other },
+ { N->getOperand(0), Elt });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != VT)
+ Res = VT.isFloatingPoint()
+ ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
+ : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
+ return Res;
+}
+
+/// If the input condition is a vector that needs to be scalarized, it must be
+/// <1 x i1>, so just convert to a normal ISD::SELECT
+/// (still with vector output type since that was acceptable if we got here).
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
+ SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
+ N->getOperand(2));
+}
+
+/// If the operand is a vector that needs to be scalarized then the
+/// result must be v1i1, so just convert to a scalar SETCC and wrap
+/// with a scalar_to_vector since the res type is legal if we got here
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ EVT NVT = VT.getVectorElementType();
+ SDLoc DL(N);
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+
+ Res = DAG.getNode(ExtendCode, DL, NVT, Res);
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
+}
+
+/// If the value to store is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ SDLoc dl(N);
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(
+ N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(),
+ N->getMemOperand()->getFlags(), N->getAAInfo());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getOriginalAlign(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
+}
+
+/// If the value to round is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Wrong operand for scalarization!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt,
+ N->getOperand(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Wrong operand for scalarization!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { N->getValueType(0).getVectorElementType(),
+ MVT::Other },
+ { N->getOperand(0), Elt, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+/// If the value to extend is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+/// If the value to extend is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+ {N->getValueType(0).getVectorElementType(), MVT::Other},
+ {N->getOperand(0), Elt});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ // Result type may be wider than element type.
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
+
+ SDValue Op = GetScalarizedVector(VecOp);
+ return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0),
+ AccOp, Op, N->getFlags());
+}
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// This method is called when the specified result of the specified node is
+/// found to need vector splitting. At this point, the node may also have
+/// invalid operands or may have other results that need legalization, we just
+/// know that (at least) one result needs vector splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break;
+ case ISD::VSELECT:
+ case ISD::SELECT:
+ case ISD::VP_MERGE:
+ case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
+ case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ SplitVecRes_ScalarOp(N, Lo, Hi);
+ break;
+ case ISD::STEP_VECTOR:
+ SplitVecRes_STEP_VECTOR(N, Lo, Hi);
+ break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::VP_LOAD:
+ SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MLOAD:
+ SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MGATHER:
+ case ISD::VP_GATHER:
+ SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
+ break;
+ case ISD::SETCC:
+ case ISD::VP_SETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_REVERSE:
+ SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+ case ISD::VECTOR_SPLICE:
+ SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_DEINTERLEAVE:
+ SplitVecRes_VECTOR_DEINTERLEAVE(N);
+ return;
+ case ISD::VECTOR_INTERLEAVE:
+ SplitVecRes_VECTOR_INTERLEAVE(N);
+ return;
+ case ISD::VAARG:
+ SplitVecRes_VAARG(N, Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+ break;
+
+ case ISD::ABS:
+ case ISD::VP_ABS:
+ case ISD::BITREVERSE:
+ case ISD::VP_BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::VP_BSWAP:
+ case ISD::CTLZ:
+ case ISD::VP_CTLZ:
+ case ISD::CTTZ:
+ case ISD::VP_CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::VP_CTPOP:
+ case ISD::FABS: case ISD::VP_FABS:
+ case ISD::FCEIL:
+ case ISD::VP_FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::VP_FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::VP_FNEARBYINT:
+ case ISD::FNEG: case ISD::VP_FNEG:
+ case ISD::FREEZE:
+ case ISD::ARITH_FENCE:
+ case ISD::FP_EXTEND:
+ case ISD::VP_FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::VP_FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::VP_FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::VP_FRINT:
+ case ISD::FROUND:
+ case ISD::VP_FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::VP_FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT: case ISD::VP_SQRT:
+ case ISD::FTRUNC:
+ case ISD::VP_FROUNDTOZERO:
+ case ISD::SINT_TO_FP:
+ case ISD::VP_SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::VP_TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::VP_UINT_TO_FP:
+ case ISD::FCANONICALIZE:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+ case ISD::FFREXP:
+ SplitVecRes_FFREXP(N, ResNo, Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ SplitVecRes_ExtendOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD: case ISD::VP_ADD:
+ case ISD::SUB: case ISD::VP_SUB:
+ case ISD::MUL: case ISD::VP_MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::FADD: case ISD::VP_FADD:
+ case ISD::FSUB: case ISD::VP_FSUB:
+ case ISD::FMUL: case ISD::VP_FMUL:
+ case ISD::FMINNUM: case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::SDIV: case ISD::VP_SDIV:
+ case ISD::UDIV: case ISD::VP_UDIV:
+ case ISD::FDIV: case ISD::VP_FDIV:
+ case ISD::FPOW:
+ case ISD::AND: case ISD::VP_AND:
+ case ISD::OR: case ISD::VP_OR:
+ case ISD::XOR: case ISD::VP_XOR:
+ case ISD::SHL: case ISD::VP_SHL:
+ case ISD::SRA: case ISD::VP_ASHR:
+ case ISD::SRL: case ISD::VP_LSHR:
+ case ISD::UREM: case ISD::VP_UREM:
+ case ISD::SREM: case ISD::VP_SREM:
+ case ISD::FREM: case ISD::VP_FREM:
+ case ISD::SMIN: case ISD::VP_SMIN:
+ case ISD::SMAX: case ISD::VP_SMAX:
+ case ISD::UMIN: case ISD::VP_UMIN:
+ case ISD::UMAX: case ISD::VP_UMAX:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::VP_FCOPYSIGN:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ case ISD::FMA: case ISD::VP_FMA:
+ case ISD::FSHL:
+ case ISD::VP_FSHL:
+ case ISD::FSHR:
+ case ISD::VP_FSHR:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
+
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ SplitVecRes_StrictFPOp(N, Lo, Hi);
+ break;
+
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi);
+ break;
+
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
+ SplitVecRes_FIX(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
+ MachinePointerInfo &MPI, SDValue &Ptr,
+ uint64_t *ScaledOffset) {
+ SDLoc DL(N);
+ unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8;
+
+ if (MemVT.isScalableVector()) {
+ SDNodeFlags Flags;
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize));
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ Flags.setNoUnsignedWrap(true);
+ if (ScaledOffset)
+ *ScaledOffset += IncrementSize;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement,
+ Flags);
+ } else {
+ MPI = N->getPointerInfo().getWithOffset(IncrementSize);
+ // Increment the pointer to the other half.
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize));
+ }
+}
+
+std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
+ return SplitMask(Mask, SDLoc(Mask));
+}
+
+std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask,
+ const SDLoc &DL) {
+ SDValue MaskLo, MaskHi;
+ EVT MaskVT = Mask.getValueType();
+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ return std::make_pair(MaskLo, MaskHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() == 2) {
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ return;
+ }
+
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
+ {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
+ {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
+}
+
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ SDLoc dl(N);
+
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() == 3) {
+ Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags);
+ Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags);
+ return;
+ }
+
+ assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(),
+ {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(),
+ {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+ SDValue Op2 = N->getOperand(2);
+
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2,
+ N->getFlags());
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2,
+ N->getFlags());
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getBuildVector(LoVT, dl, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getBuildVector(HiVT, dl, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ SDLoc dl(N);
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ EVT VecVT = Vec.getValueType();
+ EVT LoVT = Lo.getValueType();
+ EVT SubVecVT = SubVec.getValueType();
+ unsigned VecElems = VecVT.getVectorMinNumElements();
+ unsigned SubElems = SubVecVT.getVectorMinNumElements();
+ unsigned LoElems = LoVT.getVectorMinNumElements();
+
+ // If we know the index is in the first half, and we know the subvector
+ // doesn't cross the boundary between the halves, we can avoid spilling the
+ // vector, and insert into the lower half of the split vector directly.
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal + SubElems <= LoElems) {
+ Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
+ return;
+ }
+ // Similarly if the subvector is fully in the high half, but mind that we
+ // can't tell whether a fixed-length subvector is fully within the high half
+ // of a scalable vector.
+ if (VecVT.isScalableVector() == SubVecVT.isScalableVector() &&
+ IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
+ Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec,
+ DAG.getVectorIdxConstant(IdxVal - LoElems, dl));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Store the new subvector into the specified index.
+ SDValue SubVecPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr,
+ MachinePointerInfo::getUnknownStack(MF));
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Increment the pointer to the other part.
+ auto *Load = cast<LoadSDNode>(Lo);
+ MachinePointerInfo MPI = Load->getPointerInfo();
+ IncrementPointer(Load, LoVT, MPI, StackPtr);
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
+}
+
+// Handle splitting an FP where the second operand does not match the first
+// type. The second operand may be a scalar, or a vector that has exactly as
+// many elements as the first
+void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (RHSVT.isVector()) {
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS);
+ Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ SDValue ArgLo, ArgHi;
+ SDValue Test = N->getOperand(1);
+ SDValue FpValue = N->getOperand(0);
+ if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(FpValue, ArgLo, ArgHi);
+ else
+ std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue));
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags());
+ Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags());
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) =
+ DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+
+ SDLoc dl(N);
+ SDValue InLo, InHi;
+
+ if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N0, InLo, InHi);
+ else
+ std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
+
+ EVT InLoVT = InLo.getValueType();
+ unsigned InNumElements = InLoVT.getVectorNumElements();
+
+ EVT OutLoVT, OutHiVT;
+ std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned OutNumElements = OutLoVT.getVectorNumElements();
+ assert((2 * OutNumElements) <= InNumElements &&
+ "Illegal extend vector in reg split");
+
+ // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
+ // input vector (i.e. we only use InLo):
+ // OutLo will extend the first OutNumElements from InLo.
+ // OutHi will extend the next OutNumElements from InLo.
+
+ // Shuffle the elements from InLo for OutHi into the bottom elements to
+ // create a 'fake' InHi.
+ SmallVector<int, 8> SplitHi(InNumElements, -1);
+ for (unsigned i = 0; i != OutNumElements; ++i)
+ SplitHi[i] = i + OutNumElements;
+ InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi);
+
+ Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
+ Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned NumOps = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 4> OpsLo(NumOps);
+ SmallVector<SDValue, 4> OpsHi(NumOps);
+
+ // The Chain is the first operand.
+ OpsLo[0] = Chain;
+ OpsHi[0] = Chain;
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDValue Op = N->getOperand(i);
+ SDValue OpLo = Op;
+ SDValue OpHi = Op;
+
+ EVT InVT = Op.getValueType();
+ if (InVT.isVector()) {
+ // If the input also splits, handle it directly for a
+ // compile time speedup. Otherwise split it by hand.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Op, OpLo, OpHi);
+ else
+ std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
+ }
+
+ OpsLo[i] = OpLo;
+ OpsHi[i] = OpHi;
+ }
+
+ EVT LoValueVTs[] = {LoVT, MVT::Other};
+ EVT HiValueVTs[] = {HiVT, MVT::Other};
+ Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo,
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi,
+ N->getFlags());
+
+ // Build a factor node to remember that this Op is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ //The results of each unrolled operation, including the chain.
+ EVT ChainVTs[] = {EltVT, MVT::Other};
+ SmallVector<SDValue, 8> Chains;
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ Operands[0] = Chain;
+ for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, DAG.getVectorIdxConstant(i, dl));
+ } else {
+ Operands[j] = Operand;
+ }
+ }
+ SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+ Scalar.getNode()->setFlags(N->getFlags());
+
+ //Add in the scalar as well as its chain value to the
+ //result vectors.
+ Scalars.push_back(Scalar);
+ Chains.push_back(Scalar.getValue(1));
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(DAG.getUNDEF(EltVT));
+
+ // Build a new factor node to connect the chain back together.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ // Create a new BUILD_VECTOR node
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+ return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
+void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
+ std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
+ std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
+
+ SDValue LoLHS, HiLHS, LoRHS, HiRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
+ GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
+ } else {
+ std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
+ std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
+ }
+
+ unsigned Opcode = N->getOpcode();
+ SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
+ SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
+ SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
+ SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+ LoNode->setFlags(N->getFlags());
+ HiNode->setFlags(N->getFlags());
+
+ Lo = SDValue(LoNode, ResNo);
+ Hi = SDValue(HiNode, ResNo);
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo),
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, OtherVT,
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements();
+ if (IdxVal < LoNumElts) {
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ return;
+ } else if (!Vec.getValueType().isScalableVector()) {
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));
+ return;
+ }
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return;
+
+ // Make the vector elements byte-addressable if they aren't already.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ if (VecVT.getScalarSizeInBits() < 8) {
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorElementCount());
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ // Extend the element type to match if needed.
+ if (EltVT.bitsGT(Elt.getValueType()))
+ Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
+ }
+
+ // Spill the vector to the stack.
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+ Store = DAG.getTruncStore(
+ Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
+ commonAlignment(SmallestAlign,
+ EltVT.getFixedSizeInBits() / 8));
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
+
+ // Increment the pointer to the other part.
+ auto Load = cast<LoadSDNode>(Lo);
+ MachinePointerInfo MPI = Load->getPointerInfo();
+ IncrementPointer(Load, LoVT, MPI, StackPtr);
+
+ Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign);
+
+ // If we adjusted the original type, we need to truncate the results.
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ if (LoVT != Lo.getValueType())
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
+ if (HiVT != Hi.getValueType())
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ assert(N->getValueType(0).isScalableVector() &&
+ "Only scalable vectors are supported for STEP_VECTOR");
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDValue Step = N->getOperand(0);
+
+ Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
+
+ // Hi = Lo + (EltCnt * Step)
+ EVT EltVT = Step.getValueType();
+ APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue();
+ SDValue StartOfHi =
+ DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements());
+ StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
+ StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
+
+ Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
+ Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0));
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ Hi = DAG.getUNDEF(HiVT);
+ } else {
+ assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode");
+ Hi = Lo;
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
+ SDValue Value, NewChain;
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+ std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
+ ReplaceValueWith(SDValue(LD, 1), NewChain);
+ return;
+ }
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
+
+ MachinePointerInfo MPI;
+ IncrementPointer(LD, LoMemVT, MPI, Ptr);
+
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
+ HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(LD->isUnindexed() && "Indexed VP load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed variable-length load offset");
+ Align Alignment = LD->getOriginalAlign();
+ SDValue Mask = LD->getMask();
+ SDValue EVL = LD->getVectorLength();
+ EVT MemoryVT = LD->getMemoryVT();
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ // Split EVL operand
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ LD->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, LD->getAAInfo(), LD->getRanges());
+
+ Lo =
+ DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset,
+ MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad());
+
+ if (HiIsEmpty) {
+ // The hi vp_load has zero storage size. We therefore simply set it to
+ // the low vp_load and rely on subsequent removal from the chain.
+ Hi = Lo;
+ } else {
+ // Generate hi vp_load.
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ LD->isExpandingLoad());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector())
+ MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
+ else
+ MPI = LD->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedValue());
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ LD->getAAInfo(), LD->getRanges());
+
+ Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr,
+ Offset, MaskHi, EVLHi, HiMemVT, MMO,
+ LD->isExpandingLoad());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
+ SDValue &Lo, SDValue &Hi) {
+ assert(SLD->isUnindexed() &&
+ "Indexed VP strided load during type legalization!");
+ assert(SLD->getOffset().isUndef() &&
+ "Unexpected indexed variable-length load offset");
+
+ SDLoc DL(SLD);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0));
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty);
+
+ SDValue Mask = SLD->getMask();
+ SDValue LoMask, HiMask;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, LoMask, HiMask);
+ else
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+ }
+
+ SDValue LoEVL, HiEVL;
+ std::tie(LoEVL, HiEVL) =
+ DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL);
+
+ // Generate the low vp_strided_load
+ Lo = DAG.getStridedLoadVP(
+ SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL,
+ SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(),
+ LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad());
+
+ if (HiIsEmpty) {
+ // The high vp_strided_load has zero storage size. We therefore simply set
+ // it to the low vp_strided_load and rely on subsequent removal from the
+ // chain.
+ Hi = Lo;
+ } else {
+ // Generate the high vp_strided_load.
+ // To calculate the high base address, we need to sum to the low base
+ // address stride number of bytes for each element already loaded by low,
+ // that is: Ptr = Ptr + (LoEVL * Stride)
+ EVT PtrVT = SLD->getBasePtr().getValueType();
+ SDValue Increment =
+ DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
+ DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT));
+ SDValue Ptr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment);
+
+ Align Alignment = SLD->getOriginalAlign();
+ if (LoMemVT.isScalableVector())
+ Alignment = commonAlignment(
+ Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()),
+ MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ SLD->getAAInfo(), SLD->getRanges());
+
+ Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(),
+ HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(),
+ SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO,
+ SLD->isExpandingLoad());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(SLD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
+ SDValue &Lo, SDValue &Hi) {
+ assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(MLD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Ch = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ SDValue Offset = MLD->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed masked load offset");
+ SDValue Mask = MLD->getMask();
+ SDValue PassThru = MLD->getPassThru();
+ Align Alignment = MLD->getOriginalAlign();
+ ISD::LoadExtType ExtType = MLD->getExtensionType();
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ EVT MemoryVT = MLD->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
+
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ else
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
+ MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
+
+ if (HiIsEmpty) {
+ // The hi masked load has zero storage size. We therefore simply set it to
+ // the low masked load and rely on subsequent removal from the chain.
+ Hi = Lo;
+ } else {
+ // Generate hi masked load.
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ MLD->isExpandingLoad());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector())
+ MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
+ else
+ MPI = MLD->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedValue());
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
+ HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MLD, 1), Ch);
+
+}
+
+void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
+ SDValue &Hi, bool SplitSETCC) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ struct Operands {
+ SDValue Mask;
+ SDValue Index;
+ SDValue Scale;
+ } Ops = [&]() -> Operands {
+ if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) {
+ return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
+ }
+ auto *VPSC = cast<VPGatherSDNode>(N);
+ return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
+ }();
+
+ EVT MemoryVT = N->getMemoryVT();
+ Align Alignment = N->getOriginalAlign();
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
+ }
+
+ EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Ops.Index.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Ops.Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+
+ if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
+ SDValue PassThru = MGT->getPassThru();
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ else
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
+ ISD::MemIndexType IndexTy = MGT->getIndexType();
+
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
+ OpsLo, MMO, IndexTy, ExtType);
+
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
+ OpsHi, MMO, IndexTy, ExtType);
+ } else {
+ auto *VPGT = cast<VPGatherSDNode>(N);
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
+
+ SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+ Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
+ MMO, VPGT->getIndexType());
+
+ SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+ Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
+ MMO, VPGT->getIndexType());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ EVT LoVT, HiVT;
+ SDLoc DL(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly. Otherwise split it by hand.
+ SDValue LL, LH, RL, RH;
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), LL, LH);
+ else
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+
+ if (getTypeAction(N->getOperand(1).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(1), RL, RH);
+ else
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ if (N->getOpcode() == ISD::SETCC) {
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+ } else {
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo,
+ EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi,
+ EVLHi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() <= 2) {
+ if (Opcode == ISD::FP_ROUND) {
+ Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
+ } else {
+ Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags);
+ }
+ return;
+ }
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
+ auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi);
+ Lo->setFlags(N->getFlags());
+ Hi->setFlags(N->getFlags());
+
+ SDNode *HiNode = Hi.getNode();
+ SDNode *LoNode = Lo.getNode();
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo),
+ SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo),
+ SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DestVT = N->getValueType(0);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+
+ // We can do better than a generic split operation if the extend is doing
+ // more than just doubling the width of the elements and the following are
+ // true:
+ // - The number of vector elements is even,
+ // - the source type is legal,
+ // - the type of a split source is illegal,
+ // - the type of an extended (by doubling element size) source is legal, and
+ // - the type of that extended source when split is legal.
+ //
+ // This won't necessarily completely legalize the operation, but it will
+ // more effectively move in the right direction and prevent falling down
+ // to scalarization in many cases due to the input vector being split too
+ // far.
+ if (SrcVT.getVectorElementCount().isKnownEven() &&
+ SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
+ EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
+
+ EVT SplitLoVT, SplitHiVT;
+ std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
+ TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
+ LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
+ if (!N->isVPOpcode()) {
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo});
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi});
+ return;
+ }
+ }
+ // Fall back to the generic unary operator splitting otherwise.
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ auto &&IsConstant = [](const SDValue &N) {
+ APInt SplatValue;
+ return N.getResNo() == 0 &&
+ (ISD::isConstantSplatVector(N.getNode(), SplatValue) ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
+ };
+ auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1,
+ SDValue &Input2,
+ ArrayRef<int> Mask) {
+ assert(Input1->getOpcode() == ISD::BUILD_VECTOR &&
+ Input2->getOpcode() == ISD::BUILD_VECTOR &&
+ "Expected build vector node.");
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT));
+ for (unsigned I = 0; I < NewElts; ++I) {
+ if (Mask[I] == PoisonMaskElem)
+ continue;
+ unsigned Idx = Mask[I];
+ if (Idx >= NewElts)
+ Ops[I] = Input2.getOperand(Idx - NewElts);
+ else
+ Ops[I] = Input1.getOperand(Idx);
+ // Make the type of all elements the same as the element type.
+ if (Ops[I].getValueType().bitsGT(EltVT))
+ Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]);
+ }
+ return DAG.getBuildVector(NewVT, DL, Ops);
+ };
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int> OrigMask(N->getMask());
+ // Try to pack incoming shuffles/inputs.
+ auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
+ &DL](SmallVectorImpl<int> &Mask) {
+ // Check if all inputs are shuffles of the same operands or non-shuffles.
+ MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
+ for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) {
+ SDValue Input = Inputs[Idx];
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
+ if (!Shuffle ||
+ Input.getOperand(0).getValueType() != Input.getValueType())
+ continue;
+ ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))]
+ .push_back(Idx);
+ ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))]
+ .push_back(Idx);
+ }
+ for (auto &P : ShufflesIdxs) {
+ if (P.second.size() < 2)
+ continue;
+ // Use shuffles operands instead of shuffles themselves.
+ // 1. Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = PoisonMaskElem;
+ continue;
+ }
+ auto *Shuffle =
+ dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode());
+ if (!Shuffle || !is_contained(P.second, SrcRegIdx))
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
+ continue;
+ }
+ Idx = MaskElt % NewElts +
+ P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first
+ ? 0
+ : 1] *
+ NewElts;
+ }
+ // 2. Update inputs.
+ Inputs[P.second[0]] = P.first.first;
+ Inputs[P.second[1]] = P.first.second;
+ // Clear the pair data.
+ P.second.clear();
+ ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
+ }
+ // Check if any concat_vectors can be simplified.
+ SmallBitVector UsedSubVector(2 * std::size(Inputs));
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = PoisonMaskElem;
+ continue;
+ }
+ TargetLowering::LegalizeTypeAction TypeAction =
+ getTypeAction(Inputs[SrcRegIdx].getValueType());
+ if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS &&
+ Inputs[SrcRegIdx].getNumOperands() == 2 &&
+ !Inputs[SrcRegIdx].getOperand(1).isUndef() &&
+ (TypeAction == TargetLowering::TypeLegal ||
+ TypeAction == TargetLowering::TypeWidenVector))
+ UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2));
+ }
+ if (UsedSubVector.count() > 1) {
+ SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs;
+ for (unsigned I = 0; I < std::size(Inputs); ++I) {
+ if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
+ continue;
+ if (Pairs.empty() || Pairs.back().size() == 2)
+ Pairs.emplace_back();
+ if (UsedSubVector.test(2 * I)) {
+ Pairs.back().emplace_back(I, 0);
+ } else {
+ assert(UsedSubVector.test(2 * I + 1) &&
+ "Expected to be used one of the subvectors.");
+ Pairs.back().emplace_back(I, 1);
+ }
+ }
+ if (!Pairs.empty() && Pairs.front().size() > 1) {
+ // Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ auto *It = find_if(
+ Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) {
+ return Idxs.front().first == SrcRegIdx ||
+ Idxs.back().first == SrcRegIdx;
+ });
+ if (It == Pairs.end())
+ continue;
+ Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) +
+ (SrcRegIdx == It->front().first ? 0 : (NewElts / 2));
+ }
+ // Adjust inputs.
+ for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) {
+ Inputs[Idxs.front().first] = DAG.getNode(
+ ISD::CONCAT_VECTORS, DL,
+ Inputs[Idxs.front().first].getValueType(),
+ Inputs[Idxs.front().first].getOperand(Idxs.front().second),
+ Inputs[Idxs.back().first].getOperand(Idxs.back().second));
+ }
+ }
+ }
+ bool Changed;
+ do {
+ // Try to remove extra shuffles (except broadcasts) and shuffles with the
+ // reused operands.
+ Changed = false;
+ for (unsigned I = 0; I < std::size(Inputs); ++I) {
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
+ if (!Shuffle)
+ continue;
+ if (Shuffle->getOperand(0).getValueType() != NewVT)
+ continue;
+ int Op = -1;
+ if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() &&
+ !Shuffle->isSplat()) {
+ Op = 0;
+ } else if (!Inputs[I].hasOneUse() &&
+ !Shuffle->getOperand(1).isUndef()) {
+ // Find the only used operand, if possible.
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
+ continue;
+ }
+ int OpIdx = MaskElt / NewElts;
+ if (Op == -1) {
+ Op = OpIdx;
+ continue;
+ }
+ if (Op != OpIdx) {
+ Op = -1;
+ break;
+ }
+ }
+ }
+ if (Op < 0) {
+ // Try to check if one of the shuffle operands is used already.
+ for (int OpIdx = 0; OpIdx < 2; ++OpIdx) {
+ if (Shuffle->getOperand(OpIdx).isUndef())
+ continue;
+ auto *It = find(Inputs, Shuffle->getOperand(OpIdx));
+ if (It == std::end(Inputs))
+ continue;
+ int FoundOp = std::distance(std::begin(Inputs), It);
+ // Found that operand is used already.
+ // 1. Fix the mask for the reused operand.
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
+ continue;
+ }
+ int MaskIdx = MaskElt / NewElts;
+ if (OpIdx == MaskIdx)
+ Idx = MaskElt % NewElts + FoundOp * NewElts;
+ }
+ // 2. Set Op to the unused OpIdx.
+ Op = (OpIdx + 1) % 2;
+ break;
+ }
+ }
+ if (Op >= 0) {
+ Changed = true;
+ Inputs[I] = Shuffle->getOperand(Op);
+ // Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ int OpIdx = MaskElt / NewElts;
+ if (OpIdx != Op)
+ continue;
+ Idx = MaskElt % NewElts + SrcRegIdx * NewElts;
+ }
+ }
+ }
+ } while (Changed);
+ };
+ TryPeekThroughShufflesInputs(OrigMask);
+ // Proces unique inputs.
+ auto &&MakeUniqueInputs = [&Inputs, &IsConstant,
+ NewElts](SmallVectorImpl<int> &Mask) {
+ SetVector<SDValue> UniqueInputs;
+ SetVector<SDValue> UniqueConstantInputs;
+ for (const auto &I : Inputs) {
+ if (IsConstant(I))
+ UniqueConstantInputs.insert(I);
+ else if (!I.isUndef())
+ UniqueInputs.insert(I);
+ }
+ // Adjust mask in case of reused inputs. Also, need to insert constant
+ // inputs at first, otherwise it affects the final outcome.
+ if (UniqueInputs.size() != std::size(Inputs)) {
+ auto &&UniqueVec = UniqueInputs.takeVector();
+ auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
+ unsigned ConstNum = UniqueConstantVec.size();
+ for (int &Idx : Mask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = PoisonMaskElem;
+ continue;
+ }
+ const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
+ if (It != UniqueConstantVec.end()) {
+ Idx = (Idx % NewElts) +
+ NewElts * std::distance(UniqueConstantVec.begin(), It);
+ assert(Idx >= 0 && "Expected defined mask idx.");
+ continue;
+ }
+ const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]);
+ assert(RegIt != UniqueVec.end() && "Cannot find non-const value.");
+ Idx = (Idx % NewElts) +
+ NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum);
+ assert(Idx >= 0 && "Expected defined mask idx.");
+ }
+ copy(UniqueConstantVec, std::begin(Inputs));
+ copy(UniqueVec, std::next(std::begin(Inputs), ConstNum));
+ }
+ };
+ MakeUniqueInputs(OrigMask);
+ SDValue OrigInputs[4];
+ copy(Inputs, std::begin(OrigInputs));
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands.
+ unsigned FirstMaskIdx = High * NewElts;
+ SmallVector<int> Mask(NewElts * std::size(Inputs), PoisonMaskElem);
+ copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
+ assert(!Output && "Expected default initialized initial value.");
+ TryPeekThroughShufflesInputs(Mask);
+ MakeUniqueInputs(Mask);
+ SDValue TmpInputs[4];
+ copy(Inputs, std::begin(TmpInputs));
+ // Track changes in the output registers.
+ int UsedIdx = -1;
+ bool SecondIteration = false;
+ auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) {
+ if (UsedIdx < 0) {
+ UsedIdx = Idx;
+ return false;
+ }
+ if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx)
+ SecondIteration = true;
+ return SecondIteration;
+ };
+ processShuffleMasks(
+ Mask, std::size(Inputs), std::size(Inputs),
+ /*NumOfUsedRegs=*/1,
+ [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); },
+ [&Output, &DAG = DAG, NewVT, &DL, &Inputs,
+ &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) {
+ if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx],
+ DAG.getUNDEF(NewVT), Mask);
+ Inputs[Idx] = Output;
+ },
+ [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs,
+ &TmpInputs,
+ &BuildVector](ArrayRef<int> Mask, unsigned Idx1, unsigned Idx2) {
+ if (AccumulateResults(Idx1)) {
+ if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
+ Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1],
+ Inputs[Idx2], Mask);
+ } else {
+ if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
+ TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1],
+ TmpInputs[Idx2], Mask);
+ }
+ Inputs[Idx1] = Output;
+ });
+ copy(OrigInputs, std::begin(Inputs));
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDValue SV = N->getOperand(2);
+ SDLoc dl(N);
+
+ const Align Alignment =
+ DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext()));
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value());
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value());
+ Chain = Hi.getValue(1);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT DstVTLo, DstVTHi;
+ std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue SrcLo, SrcHi;
+ EVT SrcVT = N->getOperand(0).getValueType();
+ if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), SrcLo, SrcHi);
+ else
+ std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue InLo, InHi;
+ GetSplitVector(N->getOperand(0), InLo, InHi);
+ SDLoc DL(N);
+
+ Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
+ Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ SDValue Expanded = TLI.expandVectorSplice(N, DAG);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
+
+ SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ EVT VT = Op0Lo.getValueType();
+ SDLoc DL(N);
+ SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Lo, Op0Hi);
+ SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op1Lo, Op1Hi);
+
+ SetSplitVector(SDValue(N, 0), ResLo.getValue(0), ResHi.getValue(0));
+ SetSplitVector(SDValue(N, 1), ResLo.getValue(1), ResHi.getValue(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
+ SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ EVT VT = Op0Lo.getValueType();
+ SDLoc DL(N);
+ SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Lo, Op1Lo),
+ DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Hi, Op1Hi)};
+
+ SetSplitVector(SDValue(N, 0), Res[0].getValue(0), Res[0].getValue(1));
+ SetSplitVector(SDValue(N, 1), Res[1].getValue(0), Res[1].getValue(1));
+}
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// This method is called when the specified operand of the specified node is
+/// found to need vector splitting. At this point, all of the result types of
+/// the node are known to be legal, but other operands of the node may need
+/// legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom split this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::VP_SETCC:
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::VP_TRUNCATE:
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::VP_FP_ROUND:
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::VP_STORE:
+ Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSCATTER:
+ case ISD::VP_SCATTER:
+ Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo);
+ break;
+ case ISD::MGATHER:
+ case ISD::VP_GATHER:
+ Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::VP_SINT_TO_FP:
+ case ISD::VP_UINT_TO_FP:
+ if (N->getValueType(0).bitsLT(
+ N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
+ Res = SplitVecOp_TruncateHelper(N);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SplitVecOp_FP_TO_XINT_SAT(N);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::FTRUNC:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::FLDEXP:
+ Res = SplitVecOp_FPOpDifferentTypes(N);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ Res = SplitVecOp_VECREDUCE(N, OpNo);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = SplitVecOp_VECREDUCE_SEQ(N);
+ break;
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ Res = SplitVecOp_VP_REDUCE(N, OpNo);
+ break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+ // The only possibility for an illegal operand is the mask, since result type
+ // legalization would have handled this node already otherwise.
+ assert(OpNo == 0 && "Illegal operand must be mask");
+
+ SDValue Mask = N->getOperand(0);
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT Src0VT = Src0.getValueType();
+ SDLoc DL(N);
+ assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
+
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Lo and Hi have differing types");
+
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
+
+ SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
+ std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoSelect =
+ DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+ SDValue HiSelect =
+ DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ // Use the appropriate scalar instruction on the split subvectors before
+ // reducing the now partially reduced smaller vector.
+ unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
+ SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+ SDNodeFlags Flags = N->getFlags();
+
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ // Reduce low half.
+ SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags);
+
+ // Reduce high half, using low half result as initial value.
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ assert(OpNo == 1 && "Can only split reduce vector operand");
+
+ unsigned Opc = N->getOpcode();
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl);
+
+ const SDNodeFlags Flags = N->getFlags();
+
+ SDValue ResLo =
+ DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags);
+ return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorElementCount());
+
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi });
+
+ // Build a factor node to remember that this operation is independent
+ // of the other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+ } else if (N->getNumOperands() == 3) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
+ // We know that the result type is legal.
+ EVT ResVT = N->getValueType(0);
+
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+
+ SDValue Lo, Hi;
+ GetSplitVector(SubVec, Lo, Hi);
+
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+
+ SDValue FirstInsertion =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
+ SDValue SecondInsertion =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
+ DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
+
+ return SecondInsertion;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoEltsMin) {
+ assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else if (SubVT.isScalableVector() ==
+ N->getOperand(0).getValueType().isScalableVector())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getVectorIdxConstant(IdxVal - LoEltsMin, dl));
+
+ // After this point the DAG node only permits extracting fixed-width
+ // subvectors from scalable vectors.
+ assert(SubVT.isFixedLengthVector() &&
+ "Extracting scalable subvector from fixed-width unsupported");
+
+ // If the element type is i1 and we're not promoting the result, then we may
+ // end up loading the wrong data since the bits are packed tightly into
+ // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal)
+ // type at index 4, then we will load a byte starting at index 0.
+ if (SubVT.getScalarType() == MVT::i1)
+ report_fatal_error("Don't know how to extract fixed-width predicate "
+ "subvector from a scalable predicate vector");
+
+ // Spill the vector to the stack. We should use the alignment for
+ // the smallest part.
+ SDValue Vec = N->getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Extract the subvector by loading the correct part.
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx);
+
+ return DAG.getLoad(
+ SubVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = Index->getZExtValue();
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ else if (!Vec.getValueType().isScalableVector())
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts, SDLoc(N),
+ Idx.getValueType())), 0);
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return SDValue();
+
+ // Make the vector elements byte-addressable if they aren't already.
+ SDLoc dl(N);
+ EVT EltVT = VecVT.getVectorElementType();
+ if (VecVT.getScalarSizeInBits() < 8) {
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorElementCount());
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ }
+
+ // Store the vector to the stack.
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Load back the required element.
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+ // FIXME: This is to handle i1 vectors with elements promoted to i8.
+ // i1 vector handling needs general improvement.
+ if (N->getValueType(0).bitsLT(EltVT)) {
+ SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0));
+ }
+
+ return DAG.getExtLoad(
+ ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
+ commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
+ SDValue Lo, Hi;
+
+ // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
+ // splitting the result has the same effect as splitting the input operand.
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
+ (void)OpNo;
+ SDValue Lo, Hi;
+ SplitVecRes_Gather(N, Lo, Hi);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed vp_store of vector?");
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Offset = N->getOffset();
+ assert(Offset.isUndef() && "Unexpected VP store offset");
+ SDValue Mask = N->getMask();
+ SDValue EVL = N->getVectorLength();
+ SDValue Data = N->getValue();
+ Align Alignment = N->getOriginalAlign();
+ SDLoc DL(N);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
+
+ EVT MemoryVT = N->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
+
+ // Split EVL
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL);
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // If the hi vp_store has zero storage size, only the lo vp_store is needed.
+ if (HiIsEmpty)
+ return Lo;
+
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector()) {
+ Alignment = commonAlignment(Alignment,
+ LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ } else
+ MPI = N->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedValue());
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N,
+ unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?");
+ assert(N->getOffset().isUndef() && "Unexpected VP strided store offset");
+
+ SDLoc DL(N);
+
+ SDValue Data = N->getValue();
+ SDValue LoData, HiData;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Data, LoData, HiData);
+ else
+ std::tie(LoData, HiData) = DAG.SplitVector(Data, DL);
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs(
+ N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty);
+
+ SDValue Mask = N->getMask();
+ SDValue LoMask, HiMask;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC)
+ SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
+ else if (getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, LoMask, HiMask);
+ else
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoEVL, HiEVL;
+ std::tie(LoEVL, HiEVL) =
+ DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL);
+
+ // Generate the low vp_strided_store
+ SDValue Lo = DAG.getStridedStoreVP(
+ N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(),
+ N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(),
+ N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore());
+
+ // If the high vp_strided_store has zero storage size, only the low
+ // vp_strided_store is needed.
+ if (HiIsEmpty)
+ return Lo;
+
+ // Generate the high vp_strided_store.
+ // To calculate the high base address, we need to sum to the low base
+ // address stride number of bytes for each element already stored by low,
+ // that is: Ptr = Ptr + (LoEVL * Stride)
+ EVT PtrVT = N->getBasePtr().getValueType();
+ SDValue Increment =
+ DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
+ DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT));
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment);
+
+ Align Alignment = N->getOriginalAlign();
+ if (LoMemVT.isScalableVector())
+ Alignment = commonAlignment(Alignment,
+ LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(N->getPointerInfo().getAddrSpace()),
+ MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
+
+ SDValue Hi = DAG.getStridedStoreVP(
+ N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask,
+ HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed masked store of vector?");
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Offset = N->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed masked store offset");
+ SDValue Mask = N->getMask();
+ SDValue Data = N->getValue();
+ Align Alignment = N->getOriginalAlign();
+ SDLoc DL(N);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
+
+ EVT MemoryVT = N->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
+
+ SDValue Lo, Hi, Res;
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ if (HiIsEmpty) {
+ // The hi masked store has zero storage size.
+ // Only the lo masked store is needed.
+ Res = Lo;
+ } else {
+
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector()) {
+ Alignment = commonAlignment(
+ Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ } else
+ MPI = N->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedValue());
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ Align Alignment = N->getOriginalAlign();
+ SDLoc DL(N);
+ struct Operands {
+ SDValue Mask;
+ SDValue Index;
+ SDValue Scale;
+ SDValue Data;
+ } Ops = [&]() -> Operands {
+ if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
+ return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
+ MSC->getValue()};
+ }
+ auto *VPSC = cast<VPScatterSDNode>(N);
+ return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
+ VPSC->getValue()};
+ }();
+ // Split all operands
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Ops.Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
+ }
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Ops.Index.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Ops.Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
+
+ SDValue Lo;
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+
+ if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+ Lo =
+ DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+ MSC->getIndexType(), MSC->isTruncatingStore());
+
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
+ MMO, MSC->getIndexType(),
+ MSC->isTruncatingStore());
+ }
+ auto *VPSC = cast<VPScatterSDNode>(N);
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
+
+ SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+ Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+ VPSC->getIndexType());
+
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
+ VPSC->getIndexType());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ SDLoc DL(N);
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ Align Alignment = N->getOriginalAlign();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ // Scalarize if the split halves are not byte-sized.
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
+ return TLI.scalarizeVectorStore(N, DAG);
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
+ Alignment, MMOFlags, AAInfo);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ MachinePointerInfo MPI;
+ IncrementPointer(N, LoMemVT, MPI, Ptr);
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI,
+ HiMemVT, Alignment, MMOFlags, AAInfo);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc DL(N);
+
+ // The input operands all must have the same type, and we know the result
+ // type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (const SDValue &Op : N->op_values()) {
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+ DAG.getVectorIdxConstant(i, DL)));
+ }
+ }
+
+ return DAG.getBuildVector(N->getValueType(0), DL, Elts);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
+ // The result type is legal, but the input type is illegal. If splitting
+ // ends up with the result type of each half still being legal, just
+ // do that. If, however, that would result in an illegal result type,
+ // we can try to get more clever with power-two vectors. Specifically,
+ // split the input type, but also widen the result element size, then
+ // concatenate the halves and truncate again. For example, consider a target
+ // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+ // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+ // %inlo = v4i32 extract_subvector %in, 0
+ // %inhi = v4i32 extract_subvector %in, 4
+ // %lo16 = v4i16 trunc v4i32 %inlo
+ // %hi16 = v4i16 trunc v4i32 %inhi
+ // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+ // %res = v8i8 trunc v8i16 %in16
+ //
+ // Without this transform, the original truncate would end up being
+ // scalarized, which is pretty much always a last resort.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ SDValue InVec = N->getOperand(OpNo);
+ EVT InVT = InVec->getValueType(0);
+ EVT OutVT = N->getValueType(0);
+ ElementCount NumElements = OutVT.getVectorElementCount();
+ bool IsFloat = OutVT.isFloatingPoint();
+
+ unsigned InElementSize = InVT.getScalarSizeInBits();
+ unsigned OutElementSize = OutVT.getScalarSizeInBits();
+
+ // Determine the split output VT. If its legal we can just split dirctly.
+ EVT LoOutVT, HiOutVT;
+ std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
+ assert(LoOutVT == HiOutVT && "Unequal split?");
+
+ // If the input elements are only 1/2 the width of the result elements,
+ // just use the normal splitting. Our trick only work if there's room
+ // to split more than once.
+ if (isTypeLegal(LoOutVT) ||
+ InElementSize <= OutElementSize * 2)
+ return SplitVecOp_UnaryOp(N);
+ SDLoc DL(N);
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = InVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+ if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
+ return SplitVecOp_UnaryOp(N);
+
+ // Get the split input vector.
+ SDValue InLoVec, InHiVec;
+ GetSplitVector(InVec, InLoVec, InHiVec);
+
+ // Truncate them to 1/2 the element size.
+ //
+ // This assumes the number of elements is a power of two; any vector that
+ // isn't should be widened, not split.
+ EVT HalfElementVT = IsFloat ?
+ EVT::getFloatingPointVT(InElementSize/2) :
+ EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+ NumElements.divideCoefficientBy(2));
+
+ SDValue HalfLo;
+ SDValue HalfHi;
+ SDValue Chain;
+ if (N->isStrictFPOpcode()) {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), InLoVec});
+ HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), InHiVec});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
+ HalfHi.getValue(1));
+ } else {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+ }
+
+ // Concatenate them to get the full intermediate truncation result.
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+ SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+ HalfHi);
+ // Now finish up by truncating all the way down to the original result
+ // type. This should normally be something that ends up being legal directly,
+ // but in theory if a target has very wide vectors and an annoyingly
+ // restricted set of legal types, this split can chain to build things up.
+
+ if (N->isStrictFPOpcode()) {
+ SDValue Res = DAG.getNode(
+ ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
+ {Chain, InterVec,
+ DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
+ // Relink the chain
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+ return Res;
+ }
+
+ return IsFloat
+ ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+ DAG.getTargetConstant(
+ 0, DL, TLI.getPointerTy(DAG.getDataLayout())))
+ : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
+
+ LLVMContext &Context = *DAG.getContext();
+ EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
+ EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
+
+ if (N->getOpcode() == ISD::SETCC) {
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ } else {
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
+ LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1,
+ N->getOperand(2), MaskLo, EVLLo);
+ HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
+ N->getOperand(2), MaskHi, EVLHi);
+ }
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorElementCount());
+
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else if (N->getOpcode() == ISD::VP_FP_ROUND) {
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL);
+ Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
+ Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
+ } else {
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+// Split a vector type in an FP binary operation where the second operand has a
+// different type from the first.
+//
+// The result (and the first input) has a legal vector type, but the second
+// input needs splitting.
+SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
+ SDLoc DL(N);
+
+ EVT LHSLoVT, LHSHiVT;
+ std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT))
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+
+ SDValue LHSLo, LHSHi;
+ std::tie(LHSLo, LHSHi) =
+ DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT);
+
+ SDValue RHSLo, RHSHi;
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
+
+ SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo);
+ SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT NewResVT =
+ EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorElementCount());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+
+ auto unrollExpandedOp = [&]() {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ return true;
+ }
+ return false;
+ };
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to widen the result of this operator!");
+
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = WidenVecRes_INSERT_SUBVECTOR(N);
+ break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::STEP_VECTOR:
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ Res = WidenVecRes_ScalarOp(N);
+ break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT:
+ case ISD::VP_SELECT:
+ case ISD::VP_MERGE:
+ Res = WidenVecRes_Select(N);
+ break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::VP_SETCC:
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::VP_LOAD:
+ Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
+ break;
+ case ISD::MLOAD:
+ Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER:
+ Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
+ case ISD::VP_GATHER:
+ Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N));
+ break;
+ case ISD::VECTOR_REVERSE:
+ Res = WidenVecRes_VECTOR_REVERSE(N);
+ break;
+
+ case ISD::ADD: case ISD::VP_ADD:
+ case ISD::AND: case ISD::VP_AND:
+ case ISD::MUL: case ISD::VP_MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR: case ISD::VP_OR:
+ case ISD::SUB: case ISD::VP_SUB:
+ case ISD::XOR: case ISD::VP_XOR:
+ case ISD::SHL: case ISD::VP_SHL:
+ case ISD::SRA: case ISD::VP_ASHR:
+ case ISD::SRL: case ISD::VP_LSHR:
+ case ISD::FMINNUM: case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::SMIN: case ISD::VP_SMIN:
+ case ISD::SMAX: case ISD::VP_SMAX:
+ case ISD::UMIN: case ISD::VP_UMIN:
+ case ISD::UMAX: case ISD::VP_UMAX:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ // Vector-predicated binary op widening. Note that -- unlike the
+ // unpredicated versions -- we don't have to worry about trapping on
+ // operations like UDIV, FADD, etc., as we pass on the original vector
+ // length parameter. This means the widened elements containing garbage
+ // aren't active.
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ case ISD::VP_FCOPYSIGN:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FPOW:
+ case ISD::FREM:
+ if (unrollExpandedOp())
+ break;
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those
+ // like any other binary ops.
+ [[fallthrough]];
+
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = WidenVecRes_BinaryCanTrap(N);
+ break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ // These are binary operations, but with an extra operand that shouldn't
+ // be widened (the scale).
+ Res = WidenVecRes_BinaryWithExtraScalarOp(N);
+ break;
+
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ Res = WidenVecRes_StrictFP(N);
+ break;
+
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ Res = WidenVecRes_OverflowOp(N, ResNo);
+ break;
+
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
+ case ISD::IS_FPCLASS:
+ Res = WidenVecRes_IS_FPCLASS(N);
+ break;
+
+ case ISD::FLDEXP:
+ case ISD::FPOWI:
+ if (!unrollExpandedOp())
+ Res = WidenVecRes_ExpOp(N);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::VP_FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::VP_FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::VP_FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::VP_SINT_TO_FP:
+ case ISD::VP_TRUNCATE:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::VP_UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = WidenVecRes_FP_TO_XINT_SAT(N);
+ break;
+
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ if (unrollExpandedOp())
+ break;
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those
+ // like any other unary ops.
+ [[fallthrough]];
+
+ case ISD::ABS:
+ case ISD::VP_ABS:
+ case ISD::BITREVERSE:
+ case ISD::VP_BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::VP_BSWAP:
+ case ISD::CTLZ:
+ case ISD::VP_CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::VP_CTPOP:
+ case ISD::CTTZ:
+ case ISD::VP_CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ case ISD::FNEG: case ISD::VP_FNEG:
+ case ISD::VP_FABS:
+ case ISD::VP_SQRT:
+ case ISD::VP_FCEIL:
+ case ISD::VP_FFLOOR:
+ case ISD::VP_FRINT:
+ case ISD::VP_FNEARBYINT:
+ case ISD::VP_FROUND:
+ case ISD::VP_FROUNDEVEN:
+ case ISD::VP_FROUNDTOZERO:
+ case ISD::FREEZE:
+ case ISD::ARITH_FENCE:
+ case ISD::FCANONICALIZE:
+ Res = WidenVecRes_Unary(N);
+ break;
+ case ISD::FMA: case ISD::VP_FMA:
+ case ISD::FSHL:
+ case ISD::VP_FSHL:
+ case ISD::FSHR:
+ case ISD::VP_FSHR:
+ Res = WidenVecRes_Ternary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ if (N->getNumOperands() == 3)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+
+ assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, InOp3, Mask, N->getOperand(4)});
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ if (N->getNumOperands() == 2)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
+ N->getFlags());
+
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
+ // Binary op widening, but with an extra operand that shouldn't be widened.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
+ N->getFlags());
+}
+
+// Given a vector of operations that have been broken up to widen, see
+// if we can collect them together into the next widest legal VT. This
+// implementation is trap-safe.
+static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
+ SmallVectorImpl<SDValue> &ConcatOps,
+ unsigned ConcatEnd, EVT VT, EVT MaxVT,
+ EVT WidenVT) {
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ SDLoc dl(ConcatOps[0]);
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ int Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getVectorIdxConstant(i, dl));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, SubConcatOps);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ ArrayRef(ConcatOps.data(), NumOps));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
+ // Binary op widening for operations that can trap.
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorMinNumElements();
+ const SDNodeFlags Flags = N->getFlags();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
+ }
+
+ // FIXME: Improve support for scalable vectors.
+ assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getVectorIdxConstant(Idx, dl));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getVectorIdxConstant(Idx, dl));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getVectorIdxConstant(Idx, dl));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getVectorIdxConstant(Idx, dl));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2, Flags);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return WidenVecRes_STRICT_FSETCC(N);
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ return WidenVecRes_Convert_StrictFP(N);
+ default:
+ break;
+ }
+
+ // StrictFP op widening for operations that can trap.
+ unsigned NumOpers = N->getNumOperands();
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SmallVector<SDValue, 4> InOps;
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ SmallVector<SDValue, 16> Chains;
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // The Chain is the first operand.
+ InOps.push_back(N->getOperand(0));
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ EVT OpVT = Oper.getValueType();
+ if (OpVT.isVector()) {
+ if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector)
+ Oper = GetWidenedVector(Oper);
+ else {
+ EVT WideOpVT =
+ EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
+ WidenVT.getVectorElementCount());
+ Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ DAG.getUNDEF(WideOpVT), Oper,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ }
+
+ InOps.push_back(Oper);
+ }
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ EVT OpVT = Op.getValueType();
+ if (OpVT.isVector()) {
+ EVT OpExtractVT =
+ EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
+ VT.getVectorElementCount());
+ Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpExtractVT, Op,
+ DAG.getVectorIdxConstant(Idx, dl));
+ }
+
+ EOps.push_back(Op);
+ }
+
+ EVT OperVT[] = {VT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ EVT OpVT = Op.getValueType();
+ if (OpVT.isVector())
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OpVT.getVectorElementType(), Op,
+ DAG.getVectorIdxConstant(Idx, dl));
+
+ EOps.push_back(Op);
+ }
+
+ EVT WidenVT[] = {WidenEltVT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Build a factor node to remember all the Ops that have been created.
+ SDValue NewChain;
+ if (Chains.size() == 1)
+ NewChain = Chains[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT WideResVT, WideOvVT;
+ SDValue WideLHS, WideRHS;
+
+ // TODO: This might result in a widen/split loop.
+ if (ResNo == 0) {
+ WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
+ WideOvVT = EVT::getVectorVT(
+ *DAG.getContext(), OvVT.getVectorElementType(),
+ WideResVT.getVectorNumElements());
+
+ WideLHS = GetWidenedVector(N->getOperand(0));
+ WideRHS = GetWidenedVector(N->getOperand(1));
+ } else {
+ WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
+ WideResVT = EVT::getVectorVT(
+ *DAG.getContext(), ResVT.getVectorElementType(),
+ WideOvVT.getVectorNumElements());
+
+ SDValue Zero = DAG.getVectorIdxConstant(0, DL);
+ WideLHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(0), Zero);
+ WideRHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(1), Zero);
+ }
+
+ SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
+ SDNode *WideNode = DAG.getNode(
+ N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
+
+ // Replace the other vector result not being explicitly widened here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
+ SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
+ } else {
+ SDValue Zero = DAG.getVectorIdxConstant(0, DL);
+ SDValue OtherVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(WideNode, ResNo);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
+
+ EVT InVT = InOp.getValueType();
+
+ unsigned Opcode = N->getOpcode();
+ const SDNodeFlags Flags = N->getFlags();
+
+ // Handle the case of ZERO_EXTEND where the promoted InVT element size does
+ // not equal that of WidenVT.
+ if (N->getOpcode() == ISD::ZERO_EXTEND &&
+ getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
+ TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
+ WidenVT.getScalarSizeInBits()) {
+ InOp = ZExtPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
+ Opcode = ISD::TRUNCATE;
+ }
+
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
+ ElementCount InVTEC = InVT.getVectorElementCount();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTEC = InVT.getVectorElementCount();
+ if (InVTEC == WidenEC) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ if (N->getNumOperands() == 3) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2));
+ }
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
+ }
+ if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
+ // If both input and result vector types are of same width, extend
+ // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
+ // accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
+ if (Opcode == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
+ if (Opcode == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat =
+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
+ }
+
+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+ DAG.getVectorIdxConstant(0, DL));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
+ }
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Also widen the input.
+ if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+ Src = GetWidenedVector(Src);
+ SrcVT = Src.getValueType();
+ }
+
+ // Input and output not widened to the same size, give up.
+ if (WidenNumElts != SrcVT.getVectorElementCount())
+ return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
+ SDValue InOp = N->getOperand(1);
+ SDLoc DL(N);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+
+ // FIXME: Optimizations need to be implemented here.
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 32> OpChains;
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenSVT = WidenVT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InSVT = InVT.getVectorElementType();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ }
+ }
+ }
+
+ // Unroll, extend the scalars and rebuild the vector.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
+ break;
+ default:
+ llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
+ }
+ Ops.push_back(Val);
+ }
+
+ while (Ops.size() != WidenNumElts)
+ Ops.push_back(DAG.getUNDEF(WidenSVT));
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+ SDValue FpValue = N->getOperand(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+ SDValue Arg = GetWidenedVector(FpValue);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
+ N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags());
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT,
+ {InOp, Mask, N->getOperand(2)});
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
+ case TargetLowering::TypePromoteInteger: {
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ SDValue NInOp = GetPromotedInteger(InOp);
+ EVT NInVT = NInOp.getValueType();
+ if (WidenVT.bitsEq(NInVT)) {
+ // For big endian targets we need to shift the input integer or the
+ // interesting bits will end up at the wrong place.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout());
+ assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!");
+ NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp,
+ DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp);
+ }
+ InOp = NInOp;
+ InVT = NInVT;
+ break;
+ }
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ unsigned InScalarSize = InVT.getScalarSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumParts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ // For big endian systems, using the promoted input scalar type
+ // to produce the scalar_to_vector would put the desired bits into
+ // the least significant byte(s) of the wider element zero. This
+ // will mean that the users of the result vector are using incorrect
+ // bits. Use the original input type instead. Although either input
+ // type can be used on little endian systems, for consistency we
+ // use the original type there as well.
+ EVT OrigInVT = N->getOperand(0).getValueType();
+ NewNumParts = WidenSize / OrigInVT.getSizeInBits();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ SDValue NewVec;
+ if (InVT.isVector()) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might
+ // make it an illegal type that might lead to repeatedly splitting the
+ // input and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenSize % InSize == 0) {
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
+
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ } else {
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(InOp, Ops);
+ Ops.append(WidenSize / InScalarSize - Ops.size(),
+ DAG.getUNDEF(InVT.getVectorElementType()));
+
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ }
+ } else {
+ NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+
+ // Integer BUILD_VECTOR operands may be larger than the node's vector element
+ // type. The UNDEFs need to have the same type as the existing operands.
+ EVT EltVT = N->getOperand(0).getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
+ NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
+
+ return DAG.getBuildVector(WidenVT, dl, NewOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned NumInElts = InVT.getVectorMinNumElements();
+ if (WidenNumElts % NumInElts == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenNumElts / NumInElts;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ assert(!WidenVT.isScalableVector() &&
+ "Cannot use vector shuffles to widen CONCAT_VECTOR result");
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ MaskOps);
+ }
+ }
+ }
+
+ assert(!WidenVT.isScalableVector() &&
+ "Cannot use build vectors to widen CONCAT_VECTOR result");
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j = 0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(j, dl));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getBuildVector(WidenVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ auto InOpTypeAction = getTypeAction(InOp.getValueType());
+ if (InOpTypeAction == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned InNumElts = InVT.getVectorMinNumElements();
+ unsigned VTNumElts = VT.getVectorMinNumElements();
+ assert(IdxVal % VTNumElts == 0 &&
+ "Expected Idx to be a multiple of subvector minimum vector length");
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ if (VT.isScalableVector()) {
+ // Try to split the operation up into smaller extracts and concat the
+ // results together, e.g.
+ // nxv6i64 extract_subvector(nxv12i64, 6)
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv16i64, 6)
+ // nxv2i64 extract_subvector(nxv16i64, 8)
+ // nxv2i64 extract_subvector(nxv16i64, 10)
+ // undef)
+ unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ // Avoid recursion around e.g. nxv1i8.
+ if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
+ SmallVector<SDValue> Parts;
+ unsigned I = 0;
+ for (; I < VTNumElts / GCD; ++I)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
+ for (; I < WidenNumElts / GCD; ++I)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ report_fatal_error("Don't know how to widen the result of "
+ "EXTRACT_SUBVECTOR for scalable vectors");
+ }
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned i;
+ for (i = 0; i < VTNumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + i, dl));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getBuildVector(WidenVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_AssertZext(SDNode *N) {
+ SDValue InOp = ModifyToType(
+ N->getOperand(0),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), true);
+ return DAG.getNode(ISD::AssertZext, SDLoc(N), InOp.getValueType(), InOp,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!LD->getMemoryVT().isByteSized()) {
+ SDValue Value, NewChain;
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+ ReplaceValueWith(SDValue(LD, 0), Value);
+ ReplaceValueWith(SDValue(LD, 1), NewChain);
+ return SDValue();
+ }
+
+ // Generate a vector-predicated load if it is custom/legal on the target. To
+ // avoid possible recursion, only do this if the widened mask type is legal.
+ // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ EVT LdVT = LD->getMemoryVT();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (ExtType == ISD::NON_EXTLOAD &&
+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ SDLoc DL(N);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ LdVT.getVectorElementCount());
+ const auto *MMO = LD->getMemOperand();
+ SDValue NewLoad =
+ DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
+ MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(),
+ MMO->getAAInfo());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
+ report_fatal_error("Unable to widen vector load");
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue EVL = N->getVectorLength();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
+ .getVectorElementCount() &&
+ "Unable to widen vector load");
+
+ SDValue Res =
+ DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(),
+ N->getBasePtr(), N->getOffset(), Mask, EVL,
+ N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
+ SDLoc DL(N);
+
+ // The mask should be widened as well
+ SDValue Mask = N->getMask();
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided load");
+ Mask = GetWidenedVector(Mask);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Data and mask vectors should have the same number of elements");
+
+ SDValue Res = DAG.getStridedLoadVP(
+ N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
+ N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
+ N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
+ N->isExpandingLoad());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ SDValue Res = DAG.getMaskedLoad(
+ WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ ExtType, N->isExpandingLoad());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
+ SDValue Scale = N->getScale();
+ unsigned NumElts = WideVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ // Widen the Index operand
+ SDValue Index = N->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+ SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
+ Scale };
+
+ // Widen the MemoryType
+ EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getMemoryVT().getScalarType(), NumElts);
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+ WideMemVT, dl, Ops, N->getMemOperand(),
+ N->getIndexType(), N->getExtensionType());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue Scale = N->getScale();
+ ElementCount WideEC = WideVT.getVectorElementCount();
+ SDLoc dl(N);
+
+ SDValue Index = GetWidenedVector(N->getIndex());
+ EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getMemoryVT().getScalarType(), WideEC);
+ Mask = GetWidenedMask(Mask, WideEC);
+
+ SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale,
+ Mask, N->getVectorLength()};
+ SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT,
+ dl, Ops, N->getMemOperand(), N->getIndexType());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
+}
+
+// Return true is this is a SETCC node or a strict version of it.
+static inline bool isSETCCOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return true;
+ }
+ return false;
+}
+
+// Return true if this is a node that could have two SETCCs as operands.
+static inline bool isLogicalMaskOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return true;
+ }
+ return false;
+}
+
+// If N is a SETCC or a strict variant of it, return the type
+// of the compare operands.
+static inline EVT getSETCCOperandType(SDValue N) {
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ return N->getOperand(OpNo).getValueType();
+}
+
+// This is used just for the assert in convertMask(). Check that this either
+// a SETCC or a previously handled SETCC by convertMask().
+#ifndef NDEBUG
+static inline bool isSETCCorConvertedSETCC(SDValue N) {
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
+ for (unsigned i = 1; i < N->getNumOperands(); ++i)
+ if (!N->getOperand(i)->isUndef())
+ return false;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::SIGN_EXTEND)
+ N = N.getOperand(0);
+
+ if (isLogicalMaskOp(N.getOpcode()))
+ return isSETCCorConvertedSETCC(N.getOperand(0)) &&
+ isSETCCorConvertedSETCC(N.getOperand(1));
+
+ return (isSETCCOp(N.getOpcode()) ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
+}
+#endif
+
+// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
+// to ToMaskVT if needed with vector extension or truncation.
+SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
+ EVT ToMaskVT) {
+ // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
+ // FIXME: This code seems to be too restrictive, we might consider
+ // generalizing it or dropping it.
+ assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
+
+ // Make a new Mask node, with a legal result VT.
+ SDValue Mask;
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
+ Ops.push_back(InMask->getOperand(i));
+ if (InMask->isStrictFPOpcode()) {
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
+ { MaskVT, MVT::Other }, Ops);
+ ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
+ }
+ else
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+
+ // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
+ // extend or truncate is needed.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
+ unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
+ if (MaskScalarBits < ToMaskScalBits) {
+ EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
+ } else if (MaskScalarBits > ToMaskScalBits) {
+ EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
+ }
+
+ assert(Mask->getValueType(0).getScalarSizeInBits() ==
+ ToMaskVT.getScalarSizeInBits() &&
+ "Mask should have the right element size by now.");
+
+ // Adjust Mask to the right number of elements.
+ unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
+ if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(Mask));
+ Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
+ ZeroIdx);
+ } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
+ unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
+ EVT SubVT = Mask->getValueType(0);
+ SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT));
+ SubOps[0] = Mask;
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
+ }
+
+ assert((Mask->getValueType(0) == ToMaskVT) &&
+ "A mask of ToMaskVT should have been produced by now.");
+
+ return Mask;
+}
+
+// This method tries to handle some special cases for the vselect mask
+// and if needed adjusting the mask vector type to match that of the VSELECT.
+// Without it, many cases end up with scalarization of the SETCC, with many
+// unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Cond = N->getOperand(0);
+
+ if (N->getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
+ return SDValue();
+
+ // If this is a splitted VSELECT that was previously already handled, do
+ // nothing.
+ EVT CondVT = Cond->getValueType(0);
+ if (CondVT.getScalarSizeInBits() != 1)
+ return SDValue();
+
+ EVT VSelVT = N->getValueType(0);
+
+ // This method can't handle scalable vector types.
+ // FIXME: This support could be added in the future.
+ if (VSelVT.isScalableVector())
+ return SDValue();
+
+ // Only handle vector types which are a power of 2.
+ if (!isPowerOf2_64(VSelVT.getSizeInBits()))
+ return SDValue();
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = VSelVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
+
+ if (FinalVT.getVectorNumElements() == 1)
+ return SDValue();
+
+ // If there is support for an i1 vector mask, don't touch.
+ if (isSETCCOp(Cond.getOpcode())) {
+ EVT SetCCOpVT = getSETCCOperandType(Cond);
+ while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
+ SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
+ EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
+ if (SetCCResVT.getScalarSizeInBits() == 1)
+ return SDValue();
+ } else if (CondVT.getScalarType() == MVT::i1) {
+ // If there is support for an i1 vector mask (or only scalar i1 conditions),
+ // don't touch.
+ while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+ CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+ if (CondVT.getScalarType() == MVT::i1)
+ return SDValue();
+ }
+
+ // Widen the vselect result type if needed.
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector)
+ VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
+
+ // The mask of the VSELECT should have integer elements.
+ EVT ToMaskVT = VSelVT;
+ if (!ToMaskVT.getScalarType().isInteger())
+ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
+
+ SDValue Mask;
+ if (isSETCCOp(Cond->getOpcode())) {
+ EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else if (isLogicalMaskOp(Cond->getOpcode()) &&
+ isSETCCOp(Cond->getOperand(0).getOpcode()) &&
+ isSETCCOp(Cond->getOperand(1).getOpcode())) {
+ // Cond is (AND/OR/XOR (SETCC, SETCC))
+ SDValue SETCC0 = Cond->getOperand(0);
+ SDValue SETCC1 = Cond->getOperand(1);
+ EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
+ EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
+ unsigned ScalarBits0 = VT0.getScalarSizeInBits();
+ unsigned ScalarBits1 = VT1.getScalarSizeInBits();
+ unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
+ EVT MaskVT;
+ // If the two SETCCs have different VTs, either extend/truncate one of
+ // them to the other "towards" ToMaskVT, or truncate one and extend the
+ // other to ToMaskVT.
+ if (ScalarBits0 != ScalarBits1) {
+ EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
+ EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
+ if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
+ MaskVT = WideVT;
+ else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
+ MaskVT = NarrowVT;
+ else
+ MaskVT = ToMaskVT;
+ } else
+ // If the two SETCCs have the same VT, don't change it.
+ MaskVT = VT0;
+
+ // Make new SETCCs and logical nodes.
+ SETCC0 = convertMask(SETCC0, VT0, MaskVT);
+ SETCC1 = convertMask(SETCC1, VT1, MaskVT);
+ Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
+
+ // Convert the logical op for VSELECT if needed.
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else
+ return SDValue();
+
+ return Mask;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ unsigned Opcode = N->getOpcode();
+ if (CondVT.isVector()) {
+ if (SDValue WideCond = WidenVSELECTMask(N)) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, WideCond, InOp1, InOp2);
+ }
+
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ // If we have to split the condition there is no point in widening the
+ // select. This would result in an cycle of widening the select ->
+ // widening the condition operand -> splitting the condition operand ->
+ // splitting the select -> widening the select. Instead split this select
+ // further and widen the resulting type.
+ if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+ SDValue Res = ModifyToType(SplitSelect, WidenVT);
+ return Res;
+ }
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
+ N->getOperand(3));
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue OpValue = GetWidenedVector(N->getOperand(0));
+ assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type");
+
+ SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue);
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned VTNumElts = VT.getVectorMinNumElements();
+ unsigned IdxVal = WidenNumElts - VTNumElts;
+
+ if (VT.isScalableVector()) {
+ // Try to split the 'Widen ReverseVal' into smaller extracts and concat the
+ // results together, e.g.(nxv6i64 -> nxv8i64)
+ // nxv8i64 vector_reverse
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv8i64, 2)
+ // nxv2i64 extract_subvector(nxv8i64, 4)
+ // nxv2i64 extract_subvector(nxv8i64, 6)
+ // nxv2i64 undef)
+
+ unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ SmallVector<SDValue> Parts;
+ unsigned i = 0;
+ for (; i < VTNumElts / GCD; ++i)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, ReverseVal,
+ DAG.getVectorIdxConstant(IdxVal + i * GCD, dl)));
+ for (; i < WidenNumElts / GCD; ++i)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for
+ // fixed-vectors.
+ SmallVector<int, 16> Mask;
+ for (unsigned i = 0; i != VTNumElts; ++i) {
+ Mask.push_back(IdxVal + i);
+ }
+ for (unsigned i = VTNumElts; i != WidenNumElts; ++i)
+ Mask.push_back(-1);
+
+ return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT),
+ Mask);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non-vector type");
+ EVT WidenInVT =
+ EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC);
+
+ // The input and output types often differ here, and it could be that while
+ // we'd prefer to widen the result type, the input operands have been split.
+ // In this case, we also need to split the result of this node as well.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+ SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+ return Res;
+ }
+
+ // If the inputs also widen, handle them directly. Otherwise widen by hand.
+ SDValue InOp2 = N->getOperand(1);
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp1 = GetWidenedVector(InOp1);
+ InOp2 = GetWidenedVector(InOp2);
+ } else {
+ InOp1 = DAG.WidenVector(InOp1, SDLoc(N));
+ InOp2 = DAG.WidenVector(InOp2, SDLoc(N));
+ }
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ (void)WidenInVT;
+ if (N->getOpcode() == ISD::VP_SETCC) {
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
+ return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
+ N->getOperand(2), Mask, N->getOperand(4));
+ }
+ return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
+ N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(1).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ SDValue CC = N->getOperand(3);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+
+ // Fully unroll and reassemble.
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Chains(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, dl, Scalars);
+}
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
+ break;
+ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
+ case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
+ case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
+ case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break;
+ case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecOp_EXTEND(N);
+ break;
+
+ case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::TRUNCATE:
+ Res = WidenVecOp_Convert(N);
+ break;
+
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = WidenVecOp_FP_TO_XINT_SAT(N);
+ break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
+ Res = WidenVecOp_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = WidenVecOp_VECREDUCE_SEQ(N);
+ break;
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ Res = WidenVecOp_VP_REDUCE(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue InOp = N->getOperand(0);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
+ assert(VT.getVectorNumElements() <
+ InOp.getValueType().getVectorNumElements() &&
+ "Input wasn't widened!");
+
+ // We may need to further widen the operand until it has the same total
+ // vector size as the result.
+ EVT InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ for (EVT FixedVT : MVT::vector_valuetypes()) {
+ EVT FixedEltVT = FixedVT.getVectorElementType();
+ if (TLI.isTypeLegal(FixedVT) &&
+ FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+ FixedEltVT == InEltVT) {
+ assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+ "Not enough elements in the fixed type for the operand!");
+ assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+ "We can't have the same type as we started with!");
+ if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+ InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+ DAG.getUNDEF(FixedVT), InOp,
+ DAG.getVectorIdxConstant(0, DL));
+ else
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getVectorIdxConstant(0, DL));
+ break;
+ }
+ }
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits())
+ // We couldn't find a legal vector type that was a widening of the input
+ // and could be extended in-register to the result type, so we have to
+ // scalarize.
+ return WidenVecOp_Convert(N);
+ }
+
+ // Use special DAG nodes to represent the operation of extending the
+ // low lanes.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Extend legalization on extend operation!");
+ case ISD::ANY_EXTEND:
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
+ case ISD::SIGN_EXTEND:
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
+ case ISD::ZERO_EXTEND:
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
+ }
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ EVT ResultVT = N->getValueType(0);
+ SDValue Test = N->getOperand(1);
+ SDValue WideArg = GetWidenedVector(N->getOperand(0));
+
+ // Process this node similarly to SETCC.
+ EVT WideResultVT = getSetCCResultType(WideArg.getValueType());
+ if (ResultVT.getScalarType() == MVT::i1)
+ WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideResultVT.getVectorNumElements());
+
+ SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT,
+ {WideArg, Test}, N->getFlags());
+
+ // Extract the needed results from the result vector.
+ EVT ResVT =
+ EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(),
+ ResultVT.getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, WideNode,
+ DAG.getVectorIdxConstant(0, DL));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, ResultVT, CC);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+ SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // See if a widened result type would be legal, if so widen the node.
+ // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ InVT.getVectorElementCount());
+ if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ if (Opcode == ISD::STRICT_FP_ROUND)
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp, N->getOperand(2) });
+ else
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else {
+ if (Opcode == ISD::FP_ROUND)
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
+ else
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ }
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+
+ EVT InEltVT = InVT.getVectorElementType();
+
+ // Unroll the convert into some scalar code and create a nasty build vector.
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ if (N->isStrictFPOpcode()) {
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i=0; i < NumElts; ++i) {
+ NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, dl));
+ Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT,
+ InOp, DAG.getVectorIdxConstant(i, dl)));
+ }
+
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT DstVT = N->getValueType(0);
+ SDValue Src = GetWidenedVector(N->getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ ElementCount WideNumElts = SrcVT.getVectorElementCount();
+ SDLoc dl(N);
+
+ // See if a widened result type would be legal, if so widen the node.
+ EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(),
+ DstVT.getVectorElementType(), WideNumElts);
+ if (TLI.isTypeLegal(WideDstVT)) {
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Give up and unroll.
+ return DAG.UnrollVectorOp(N);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Check if we can convert between two legal vector types and extract.
+ TypeSize InWidenSize = InWidenVT.getSizeInBits();
+ TypeSize Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (!VT.isVector() && VT != MVT::x86mmx &&
+ InWidenSize.hasKnownScalarFactor(Size)) {
+ unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size);
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ }
+
+ // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
+ // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
+ // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
+ // having to copy via memory.
+ if (VT.isVector()) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getFixedSizeInBits();
+ if (InWidenSize.isKnownMultipleOf(EltSize)) {
+ ElementCount NewNumElts =
+ (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits())
+ .divideCoefficientBy(EltSize);
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT InVT = N->getOperand(0).getValueType();
+ SDLoc dl(N);
+
+ // If the widen width for this operand is the same as the width of the concat
+ // and all but the first operand is undef, just use the widened operand.
+ unsigned NumOperands = N->getNumOperands();
+ if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ unsigned i;
+ for (i = 1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ return GetWidenedVector(N->getOperand(0));
+ }
+
+ // Otherwise, fall back to a nasty build vector.
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j = 0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(j, dl));
+ }
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue InVec = N->getOperand(0);
+
+ if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
+ SubVec = GetWidenedVector(SubVec);
+
+ if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
+ N->getConstantOperandVal(2) == 0)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
+ N->getOperand(2));
+
+ report_fatal_error("Don't know how to widen the operands for "
+ "INSERT_SUBVECTOR");
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value, but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ if (!ST->getMemoryVT().getScalarType().isByteSized())
+ return TLI.scalarizeVectorStore(ST, DAG);
+
+ if (ST->isTruncatingStore())
+ return TLI.scalarizeVectorStore(ST, DAG);
+
+ // Generate a vector-predicated store if it is custom/legal on the target.
+ // To avoid possible recursion, only do this if the widened mask type is
+ // legal.
+ // FIXME: Not all targets may support EVL in VP_STORE. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ SDValue StVal = ST->getValue();
+ EVT StVT = StVal.getValueType();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+
+ if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ // Widen the value.
+ SDLoc DL(N);
+ StVal = GetWidenedVector(StVal);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ StVT.getVectorElementCount());
+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
+ DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask,
+ EVL, StVT, ST->getMemOperand(),
+ ST->getAddressingMode());
+ }
+
+ SmallVector<SDValue, 16> StChain;
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
+
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
+ report_fatal_error("Unable to widen vector store");
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 3) &&
+ "Can widen only data or mask operand of vp_store");
+ VPStoreSDNode *ST = cast<VPStoreSDNode>(N);
+ SDValue Mask = ST->getMask();
+ SDValue StVal = ST->getValue();
+ SDLoc dl(N);
+
+ if (OpNo == 1) {
+ // Widen the value.
+ StVal = GetWidenedVector(StVal);
+
+ // We only handle the case where the mask needs widening to an
+ // identically-sized type as the vector inputs.
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP store");
+ Mask = GetWidenedVector(Mask);
+ } else {
+ Mask = GetWidenedVector(Mask);
+
+ // We only handle the case where the stored value needs widening to an
+ // identically-sized type as the mask.
+ assert(getTypeAction(StVal.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP store");
+ StVal = GetWidenedVector(StVal);
+ }
+
+ assert(Mask.getValueType().getVectorElementCount() ==
+ StVal.getValueType().getVectorElementCount() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(),
+ ST->getOffset(), Mask, ST->getVectorLength(),
+ ST->getMemoryVT(), ST->getMemOperand(),
+ ST->getAddressingMode(), ST->isTruncatingStore(),
+ ST->isCompressingStore());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
+ unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 4) &&
+ "Can widen only data or mask operand of vp_strided_store");
+ VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
+ SDValue Mask = SST->getMask();
+ SDValue StVal = SST->getValue();
+ SDLoc DL(N);
+
+ if (OpNo == 1)
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided store");
+ else
+ assert(getTypeAction(StVal.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided store");
+
+ StVal = GetWidenedVector(StVal);
+ Mask = GetWidenedVector(Mask);
+
+ assert(StVal.getValueType().getVectorElementCount() ==
+ Mask.getValueType().getVectorElementCount() &&
+ "Data and mask vectors should have the same number of elements");
+
+ return DAG.getStridedStoreVP(
+ SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
+ SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
+ SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
+ SST->isCompressingStore());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 3) &&
+ "Can widen only data or mask operand of mstore");
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue StVal = MST->getValue();
+ SDLoc dl(N);
+
+ if (OpNo == 1) {
+ // Widen the value.
+ StVal = GetWidenedVector(StVal);
+
+ // The mask should be widened as well.
+ EVT WideVT = StVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else {
+ // Widen the mask.
+ EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ EVT ValueVT = StVal.getValueType();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(),
+ WideMaskVT.getVectorNumElements());
+ StVal = ModifyToType(StVal, WideVT);
+ }
+
+ assert(Mask.getValueType().getVectorNumElements() ==
+ StVal.getValueType().getVectorNumElements() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
+ MST->getOffset(), Mask, MST->getMemoryVT(),
+ MST->getMemOperand(), MST->getAddressingMode(),
+ false, MST->isCompressingStore());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 4 && "Can widen only the index of mgather");
+ auto *MG = cast<MaskedGatherSDNode>(N);
+ SDValue DataOp = MG->getPassThru();
+ SDValue Mask = MG->getMask();
+ SDValue Scale = MG->getScale();
+
+ // Just widen the index. It's allowed to have extra elements.
+ SDValue Index = GetWidenedVector(MG->getIndex());
+
+ SDLoc dl(N);
+ SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
+ Scale};
+ SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
+ MG->getMemOperand(), MG->getIndexType(),
+ MG->getExtensionType());
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue DataOp = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ EVT WideMemVT = MSC->getMemoryVT();
+
+ if (OpNo == 1) {
+ DataOp = GetWidenedVector(DataOp);
+ unsigned NumElts = DataOp.getValueType().getVectorNumElements();
+
+ // Widen index.
+ EVT IndexVT = Index.getValueType();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ IndexVT.getVectorElementType(), NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ // The mask should be widened as well.
+ EVT MaskVT = Mask.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(), NumElts);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ // Widen the MemoryType
+ WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ MSC->getMemoryVT().getScalarType(), NumElts);
+ } else if (OpNo == 4) {
+ // Just widen the index. It's allowed to have extra elements.
+ Index = GetWidenedVector(Index);
+ } else
+ llvm_unreachable("Can't widen this operand of mscatter");
+
+ SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
+ Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N),
+ Ops, MSC->getMemOperand(), MSC->getIndexType(),
+ MSC->isTruncatingStore());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) {
+ VPScatterSDNode *VPSC = cast<VPScatterSDNode>(N);
+ SDValue DataOp = VPSC->getValue();
+ SDValue Mask = VPSC->getMask();
+ SDValue Index = VPSC->getIndex();
+ SDValue Scale = VPSC->getScale();
+ EVT WideMemVT = VPSC->getMemoryVT();
+
+ if (OpNo == 1) {
+ DataOp = GetWidenedVector(DataOp);
+ Index = GetWidenedVector(Index);
+ const auto WideEC = DataOp.getValueType().getVectorElementCount();
+ Mask = GetWidenedMask(Mask, WideEC);
+ WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ VPSC->getMemoryVT().getScalarType(), WideEC);
+ } else if (OpNo == 3) {
+ // Just widen the index. It's allowed to have extra elements.
+ Index = GetWidenedVector(Index);
+ } else
+ llvm_unreachable("Can't widen this operand of VP_SCATTER");
+
+ SDValue Ops[] = {
+ VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask,
+ VPSC->getVectorLength()};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops,
+ VPSC->getMemOperand(), VPSC->getIndexType());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = getSetCCResultType(InOp0.getValueType());
+ // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
+ if (VT.getScalarType() == MVT::i1)
+ SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ SVT.getVectorElementCount());
+
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ VT.getVectorElementCount());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+ DAG.getVectorIdxConstant(0, dl));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, dl, VT, CC);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = GetWidenedVector(N->getOperand(1));
+ SDValue RHS = GetWidenedVector(N->getOperand(2));
+ SDValue CC = N->getOperand(3);
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Unroll into a build vector.
+ SmallVector<SDValue, 8> Scalars(NumElts);
+ SmallVector<SDValue, 8> Chains(NumElts);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(VT, dl, Scalars);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(0));
+ EVT OrigVT = N->getOperand(0).getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+ SDNodeFlags Flags = N->getFlags();
+
+ unsigned Opc = N->getOpcode();
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
+ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
+ assert(NeutralElem && "Neutral element must exist");
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = std::gcd(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+ }
+
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getVectorIdxConstant(Idx, dl));
+
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
+ SDLoc dl(N);
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+ SDValue Op = GetWidenedVector(VecOp);
+
+ EVT OrigVT = VecOp.getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+ SDNodeFlags Flags = N->getFlags();
+
+ unsigned Opc = N->getOpcode();
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
+ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = std::gcd(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
+ }
+
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getVectorIdxConstant(Idx, dl));
+
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(1));
+ SDValue Mask = GetWidenedMask(N->getOperand(2),
+ Op.getValueType().getVectorElementCount());
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0),
+ {N->getOperand(0), Op, Mask, N->getOperand(3)},
+ N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
+ // This only gets called in the case that the left and right inputs and
+ // result are of a legal odd vector type, and the condition is illegal i1 of
+ // the same odd width that needs widening.
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
+
+ SDValue Cond = GetWidenedVector(N->getOperand(0));
+ SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
+ SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
+ SDLoc DL(N);
+
+ SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
+ LeftIn, RightIn);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+ DAG.getVectorIdxConstant(0, DL));
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static std::optional<EVT> findMemType(SelectionDAG &DAG,
+ const TargetLowering &TLI, unsigned Width,
+ EVT WidenVT, unsigned Align = 0,
+ unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ const bool Scalable = WidenVT.isScalableVector();
+ unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (!Scalable && Width == WidenEltWidth)
+ return RetVT;
+
+ // Don't bother looking for an integer type if the vector is scalable, skip
+ // to vector types.
+ if (!Scalable) {
+ // See if there is larger legal integer than the element type to load/store.
+ for (EVT MemVT : reverse(MVT::integer_valuetypes())) {
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (MemVTWidth == WidenWidth)
+ return MemVT;
+ RetVT = MemVT;
+ break;
+ }
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (EVT MemVT : reverse(MVT::vector_valuetypes())) {
+ // Skip vector MVTs which don't match the scalable property of WidenVT.
+ if (Scalable != MemVT.isScalableVector())
+ continue;
+ unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue();
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ // Using element-wise loads and stores for widening operations is not
+ // supported for scalable vectors
+ if (Scalable)
+ return std::nullopt;
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVectorImpl<SDValue> &LdOps,
+ unsigned Start, unsigned End) {
+ SDLoc dl(LdOps[Start]);
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type.
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getVectorIdxConstant(Idx++, dl));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load power-of-two widths.
+ // The routine chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ TypeSize LdWidth = LdVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - LdWidth;
+ // Allow wider loads if they are sufficiently aligned to avoid memory faults
+ // and if the original load is simple.
+ unsigned LdAlign =
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
+
+ // Find the vector type that can load from.
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinValue());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ // Unless we're able to load in one instruction we must work out how to load
+ // the remainder.
+ if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
+ std::optional<EVT> NewVT = FirstVT;
+ TypeSize RemainingWidth = LdWidth;
+ TypeSize NewVTWidth = FirstVTWidth;
+ do {
+ RemainingWidth -= NewVTWidth;
+ if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(),
+ WidenVT, LdAlign, WidthDiff.getKnownMinValue());
+ if (!NewVT)
+ return SDValue();
+ NewVTWidth = NewVT->getSizeInBits();
+ }
+ MemVTs.push_back(*NewVT);
+ } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
+ }
+
+ SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction.
+ if (MemVTs.empty()) {
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ if (!FirstVT->isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (FirstVT == WidenVT)
+ return LdOp;
+
+ // TODO: We don't currently have any tests that exercise this code path.
+ assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+ unsigned NumConcat =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar.
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ uint64_t ScaledOffset = 0;
+ MachinePointerInfo MPI = LD->getPointerInfo();
+
+ // First incremement past the first load.
+ IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
+ &ScaledOffset);
+
+ for (EVT MemVT : MemVTs) {
+ Align NewAlign = ScaledOffset == 0
+ ? LD->getOriginalAlign()
+ : commonAlignment(LD->getAlign(), ScaledOffset);
+ SDValue L =
+ DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
+
+ LdOps.push_back(L);
+ LdChain.push_back(L.getValue(1));
+ IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
+ }
+
+ // Build the vector from the load operations.
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to load are power-of-2, and the scalar loads can be
+ // combined to make a power-of-2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First, combine the scalar loads to a vector.
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
+ }
+
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector.
+ TypeSize LdTySize = LdTy.getSizeInBits();
+ TypeSize NewLdTySize = NewLdTy.getSizeInBits();
+ assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
+ NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue()));
+ unsigned NumOps =
+ NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ unsigned j = 0;
+ for (; j != End-Idx; ++j)
+ WidenOps[j] = ConcatOps[Idx+j];
+ for (; j != NumOps; ++j)
+ WidenOps[j] = DAG.getUNDEF(LdTy);
+
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ WidenOps);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ ArrayRef(&ConcatOps[Idx], End - Idx));
+
+ // We need to fill the rest with undefs to build the vector.
+ unsigned NumOps =
+ WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extend it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ if (LdVT.isScalableVector())
+ report_fatal_error("Generating widen scalable extending vector loads is "
+ "not yet supported");
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen.
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] =
+ DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
+ LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr =
+ DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs.
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getBuildVector(WidenVT, dl, Ops);
+}
+
+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store power-of-two widths.
+ // The routine chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ TypeSize StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ TypeSize ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getFixedSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+ assert(StVT.isScalableVector() == ValVT.isScalableVector() &&
+ "Mismatch between store and value types");
+
+ int Idx = 0; // current index to store
+
+ MachinePointerInfo MPI = ST->getPointerInfo();
+ uint64_t ScaledOffset = 0;
+
+ // A breakdown of how to widen this vector store. Each element of the vector
+ // is a memory VT combined with the number of times it is to be stored to,
+ // e,g., v5i32 -> {{v2i32,2},{i32,1}}
+ SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;
+
+ while (StWidth.isNonZero()) {
+ // Find the largest vector type we can store with.
+ std::optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT);
+ if (!NewVT)
+ return false;
+ MemVTs.push_back({*NewVT, 0});
+ TypeSize NewVTWidth = NewVT->getSizeInBits();
+
+ do {
+ StWidth -= NewVTWidth;
+ MemVTs.back().second++;
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ }
+
+ for (const auto &Pair : MemVTs) {
+ EVT NewVT = Pair.first;
+ unsigned Count = Pair.second;
+ TypeSize NewVTWidth = NewVT.getSizeInBits();
+
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorMinNumElements();
+ do {
+ Align NewAlign = ScaledOffset == 0
+ ? ST->getOriginalAlign()
+ : commonAlignment(ST->getAlign(), ScaledOffset);
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getVectorIdxConstant(Idx, dl));
+ SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign,
+ MMOFlags, AAInfo);
+ StChain.push_back(PartStore);
+
+ Idx += NumVTElts;
+ IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
+ &ScaledOffset);
+ } while (--Count);
+ } else {
+ // Cast the vector to the scalar type we can store.
+ unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type.
+ Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getVectorIdxConstant(Idx++, dl));
+ SDValue PartStore =
+ DAG.getStore(Chain, dl, EOp, BasePtr, MPI, ST->getOriginalAlign(),
+ MMOFlags, AAInfo);
+ StChain.push_back(PartStore);
+
+ IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
+ } while (--Count);
+ // Restore index back to be relative to the original widen element type.
+ Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth;
+ }
+ }
+
+ return true;
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+ bool FillWithZeroes) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ assert(InVT.isScalableVector() == NVT.isScalableVector() &&
+ "cannot modify scalable vectors in this way");
+ SDLoc dl(InOp);
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ ElementCount InEC = InVT.getVectorElementCount();
+ ElementCount WidenEC = NVT.getVectorElementCount();
+ if (WidenEC.hasKnownScalarFactor(InEC)) {
+ unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+ DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = FillVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
+ }
+
+ if (InEC.hasKnownScalarFactor(WidenEC))
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getVectorIdxConstant(0, dl));
+
+ assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
+ "Scalable vectors should have been handled already.");
+
+ unsigned InNumElts = InEC.getFixedValue();
+ unsigned WidenNumElts = WidenEC.getFixedValue();
+
+ // Fall back to extract and build (+ mask, if padding with zeros).
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(Idx, dl));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+
+ SDValue Widened = DAG.getBuildVector(NVT, dl, Ops);
+ if (!FillWithZeroes)
+ return Widened;
+
+ assert(NVT.isInteger() &&
+ "We expect to never want to FillWithZeroes for non-integral types.");
+
+ SmallVector<SDValue, 16> MaskOps;
+ MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT));
+ MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT));
+
+ return DAG.getNode(ISD::AND, dl, NVT, Widened,
+ DAG.getBuildVector(NVT, dl, MaskOps));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..e0e8d503ca92
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,624 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scheduler"
+
+static cl::opt<bool>
+ DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<int> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
+ : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TLI = IS->TLI;
+ TII = STI.getInstrInfo();
+ ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
+ // This hard requirement could be relaxed, but for now
+ // do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+
+ SUnit *PredSU = Pred.getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ continue;
+
+ SUnit *SuccSU = Succ.getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (const SDep &Succ : SU->Succs)
+ if (Succ.isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SDep &Pred : SU->Preds)
+ if (Pred.isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (SUnit &SU : *SUnits) {
+ initNumRegDefsLeft(&SU);
+ SU.NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = nullptr;
+ for (const SDep &Pred : SU->Preds) {
+ SUnit &PredSU = *Pred.getSUnit();
+ if (!PredSU.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
+ return nullptr;
+ OnlyAvailablePred = &PredSU;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions already in the packet.
+ for (const SUnit *S : Packet)
+ for (const SDep &Succ : S->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order deps.
+ if (Succ.isCtrl())
+ continue;
+
+ if (Succ.getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ int RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ MVT VT = SU->getNode()->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is achieved by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ int RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ else {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 50;
+static const unsigned PriorityThree = 15;
+static const unsigned PriorityFour = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform-specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFour;
+ break;
+
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ ResCount += PriorityThree;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --Pred.getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (const SDep &Succ : SU->Succs) {
+ adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
+ if (!Succ.isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return nullptr;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ int BestCost = SUSchedulingCost(*Best);
+ for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != std::prev(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = find(Queue, SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 000000000000..c31b971e7fc3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,264 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DataTypes.h"
+#include <utility>
+
+namespace llvm {
+
+class DIVariable;
+class DIExpression;
+class SDNode;
+class Value;
+class raw_ostream;
+
+/// Holds the information for a single machine location through SDISel; either
+/// an SDNode, a constant, a stack location, or a virtual register.
+class SDDbgOperand {
+public:
+ enum Kind {
+ SDNODE = 0, ///< Value is the result of an expression.
+ CONST = 1, ///< Value is a constant.
+ FRAMEIX = 2, ///< Value is contents of a stack location.
+ VREG = 3 ///< Value is a virtual register.
+ };
+ Kind getKind() const { return kind; }
+
+ /// Returns the SDNode* for a register ref
+ SDNode *getSDNode() const {
+ assert(kind == SDNODE);
+ return u.s.Node;
+ }
+
+ /// Returns the ResNo for a register ref
+ unsigned getResNo() const {
+ assert(kind == SDNODE);
+ return u.s.ResNo;
+ }
+
+ /// Returns the Value* for a constant
+ const Value *getConst() const {
+ assert(kind == CONST);
+ return u.Const;
+ }
+
+ /// Returns the FrameIx for a stack object
+ unsigned getFrameIx() const {
+ assert(kind == FRAMEIX);
+ return u.FrameIx;
+ }
+
+ /// Returns the Virtual Register for a VReg
+ unsigned getVReg() const {
+ assert(kind == VREG);
+ return u.VReg;
+ }
+
+ static SDDbgOperand fromNode(SDNode *Node, unsigned ResNo) {
+ return SDDbgOperand(Node, ResNo);
+ }
+ static SDDbgOperand fromFrameIdx(unsigned FrameIdx) {
+ return SDDbgOperand(FrameIdx, FRAMEIX);
+ }
+ static SDDbgOperand fromVReg(unsigned VReg) {
+ return SDDbgOperand(VReg, VREG);
+ }
+ static SDDbgOperand fromConst(const Value *Const) {
+ return SDDbgOperand(Const);
+ }
+
+ bool operator!=(const SDDbgOperand &Other) const { return !(*this == Other); }
+ bool operator==(const SDDbgOperand &Other) const {
+ if (kind != Other.kind)
+ return false;
+ switch (kind) {
+ case SDNODE:
+ return getSDNode() == Other.getSDNode() && getResNo() == Other.getResNo();
+ case CONST:
+ return getConst() == Other.getConst();
+ case VREG:
+ return getVReg() == Other.getVReg();
+ case FRAMEIX:
+ return getFrameIx() == Other.getFrameIx();
+ }
+ return false;
+ }
+
+private:
+ Kind kind;
+ union {
+ struct {
+ SDNode *Node; ///< Valid for expressions.
+ unsigned ResNo; ///< Valid for expressions.
+ } s;
+ const Value *Const; ///< Valid for constants.
+ unsigned FrameIx; ///< Valid for stack objects.
+ unsigned VReg; ///< Valid for registers.
+ } u;
+
+ /// Constructor for non-constants.
+ SDDbgOperand(SDNode *N, unsigned R) : kind(SDNODE) {
+ u.s.Node = N;
+ u.s.ResNo = R;
+ }
+ /// Constructor for constants.
+ SDDbgOperand(const Value *C) : kind(CONST) { u.Const = C; }
+ /// Constructor for virtual registers and frame indices.
+ SDDbgOperand(unsigned VRegOrFrameIdx, Kind Kind) : kind(Kind) {
+ assert((Kind == VREG || Kind == FRAMEIX) &&
+ "Invalid SDDbgValue constructor");
+ if (kind == VREG)
+ u.VReg = VRegOrFrameIdx;
+ else
+ u.FrameIx = VRegOrFrameIdx;
+ }
+};
+
+/// Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgValue {
+public:
+
+private:
+ // SDDbgValues are allocated by a BumpPtrAllocator, which means the destructor
+ // may not be called; therefore all member arrays must also be allocated by
+ // that BumpPtrAllocator, to ensure that they are correctly freed.
+ size_t NumLocationOps;
+ SDDbgOperand *LocationOps;
+ // SDNode dependencies will be calculated as SDNodes that appear in
+ // LocationOps plus these AdditionalDependencies.
+ size_t NumAdditionalDependencies;
+ SDNode **AdditionalDependencies;
+ DIVariable *Var;
+ DIExpression *Expr;
+ DebugLoc DL;
+ unsigned Order;
+ bool IsIndirect;
+ bool IsVariadic;
+ bool Invalid = false;
+ bool Emitted = false;
+
+public:
+ SDDbgValue(BumpPtrAllocator &Alloc, DIVariable *Var, DIExpression *Expr,
+ ArrayRef<SDDbgOperand> L, ArrayRef<SDNode *> Dependencies,
+ bool IsIndirect, DebugLoc DL, unsigned O, bool IsVariadic)
+ : NumLocationOps(L.size()),
+ LocationOps(Alloc.Allocate<SDDbgOperand>(L.size())),
+ NumAdditionalDependencies(Dependencies.size()),
+ AdditionalDependencies(Alloc.Allocate<SDNode *>(Dependencies.size())),
+ Var(Var), Expr(Expr), DL(DL), Order(O), IsIndirect(IsIndirect),
+ IsVariadic(IsVariadic) {
+ assert(IsVariadic || L.size() == 1);
+ assert(!(IsVariadic && IsIndirect));
+ std::copy(L.begin(), L.end(), LocationOps);
+ std::copy(Dependencies.begin(), Dependencies.end(), AdditionalDependencies);
+ }
+
+ // We allocate arrays with the BumpPtrAllocator and never free or copy them,
+ // for LocationOps and AdditionalDependencies, as we never expect to copy or
+ // destroy an SDDbgValue. If we ever start copying or destroying instances, we
+ // should manage the allocated memory appropriately.
+ SDDbgValue(const SDDbgValue &Other) = delete;
+ SDDbgValue &operator=(const SDDbgValue &Other) = delete;
+ ~SDDbgValue() = delete;
+
+ /// Returns the DIVariable pointer for the variable.
+ DIVariable *getVariable() const { return Var; }
+
+ /// Returns the DIExpression pointer for the expression.
+ DIExpression *getExpression() const { return Expr; }
+
+ ArrayRef<SDDbgOperand> getLocationOps() const {
+ return ArrayRef<SDDbgOperand>(LocationOps, NumLocationOps);
+ }
+
+ SmallVector<SDDbgOperand> copyLocationOps() const {
+ return SmallVector<SDDbgOperand>(LocationOps, LocationOps + NumLocationOps);
+ }
+
+ // Returns the SDNodes which this SDDbgValue depends on.
+ SmallVector<SDNode *> getSDNodes() const {
+ SmallVector<SDNode *> Dependencies;
+ for (const SDDbgOperand &DbgOp : getLocationOps())
+ if (DbgOp.getKind() == SDDbgOperand::SDNODE)
+ Dependencies.push_back(DbgOp.getSDNode());
+ for (SDNode *Node : getAdditionalDependencies())
+ Dependencies.push_back(Node);
+ return Dependencies;
+ }
+
+ ArrayRef<SDNode *> getAdditionalDependencies() const {
+ return ArrayRef<SDNode *>(AdditionalDependencies,
+ NumAdditionalDependencies);
+ }
+
+ /// Returns whether this is an indirect value.
+ bool isIndirect() const { return IsIndirect; }
+
+ bool isVariadic() const { return IsVariadic; }
+
+ /// Returns the DebugLoc.
+ const DebugLoc &getDebugLoc() const { return DL; }
+
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
+ unsigned getOrder() const { return Order; }
+
+ /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ /// property. A SDDbgValue is invalid if the SDNode that produces the value is
+ /// deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() const { return Invalid; }
+
+ /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this
+ /// SDDbgValue has been emitted to an MBB.
+ void setIsEmitted() { Emitted = true; }
+ bool isEmitted() const { return Emitted; }
+
+ /// clearIsEmitted - Reset Emitted flag, for certain special cases where
+ /// SDDbgValue is emitted twice. DBG_INSTR_REF depends on this behaviour.
+ void clearIsEmitted() { Emitted = false; }
+
+ LLVM_DUMP_METHOD void dump() const;
+ LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
+};
+
+/// Holds the information from a dbg_label node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgLabel {
+ MDNode *Label;
+ DebugLoc DL;
+ unsigned Order;
+
+public:
+ SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O)
+ : Label(Label), DL(std::move(dl)), Order(O) {}
+
+ /// Returns the MDNode pointer for the label.
+ MDNode *getLabel() const { return Label; }
+
+ /// Returns the DebugLoc.
+ const DebugLoc &getDebugLoc() const { return DL; }
+
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
+ unsigned getOrder() const { return Order; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 000000000000..5b01743d23e0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,819 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return nullptr;
+ return Queue.pop_back_val();
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs = 0u;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule() override;
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVectorImpl<SUnit*>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
+ void ListScheduleBottomUp();
+
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const override { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(nullptr);
+
+ LLVM_DEBUG(dump());
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*PredSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SDep &Pred : SU->Preds) {
+ ReleasePred(SU, &Pred);
+ if (Pred.isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[Pred.getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
+ LiveRegCycles[Pred.getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isAssignedRegDep()) {
+ if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[Succ.getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[Succ.getReg()] = nullptr;
+ LiveRegCycles[Succ.getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getGluedNode())
+ return nullptr;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return nullptr;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue)
+ return nullptr;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (const SDValue &Op : N->op_values()) {
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return nullptr;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = newSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = newSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPred = Pred;
+ else if (Pred.getSUnit()->getNode() &&
+ Pred.getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ SDep D(LoadSU, SDep::Barrier);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPred(NewSU, Pred);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVectorImpl<SUnit*> &Copies) {
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, Succ));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(!MCID.implicit_defs().empty() &&
+ "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (MCPhysReg ImpDef : MCID.implicit_defs()) {
+ if (Reg == ImpDef)
+ break;
+ ++NumRes;
+ }
+ }
+ return N->getSimpleValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit *> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs,
+ const TargetRegisterInfo *TRI,
+ const SDNode *Node = nullptr) {
+ bool Added = false;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AI])
+ continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AI] == SU)
+ continue;
+
+ // Allow multiple uses of same def
+ if (Node && LiveRegDefs[*AI]->getNode() == Node)
+ continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AI).second) {
+ LRegs.push_back(*AI);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVectorImpl<unsigned> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isAssignedRegDep()) {
+ CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (Register::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (Reg.isPhysical()) {
+ SDNode *SrcNode = Node->getOperand(2).getNode();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode);
+ }
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ for (MCPhysReg Reg : MCID.implicit_defs())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = nullptr;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule() override;
+
+ MachineBasicBlock *
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos) override;
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(nullptr);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling: ");
+ LLVM_DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = nullptr;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SDNode &Node : DAG->allnodes()) {
+ SDNode *N = &Node;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (const SDNode *U : Glue->uses())
+ if (U == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
+ DenseMap<SDValue, Register> VRBaseMap;
+
+ LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
+
+ unsigned NumNodes = Sequence.size();
+ MachineBasicBlock *BB = Emitter.getBlock();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ LLVM_DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+
+ // Emit any debug values associated with the node.
+ if (N->getHasDebugValue()) {
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ for (auto *DV : DAG->GetDbgValues(N)) {
+ if (!DV->isEmitted())
+ if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 000000000000..458f50c54824
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,3210 @@
+//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
+ createHybridListDAGScheduler);
+
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ bool NeedLatency;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit *> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle = 0;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle = ~0u;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount = 0u;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs = 0u;
+ std::unique_ptr<SUnit*[]> LiveRegDefs;
+ std::unique_ptr<SUnit*[]> LiveRegGens;
+
+ // Collect interferences between physical register use/defs.
+ // Each interference is an SUnit and set of physical registers.
+ SmallVector<SUnit*, 4> Interferences;
+
+ using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
+
+ LRegsMapT LRegsMap;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf),
+ NeedLatency(needlatency), AvailableQueue(availqueue),
+ Topo(SUnits, nullptr) {
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+ }
+
+ ~ScheduleDAGRRList() override {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule() override;
+
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Does *NOT* update the topological ordering! It just queues an update.
+ void AddPredQueued(SUnit *SU, const SDep &D) {
+ Topo.AddPredQueued(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
+ void CapturePred(SDep *PredEdge);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *TryUnfoldSU(SUnit *);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVectorImpl<SUnit*>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
+
+ void releaseInterferences(unsigned Reg = 0);
+
+ SUnit *PickNodeToScheduleBottomUp();
+ void ListScheduleBottomUp();
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = newSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.AddSUnitWithoutPredecessors(NewNode);
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.AddSUnitWithoutPredecessors(NewNode);
+ return NewNode;
+ }
+
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool forceUnitLatencies() const override {
+ return !NeedLatency;
+ }
+};
+
+} // end anonymous namespace
+
+static constexpr unsigned RegSequenceCost = 1;
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
+ MVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::Untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+
+ // Special handling for CopyFromReg of untyped values.
+ if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Opcode = Node->getMachineOpcode();
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = RegSequenceCost;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ assert(RC && "Not a valid register class");
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
+
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle =
+ DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
+ NumLiveRegs = 0;
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ CallSeqEndForStart.clear();
+ assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(nullptr);
+
+ LLVM_DEBUG(dump());
+ Topo.MarkDirty();
+
+ AvailableQueue->initNodes(SUnits);
+
+ HazardRec->Reset();
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+
+ AvailableQueue->releaseState();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*PredSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ if (!forceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
+ }
+}
+
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ while (true) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : N->op_values())
+ if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other) {
+ N = Op.getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ while (true) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = nullptr;
+ unsigned BestMaxNest = MaxNest;
+ for (const SDValue &Op : N->op_values()) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(Op.getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other) {
+ N = Op.getNode();
+ goto found_chain_operand;
+ }
+ return nullptr;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return nullptr;
+ }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+ // Bottom up: release predecessors
+ for (SDep &Pred : SU->Preds) {
+ ReleasePred(SU, &Pred);
+ if (Pred.isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
+ if (!LiveRegGens[Pred.getReg()]) {
+ ++NumLiveRegs;
+ LiveRegGens[Pred.getReg()] = SU;
+ }
+ }
+ }
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+ assert(N && "Must find call sequence start");
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
+}
+
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ HazardRec->RecedeCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = SU->getHeight();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, -Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
+ SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled instruction.
+ EmitNode(SU);
+
+ Sequence.push_back(SU);
+
+ AvailableQueue->scheduledNode(SU);
+
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
+
+ // Release all the implicit physical register defs that are live.
+ for (SDep &Succ : SU->Succs) {
+ // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
+ if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[Succ.getReg()] = nullptr;
+ LiveRegGens[Succ.getReg()] = nullptr;
+ releaseInterferences(Succ.getReg());
+ }
+ }
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ resetVRegCycle(SU);
+
+ SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, increase the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ for (SDep &Pred : SU->Preds) {
+ CapturePred(&Pred);
+ if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[Pred.getReg()] = nullptr;
+ LiveRegGens[Pred.getReg()] = nullptr;
+ releaseInterferences(Pred.getReg());
+ }
+ }
+
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ SUnit *SeqEnd = CallSeqEndForStart[SU];
+ assert(SeqEnd && "Call sequence start/end must be known");
+ assert(!LiveRegDefs[CallResource]);
+ assert(!LiveRegGens[CallResource]);
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = SeqEnd;
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[CallResource]);
+ assert(LiveRegGens[CallResource]);
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ for (auto &Succ : SU->Succs) {
+ if (Succ.isAssignedRegDep()) {
+ auto Reg = Succ.getReg();
+ if (!LiveRegDefs[Reg])
+ ++NumLiveRegs;
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[Reg] = SU;
+
+ // Update LiveRegGen only if was empty before this unscheduling.
+ // This is to avoid incorrect updating LiveRegGen set in previous run.
+ if (!LiveRegGens[Reg]) {
+ // Find the successor with the lowest height.
+ LiveRegGens[Reg] = Succ.getSUnit();
+ for (auto &Succ2 : SU->Succs) {
+ if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
+ Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
+ LiveRegGens[Reg] = Succ2.getSUnit();
+ }
+ }
+ }
+ }
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
+ AvailableQueue->unscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (auto E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
+ Sequence.pop_back();
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
+ UnscheduleNodeBottomUp(OldSU);
+ AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// TryUnfold - Attempt to unfold
+SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ // Use while over if to ease fall through.
+ SmallVector<SDNode *, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ // If LoadSU has already been scheduled, we should clone it but
+ // this would negate the benefit to unfolding so just return SU.
+ if (LoadSU->isScheduled)
+ return SU;
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ bool isNewN = true;
+ SUnit *NewSU;
+ // This can only happen when isNewLoad is false.
+ if (N->getNodeId() != -1) {
+ NewSU = &SUnits[N->getNodeId()];
+ // If NewSU has already been scheduled, we need to clone it, but this
+ // negates the benefit to unfolding so just return SU.
+ if (NewSU->isScheduled) {
+ return SU;
+ }
+ isNewN = false;
+ } else {
+ NewSU = CreateNewSUnit(N);
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+ }
+
+ LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+
+ // Now that we are committed to unfolding replace DAG Uses.
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
+ SDValue(LoadNode, 1));
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (const SDep &Pred : ChainPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPredQueued(LoadSU, Pred);
+ }
+ for (const SDep &Pred : LoadPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPredQueued(LoadSU, Pred);
+ }
+ for (const SDep &Pred : NodePreds) {
+ RemovePred(SU, Pred);
+ AddPredQueued(NewSU, Pred);
+ }
+ for (SDep &D : NodeSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPredQueued(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
+ !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (SDep &D : ChainSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPredQueued(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPredQueued(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ if (isNewN)
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0)
+ NewSU->isAvailable = true;
+
+ return NewSU;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ if (!N)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ if (N->getGluedNode() &&
+ !TII->canCopyGluedNodeDuringSchedule(N)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Giving up because it has incoming glue and the target does not "
+ "want to copy it\n");
+ return nullptr;
+ }
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue) {
+ LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
+ return nullptr;
+ } else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (const SDValue &Op : N->op_values()) {
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
+ LLVM_DEBUG(
+ dbgs() << "Giving up because it one of the operands is glue and "
+ "the target does not want to copy it\n");
+ return nullptr;
+ }
+ }
+
+ // If possible unfold instruction.
+ if (TryUnfold) {
+ SUnit *UnfoldSU = TryUnfoldSU(SU);
+ if (!UnfoldSU)
+ return nullptr;
+ SU = UnfoldSU;
+ N = SU->getNode();
+ // If this can be scheduled don't bother duplicating and just return
+ if (SU->NumSuccsLeft == 0)
+ return SU;
+ }
+
+ LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPredQueued(NewSU, Pred);
+
+ // Make sure the clone comes after the original. (InstrEmitter assumes
+ // this ordering.)
+ AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(NewSU);
+ AddPredQueued(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.emplace_back(SuccSU, D);
+ }
+ }
+ for (const auto &[DelSU, DelD] : DelDeps)
+ RemovePred(DelSU, DelD);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVectorImpl<SUnit*> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(nullptr);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(nullptr);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(CopyToSU);
+ AddPredQueued(SuccSU, D);
+ DelDeps.emplace_back(SuccSU, Succ);
+ }
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise,
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ }
+ }
+ for (const auto &[DelSU, DelD] : DelDeps)
+ RemovePred(DelSU, DelD);
+
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPredQueued(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPredQueued(CopyToSU, ToDep);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(!MCID.implicit_defs().empty() &&
+ "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (MCPhysReg ImpDef : MCID.implicit_defs()) {
+ if (Reg == ImpDef)
+ break;
+ ++NumRes;
+ }
+ }
+ return N->getSimpleValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs,
+ const TargetRegisterInfo *TRI,
+ const SDNode *Node = nullptr) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Allow multiple uses of same def
+ if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
+ continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI).second) {
+ LRegs.push_back(*AliasI);
+ }
+ }
+}
+
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ ArrayRef<SUnit*> LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i).second)
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (const SDValue &Op : N->op_values())
+ if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
+ return RegOp->getRegMask();
+ return nullptr;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
+ CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (Reg.isPhysical())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (Reg.isPhysical()) {
+ SDNode *SrcNode = Node->getOperand(2).getNode();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
+ SrcNode);
+ }
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) &&
+ RegAdded.insert(CallResource).second)
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask,
+ ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ RegAdded, LRegs);
+
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (MCID.hasOptionalDef()) {
+ // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
+ // This operand can be either a def of CPSR, if the S bit is set; or a use
+ // of %noreg. When the OptionalDef is set to a valid register, we need to
+ // handle it in the same way as an ImplicitDef.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
+ if (MCID.operands()[i].isOptionalDef()) {
+ const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
+ Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ }
+ for (MCPhysReg Reg : MCID.implicit_defs())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+
+ return !LRegs.empty();
+}
+
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = Interferences.size(); i > 0; --i) {
+ SUnit *SU = Interferences[i-1];
+ LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+ if (Reg) {
+ SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
+ if (!is_contained(LRegs, Reg))
+ continue;
+ }
+ SU->isPending = false;
+ // The interfering node may no longer be available due to backtracking.
+ // Furthermore, it may have been made available again, in which case it is
+ // now already in the AvailableQueue.
+ if (SU->isAvailable && !SU->NodeQueueId) {
+ LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ AvailableQueue->push(SU);
+ }
+ if (i < Interferences.size())
+ Interferences[i-1] = Interferences.back();
+ Interferences.pop_back();
+ LRegsMap.erase(LRegsPos);
+ }
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
+ auto FindAvailableNode = [&]() {
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LLVM_DEBUG(dbgs() << " Interfering reg ";
+ if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
+ else dbgs() << printReg(LRegs[0], TRI);
+ dbgs() << " SU #" << CurSU->NodeNum << '\n');
+ auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
+ if (LRegsInserted) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Interferences are pending");
+ // Update the interference with current live regs.
+ LRegsIter->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
+ }
+ };
+ FindAvailableNode();
+ if (CurSU)
+ return CurSU;
+
+ // We query the topological order in the loop body, so make sure outstanding
+ // updates are applied before entering it (we only enter the loop if there
+ // are some interferences). If we make changes to the ordering, we exit
+ // the loop.
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (SUnit *TrySU : Interferences) {
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = nullptr;
+ unsigned LiveCycle = std::numeric_limits<unsigned>::max();
+ for (unsigned Reg : LRegs) {
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ // BacktrackBottomUp mutates Interferences!
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
+ << ") to SU(" << TrySU->NodeNum << ")\n");
+ AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer available.
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
+ LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
+ CurSU = AvailableQueue->pop();
+ } else {
+ LLVM_DEBUG(dbgs() << "TrySU available\n");
+ // Available and in AvailableQueue
+ AvailableQueue->remove(TrySU);
+ CurSU = TrySU;
+ }
+ FindAvailableNode();
+ // Interferences has been mutated. We must break.
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = nullptr;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !Interferences.empty()) {
+ LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
+
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
+ "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+ }
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+namespace {
+
+class RegReductionPQBase;
+
+struct queue_sort {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit *> Queue;
+ unsigned CurQueueId = 0;
+ bool TracksRegPressure;
+ bool SrcOrder;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits = nullptr;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetLowering *TLI = nullptr;
+ ScheduleDAGRRList *scheduleDAG = nullptr;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
+ SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits) override;
+
+ void addNode(const SUnit *SU) override;
+
+ void updateNode(const SUnit *SU) override;
+
+ void releaseState() override {
+ SUnits = nullptr;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return SU->getNode()->getIROrder();
+ }
+
+ bool empty() const override { return Queue.empty(); }
+
+ void push(SUnit *U) override {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) override {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const override { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void scheduledNode(SUnit *SU) override;
+
+ void unscheduledNode(SUnit *SU) override;
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
+ unsigned BestIdx = 0;
+ // Only compute the cost for the first 1000 items in the queue, to avoid
+ // excessive compile-times for very large queues.
+ for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
+ I++)
+ if (Picker(Q[BestIdx], Q[I]))
+ BestIdx = I;
+ SUnit *V = Q[BestIdx];
+ if (BestIdx + 1 != Q.size())
+ std::swap(Q[BestIdx], Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const override { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const override {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() override {
+ if (Queue.empty()) return nullptr;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit *> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ DAG->dumpNode(*SU);
+ }
+ }
+#endif
+};
+
+using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
+using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
+using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
+using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ if (SUNumbers[SU->NodeNum] != 0)
+ return SUNumbers[SU->NodeNum];
+
+ // Use WorkList to avoid stack overflow on excessively large IRs.
+ struct WorkState {
+ WorkState(const SUnit *SU) : SU(SU) {}
+ const SUnit *SU;
+ unsigned PredsProcessed = 0;
+ };
+
+ SmallVector<WorkState, 16> WorkList;
+ WorkList.push_back(SU);
+ while (!WorkList.empty()) {
+ auto &Temp = WorkList.back();
+ auto *TempSU = Temp.SU;
+ bool AllPredsKnown = true;
+ // Try to find a non-evaluated pred and push it into the processing stack.
+ for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
+ auto &Pred = TempSU->Preds[P];
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (SUNumbers[PredSU->NodeNum] == 0) {
+#ifndef NDEBUG
+ // In debug mode, check that we don't have such element in the stack.
+ for (auto It : WorkList)
+ assert(It.SU != PredSU && "Trying to push an element twice?");
+#endif
+ // Next time start processing this one starting from the next pred.
+ Temp.PredsProcessed = P + 1;
+ WorkList.push_back(PredSU);
+ AllPredsKnown = false;
+ break;
+ }
+ }
+
+ if (!AllPredsKnown)
+ continue;
+
+ // Once all preds are known, we can calculate the answer for this one.
+ unsigned SethiUllmanNumber = 0;
+ unsigned Extra = 0;
+ for (const SDep &Pred : TempSU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
+ assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+ SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
+ WorkList.pop_back();
+ }
+
+ assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
+ return SUNumbers[SU->NodeNum];
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (const SUnit &SU : *SUnits)
+ CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
+ }
+}
+#endif
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ MVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
+ << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ LLVM_DEBUG(dumpRegPressure());
+}
+
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ if (POpc == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx =
+ cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned RCId = RC->getID();
+ // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
+ // here. Instead use the same constant as in GetCostForDef.
+ RegPressure[RCId] += RegSequenceCost;
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = PN->getSimpleValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ LLVM_DEBUG(dumpRegPressure());
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue; // ignore chain succs
+ unsigned Height = Succ.getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (Succ.getSUnit()->getNode() &&
+ Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(Succ.getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ const SUnit *PredSU = Pred.getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ Register Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue;
+ const SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ Register Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ Pred.getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ Pred.getSUnit()->isVRegCycle = false;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ if (Pred.getSUnit()->isVRegCycle &&
+ Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ #ifndef NDEBUG
+ static const char *const PhysRegMsg[] = { " has no physreg",
+ " defines a physreg" };
+ #endif
+ LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
+ << ") " << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh) {
+ LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
+ return true;
+ }
+ else if (!LHigh && RHigh) {
+ LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
+ return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
+ << "): " << LPDiff << " != SU(" << right->NodeNum
+ << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses
+ << "\n");
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall)
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum
+ << "): " << right->getDepth() << "\n");
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow)
+ return left->getHeight() > right->getHeight();
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure && !SrcOrder)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
+ for (SUnit &SU : sunits)
+ initVRegCycle(&SU);
+}
+
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ ArrayRef<MCPhysReg> ImpDefs =
+ TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if (ImpDefs.empty() && !RegMask)
+ return false;
+
+ for (const SDep &Succ : SU->Succs) {
+ SUnit *SuccSU = Succ.getSUnit();
+ for (const SDep &SuccPred : SuccSU->Preds) {
+ if (!SuccPred.isAssignedRegDep())
+ continue;
+
+ if (RegMask &&
+ MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+
+ for (MCPhysReg ImpDef : ImpDefs) {
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
+ assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ ArrayRef<MCPhysReg> SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).implicit_defs();
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (SUImpDefs.empty() && !SURegMask)
+ continue;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ MCPhysReg Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ for (MCPhysReg SUReg : SUImpDefs) {
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (SUnit &SU : *SUnits) {
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU.NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU.NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU.getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
+ continue;
+
+ SDNode *PredFrameSetup = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (Pred.isCtrl() && Pred.getSUnit()) {
+ // Find the predecessor which is not data dependence.
+ SDNode *PredND = Pred.getSUnit()->getNode();
+
+ // If PredND is FrameSetup, we should not pre-scheduled the node,
+ // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
+ // ADJCALLSTACKUP may hold CallResource too long and make other
+ // calls can't be scheduled. If there's no other available node
+ // to schedule, the schedular will try to rename the register by
+ // creating copy to avoid the conflict which will fail because
+ // CallResource is not a real physical register.
+ if (PredND && PredND->isMachineOpcode() &&
+ (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+ PredFrameSetup = PredND;
+ break;
+ }
+ }
+ // Skip the node has FrameSetup parent.
+ if (PredFrameSetup != nullptr)
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (!Pred.isCtrl()) {
+ PredSU = Pred.getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU.getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (const SDep &PredSucc : PredSU->Succs) {
+ SUnit *PredSuccSU = PredSucc.getSUnit();
+ if (PredSuccSU == &SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(&SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ LLVM_DEBUG(
+ dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
+ << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != &SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPredQueued(&SU, Edge);
+ Edge.setSUnit(&SU);
+ scheduleDAG->AddPredQueued(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+ for (SUnit &SU : *SUnits) {
+ if (!SU.isTwoAddress)
+ continue;
+
+ SDNode *Node = SU.getNode();
+ if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
+ continue;
+
+ bool isLiveOut = hasOnlyLiveOutUses(&SU);
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU.getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU)
+ continue;
+ for (const SDep &Succ : DUSU->Succs) {
+ if (Succ.isCtrl())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU == &SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU.getHeight() &&
+ (SU.getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+ (!SU.isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, &SU)) {
+ LLVM_DEBUG(dbgs()
+ << " Adding a pseudo-two-addr edge from SU #"
+ << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
+
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 000000000000..0579c1664d5c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,1086 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows the latency-based scheduler to notice high latency instructions
+// without a target itinerary. The choice of number here has more to do with
+// balancing scheduler heuristics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
+ DAG = dag;
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = nullptr;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.emplace_back(N, (unsigned)SUnits.size());
+ assert((Addr == nullptr || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = newSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
+ SU->SchedulingPref = Old->SchedulingPref;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const TargetLowering &TLI,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
+ return;
+
+ if (Register::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
+ PhysReg = Reg;
+ } else if (Def->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg))
+ PhysReg = Reg;
+ }
+
+ if (PhysReg != 0) {
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+}
+
+// Helper for AddGlue to clone node operands.
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
+ SDValue ExtraOper = SDValue()) {
+ SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
+ if (ExtraOper.getNode())
+ Ops.push_back(ExtraOper);
+
+ SDVTList VTList = DAG->getVTList(VTs);
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ if (MN)
+ MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
+
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
+
+ // Reset the memory references
+ if (MN)
+ DAG->setNodeMemRefs(MN, MMOs);
+}
+
+static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return false;
+
+ // Don't add a glue operand to something that already uses glue.
+ if (GlueDestNode &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ return false;
+ }
+ // Don't add glue to something that already has a glue value.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
+
+ SmallVector<EVT, 4> VTs(N->values());
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ CloneNodeWithValues(N, DAG, VTs, Glue);
+
+ return true;
+}
+
+// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
+// node even though simply shrinking the value list is sufficient.
+static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
+ assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
+ !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
+ "expected an unused glue value");
+
+ CloneNodeWithValues(N, DAG,
+ ArrayRef(N->value_begin(), N->getNumValues() - 1));
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDValue Chain;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1);
+ if (!Chain)
+ return;
+
+ // Skip any load instruction that has a tied input. There may be an additional
+ // dependency requiring a different order than by increasing offsets, and the
+ // added glue may introduce a cycle.
+ auto hasTiedInput = [this](const SDNode *N) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
+ if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
+ return true;
+ }
+
+ return false;
+ };
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ bool Cluster = false;
+ SDNode *Base = Node;
+
+ if (hasTiedInput(Base))
+ return;
+
+ // This algorithm requires a reasonably low use count before finding a match
+ // to avoid uselessly blowing up compile time in large blocks.
+ unsigned UseCount = 0;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E && UseCount < 100; ++I, ++UseCount) {
+ if (I.getUse().getResNo() != Chain.getResNo())
+ continue;
+
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User).second)
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2 ||
+ hasTiedInput(User)) {
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ }
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ // Reset UseCount to allow more matches.
+ UseCount = 0;
+ }
+
+ if (!Cluster)
+ return;
+
+ // Sort them in increasing order.
+ llvm::sort(Offsets);
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ return;
+
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ SDValue InGlue;
+ if (AddGlue(Lead, InGlue, true, DAG))
+ InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutGlue = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ // If AddGlue fails, we could leave an unsused glue value. This should not
+ // cause any
+ if (AddGlue(Load, InGlue, OutGlue, DAG)) {
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+ else if (!OutGlue && InGlue.getNode())
+ RemoveUnusedGlue(InGlue.getNode(), DAG);
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SDNode &NI : DAG->allnodes()) {
+ SDNode *Node = &NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SDNode &NI : DAG->allnodes()) {
+ NI.setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 32> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ SmallVector<SUnit*, 8> CallSUnits;
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (const SDValue &Op : NI->op_values())
+ if (Visited.insert(Op.getNode()).second)
+ Worklist.push_back(Op.getNode());
+
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = newSUnit(NI);
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ }
+
+ // Scan down to find any glued succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode *U : N->uses())
+ if (GlueVal.isOperandOf(U)) {
+ HasGlueUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = U;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ break;
+ }
+ if (!HasGlueUse) break;
+ }
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ computeLatency(NodeSUnit);
+ }
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = forceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (SUnit &SU : SUnits) {
+ SDNode *MainNode = SU.getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ SU.isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ SU.isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode() &&
+ !TII->get(N->getMachineOpcode()).implicit_defs().empty()) {
+ SU.hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU.hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ unsigned DefIdx = N->getOperand(i).getResNo();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == &SU)
+ continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0 && !StressSched)
+ PhysReg = 0;
+
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
+ SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+ : SDep(OpSU, SDep::Data, PhysReg);
+ Dep.setLatency(OpLatency);
+ if (!isChain && !UnitLatencies) {
+ computeOperandLatency(OpN, N, i, Dep);
+ ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep);
+ }
+
+ if (!SU.addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ if (POpc == TargetOpcode::PATCHPOINT &&
+ Node->getValueType(0) == MVT::Other) {
+ // PATCHPOINT is defined to have one result, but it might really have none
+ // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
+ // real definition.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getSimpleValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (!Node) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
+ return;
+ }
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes glued together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = Latency - 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dumpNodeName(SU);
+ dbgs() << ": ";
+
+ if (!SU.getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU.getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ dbgs() << " ";
+ GluedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ GluedNodes.pop_back();
+ }
+#endif
+}
+
+void ScheduleDAGSDNodes::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getNode() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getNode() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (const SUnit *SU : Sequence) {
+ if (SU)
+ dumpNode(*SU);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = llvm::count(Sequence, nullptr);
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void
+ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+ SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
+ DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ /// Returns true if \p DV has any VReg operand locations which don't exist in
+ /// VRBaseMap.
+ auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {
+ for (const SDDbgOperand &L : DV->getLocationOps()) {
+ if (L.getKind() == SDDbgOperand::SDNODE &&
+ VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)
+ return true;
+ }
+ return false;
+ };
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with the same
+ // source order number as N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ for (auto *DV : DAG->GetDbgValues(N)) {
+ if (DV->isEmitted())
+ continue;
+ unsigned DVOrder = DV->getOrder();
+ if (Order != 0 && DVOrder != Order)
+ continue;
+ // If DV has any VReg location operands which haven't been mapped then
+ // either that node is no longer available or we just haven't visited the
+ // node yet. In the former case we should emit an undef dbg_value, but we
+ // can do it later. And for the latter we'll want to wait until all
+ // dependent nodes have been visited.
+ if (!DV->isInvalidated() && HasUnknownVReg(DV))
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
+ if (!DbgMI)
+ continue;
+ Orders.push_back({DVOrder, DbgMI});
+ BB->insert(InsertPos, DbgMI);
+ }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void
+ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+ DenseMap<SDValue, Register> &VRBaseMap,
+ SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
+ SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) {
+ unsigned Order = N->getIROrder();
+ if (!Order || Seen.count(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ // If a new instruction was generated for this Order number, record it.
+ // Otherwise, leave this order number unseen: we will either find later
+ // instructions for it, or leave it unseen if there were no instructions at
+ // all.
+ if (NewInsn) {
+ Seen.insert(Order);
+ Orders.push_back({Order, NewInsn});
+ }
+
+ // Even if no instruction was generated, a Value may have become defined via
+ // earlier nodes. Try to process them now.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue; // ignore chain preds
+ if (Pred.getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit *, Register>::iterator VRI =
+ VRBaseMap.find(Pred.getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ Register Reg;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ continue; // ignore chain preds
+ if (Succ.getReg()) {
+ Reg = Succ.getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(Pred.getReg() && "Unknown physical register!");
+ Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(Pred.getReg());
+ }
+ break;
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
+ DenseMap<SDValue, Register> VRBaseMap;
+ DenseMap<SUnit*, Register> CopyVRBaseMap;
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<Register, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
+
+ // Emit a node, and determine where its first instruction is for debuginfo.
+ // Zero, one, or multiple instructions can be created when emitting a node.
+ auto EmitNode =
+ [&](SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * {
+ // Fetch instruction prior to this, or end() if nonexistant.
+ auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
+ if (I == BB->begin())
+ return BB->end();
+ else
+ return std::prev(Emitter.getInsertPos());
+ };
+
+ MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
+ Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
+ MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
+
+ // If the iterator did not change, no instructions were inserted.
+ if (Before == After)
+ return nullptr;
+
+ MachineInstr *MI;
+ if (Before == BB->end()) {
+ // There were no prior instructions; the new ones must start at the
+ // beginning of the block.
+ MI = &Emitter.getBlock()->instr_front();
+ } else {
+ // Return first instruction after the pre-existing instructions.
+ MI = &*std::next(Before);
+ }
+
+ if (MI->isCandidateForCallSiteEntry() &&
+ DAG->getTarget().Options.EmitCallSiteInfo)
+ MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node));
+
+ if (DAG->getNoMergeSiteInfo(Node)) {
+ MI->setFlag(MachineInstr::MIFlag::NoMerge);
+ }
+
+ if (MDNode *MD = DAG->getPCSections(Node))
+ MI->setPCSections(MF, MD);
+
+ return MI;
+ };
+
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI) {
+ BB->insert(InsertPos, DbgMI);
+ // We re-emit the dbg_value closer to its use, too, after instructions
+ // are emitted to the BB.
+ (*PDI)->clearIsEmitted();
+ }
+ }
+ }
+
+ for (SUnit *SU : Sequence) {
+ if (!SU) {
+ // Null SUnit* is a noop.
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any glued SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(N))
+ if (NewInsn && NewInsn->isCall())
+ NewInsn->setHeapAllocMarker(MF, MD);
+
+ GluedNodes.pop_back();
+ }
+ auto NewInsn =
+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
+ NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
+ if (NewInsn && NewInsn->isCall())
+ NewInsn->setHeapAllocMarker(MF, MD);
+ }
+ }
+
+ // Insert all the dbg_values which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+ // Sort the source order instructions and use the order to insert debug
+ // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
+ // regardless of the host's implementation fo std::sort.
+ llvm::stable_sort(Orders, less_first());
+ std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
+ [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
+ return LHS->getOrder() < RHS->getOrder();
+ });
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ assert(MI);
+ for (; DI != DE; ++DI) {
+ if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
+ break;
+ if ((*DI)->isEmitted())
+ continue;
+
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ LastOrder = Order;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
+ for (; DI != DE; ++DI) {
+ if ((*DI)->isEmitted())
+ continue;
+ assert((*DI)->getOrder() >= LastOrder &&
+ "emitting DBG_VALUE out of order");
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
+ }
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+
+ SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
+ SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
+ // Now emit the rest according to source order.
+ LastOrder = 0;
+ for (const auto &InstrOrder : Orders) {
+ unsigned Order = InstrOrder.first;
+ MachineInstr *MI = InstrOrder.second;
+ if (!MI)
+ continue;
+
+ // Insert all SDDbgLabel's whose order(s) are before "Order".
+ for (; DLI != DLE &&
+ (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
+ ++DLI) {
+ MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ if (DLI == DLE)
+ break;
+
+ LastOrder = Order;
+ }
+ }
+
+ InsertPos = Emitter.getInsertPos();
+ // In some cases, DBG_VALUEs might be inserted after the first terminator,
+ // which results in an invalid MBB. If that happens, move the DBG_VALUEs
+ // before the first terminator.
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ auto FirstTerm = InsertBB->getFirstTerminator();
+ if (FirstTerm != InsertBB->end()) {
+ assert(!FirstTerm->isDebugValue() &&
+ "first terminator cannot be a debug value");
+ for (MachineInstr &MI : make_early_inc_range(
+ make_range(std::next(FirstTerm), InsertBB->end()))) {
+ // Only scan up to insertion point.
+ if (&MI == InsertPos)
+ break;
+
+ if (!MI.isDebugValue())
+ continue;
+
+ // The DBG_VALUE was referencing a value produced by a terminator. By
+ // moving the DBG_VALUE, the referenced value also needs invalidating.
+ MI.getOperand(0).ChangeToRegister(0, false);
+ MI.moveBefore(&*FirstTerm);
+ }
+ }
+ return InsertBB;
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 000000000000..439ccfdc3275
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,193 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class AAResults;
+class InstrItineraryData;
+
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ MachineBasicBlock *BB = nullptr;
+ SelectionDAG *DAG = nullptr; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ ~ScheduleDAGSDNodes() override = default;
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<MCSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken ||
+ isa<MDNodeSDNode>(Node)) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *newSUnit(SDNode *N);
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *Old);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ void BuildSchedGraph(AAResults *AA);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
+ /// computeLatency - Compute node latency.
+ ///
+ virtual void computeLatency(SUnit *SU);
+
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+ void dumpNode(const SUnit &SU) const override;
+ void dump() const override;
+ void dumpSchedule() const;
+
+ std::string getGraphNodeLabel(const SUnit *SU) const override;
+
+ std::string getDAGName() const override;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx = 0;
+ unsigned NodeNumDefs = 0;
+ MVT ValueType;
+
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != nullptr; }
+
+ MVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+
+ private:
+ void InitNodeNumDefs();
+ };
+
+ protected:
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given
+ /// a latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool forceUnitLatencies() const { return false; }
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
+ };
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 000000000000..1ba1fd65b8c9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,271 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AA - AAResults for making memory reference queries.
+ AAResults *AA;
+
+public:
+ ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+ }
+
+ ~ScheduleDAGVLIW() override {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule() override;
+
+private:
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ AvailableQueue->initNodes(SUnits);
+
+ listScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*SuccSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ assert(!D.isWeak() && "unexpected artificial DAG edge");
+
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ PendingQueue.push_back(SuccSU);
+ }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SDep &Succ : SU->Succs) {
+ assert(!Succ.isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ releaseSucc(SU, Succ);
+ }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ releaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->scheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ releaseSuccessors(&EntrySU);
+
+ // All leaves to AvailableQueue.
+ for (SUnit &SU : SUnits) {
+ // It is available if it has no predecessors.
+ if (SU.Preds.empty()) {
+ AvailableQueue->push(&SU);
+ SU.isAvailable = true;
+ }
+ }
+
+ // While AvailableQueue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+ else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(nullptr);
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = nullptr;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ LLVM_DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(nullptr); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 000000000000..30d202494320
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,12710 @@
+//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
+
+void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+void SelectionDAG::DAGNodeInsertedListener::anchor() {}
+
+#define DEBUG_TYPE "selectiondag"
+
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+ cl::Hidden, cl::init(true),
+ cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+ cl::desc("Number limit for gluing ld/st of memcpy."),
+ cl::Hidden, cl::init(0));
+
+static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
+ LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ unsigned EltSize =
+ N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ SplatVal = Op0->getAPIntValue().trunc(EltSize);
+ return true;
+ }
+ if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
+ SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize);
+ return true;
+ }
+ }
+
+ auto *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasUndefs;
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
+ EltSize) &&
+ EltSize == SplatBitSize;
+}
+
+// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
+// specializations of the more general isConstantSplatVector()?
+
+bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
+ APInt SplatVal;
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).isUndef())
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
+ SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countr_one() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countr_one() < EltSize)
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef())
+ return false;
+ return true;
+}
+
+bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
+ APInt SplatVal;
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ bool IsAllUndef = true;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ IsAllUndef = false;
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target
+ // and a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all zeros, not whether the individual
+ // constants are.
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
+ if (CN->getAPIntValue().countr_zero() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countr_zero() < EltSize)
+ return false;
+ } else
+ return false;
+ }
+
+ // Do not accept an all-undef vector.
+ if (IsAllUndef)
+ return false;
+ return true;
+}
+
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true);
+}
+
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true);
+}
+
+bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantFPSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize,
+ bool Signed) {
+ assert(N->getValueType(0).isVector() && "Expected a vector!");
+
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (EltSize <= NewEltSize)
+ return false;
+
+ if (N->getOpcode() == ISD::ZERO_EXTEND) {
+ return (N->getOperand(0).getValueType().getScalarSizeInBits() <=
+ NewEltSize) &&
+ !Signed;
+ }
+ if (N->getOpcode() == ISD::SIGN_EXTEND) {
+ return (N->getOperand(0).getValueType().getScalarSizeInBits() <=
+ NewEltSize) &&
+ Signed;
+ }
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+
+ APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize);
+ if (Signed && C.trunc(NewEltSize).sext(EltSize) != C)
+ return false;
+ if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C)
+ return false;
+ }
+
+ return true;
+}
+
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+ return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
+}
+
+bool ISD::isFreezeUndef(const SDNode *N) {
+ return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef();
+}
+
+bool ISD::matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match,
+ bool AllowUndefs) {
+ // FIXME: Add support for scalar UNDEF cases?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ // FIXME: Add support for vector UNDEF cases?
+ if (ISD::BUILD_VECTOR != Op.getOpcode() &&
+ ISD::SPLAT_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (AllowUndefs && Op.getOperand(i).isUndef()) {
+ if (!Match(nullptr))
+ return false;
+ continue;
+ }
+
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+ bool AllowUndefs, bool AllowTypeMismatch) {
+ if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ // TODO: Add support for scalar UNDEF cases?
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ // TODO: Add support for vector UNDEF cases?
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ (LHS.getOpcode() != ISD::BUILD_VECTOR &&
+ LHS.getOpcode() != ISD::SPLAT_VECTOR))
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ bool LHSUndef = AllowUndefs && LHSOp.isUndef();
+ bool RHSUndef = AllowUndefs && RHSOp.isUndef();
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp);
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
+ if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
+ return false;
+ if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT ||
+ LHSOp.getValueType() != RHSOp.getValueType()))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
+ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
+ switch (VecReduceOpcode) {
+ default:
+ llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ return ISD::FADD;
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ return ISD::FMUL;
+ case ISD::VECREDUCE_ADD:
+ case ISD::VP_REDUCE_ADD:
+ return ISD::ADD;
+ case ISD::VECREDUCE_MUL:
+ case ISD::VP_REDUCE_MUL:
+ return ISD::MUL;
+ case ISD::VECREDUCE_AND:
+ case ISD::VP_REDUCE_AND:
+ return ISD::AND;
+ case ISD::VECREDUCE_OR:
+ case ISD::VP_REDUCE_OR:
+ return ISD::OR;
+ case ISD::VECREDUCE_XOR:
+ case ISD::VP_REDUCE_XOR:
+ return ISD::XOR;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VP_REDUCE_SMAX:
+ return ISD::SMAX;
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VP_REDUCE_SMIN:
+ return ISD::SMIN;
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VP_REDUCE_UMAX:
+ return ISD::UMAX;
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VP_REDUCE_UMIN:
+ return ISD::UMIN;
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VP_REDUCE_FMAX:
+ return ISD::FMAXNUM;
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VP_REDUCE_FMIN:
+ return ISD::FMINNUM;
+ case ISD::VECREDUCE_FMAXIMUM:
+ return ISD::FMAXIMUM;
+ case ISD::VECREDUCE_FMINIMUM:
+ return ISD::FMINIMUM;
+ }
+}
+
+bool ISD::isVPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \
+ case ISD::VPSD: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+bool ISD::isVPBinaryOp(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_BINARYOP return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return false;
+}
+
+bool ISD::isVPReduction(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return false;
+}
+
+/// The operand position of the vector mask.
+std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return std::nullopt;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \
+ case ISD::VPSD: \
+ return MASKPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+/// The operand position of the explicit vector length parameter.
+std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return std::nullopt;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
+ case ISD::VPSD: \
+ return EVLPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode,
+ bool hasFPExcept) {
+ // FIXME: Return strict opcodes in case of fp exceptions.
+ switch (VPOpcode) {
+ default:
+ return std::nullopt;
+#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) case ISD::VPOPC:
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) return ISD::SDOPC;
+#define END_REGISTER_VP_SDNODE(VPOPC) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return std::nullopt;
+}
+
+unsigned ISD::getVPForBaseOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("can not translate this Opcode to VP.");
+#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break;
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC:
+#define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
+ case ISD::SEXTLOAD:
+ return ISD::SIGN_EXTEND;
+ case ISD::ZEXTLOAD:
+ return ISD::ZERO_EXTEND;
+ default:
+ break;
+ }
+
+ llvm_unreachable("Invalid LoadExtType");
+}
+
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) {
+ unsigned Operation = Op;
+ if (isIntegerLike)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) {
+ return getSetCCInverseImpl(Op, Type.isInteger());
+}
+
+ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op,
+ bool isIntegerLike) {
+ return getSetCCInverseImpl(Op, isIntegerLike);
+}
+
+/// For an integer comparison, return 1 if the comparison is a signed operation
+/// and 2 if the result is an unsigned comparison. Return zero if the operation
+/// does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ EVT Type) {
+ bool IsInteger = Type.isInteger();
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set, then the resultant comparison DOES suddenly
+ // care about orderedness, and it is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ EVT Type) {
+ bool IsInteger = Type.isInteger();
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (IsInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ ArrayRef<SDValue> Ops) {
+ for (const auto &Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ ArrayRef<SDUse> Ops) {
+ for (const auto &Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC,
+ SDVTList VTList, ArrayRef<SDValue> OpList) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList);
+}
+
+/// If this is an SDNode with special info, add this info to the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ case ISD::MCSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant: {
+ const ConstantSDNode *C = cast<ConstantSDNode>(N);
+ ID.AddPointer(C->getConstantIntValue());
+ ID.AddBoolean(C->isOpaque());
+ break;
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP:
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ if (cast<LifetimeSDNode>(N)->hasOffset()) {
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getSize());
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
+ }
+ break;
+ case ISD::PSEUDO_PROBE:
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlign().value());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(LD->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::VP_LOAD: {
+ const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N);
+ ID.AddInteger(ELD->getMemoryVT().getRawBits());
+ ID.AddInteger(ELD->getRawSubclassData());
+ ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ELD->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::VP_STORE: {
+ const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
+ ID.AddInteger(EST->getMemoryVT().getRawBits());
+ ID.AddInteger(EST->getRawSubclassData());
+ ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(EST->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: {
+ const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(N);
+ ID.AddInteger(SLD->getMemoryVT().getRawBits());
+ ID.AddInteger(SLD->getRawSubclassData());
+ ID.AddInteger(SLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE: {
+ const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
+ ID.AddInteger(SST->getMemoryVT().getRawBits());
+ ID.AddInteger(SST->getRawSubclassData());
+ ID.AddInteger(SST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const VPGatherSDNode *EG = cast<VPGatherSDNode>(N);
+ ID.AddInteger(EG->getMemoryVT().getRawBits());
+ ID.AddInteger(EG->getRawSubclassData());
+ ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ ID.AddInteger(EG->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const VPScatterSDNode *ES = cast<VPScatterSDNode>(N);
+ ID.AddInteger(ES->getMemoryVT().getRawBits());
+ ID.AddInteger(ES->getRawSubclassData());
+ ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ES->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::MLOAD: {
+ const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ ID.AddInteger(MLD->getMemoryVT().getRawBits());
+ ID.AddInteger(MLD->getRawSubclassData());
+ ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MLD->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::MSTORE: {
+ const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ ID.AddInteger(MST->getMemoryVT().getRawBits());
+ ID.AddInteger(MST->getRawSubclassData());
+ ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MST->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::MGATHER: {
+ const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N);
+ ID.AddInteger(MG->getMemoryVT().getRawBits());
+ ID.AddInteger(MG->getRawSubclassData());
+ ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MG->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::MSCATTER: {
+ const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N);
+ ID.AddInteger(MS->getMemoryVT().getRawBits());
+ ID.AddInteger(MS->getRawSubclassData());
+ ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MS->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ ID.AddInteger(AT->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+ ID.AddPointer(BA->getBlockAddress());
+ ID.AddInteger(BA->getOffset());
+ ID.AddInteger(BA->getTargetFlags());
+ break;
+ }
+ case ISD::AssertAlign:
+ ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value());
+ break;
+ case ISD::PREFETCH:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ // Handled by MemIntrinsicSDNode check after the switch.
+ break;
+ } // end switch (N->getOpcode())
+
+ // MemIntrinsic nodes could also have subclass data, address spaces, and flags
+ // to check.
+ if (auto *MN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ ID.AddInteger(MN->getRawSubclassData());
+ ID.AddInteger(MN->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MN->getMemOperand()->getFlags());
+ ID.AddInteger(MN->getMemoryVT().getRawBits());
+ }
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->ops());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (SDNode &Node : allnodes())
+ if (Node.use_empty())
+ DeadNodes.push_back(&Node);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+ // Skip to next node if we've already managed to delete the node. This could
+ // happen if replacing a node causes a node previously added to the node to
+ // be deleted.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, nullptr);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
+ RemoveDeadNodes(DeadNodes);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N->getIterator() != AllNodes.begin() &&
+ "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SDDbgInfo::add(SDDbgValue *V, bool isParameter) {
+ assert(!(V->isVariadic() && isParameter));
+ if (isParameter)
+ ByvalParmDbgValues.push_back(V);
+ else
+ DbgValues.push_back(V);
+ for (const SDNode *Node : V->getSDNodes())
+ if (Node)
+ DbgValMap[Node].push_back(V);
+}
+
+void SDDbgInfo::erase(const SDNode *Node) {
+ DbgValMapType::iterator I = DbgValMap.find(Node);
+ if (I == DbgValMap.end())
+ return;
+ for (auto &Val: I->second)
+ Val->setIsInvalidated();
+ DbgValMap.erase(I);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ // If we have operands, deallocate them.
+ removeOperands(N);
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ // FIXME: There are places in SDag that have grown a dependency on the opcode
+ // value in the released node.
+ __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
+ N->NodeType = ISD::DELETED_NODE;
+
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate
+ // them and forget about that node.
+ DbgInfo->erase(N);
+
+ // Invalidate extra info.
+ SDEI.erase(N);
+}
+
+#ifndef NDEBUG
+/// VerifySDNode - Check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (const SDUse &Op : N->ops()) {
+ assert((Op.getValueType() == EltVT ||
+ (EltVT.isInteger() && Op.getValueType().isInteger() &&
+ EltVT.bitsLE(Op.getValueType()))) &&
+ "Wrong operand type!");
+ assert(Op.getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+#endif // NDEBUG
+
+/// Insert a newly allocated node into the DAG.
+///
+/// Handles insertion into the all nodes list and CSE map, as well as
+/// verification and other common operations when a new node is allocated.
+void SelectionDAG::InsertNode(SDNode *N) {
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ N->PersistentId = NextPersistentId++;
+ VerifySDNode(N);
+#endif
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeInserted(N);
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>(
+ ESN->getSymbol(), ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::MCSymbol: {
+ auto *MCSN = cast<MCSymbolSDNode>(N);
+ Erased = MCSymbols.erase(MCSN->getMCSymbol());
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing);
+
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ Node->intersectFlagsWith(N->getFlags());
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ Node->intersectFlagsWith(N->getFlags());
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ Node->intersectFlagsWith(N->getFlags());
+ return Node;
+}
+
+Align SelectionDAG::getEVTAlign(EVT VT) const {
+ Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return getDataLayout().getABITypeAlign(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
+ : TM(tm), OptLevel(OL),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)),
+ Root(getEntryNode()) {
+ InsertNode(&EntryNode);
+ DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &NewMF,
+ OptimizationRemarkEmitter &NewORE, Pass *PassPtr,
+ const TargetLibraryInfo *LibraryInfo,
+ UniformityInfo *NewUA, ProfileSummaryInfo *PSIin,
+ BlockFrequencyInfo *BFIin,
+ FunctionVarLocs const *VarLocs) {
+ MF = &NewMF;
+ SDAGISelPass = PassPtr;
+ ORE = &NewORE;
+ TLI = getSubtarget().getTargetLowering();
+ TSI = getSubtarget().getSelectionDAGInfo();
+ LibInfo = LibraryInfo;
+ Context = &MF->getFunction().getContext();
+ UA = NewUA;
+ PSI = PSIin;
+ BFI = BFIin;
+ FnVarLocs = VarLocs;
+}
+
+SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
+ allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
+ delete DbgInfo;
+}
+
+bool SelectionDAG::shouldOptForSize() const {
+ return MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI);
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+ NextPersistentId = 0;
+#endif
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
+ SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
+ "debug location. Use another overload.");
+ }
+ }
+ return N;
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ const SDLoc &DL, void *&InsertPos) {
+ SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (N) {
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ // Erase debug location from the node if the node is used at several
+ // different places. Do not propagate one location to all uses as it
+ // will cause a worse single stepping debugging experience.
+ if (N->getDebugLoc() != DL.getDebugLoc())
+ N->setDebugLoc(DebugLoc());
+ break;
+ default:
+ // When the node's point of use is located earlier in the instruction
+ // sequence than its prior point of use, update its debug info to the
+ // earlier location.
+ if (DL.getIROrder() && DL.getIROrder() < N->getIROrder())
+ N->setDebugLoc(DL.getDebugLoc());
+ break;
+ }
+ }
+ return N;
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ MCSymbols.clear();
+ SDEI.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(nullptr));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(nullptr));
+
+ EntryNode.UseList = nullptr;
+ InsertNode(&EntryNode);
+ Root = getEntryNode();
+ DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::FP_EXTEND, DL, VT, Op)
+ : getNode(ISD::FP_ROUND, DL, VT, Op,
+ getIntPtrConstant(0, DL, /*isTarget=*/true));
+}
+
+std::pair<SDValue, SDValue>
+SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
+ const SDLoc &DL, EVT VT) {
+ assert(!VT.bitsEq(Op.getValueType()) &&
+ "Strict no-op FP extend/round not allowed.");
+ SDValue Res =
+ VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
+ : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+ {Chain, Op, getIntPtrConstant(0, DL)});
+
+ return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
+}
+
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
+ EVT OpVT) {
+ if (VT.bitsLE(Op.getValueType()))
+ return getNode(ISD::TRUNCATE, SL, VT, Op);
+
+ TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
+ return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ EVT OpVT = Op.getValueType();
+ assert(VT.isInteger() && OpVT.isInteger() &&
+ "Cannot getZeroExtendInReg FP types");
+ assert(VT.isVector() == OpVT.isVector() &&
+ "getZeroExtendInReg type should be vector iff the operand "
+ "type is vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == OpVT.getVectorElementCount()) &&
+ "Vector element counts must match in getZeroExtendInReg");
+ assert(VT.bitsLE(OpVT) && "Not extending!");
+ if (OpVT == VT)
+ return Op;
+ APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
+ return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT));
+}
+
+SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZExtOrTrunc(Op, DL, VT);
+}
+
+SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZeroExtendInReg(Op, DL, VT);
+}
+
+SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) {
+ return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val);
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
+ return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
+}
+
+SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
+ return getNode(ISD::XOR, DL, VT, Val, TrueValue);
+}
+
+SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val,
+ SDValue Mask, SDValue EVL, EVT VT) {
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
+ return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL);
+}
+
+SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op,
+ SDValue Mask, SDValue EVL) {
+ return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL);
+}
+
+SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op,
+ SDValue Mask, SDValue EVL) {
+ if (VT.bitsGT(Op.getValueType()))
+ return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL);
+ if (VT.bitsLT(Op.getValueType()))
+ return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL);
+ return Op;
+}
+
+SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
+ EVT OpVT) {
+ if (!V)
+ return getConstant(0, DL, VT);
+
+ switch (TLI->getBooleanContents(OpVT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ return getConstant(1, DL, VT);
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return getAllOnesConstant(DL, VT);
+ }
+ llvm_unreachable("Unexpected boolean content enum!");
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
+ EVT EltVT = VT.getScalarType();
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
+ return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
+ EVT VT, bool isT, bool isO) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.getScalarType();
+ const ConstantInt *Elt = &Val;
+
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+ // In other cases the element type is illegal and needs to be expanded, for
+ // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+ // the value into n parts and use a vector type with n-times the elements.
+ // Then bitcast to the type requested.
+ // Legalizing constants too early makes the DAGCombiner's job harder so we
+ // only legalize if the DAG tells us we must produce legal types.
+ else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+ TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ const APInt &NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+
+ // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
+ if (VT.isScalableVector()) {
+ assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
+ "Can only handle an even split!");
+ unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
+
+ SmallVector<SDValue, 2> ScalarParts;
+ for (unsigned i = 0; i != Parts; ++i)
+ ScalarParts.push_back(getConstant(
+ NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
+
+ return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts);
+ }
+
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i)
+ EltParts.push_back(getConstant(
+ NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
+
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (getDataLayout().isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
+
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ llvm::append_range(Ops, EltParts);
+
+ SDValue V =
+ getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ return V;
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
+ ID.AddPointer(Elt);
+ ID.AddBoolean(isO);
+ void *IP = nullptr;
+ SDNode *N = nullptr;
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector())
+ Result = getSplat(VT, DL, Result);
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
+ return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
+}
+
+SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
+ const SDLoc &DL, bool LegalTypes) {
+ assert(VT.isInteger() && "Shift amount is not an integer type!");
+ EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
+ return getConstant(Val, DL, ShiftVT);
+}
+
+SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
+ return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
+ bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
+ EVT VT, bool isTarget) {
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT = VT.getScalarType();
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
+ ID.AddPointer(&V);
+ void *IP = nullptr;
+ SDNode *N = nullptr;
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector())
+ Result = getSplat(VT, DL, Result);
+ NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
+ bool isTarget) {
+ EVT EltVT = VT.getScalarType();
+ if (EltVT == MVT::f32)
+ return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
+ if (EltVT == MVT::f64)
+ return getConstantFP(APFloat(Val), DL, VT, isTarget);
+ if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
+ EltVT == MVT::f16 || EltVT == MVT::bf16) {
+ bool Ignored;
+ APFloat APF = APFloat(Val);
+ APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &Ignored);
+ return getConstantFP(APF, DL, VT, isTarget);
+ }
+ llvm_unreachable("Unsupported type in getConstantFP");
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
+ EVT VT, int64_t Offset, bool isTargetGA,
+ unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ if (BitWidth < 64)
+ Offset = SignExtend64(Offset, BitWidth);
+
+ unsigned Opc;
+ if (GV->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<GlobalAddressSDNode>(
+ Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ ID.AddInteger(FI);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+ MaybeAlign Alignment, int Offset,
+ bool isTarget, unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (!Alignment)
+ Alignment = shouldOptForSize()
+ ? getDataLayout().getABITypeAlign(C->getType())
+ : getDataLayout().getPrefTypeAlign(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ ID.AddInteger(Alignment->value());
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new constant pool: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ MaybeAlign Alignment, int Offset,
+ bool isTarget, unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (!Alignment)
+ Alignment = getDataLayout().getPrefTypeAlign(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ ID.AddInteger(Alignment->value());
+ ID.AddInteger(Offset);
+ C->addSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt);
+ ID.AddPointer(MBB);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<BasicBlockSDNode>(MBB);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = newSDNode<VTSDNode>(VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
+ SDNode *&N = MCSymbols[Sym];
+ if (N)
+ return SDValue(N, 0);
+ N = newSDNode<MCSymbolSDNode>(Sym, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (!CondCodeNodes[Cond]) {
+ auto *N = newSDNode<CondCodeSDNode>(Cond);
+ CondCodeNodes[Cond] = N;
+ InsertNode(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
+ bool ConstantFold) {
+ assert(MulImm.getBitWidth() == VT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ if (MulImm == 0)
+ return getConstant(0, DL, VT);
+
+ if (ConstantFold) {
+ const MachineFunction &MF = getMachineFunction();
+ auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
+ if (Attr.isValid()) {
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
+ if (*VScaleMax == VScaleMin)
+ return getConstant(MulImm * VScaleMin, DL, VT);
+ }
+ }
+
+ return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
+}
+
+SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
+ bool ConstantFold) {
+ if (EC.isScalable())
+ return getVScale(DL, VT,
+ APInt(VT.getSizeInBits(), EC.getKnownMinValue()));
+
+ return getConstant(EC.getKnownMinValue(), DL, VT);
+}
+
+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
+ APInt One(ResVT.getScalarSizeInBits(), 1);
+ return getStepVector(DL, ResVT, One);
+}
+
+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) {
+ assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth());
+ if (ResVT.isScalableVector())
+ return getNode(
+ ISD::STEP_VECTOR, DL, ResVT,
+ getTargetConstant(StepVal, DL, ResVT.getVectorElementType()));
+
+ SmallVector<SDValue, 16> OpsStepConstants;
+ for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++)
+ OpsStepConstants.push_back(
+ getConstant(StepVal * i, DL, ResVT.getVectorElementType()));
+ return getBuildVector(ResVT, DL, OpsStepConstants);
+}
+
+/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
+/// point at N1 to point at N2 and indices that point at N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
+ std::swap(N1, N2);
+ ShuffleVectorSDNode::commuteMask(M);
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
+ SDValue N2, ArrayRef<int> Mask) {
+ assert(VT.getVectorNumElements() == Mask.size() &&
+ "Must have the same number of vector elements as mask elements!");
+ assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+ "Invalid VECTOR_SHUFFLE");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ int NElts = Mask.size();
+ assert(llvm::all_of(Mask,
+ [&](int M) { return M < (NElts * 2) && M >= -1; }) &&
+ "Index out of range");
+
+ // Copy the mask so we can do any needed cleanup.
+ SmallVector<int, 8> MaskVec(Mask);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (int i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.isUndef())
+ commuteShuffle(N1, N2, MaskVec);
+
+ if (TLI->hasVectorBlend()) {
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
+
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+ continue;
+
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
+
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+ }
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.isUndef();
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+ // Reset our undef status after accounting for the mask.
+ N2Undef = N2.isUndef();
+ // Re-check whether both sides ended up undef.
+ if (N1.isUndef() && N2Undef)
+ return getUNDEF(VT);
+
+ // If Identity shuffle return that node.
+ bool Identity = true, AllSame = true;
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
+ if (MaskVec[i] != MaskVec[0]) AllSame = false;
+ }
+ if (Identity && NElts)
+ return N1;
+
+ // Shuffling a constant splat doesn't change the result.
+ if (N2Undef) {
+ SDValue V = N1;
+
+ // Look through any bitcasts. We check that these don't change the number
+ // (and size) of elements and just changes their types.
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ // A splat should always show up as a build vector node.
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ // If this is a splat of an undef, shuffling it is also undef.
+ if (Splat && Splat.isUndef())
+ return getUNDEF(VT);
+
+ bool SameNumElts =
+ V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
+ // We only have a splat which can skip shuffles if there is a splatted
+ // value and no undef lanes rearranged by the shuffle.
+ if (Splat && UndefElements.none()) {
+ // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+ // number of elements match or the value splatted is a zero constant.
+ if (SameNumElts)
+ return N1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Splat))
+ if (C->isZero())
+ return N1;
+ }
+
+ // If the shuffle itself creates a splat, build the vector directly.
+ if (AllSame && SameNumElts) {
+ EVT BuildVT = BV->getValueType(0);
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (BuildVT != VT)
+ NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+ return NewBV;
+ }
+ }
+ }
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
+ for (int i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ llvm::copy(MaskVec, MaskAlloc);
+
+ auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
+ dl.getDebugLoc(), MaskAlloc);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
+ EVT VT = SV.getValueType(0);
+ SmallVector<int, 8> MaskVec(SV.getMask());
+ ShuffleVectorSDNode::commuteMask(MaskVec);
+
+ SDValue Op0 = SV.getOperand(0);
+ SDValue Op1 = SV.getOperand(1);
+ return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt);
+ ID.AddInteger(RegNo);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), std::nullopt);
+ ID.AddPointer(RegMask);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<RegisterMaskSDNode>(RegMask);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
+ MCSymbol *Label) {
+ return getLabelNode(ISD::EH_LABEL, dl, Root, Label);
+}
+
+SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
+ SDValue Root, MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops);
+ ID.AddPointer(Label);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N =
+ newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ int64_t Offset, bool isTarget,
+ unsigned TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ ID.AddPointer(BA);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), std::nullopt);
+ ID.AddPointer(V);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<SrcValueSDNode>(V);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), std::nullopt);
+ ID.AddPointer(MD);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<MDNodeSDNode>(MD);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
+ if (VT == V.getValueType())
+ return V;
+
+ return getNode(ISD::BITCAST, SDLoc(V), VT, V);
+}
+
+SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
+ unsigned SrcAS, unsigned DestAS) {
+ SDValue Ops[] = {Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
+ ID.AddInteger(SrcAS);
+ ID.AddInteger(DestAS);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VT, SrcAS, DestAS);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFreeze(SDValue V) {
+ return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ const MaybeAlign MA(Node->getConstantOperandVal(3));
+
+ SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
+ Tmp2, MachinePointerInfo(V));
+ SDValue VAList = VAListLoad;
+
+ if (MA && *MA > TLI.getMinStackArgumentAlignment()) {
+ VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(MA->value() - 1, dl, VAList.getValueType()));
+
+ VAList =
+ getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)MA->value(), dl, VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*getContext())),
+ dl, VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp1 =
+ getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V));
+ // Load the actual argument out of the pointer VAList
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo());
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ SDValue Tmp1 =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS));
+ return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD));
+}
+
+Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) {
+ const DataLayout &DL = getDataLayout();
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
+
+ if (TLI->isTypeLegal(VT) || !VT.isVector())
+ return RedAlign;
+
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ const Align StackAlign = TFI->getStackAlign();
+
+ // See if we can choose a smaller ABI alignment in cases where it's an
+ // illegal vector type that will get broken down.
+ if (RedAlign > StackAlign) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ Ty = IntermediateVT.getTypeForEVT(*getContext());
+ Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
+ if (RedAlign2 < RedAlign)
+ RedAlign = RedAlign2;
+ }
+
+ return RedAlign;
+}
+
+SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int StackID = 0;
+ if (Bytes.isScalable())
+ StackID = TFI->getStackIDForScalableVectors();
+ // The stack id gives an indication of whether the object is scalable or
+ // not, so it's safe to pass in the minimum size here.
+ int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment,
+ false, nullptr, StackID);
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
+}
+
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ Align StackAlign =
+ std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign));
+ return CreateStackTemporary(VT.getStoreSize(), StackAlign);
+}
+
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ TypeSize VT1Size = VT1.getStoreSize();
+ TypeSize VT2Size = VT2.getStoreSize();
+ assert(VT1Size.isScalable() == VT2Size.isScalable() &&
+ "Don't know how to choose the maximum size when creating a stack "
+ "temporary");
+ TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue()
+ ? VT1Size
+ : VT2Size;
+
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const DataLayout &DL = getDataLayout();
+ Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2));
+ return CreateStackTemporary(Bytes, Align);
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
+ ISD::CondCode Cond, const SDLoc &dl) {
+ EVT OpVT = N1.getValueType();
+
+ auto GetUndefBooleanConstant = [&]() {
+ if (VT.getScalarType() == MVT::i1 ||
+ TLI->getBooleanContents(OpVT) ==
+ TargetLowering::UndefinedBooleanContent)
+ return getUNDEF(VT);
+ // ZeroOrOne / ZeroOrNegative require specific values for the high bits,
+ // so we cannot use getUNDEF(). Return zero instead.
+ return getConstant(0, dl, VT);
+ };
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!OpVT.isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (OpVT.isInteger()) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ // icmp eq/ne X, undef -> undef.
+ if ((N1.isUndef() || N2.isUndef()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE))
+ return GetUndefBooleanConstant();
+
+ // If both operands are undef, we can return undef for int comparison.
+ // icmp undef, undef -> undef.
+ if (N1.isUndef() && N2.isUndef())
+ return GetUndefBooleanConstant();
+
+ // icmp X, X -> true/false
+ // icmp X, undef -> true/false because undef could be X.
+ if (N1 == N2)
+ return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)),
+ dl, VT, OpVT);
+ }
+ }
+
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ if (N1CFP && N2CFP) {
+ APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return GetUndefBooleanConstant();
+ [[fallthrough]];
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return GetUndefBooleanConstant();
+ [[fallthrough]];
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return GetUndefBooleanConstant();
+ [[fallthrough]];
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return GetUndefBooleanConstant();
+ [[fallthrough]];
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return GetUndefBooleanConstant();
+ [[fallthrough]];
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return GetUndefBooleanConstant();
+ [[fallthrough]];
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ }
+ } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) {
+ // Ensure that the constant occurs on the RHS.
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT()))
+ return SDValue();
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
+ } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) ||
+ (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) {
+ // If an operand is known to be a nan (or undef that could be a nan), we can
+ // fold it.
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fails.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default:
+ llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return getBoolConstant(false, dl, VT, OpVT);
+ case 1: // Known true.
+ return getBoolConstant(true, dl, VT, OpVT);
+ case 2: // Undefined.
+ return GetUndefBooleanConstant();
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+/// Mask is known to be zero for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
+/// MaskedVectorIsZero - Return true if 'Op' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts,
+ unsigned Depth /* = 0 */) const {
+ return computeKnownBits(V, DemandedElts, Depth).isZero();
+}
+
+/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
+bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
+}
+
+APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask.");
+
+ APInt KnownZeroElements = APInt::getZero(NumElts);
+ for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
+ if (!DemandedElts[EltIdx])
+ continue; // Don't query elements that are not demanded.
+ APInt Mask = APInt::getOneBitSet(NumElts, EltIdx);
+ if (MaskedVectorIsZero(Op, Mask, Depth))
+ KnownZeroElements.setBit(EltIdx);
+ }
+ return KnownZeroElements;
+}
+
+/// isSplatValue - Return true if the vector V has the same value
+/// across all DemandedElts. For scalable vectors, we don't know the
+/// number of lanes at compile time. Instead, we use a 1 bit APInt
+/// to represent a conservative value for all lanes; that is, that
+/// one bit value is implicitly splatted across all lanes.
+bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
+ APInt &UndefElts, unsigned Depth) const {
+ unsigned Opcode = V.getOpcode();
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+ assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) &&
+ "scalable demanded bits are ignored");
+
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ // Deal with some common cases here that work for both fixed and scalable
+ // vector types.
+ switch (Opcode) {
+ case ISD::SPLAT_VECTOR:
+ UndefElts = V.getOperand(0).isUndef()
+ ? APInt::getAllOnes(DemandedElts.getBitWidth())
+ : APInt(DemandedElts.getBitWidth(), 0);
+ return true;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR: {
+ APInt UndefLHS, UndefRHS;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1)) {
+ UndefElts = UndefLHS | UndefRHS;
+ return true;
+ }
+ return false;
+ }
+ case ISD::ABS:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1);
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this,
+ Depth);
+ break;
+}
+
+ // We don't support other cases than those above for scalable vectors at
+ // the moment.
+ if (VT.isScalableVector())
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
+ UndefElts = APInt::getZero(NumElts);
+
+ switch (Opcode) {
+ case ISD::BUILD_VECTOR: {
+ SDValue Scl;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = V.getOperand(i);
+ if (Op.isUndef()) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (Scl && Scl != Op)
+ return false;
+ Scl = Op;
+ }
+ return true;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat or a shuffle of a splat.
+ APInt DemandedLHS = APInt::getZero(NumElts);
+ APInt DemandedRHS = APInt::getZero(NumElts);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ // If we aren't demanding either op, assume there's no splat.
+ // If we are demanding both ops, assume there's no splat.
+ if ((DemandedLHS.isZero() && DemandedRHS.isZero()) ||
+ (!DemandedLHS.isZero() && !DemandedRHS.isZero()))
+ return false;
+
+ // See if the demanded elts of the source op is a splat or we only demand
+ // one element, which should always be a splat.
+ // TODO: Handle source ops splats with undefs.
+ auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
+ APInt SrcUndefs;
+ return (SrcElts.popcount() == 1) ||
+ (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
+ (SrcElts & SrcUndefs).isZero());
+ };
+ if (!DemandedLHS.isZero())
+ return CheckSplatSrc(V.getOperand(0), DemandedLHS);
+ return CheckSplatSrc(V.getOperand(1), DemandedRHS);
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
+ SDValue Src = V.getOperand(0);
+ // We don't support scalable vectors at the moment.
+ if (Src.getValueType().isScalableVector())
+ return false;
+ uint64_t Idx = V.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt UndefSrcElts;
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
+ UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
+ return true;
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ // Widen the demanded elts by the src element count.
+ SDValue Src = V.getOperand(0);
+ // We don't support scalable vectors at the moment.
+ if (Src.getValueType().isScalableVector())
+ return false;
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt UndefSrcElts;
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
+ UndefElts = UndefSrcElts.trunc(NumElts);
+ return true;
+ }
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = V.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types.
+ // TODO: Add fp support?
+ if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger())
+ break;
+
+ // Bitcast 'small element' vector to 'large element' vector.
+ if ((BitWidth % SrcBitWidth) == 0) {
+ // See if each sub element is a splat.
+ unsigned Scale = BitWidth / SrcBitWidth;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt ScaledDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ for (unsigned I = 0; I != Scale; ++I) {
+ APInt SubUndefElts;
+ APInt SubDemandedElt = APInt::getOneBitSet(Scale, I);
+ APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt);
+ SubDemandedElts &= ScaledDemandedElts;
+ if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
+ return false;
+ // TODO: Add support for merging sub undef elements.
+ if (!SubUndefElts.isZero())
+ return false;
+ }
+ return true;
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// Helper wrapper to main isSplatValue function.
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const {
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+
+ APInt UndefElts;
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts
+ = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements());
+ return isSplatValue(V, DemandedElts, UndefElts) &&
+ (AllowUndefs || !UndefElts);
+}
+
+SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
+ V = peekThroughExtractSubvectors(V);
+
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts
+ = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements());
+
+ if (isSplatValue(V, DemandedElts, UndefElts)) {
+ if (VT.isScalableVector()) {
+ // DemandedElts and UndefElts are ignored for scalable vectors, since
+ // the only supported cases are SPLAT_VECTOR nodes.
+ SplatIdx = 0;
+ } else {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countr_one();
+ }
+ return V;
+ }
+ break;
+ }
+ case ISD::SPLAT_VECTOR:
+ SplatIdx = 0;
+ return V;
+ case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) {
+ int SplatIdx;
+ if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) {
+ EVT SVT = SrcVector.getValueType().getScalarType();
+ EVT LegalSVT = SVT;
+ if (LegalTypes && !TLI->isTypeLegal(SVT)) {
+ if (!SVT.isInteger())
+ return SDValue();
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
+ return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), LegalSVT, SrcVector,
+ getVectorIdxConstant(SplatIdx, SDLoc(V)));
+ }
+ return SDValue();
+}
+
+const APInt *
+SelectionDAG::getValidShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.ult(BitWidth))
+ return &ShAmt;
+ }
+ return nullptr;
+}
+
+const APInt *SelectionDAG::getValidMinimumShiftAmountConstant(
+ SDValue V, const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
+ return ValidAmt;
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MinShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MinShAmt && MinShAmt->ule(ShAmt))
+ continue;
+ MinShAmt = &ShAmt;
+ }
+ return MinShAmt;
+}
+
+const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
+ SDValue V, const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
+ return ValidAmt;
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MaxShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MaxShAmt && MaxShAmt->uge(ShAmt))
+ continue;
+ MaxShAmt = &ShAmt;
+ }
+ return MaxShAmt;
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in Known. For vectors, the known bits are those that are shared by
+/// every vector element.
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return computeKnownBits(Op, DemandedElts, Depth);
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in Known. The DemandedElts argument allows us to only collect the known
+/// bits that are shared by the requested vector elements.
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+
+ KnownBits Known(BitWidth); // Don't know anything.
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ // We know all of the bits for a constant!
+ return KnownBits::makeConstant(C->getAPIntValue());
+ }
+ if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ // We know all of the bits for a constant fp!
+ return KnownBits::makeConstant(C->getValueAPF().bitcastToAPInt());
+ }
+
+ if (Depth >= MaxRecursionDepth)
+ return Known; // Limit search depth.
+
+ KnownBits Known2;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ assert((!Op.getValueType().isFixedLengthVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
+
+ if (!DemandedElts)
+ return Known; // No demanded elts, better to assume we don't know anything.
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::MERGE_VALUES:
+ return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts,
+ Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ SDValue SrcOp = Op.getOperand(0);
+ assert(SrcOp.getValueSizeInBits() >= BitWidth &&
+ "Expected SPLAT_VECTOR implicit truncation");
+ // Implicitly truncate the bits to match the official semantics of
+ // SPLAT_VECTOR.
+ Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ assert(!Op.getValueType().isScalableVector());
+ // Collect the known bits that are shared by every demanded vector element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ Known2 = computeKnownBits(SrcOp, Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != BitWidth) {
+ assert(SrcOp.getValueSizeInBits() > BitWidth &&
+ "Expected BUILD_VECTOR implicit truncation");
+ Known2 = Known2.trunc(BitWidth);
+ }
+
+ // Known bits are the values that are shared by every demanded element.
+ Known = Known.intersectWith(Known2);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ assert(!Op.getValueType().isScalableVector());
+ // Collect the known bits that are shared by every vector element referenced
+ // by the shuffle.
+ APInt DemandedLHS, DemandedRHS;
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts,
+ DemandedLHS, DemandedRHS))
+ break;
+
+ // Known bits are the values that are shared by every demanded element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ if (!!DemandedLHS) {
+ SDValue LHS = Op.getOperand(0);
+ Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ if (!!DemandedRHS) {
+ SDValue RHS = Op.getOperand(1);
+ Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ break;
+ }
+ case ISD::VSCALE: {
+ const Function &F = getMachineFunction().getFunction();
+ const APInt &Multiplier = Op.getConstantOperandAPInt(0);
+ Known = getVScaleRange(&F, BitWidth).multiply(Multiplier).toKnownBits();
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ // Split DemandedElts and test each of the demanded subvectors.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; i != NumSubVectors; ++i) {
+ APInt DemandedSub =
+ DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts);
+ if (!!DemandedSub) {
+ SDValue Sub = Op.getOperand(i);
+ Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
+
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ if (!!DemandedSubElts) {
+ Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ }
+ if (!!DemandedSrcElts) {
+ Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
+ SDValue Src = Op.getOperand(0);
+ // Bail until we can represent demanded elements for scalable vectors.
+ if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ // We know about scalar_to_vector as much as we know about it source,
+ // which becomes the first element of otherwise unknown vector.
+ if (DemandedElts != 1)
+ break;
+
+ SDValue N0 = Op.getOperand(0);
+ Known = computeKnownBits(N0, Depth + 1);
+ if (N0.getValueSizeInBits() != BitWidth)
+ Known = Known.trunc(BitWidth);
+
+ break;
+ }
+ case ISD::BITCAST: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
+ SDValue N0 = Op.getOperand(0);
+ EVT SubVT = N0.getValueType();
+ unsigned SubBitWidth = SubVT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types.
+ if (!(SubVT.isInteger() || SubVT.isFloatingPoint()))
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (BitWidth == SubBitWidth) {
+ Known = computeKnownBits(N0, DemandedElts, Depth + 1);
+ break;
+ }
+
+ bool IsLE = getDataLayout().isLittleEndian();
+
+ // Bitcast 'small element' vector to 'large element' scalar/vector.
+ if ((BitWidth % SubBitWidth) == 0) {
+ assert(N0.getValueType().isVector() && "Expected bitcast from vector");
+
+ // Collect known bits for the (larger) output by collecting the known
+ // bits from each set of sub elements and shift these into place.
+ // We need to separately call computeKnownBits for each set of
+ // sub elements as the knownbits for each is likely to be different.
+ unsigned SubScale = BitWidth / SubBitWidth;
+ APInt SubDemandedElts(NumElts * SubScale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SubDemandedElts.setBit(i * SubScale);
+
+ for (unsigned i = 0; i != SubScale; ++i) {
+ Known2 = computeKnownBits(N0, SubDemandedElts.shl(i),
+ Depth + 1);
+ unsigned Shifts = IsLE ? i : SubScale - 1 - i;
+ Known.insertBits(Known2, SubBitWidth * Shifts);
+ }
+ }
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ if ((SubBitWidth % BitWidth) == 0) {
+ assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+
+ // Collect known bits for the (smaller) output by collecting the known
+ // bits from the overlapping larger input elements and extracting the
+ // sub sections we actually care about.
+ unsigned SubScale = SubBitWidth / BitWidth;
+ APInt SubDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
+ Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
+
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+ unsigned Offset = (Shifts % SubScale) * BitWidth;
+ Known = Known.intersectWith(Known2.extractBits(BitWidth, Offset));
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ }
+ break;
+ }
+ case ISD::AND:
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ Known &= Known2;
+ break;
+ case ISD::OR:
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ Known |= Known2;
+ break;
+ case ISD::XOR:
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ Known ^= Known2;
+ break;
+ case ISD::MUL: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+ // TODO: SelfMultiply can be poison, but not undef.
+ if (SelfMultiply)
+ SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
+ Op.getOperand(0), DemandedElts, false, Depth + 1);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
+
+ // If the multiplication is known not to overflow, the product of a number
+ // with itself is non-negative. Only do this if we didn't already computed
+ // the opposite value for the sign bit.
+ if (Op->getFlags().hasNoSignedWrap() &&
+ Op.getOperand(0) == Op.getOperand(1) &&
+ !Known.isNegative())
+ Known.makeNonNegative();
+ break;
+ }
+ case ISD::MULHU: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::mulhu(Known, Known2);
+ break;
+ }
+ case ISD::MULHS: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::mulhs(Known, Known2);
+ break;
+ }
+ case ISD::UMUL_LOHI: {
+ assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+ if (Op.getResNo() == 0)
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
+ else
+ Known = KnownBits::mulhu(Known, Known2);
+ break;
+ }
+ case ISD::SMUL_LOHI: {
+ assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+ if (Op.getResNo() == 0)
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
+ else
+ Known = KnownBits::mulhs(Known, Known2);
+ break;
+ }
+ case ISD::AVGCEILU: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth + 1);
+ Known2 = Known2.zext(BitWidth + 1);
+ KnownBits One = KnownBits::makeConstant(APInt(1, 1));
+ Known = KnownBits::computeForAddCarry(Known, Known2, One);
+ Known = Known.extractBits(BitWidth, 1);
+ break;
+ }
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
+
+ // Only known if known in both the LHS and RHS.
+ Known = Known.intersectWith(Known2);
+ break;
+ case ISD::SELECT_CC:
+ Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
+
+ // Only known if known in both the LHS and RHS.
+ Known = Known.intersectWith(Known2);
+ break;
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ case ISD::SETCC:
+ case ISD::SETCCCARRY:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case ISD::SHL:
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::shl(Known, Known2);
+
+ // Minimum shift low bits are known zero.
+ if (const APInt *ShMinAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Known.Zero.setLowBits(ShMinAmt->getZExtValue());
+ break;
+ case ISD::SRL:
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::lshr(Known, Known2);
+
+ // Minimum shift high bits are known zero.
+ if (const APInt *ShMinAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
+ break;
+ case ISD::SRA:
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::ashr(Known, Known2);
+ break;
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+ DemandedElts, Depth + 1);
+ break;
+ }
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Opcode == ISD::FSHL) {
+ Known.One <<= Amt;
+ Known.Zero <<= Amt;
+ Known2.One.lshrInPlace(BitWidth - Amt);
+ Known2.Zero.lshrInPlace(BitWidth - Amt);
+ } else {
+ Known.One <<= BitWidth - Amt;
+ Known.Zero <<= BitWidth - Amt;
+ Known2.One.lshrInPlace(Amt);
+ Known2.Zero.lshrInPlace(Amt);
+ }
+ Known = Known.unionWith(Known2);
+ }
+ break;
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: {
+ assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
+
+ // Collect lo/hi source values and concatenate.
+ unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits();
+ unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = Known2.concat(Known);
+
+ // Collect shift amount.
+ Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+ if (Opcode == ISD::SHL_PARTS)
+ Known = KnownBits::shl(Known, Known2);
+ else if (Opcode == ISD::SRA_PARTS)
+ Known = KnownBits::ashr(Known, Known2);
+ else // if (Opcode == ISD::SRL_PARTS)
+ Known = KnownBits::lshr(Known, Known2);
+
+ // TODO: Minimum shift low/high bits are known zero.
+
+ if (Op.getResNo() == 0)
+ Known = Known.extractBits(LoBits, 0);
+ else
+ Known = Known.extractBits(HiBits, LoBits);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ Known = Known.sextInReg(EVT.getScalarSizeInBits());
+ break;
+ }
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleTZ = Known2.countMaxTrailingZeros();
+ unsigned LowBits = llvm::bit_width(PossibleTZ);
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleLZ = Known2.countMaxLeadingZeros();
+ unsigned LowBits = llvm::bit_width(PossibleLZ);
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
+ case ISD::CTPOP: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If we know some of the bits are zero, they can't be one.
+ unsigned PossibleOnes = Known2.countMaxPopulation();
+ Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes));
+ break;
+ }
+ case ISD::PARITY: {
+ // Parity returns 0 everywhere but the LSB.
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
+ if (ISD::isNON_EXTLoad(LD) && Cst) {
+ // Determine any common known bits from the loaded constant pool value.
+ Type *CstTy = Cst->getType();
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() &&
+ !Op.getValueType().isScalableVector()) {
+ // If its a vector splat, then we can (quickly) reuse the scalar path.
+ // NOTE: We assume all elements match and none are UNDEF.
+ if (CstTy->isVectorTy()) {
+ if (const Constant *Splat = Cst->getSplatValue()) {
+ Cst = Splat;
+ CstTy = Cst->getType();
+ }
+ }
+ // TODO - do we need to handle different bitwidths?
+ if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
+ // Iterate across all vector elements finding common known bits.
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ }
+ Known.One.clearAllBits();
+ Known.Zero.clearAllBits();
+ break;
+ }
+ } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+ Known = KnownBits::makeConstant(CInt->getValue());
+ } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+ Known =
+ KnownBits::makeConstant(CFP->getValueAPF().bitcastToAPInt());
+ }
+ }
+ }
+ } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarSizeInBits();
+ Known.Zero.setBitsFrom(MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ EVT VT = LD->getValueType(0);
+
+ // TODO: Handle for extending loads
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (VT.isVector()) {
+ // Handle truncation to the first demanded element.
+ // TODO: Figure out which demanded elements are covered
+ if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
+ break;
+
+ // Handle the case where a load has a vector type, but scalar memory
+ // with an attached range.
+ EVT MemVT = LD->getMemoryVT();
+ KnownBits KnownFull(MemVT.getSizeInBits());
+
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownFull);
+ Known = KnownFull.trunc(BitWidth);
+ } else
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth);
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth);
+ break;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ Known = Known.anyext(BitWidth);
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.anyext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero |= (~InMask);
+ Known.One &= (~Known.Zero);
+ break;
+ }
+ case ISD::AssertAlign: {
+ unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign());
+ assert(LogOfAlign != 0);
+
+ // TODO: Should use maximum with source
+ // If a node is guaranteed to be aligned, set low zero bits accordingly as
+ // well as clearing one bits.
+ Known.Zero.setLowBits(LogOfAlign);
+ Known.One.clearLowBits(LogOfAlign);
+ break;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ Known.Zero.setBitsFrom(1);
+ break;
+ case ISD::ADD:
+ case ISD::SUB: {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
+ Flags.hasNoSignedWrap(), Known, Known2);
+ break;
+ }
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::USUBO_CARRY:
+ case ISD::SSUBO_CARRY:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ [[fallthrough]];
+ case ISD::SUBC: {
+ assert(Op.getResNo() == 0 &&
+ "We only compute knownbits for the difference here.");
+
+ // TODO: Compute influence of the carry operand.
+ if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY)
+ break;
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
+ Known, Known2);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::UADDO_CARRY:
+ case ISD::SADDO_CARRY:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ [[fallthrough]];
+ case ISD::ADDC:
+ case ISD::ADDE: {
+ assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
+
+ // With ADDE and UADDO_CARRY, a carry bit may be added in.
+ KnownBits Carry(1);
+ if (Opcode == ISD::ADDE)
+ // Can't track carry from glue, set carry to unknown.
+ Carry.resetAll();
+ else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY)
+ // TODO: Compute known bits for the carry operand. Not sure if it is worth
+ // the trouble (how often will we find a known carry bit). And I haven't
+ // tested this very much yet, but something like this might work:
+ // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry = Carry.zextOrTrunc(1, false);
+ Carry.resetAll();
+ else
+ Carry.setAllZero();
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
+ break;
+ }
+ case ISD::UDIV: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::udiv(Known, Known2, Op->getFlags().hasExact());
+ break;
+ }
+ case ISD::SDIV: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::sdiv(Known, Known2, Op->getFlags().hasExact());
+ break;
+ }
+ case ISD::SREM: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::srem(Known, Known2);
+ break;
+ }
+ case ISD::UREM: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::urem(Known, Known2);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ const unsigned Index = Op.getConstantOperandVal(1);
+ const unsigned EltBitWidth = Op.getValueSizeInBits();
+
+ // Remove low part of known bits mask
+ Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+ Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+
+ // Remove high part of known bit mask
+ Known = Known.trunc(EltBitWidth);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
+ // computeKnownBits not yet implemented for scalable vectors.
+ if (VecVT.isScalableVector())
+ break;
+ const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ if (BitWidth > EltBitWidth)
+ Known = Known.trunc(EltBitWidth);
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
+ auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1);
+ if (BitWidth > EltBitWidth)
+ Known = Known.anyext(BitWidth);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element, otherwise assume we need
+ // the original demanded vector elements and the value.
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+ bool DemandedVal = true;
+ APInt DemandedVecElts = DemandedElts;
+ auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ unsigned EltIdx = CEltNo->getZExtValue();
+ DemandedVal = !!DemandedElts[EltIdx];
+ DemandedVecElts.clearBit(EltIdx);
+ }
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ if (DemandedVal) {
+ Known2 = computeKnownBits(InVal, Depth + 1);
+ Known = Known.intersectWith(Known2.zextOrTrunc(BitWidth));
+ }
+ if (!!DemandedVecElts) {
+ Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ break;
+ }
+ case ISD::BITREVERSE: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known2.reverseBits();
+ break;
+ }
+ case ISD::BSWAP: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known2.byteSwap();
+ break;
+ }
+ case ISD::ABS: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known2.abs();
+ break;
+ }
+ case ISD::USUBSAT: {
+ // The result of usubsat will never be larger than the LHS.
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero.setHighBits(Known2.countMinLeadingZeros());
+ break;
+ }
+ case ISD::UMIN: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known, Known2);
+ break;
+ }
+ case ISD::UMAX: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known, Known2);
+ break;
+ }
+ case ISD::SMIN:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ const APInt &ValueHigh = CstHigh->getAPIntValue();
+ if (ValueLow.sle(ValueHigh)) {
+ unsigned LowSignBits = ValueLow.getNumSignBits();
+ unsigned HighSignBits = ValueHigh.getNumSignBits();
+ unsigned MinSignBits = std::min(LowSignBits, HighSignBits);
+ if (ValueLow.isNegative() && ValueHigh.isNegative()) {
+ Known.One.setHighBits(MinSignBits);
+ break;
+ }
+ if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) {
+ Known.Zero.setHighBits(MinSignBits);
+ break;
+ }
+ }
+ }
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (IsMax)
+ Known = KnownBits::smax(Known, Known2);
+ else
+ Known = KnownBits::smin(Known, Known2);
+
+ // For SMAX, if CstLow is non-negative we know the result will be
+ // non-negative and thus all sign bits are 0.
+ // TODO: There's an equivalent of this for smin with negative constant for
+ // known ones.
+ if (IsMax && CstLow) {
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ if (ValueLow.isNonNegative()) {
+ unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits()));
+ }
+ }
+
+ break;
+ }
+ case ISD::FP_TO_UINT_SAT: {
+ // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ if (Op.getResNo() == 1) {
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ [[fallthrough]];
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD: {
+ unsigned MemBits =
+ cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
+ // If we are looking at the loaded value.
+ if (Op.getResNo() == 0) {
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ Known.Zero.setBitsFrom(MemBits);
+ }
+ break;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(),
+ Known, getMachineFunction());
+ break;
+
+ default:
+ if (Opcode < ISD::BUILTIN_OP_END)
+ break;
+ [[fallthrough]];
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
+
+ // Allow the target to implement this method for its nodes.
+ TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
+ break;
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ return Known;
+}
+
+/// Convert ConstantRange OverflowResult into SelectionDAG::OverflowKind.
+static SelectionDAG::OverflowKind mapOverflowResult(ConstantRange::OverflowResult OR) {
+ switch (OR) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return SelectionDAG::OFK_Sometime;
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+ return SelectionDAG::OFK_Always;
+ case ConstantRange::OverflowResult::NeverOverflows:
+ return SelectionDAG::OFK_Never;
+ }
+ llvm_unreachable("Unknown OverflowResult");
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ // If both operands each have at least two sign bits, the addition
+ // cannot overflow.
+ if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
+ return OFK_Never;
+
+ // TODO: Add ConstantRange::signedAddMayOverflow handling.
+ return OFK_Sometime;
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ // mulhi + 1 never overflow
+ KnownBits N1Known = computeKnownBits(N1);
+ if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
+ N1Known.getMaxValue().ult(2))
+ return OFK_Never;
+
+ KnownBits N0Known = computeKnownBits(N0);
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1 &&
+ N0Known.getMaxValue().ult(2))
+ return OFK_Never;
+
+ // Fallback to ConstantRange::unsignedAddMayOverflow handling.
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedAddMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
+ // X - 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ // If both operands each have at least two sign bits, the subtraction
+ // cannot overflow.
+ if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
+ return OFK_Never;
+
+ // TODO: Add ConstantRange::signedSubMayOverflow handling.
+ return OFK_Sometime;
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
+ // X - 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
+ return OFK_Sometime;
+}
+
+bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarSizeInBits();
+
+ // Is the constant a known power of 2?
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
+ return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL) {
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
+ if (C && C->getAPIntValue() == 1)
+ return true;
+ }
+
+ // Similarly, a logical right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL) {
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
+ if (C && C->getAPIntValue().isSignMask())
+ return true;
+ }
+
+ // Are all operands of a build vector constant powers of two?
+ if (Val.getOpcode() == ISD::BUILD_VECTOR)
+ if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
+ return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ return false;
+ }))
+ return true;
+
+ // Is the operand of a splat vector a constant power of two?
+ if (Val.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0)))
+ if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
+ return true;
+
+ // vscale(power-of-two) is a power-of-two for some targets
+ if (Val.getOpcode() == ISD::VSCALE &&
+ getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1))
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+ return false;
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ComputeNumSignBits(Op, DemandedElts, Depth);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!");
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ const APInt &Val = C->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
+ if (Depth >= MaxRecursionDepth)
+ return 1; // Limit search depth.
+
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+ case ISD::MERGE_VALUES:
+ return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts,
+ Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits();
+ unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - VTBits))
+ return NumSrcSignBits - (NumSrcBits - VTBits);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ assert(!VT.isScalableVector());
+ Tmp = VTBits;
+ for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ // BUILD_VECTOR can implicitly truncate sources, we handle this specially
+ // for constant nodes to ensure we only look at the sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SrcOp)) {
+ APInt T = C->getAPIntValue().trunc(VTBits);
+ Tmp2 = T.getNumSignBits();
+ } else {
+ Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
+
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
+ }
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ return Tmp;
+
+ case ISD::VECTOR_SHUFFLE: {
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ APInt DemandedLHS, DemandedRHS;
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts,
+ DemandedLHS, DemandedRHS))
+ return 1;
+
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
+ case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ break;
+ SDValue N0 = Op.getOperand(0);
+ EVT SrcVT = N0.getValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types..
+ if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()))
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (VTBits == SrcBits)
+ return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+
+ bool IsLE = getDataLayout().isLittleEndian();
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ if ((SrcBits % VTBits) == 0) {
+ assert(VT.isVector() && "Expected bitcast to vector");
+
+ unsigned Scale = SrcBits / VTBits;
+ APInt SrcDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);
+
+ // Fast case - sign splat can be simply split across the small elements.
+ Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
+ if (Tmp == SrcBits)
+ return VTBits;
+
+ // Slow case - determine how far the sign extends into each sub-element.
+ Tmp2 = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned SubOffset = i % Scale;
+ SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset);
+ SubOffset = SubOffset * VTBits;
+ if (Tmp <= SubOffset)
+ return 1;
+ Tmp2 = std::min(Tmp2, Tmp - SubOffset);
+ }
+ return Tmp2;
+ }
+ break;
+ }
+
+ case ISD::FP_TO_SINT_SAT:
+ // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
+ Tmp = VTBits-Tmp+1;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ return std::max(Tmp, Tmp2);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (VT.isScalableVector())
+ break;
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+ Tmp = VTBits - SrcVT.getScalarSizeInBits();
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
+ }
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // SRA X, C -> adds C sign bits.
+ if (const APInt *ShAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
+ return Tmp;
+ case ISD::SHL:
+ if (const APInt *ShAmt =
+ getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ // shl destroys sign bits, ensure it doesn't shift out all sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (ShAmt->ult(Tmp))
+ return Tmp - ShAmt->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // computeKnownBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SELECT_CC:
+ Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SMIN:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+ if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) {
+ Tmp = CstLow->getAPIntValue().getNumSignBits();
+ Tmp2 = CstHigh->getAPIntValue().getNumSignBits();
+ return std::min(Tmp, Tmp2);
+ }
+ }
+
+ // Fallback - just get the minimum number of sign bits of the operands.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
+ case ISD::UMIN:
+ case ISD::UMAX:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SADDO_CARRY:
+ case ISD::UADDO_CARRY:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SSUBO_CARRY:
+ case ISD::USUBO_CARRY:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ // If setcc returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(VT.isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::SETCC:
+ case ISD::SETCCCARRY:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ }
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
+ if (Tmp == VTBits)
+ return VTBits;
+
+ if (ConstantSDNode *C =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
+ unsigned RotAmt = C->getAPIntValue().urem(VTBits);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Opcode == ISD::ROTR)
+ RotAmt = (VTBits - RotAmt) % VTBits;
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
+ }
+ break;
+ case ISD::ADD:
+ case ISD::ADDC:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts))
+ if (CRHS->isAllOnes()) {
+ KnownBits Known =
+ computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known.Zero | 1).isAllOnes())
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (Known.isNonNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Tmp2 == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2) - 1;
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Tmp2 == 1) return 1; // Early out.
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS =
+ isConstOrConstSplat(Op.getOperand(0), DemandedElts))
+ if (CLHS->isZero()) {
+ KnownBits Known =
+ computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known.Zero | 1).isAllOnes())
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (Known.isNonNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2) - 1;
+ case ISD::MUL: {
+ // The output of the Mul can be at most twice the valid bits in the inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (SignBitsOp0 == 1)
+ break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (SignBitsOp1 == 1)
+ break;
+ unsigned OutValidBits =
+ (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
+ return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
+ }
+ case ISD::SREM:
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero. The magnitude of the result should be less than or
+ // equal to the magnitude of the LHS. Therefore, the result should have
+ // at least as many sign bits as the left hand side.
+ return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ case ISD::TRUNCATE: {
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
+ unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - VTBits))
+ return NumSrcSignBits - (NumSrcBits - VTBits);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ if (VT.isScalableVector())
+ break;
+ const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ const int BitWidth = Op.getValueSizeInBits();
+ const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
+
+ // Get reverse index (starting from 1), Op1 value indexes elements from
+ // little end. Sign starts at big end.
+ const int rIndex = Items - 1 - Op.getConstantOperandVal(1);
+
+ // If the sign portion ends in our element the subtraction gives correct
+ // result. Otherwise it gives either negative or > bitwidth result
+ return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth);
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ break;
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element, otherwise assume we need
+ // the original demanded vector elements and the value.
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+ bool DemandedVal = true;
+ APInt DemandedVecElts = DemandedElts;
+ auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ unsigned EltIdx = CEltNo->getZExtValue();
+ DemandedVal = !!DemandedElts[EltIdx];
+ DemandedVecElts.clearBit(EltIdx);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (DemandedVal) {
+ // TODO - handle implicit truncation of inserted elements.
+ if (InVal.getScalarValueSizeInBits() != VTBits)
+ break;
+ Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ if (!!DemandedVecElts) {
+ Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ assert(!VT.isScalableVector());
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
+ // ComputeNumSignBits not yet implemented for scalable vectors.
+ if (VecVT.isScalableVector())
+ break;
+ const unsigned BitWidth = Op.getValueSizeInBits();
+ const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
+ // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
+ // anything about sign bits. But if the sizes match we can derive knowledge
+ // about sign bits from the vector operand.
+ if (BitWidth != EltBitWidth)
+ break;
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
+ auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
+ SDValue Src = Op.getOperand(0);
+ // Bail until we can represent demanded elements for scalable vectors.
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ }
+ case ISD::CONCAT_VECTORS: {
+ if (VT.isScalableVector())
+ break;
+ // Determine the minimum number of sign bits across all demanded
+ // elts of the input vectors. Early out if the result is already 1.
+ Tmp = std::numeric_limits<unsigned>::max();
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
+ APInt DemandedSub =
+ DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts);
+ if (!DemandedSub)
+ continue;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ break;
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
+
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // early-out
+ }
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (const MDNode *Ranges = LD->getRanges()) {
+ if (DemandedElts != 1)
+ break;
+
+ ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
+ if (VTBits > CR.getBitWidth()) {
+ switch (LD->getExtensionType()) {
+ case ISD::SEXTLOAD:
+ CR = CR.signExtend(VTBits);
+ break;
+ case ISD::ZEXTLOAD:
+ CR = CR.zeroExtend(VTBits);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (VTBits != CR.getBitWidth())
+ break;
+ return std::min(CR.getSignedMin().getNumSignBits(),
+ CR.getSignedMax().getNumSignBits());
+ }
+
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD: {
+ Tmp = cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
+ // If we are looking at the loaded value.
+ if (Op.getResNo() == 0) {
+ if (Tmp == VTBits)
+ return 1; // early-out
+ if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ return VTBits - Tmp + 1;
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ return VTBits - Tmp;
+ }
+ break;
+ }
+ }
+
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
+ case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp;
+ case ISD::NON_EXTLOAD:
+ if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+ // We only need to handle vectors - computeKnownBits should handle
+ // scalar cases.
+ Type *CstTy = Cst->getType();
+ if (CstTy->isVectorTy() && !VT.isScalableVector() &&
+ (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() &&
+ VTBits == CstTy->getScalarSizeInBits()) {
+ Tmp = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ }
+ // Unknown type. Conservatively assume no bits match sign bit.
+ return 1;
+ }
+ return Tmp;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
+ // TODO: This can probably be removed once target code is audited. This
+ // is here purely to reduce patch size and review complexity.
+ if (!VT.isScalableVector()) {
+ unsigned NumBits =
+ TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ KnownBits Known = computeKnownBits(Op, DemandedElts, Depth);
+ return std::max(FirstAnswer, Known.countMinSignBits());
+}
+
+unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op,
+ unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
+unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
+bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
+ unsigned Depth) const {
+ // Early out for FREEZE.
+ if (Op.getOpcode() == ISD::FREEZE)
+ return true;
+
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return false;
+
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
+}
+
+bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
+ const APInt &DemandedElts,
+ bool PoisonOnly,
+ unsigned Depth) const {
+ unsigned Opcode = Op.getOpcode();
+
+ // Early out for FREEZE.
+ if (Opcode == ISD::FREEZE)
+ return true;
+
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ if (isIntOrFPConstant(Op))
+ return true;
+
+ switch (Opcode) {
+ case ISD::VALUETYPE:
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ return true;
+
+ case ISD::UNDEF:
+ return PoisonOnly;
+
+ case ISD::BUILD_VECTOR:
+ // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -
+ // this shouldn't affect the result.
+ for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly,
+ Depth + 1))
+ return false;
+ }
+ return true;
+
+ // TODO: Search for noundef attributes from library functions.
+
+ // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
+
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->isGuaranteedNotToBeUndefOrPoisonForTargetNode(
+ Op, DemandedElts, *this, PoisonOnly, Depth);
+ break;
+ }
+
+ // If Op can't create undef/poison and none of its operands are undef/poison
+ // then Op is never undef/poison.
+ // NOTE: TargetNodes should handle this in themselves in
+ // isGuaranteedNotToBeUndefOrPoisonForTargetNode.
+ return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true,
+ Depth) &&
+ all_of(Op->ops(), [&](SDValue V) {
+ return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1);
+ });
+}
+
+bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
+ bool ConsiderFlags,
+ unsigned Depth) const {
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return true;
+
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
+ Depth);
+}
+
+bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
+ bool PoisonOnly, bool ConsiderFlags,
+ unsigned Depth) const {
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return true;
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::FREEZE:
+ case ISD::CONCAT_VECTORS:
+ case ISD::INSERT_SUBVECTOR:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::BITREVERSE:
+ case ISD::PARITY:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::BITCAST:
+ case ISD::BUILD_VECTOR:
+ case ISD::BUILD_PAIR:
+ return false;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ // Matches hasPoisonGeneratingFlags().
+ return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
+ Op->getFlags().hasNoUnsignedWrap());
+
+ case ISD::SHL:
+ // If the max shift amount isn't in range, then the shift can create poison.
+ if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
+ return true;
+
+ // Matches hasPoisonGeneratingFlags().
+ return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
+ Op->getFlags().hasNoUnsignedWrap());
+
+ case ISD::INSERT_VECTOR_ELT:{
+ // Ensure that the element index is in bounds.
+ EVT VecVT = Op.getOperand(0).getValueType();
+ KnownBits KnownIdx = computeKnownBits(Op.getOperand(2), Depth + 1);
+ return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
+ }
+
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->canCreateUndefOrPoisonForTargetNode(
+ Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth);
+ break;
+ }
+
+ // Be conservative and return true.
+ return true;
+}
+
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))
+ return false;
+
+ return true;
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ return true;
+
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ return !C->getValueAPF().isNaN() ||
+ (SNaN && !C->getValueAPF().isSignaling());
+ }
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (SNaN)
+ return true;
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ }
+ case ISD::FCANONICALIZE:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FLDEXP: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::FABS:
+ case ISD::FNEG:
+ case ISD::FCOPYSIGN: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SELECT:
+ return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return true;
+ case ISD::FSQRT: // Need is known positive
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FPOWI:
+ case ISD::FPOW: {
+ if (SNaN)
+ return true;
+ // TODO: Refine on operand
+ return false;
+ }
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ }
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // TODO: Does this quiet or return the origina NaN as-is?
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::BUILD_VECTOR: {
+ for (const SDValue &Opnd : Op->ops())
+ if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ return false;
+ return true;
+ }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ }
+
+ return false;
+ }
+}
+
+bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() &&
+ "Floating point type expected");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // Return false if we find any zero in a vector.
+ if (Op->getOpcode() == ISD::BUILD_VECTOR ||
+ Op->getOpcode() == ISD::SPLAT_VECTOR) {
+ for (const SDValue &OpVal : Op->op_values()) {
+ if (OpVal.isUndef())
+ return false;
+ if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
+ if (C->isZero())
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ assert(!Op.getValueType().isFloatingPoint() &&
+ "Floating point types unsupported - use isKnownNeverZeroFloat");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (ISD::matchUnaryPredicate(Op,
+ [](ConstantSDNode *C) { return !C->isZero(); }))
+ return true;
+
+ // TODO: Recognize more cases here. Most of the cases are also incomplete to
+ // some degree.
+ switch (Op.getOpcode()) {
+ default:
+ break;
+
+ case ISD::OR:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::VSELECT:
+ case ISD::SELECT:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(2), Depth + 1);
+
+ case ISD::SHL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ // 1 << X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
+ return true;
+ break;
+
+ case ISD::UADDSAT:
+ case ISD::UMAX:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::UMIN:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::ABS:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really
+ // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
+ return true;
+ break;
+
+ case ISD::UDIV:
+ case ISD::SDIV:
+ // div exact can only produce a zero if the dividend is zero.
+ // TODO: For udiv this is also true if Op1 u<= Op0
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ break;
+
+ case ISD::ADD:
+ if (Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ // TODO: There are a lot more cases we can prove for add.
+ break;
+
+ case ISD::SUB: {
+ if (isNullConstant(Op.getOperand(0)))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1);
+
+ std::optional<bool> ne =
+ KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1),
+ computeKnownBits(Op.getOperand(1), Depth + 1));
+ return ne && *ne;
+ }
+
+ case ISD::MUL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ break;
+
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
+
+ return computeKnownBits(Op, Depth).isNonZero();
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
+// Only bits set in Mask must be negated, other bits may be arbitrary.
+SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) {
+ if (isBitwiseNot(V, AllowUndefs))
+ return V.getOperand(0);
+
+ // Handle any_extend (not (truncate X)) pattern, where Mask only sets
+ // bits in the non-extended part.
+ ConstantSDNode *MaskC = isConstOrConstSplat(Mask);
+ if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+ SDValue ExtArg = V.getOperand(0);
+ if (ExtArg.getScalarValueSizeInBits() >=
+ MaskC->getAPIntValue().getActiveBits() &&
+ isBitwiseNot(ExtArg, AllowUndefs) &&
+ ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType())
+ return ExtArg.getOperand(0).getOperand(0);
+ return SDValue();
+}
+
+static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
+ // Match masked merge pattern (X & ~M) op (Y & M)
+ // Including degenerate case (X & ~M) op M
+ auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask,
+ SDValue Other) {
+ if (SDValue NotOperand =
+ getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) {
+ if (NotOperand->getOpcode() == ISD::ZERO_EXTEND ||
+ NotOperand->getOpcode() == ISD::TRUNCATE)
+ NotOperand = NotOperand->getOperand(0);
+
+ if (Other == NotOperand)
+ return true;
+ if (Other->getOpcode() == ISD::AND)
+ return NotOperand == Other->getOperand(0) ||
+ NotOperand == Other->getOperand(1);
+ }
+ return false;
+ };
+
+ if (A->getOpcode() == ISD::ZERO_EXTEND || A->getOpcode() == ISD::TRUNCATE)
+ A = A->getOperand(0);
+
+ if (B->getOpcode() == ISD::ZERO_EXTEND || B->getOpcode() == ISD::TRUNCATE)
+ B = B->getOperand(0);
+
+ if (A->getOpcode() == ISD::AND)
+ return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) ||
+ MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B);
+ return false;
+}
+
+// FIXME: unify with llvm::haveNoCommonBitsSet.
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+ assert(A.getValueType() == B.getValueType() &&
+ "Values must have the same type");
+ if (haveNoCommonBitsSetCommutative(A, B) ||
+ haveNoCommonBitsSetCommutative(B, A))
+ return true;
+ return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
+ computeKnownBits(B));
+}
+
+static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
+ SelectionDAG &DAG) {
+ if (cast<ConstantSDNode>(Step)->isZero())
+ return DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
+static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ SelectionDAG &DAG) {
+ int NumOps = Ops.size();
+ assert(NumOps != 0 && "Can't build an empty vector!");
+ assert(!VT.isScalableVector() &&
+ "BUILD_VECTOR cannot be used with scalable types");
+ assert(VT.getVectorNumElements() == (unsigned)NumOps &&
+ "Incorrect element count in BUILD_VECTOR!");
+
+ // BUILD_VECTOR of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity.
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (int i = 0; i != NumOps; ++i) {
+ if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Ops[i].getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
+ cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
+ IsIdentity = false;
+ break;
+ }
+ IdentitySrc = Ops[i].getOperand(0);
+ }
+ if (IsIdentity)
+ return IdentitySrc;
+
+ return SDValue();
+}
+
+/// Try to simplify vector concatenation to an input value, undef, or build
+/// vector.
+static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ SelectionDAG &DAG) {
+ assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
+ assert(llvm::all_of(Ops,
+ [Ops](SDValue Op) {
+ return Ops[0].getValueType() == Op.getValueType();
+ }) &&
+ "Concatenation of vectors with inconsistent value types!");
+ assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) ==
+ VT.getVectorElementCount() &&
+ "Incorrect element count in vector concatenation!");
+
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // Concat of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // Scan the operands and look for extract operations from a single source
+ // that correspond to insertion at the same location via this concatenation:
+ // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ...
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ SDValue Op = Ops[i];
+ unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements();
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op.getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
+ Op.getConstantOperandVal(1) != IdentityIndex) {
+ IsIdentity = false;
+ break;
+ }
+ assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) &&
+ "Unexpected identity source vector for concat of extracts");
+ IdentitySrc = Op.getOperand(0);
+ }
+ if (IsIdentity) {
+ assert(IdentitySrc && "Failed to set source vector of extracts");
+ return IdentitySrc;
+ }
+
+ // The code below this point is only designed to work for fixed width
+ // vectors, so we bail out for now.
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
+ // simplified to one big BUILD_VECTOR.
+ // FIXME: Add support for SCALAR_TO_VECTOR as well.
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 16> Elts;
+ for (SDValue Op : Ops) {
+ EVT OpVT = Op.getValueType();
+ if (Op.isUndef())
+ Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
+ else if (Op.getOpcode() == ISD::BUILD_VECTOR)
+ Elts.append(Op->op_begin(), Op->op_end());
+ else
+ return SDValue();
+ }
+
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ for (SDValue Op : Elts)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+
+ if (SVT.bitsGT(VT.getScalarType())) {
+ for (SDValue &Op : Elts) {
+ if (Op.isUndef())
+ Op = DAG.getUNDEF(SVT);
+ else
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+ }
+ }
+
+ SDValue V = DAG.getBuildVector(VT, DL, Elts);
+ NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
+ return V;
+}
+
+/// Gets or creates the specified node.
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!");
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ANY_EXTEND:
+ // Some targets like RISCV prefer to sign extend some types.
+ if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getZero(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
+ break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTPOP:
+ return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
+ case ISD::STEP_VECTOR: {
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
+ return V;
+ break;
+ }
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool ignored;
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ return getConstant(IntVal, DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ break;
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ }
+ }
+ }
+
+ // Constant fold unary operations with a vector integer or float operand.
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::TRUNCATE:
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::ABS:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ SDValue Ops = {N1};
+ if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
+ }
+ }
+
+ unsigned OpOpcode = N1.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::STEP_VECTOR:
+ assert(VT.isScalableVector() &&
+ "STEP_VECTOR can only be used with scalable types");
+ assert(OpOpcode == ISD::TargetConstant &&
+ VT.getVectorElementType() == N1.getValueType() &&
+ "Unexpected step operand");
+ break;
+ case ISD::FREEZE:
+ assert(VT == N1.getValueType() && "Unexpected VT!");
+ if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false,
+ /*Depth*/ 1))
+ return N1;
+ break;
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return N1; // Factor, merge or concat of one node? No need.
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() &&
+ "Invalid FP cast!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!");
+ if (N1.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (N1.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N1.isUndef())
+ return getConstantFP(0.0, DL, VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "SIGN_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "ZERO_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "ANY_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!");
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // (ext (trunc x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = N1.getOperand(0);
+ if (OpOp.getValueType() == VT) {
+ transferDbgValues(N1, OpOp);
+ return OpOp;
+ }
+ }
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "TRUNCATE result type type should be vector iff the operand "
+ "type is vector!");
+ if (N1.getValueType() == VT) return N1; // noop truncate
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(N1.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (N1.getOperand(0).getValueType().getScalarType().bitsLT(
+ VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ if (N1.getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
+ return N1.getOperand(0);
+ }
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+ return getVScale(DL, VT,
+ N1.getConstantOperandAPInt(0).trunc(VT.getSizeInBits()));
+ break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(N1.getValueType().bitsLE(VT) &&
+ "The input must be the same size or smaller than the result.");
+ assert(VT.getVectorMinNumElements() <
+ N1.getValueType().getVectorMinNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::BSWAP:
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BSWAP!");
+ assert((VT.getScalarSizeInBits() % 16 == 0) &&
+ "BSWAP types must be a multiple of 16 bits!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // bswap(bswap(X)) -> X.
+ if (OpOpcode == ISD::BSWAP)
+ return N1.getOperand(0);
+ break;
+ case ISD::BITREVERSE:
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BITREVERSE!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITCAST:
+ assert(VT.getSizeInBits() == N1.getValueSizeInBits() &&
+ "Cannot BITCAST between types of different sizes!");
+ if (VT == N1.getValueType()) return N1; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !N1.getValueType().isVector() &&
+ (VT.getVectorElementType() == N1.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ N1.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(N1.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getConstantOperandVal(1) == 0 &&
+ N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // Negation of an unknown bag of bits is still completely undefined.
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return N1.getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, N1.getOperand(0));
+ break;
+ case ISD::VSCALE:
+ assert(VT == N1.getValueType() && "Unexpected VT!");
+ break;
+ case ISD::CTPOP:
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return N1;
+ break;
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNOT(DL, N1, N1.getValueType());
+ break;
+ case ISD::VECREDUCE_ADD:
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_XOR, DL, VT, N1);
+ break;
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_OR, DL, VT, N1);
+ break;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_UMIN:
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_AND, DL, VT, N1);
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1};
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
+ switch (Opcode) {
+ case ISD::ADD: return C1 + C2;
+ case ISD::SUB: return C1 - C2;
+ case ISD::MUL: return C1 * C2;
+ case ISD::AND: return C1 & C2;
+ case ISD::OR: return C1 | C2;
+ case ISD::XOR: return C1 ^ C2;
+ case ISD::SHL: return C1 << C2;
+ case ISD::SRL: return C1.lshr(C2);
+ case ISD::SRA: return C1.ashr(C2);
+ case ISD::ROTL: return C1.rotl(C2);
+ case ISD::ROTR: return C1.rotr(C2);
+ case ISD::SMIN: return C1.sle(C2) ? C1 : C2;
+ case ISD::SMAX: return C1.sge(C2) ? C1 : C2;
+ case ISD::UMIN: return C1.ule(C2) ? C1 : C2;
+ case ISD::UMAX: return C1.uge(C2) ? C1 : C2;
+ case ISD::SADDSAT: return C1.sadd_sat(C2);
+ case ISD::UADDSAT: return C1.uadd_sat(C2);
+ case ISD::SSUBSAT: return C1.ssub_sat(C2);
+ case ISD::USUBSAT: return C1.usub_sat(C2);
+ case ISD::SSHLSAT: return C1.sshl_sat(C2);
+ case ISD::USHLSAT: return C1.ushl_sat(C2);
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.udiv(C2);
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.urem(C2);
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.sdiv(C2);
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.srem(C2);
+ case ISD::MULHS: {
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
+ }
+ case ISD::MULHU: {
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
+ }
+ case ISD::AVGFLOORS: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGFLOORU: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGCEILS: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGCEILU: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::ABDS:
+ return APIntOps::smax(C1, C2) - APIntOps::smin(C1, C2);
+ case ISD::ABDU:
+ return APIntOps::umax(C1, C2) - APIntOps::umin(C1, C2);
+ }
+ return std::nullopt;
+}
+
+// Handle constant folding with UNDEF.
+// TODO: Handle more cases.
+static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1,
+ bool IsUndef1, const APInt &C2,
+ bool IsUndef2) {
+ if (!(IsUndef1 || IsUndef2))
+ return FoldValue(Opcode, C1, C2);
+
+ // Fold and(x, undef) -> 0
+ // Fold mul(x, undef) -> 0
+ if (Opcode == ISD::AND || Opcode == ISD::MUL)
+ return APInt::getZero(C1.getBitWidth());
+
+ return std::nullopt;
+}
+
+SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
+ const GlobalAddressSDNode *GA,
+ const SDNode *N2) {
+ if (GA->getOpcode() != ISD::GlobalAddress)
+ return SDValue();
+ if (!TLI->isOffsetFoldingLegal(GA))
+ return SDValue();
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C2)
+ return SDValue();
+ int64_t Offset = C2->getSExtValue();
+ switch (Opcode) {
+ case ISD::ADD: break;
+ case ISD::SUB: Offset = -uint64_t(Offset); break;
+ default: return SDValue();
+ }
+ return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT,
+ GA->getOffset() + uint64_t(Offset));
+}
+
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+ switch (Opcode) {
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ // If a divisor is zero/undef or any element of a divisor vector is
+ // zero/undef, the whole op is undef.
+ assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+ SDValue Divisor = Ops[1];
+ if (Divisor.isUndef() || isNullConstant(Divisor))
+ return true;
+
+ return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+ llvm::any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() ||
+ isNullConstant(V); });
+ // TODO: Handle signed overflow.
+ }
+ // TODO: Handle oversized shifts.
+ default:
+ return false;
+ }
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ // We can't create a scalar CONCAT_VECTORS so skip it. It will break
+ // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by
+ // foldCONCAT_VECTORS in getNode before this is called.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ unsigned NumOps = Ops.size();
+ if (NumOps == 0)
+ return SDValue();
+
+ if (isUndef(Opcode, Ops))
+ return getUNDEF(VT);
+
+ // Handle binops special cases.
+ if (NumOps == 2) {
+ if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1]))
+ return CFP;
+
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ std::optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
+
+ SDValue Folded = getConstant(*FoldAttempt, DL, VT);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
+ }
+
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
+ if (TLI->isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
+ }
+
+ // This is for vector folding only from here on.
+ if (!VT.isVector())
+ return SDValue();
+
+ ElementCount NumElts = VT.getVectorElementCount();
+
+ // See if we can fold through bitcasted integer ops.
+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+ Ops[0].getOpcode() == ISD::BITCAST &&
+ Ops[1].getOpcode() == ISD::BITCAST) {
+ SDValue N1 = peekThroughBitcasts(Ops[0]);
+ SDValue N2 = peekThroughBitcasts(Ops[1]);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ EVT BVVT = N1.getValueType();
+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ bool IsLE = getDataLayout().isLittleEndian();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SmallVector<APInt> RawBits1, RawBits2;
+ BitVector UndefElts1, UndefElts2;
+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) {
+ SmallVector<APInt> RawBits;
+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
+ std::optional<APInt> Fold = FoldValueWithUndef(
+ Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]);
+ if (!Fold)
+ break;
+ RawBits.push_back(*Fold);
+ }
+ if (RawBits.size() == NumElts.getFixedValue()) {
+ // We have constant folded, but we need to cast this again back to
+ // the original (possibly legalized) type.
+ SmallVector<APInt> DstBits;
+ BitVector DstUndefs;
+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
+ DstBits, RawBits, DstUndefs,
+ BitVector(RawBits.size(), false));
+ EVT BVEltVT = BV1->getOperand(0).getValueType();
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
+ if (DstUndefs[I])
+ continue;
+ Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT);
+ }
+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
+ }
+ }
+ }
+ }
+
+ // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
+ // (shl step_vector(C0), C1) -> (step_vector(C0 << C1))
+ if ((Opcode == ISD::MUL || Opcode == ISD::SHL) &&
+ Ops[0].getOpcode() == ISD::STEP_VECTOR) {
+ APInt RHSVal;
+ if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) {
+ APInt NewStep = Opcode == ISD::MUL
+ ? Ops[0].getConstantOperandAPInt(0) * RHSVal
+ : Ops[0].getConstantOperandAPInt(0) << RHSVal;
+ return getStepVector(DL, VT, NewStep);
+ }
+ }
+
+ auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
+ return !Op.getValueType().isVector() ||
+ Op.getValueType().getVectorElementCount() == NumElts;
+ };
+
+ auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
+ return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
+ Op.getOpcode() == ISD::BUILD_VECTOR ||
+ Op.getOpcode() == ISD::SPLAT_VECTOR;
+ };
+
+ // All operands must be vector types with the same number of elements as
+ // the result type and must be either UNDEF or a build/splat vector
+ // or UNDEF scalars.
+ if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
+ !llvm::all_of(Ops, IsScalarOrSameVectorSize))
+ return SDValue();
+
+ // If we are comparing vectors, then the result needs to be a i1 boolean that
+ // is then extended back to the legal result type depending on how booleans
+ // are represented.
+ EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+ ISD::NodeType ExtendCode =
+ (Opcode == ISD::SETCC && SVT != VT.getScalarType())
+ ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT))
+ : ISD::SIGN_EXTEND;
+
+ // Find legal integer scalar type for constant promotion and
+ // ensure that its scalar size is at least as large as source.
+ EVT LegalSVT = VT.getScalarType();
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(VT.getScalarType()))
+ return SDValue();
+ }
+
+ // For scalable vector types we know we're dealing with SPLAT_VECTORs. We
+ // only have one operand to check. For fixed-length vector types we may have
+ // a combination of BUILD_VECTOR and SPLAT_VECTOR.
+ unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+
+ // Constant fold each scalar lane separately.
+ SmallVector<SDValue, 4> ScalarResults;
+ for (unsigned I = 0; I != NumVectorElts; I++) {
+ SmallVector<SDValue, 4> ScalarOps;
+ for (SDValue Op : Ops) {
+ EVT InSVT = Op.getValueType().getScalarType();
+ if (Op.getOpcode() != ISD::BUILD_VECTOR &&
+ Op.getOpcode() != ISD::SPLAT_VECTOR) {
+ if (Op.isUndef())
+ ScalarOps.push_back(getUNDEF(InSVT));
+ else
+ ScalarOps.push_back(Op);
+ continue;
+ }
+
+ SDValue ScalarOp =
+ Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
+ EVT ScalarVT = ScalarOp.getValueType();
+
+ // Build vector (integer) scalar operands may need implicit
+ // truncation - do this before constant folding.
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) {
+ // Don't create illegally-typed nodes unless they're constants or undef
+ // - if we fail to constant fold we can't guarantee the (dead) nodes
+ // we're creating will be cleaned up before being visited for
+ // legalization.
+ if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() &&
+ !isa<ConstantSDNode>(ScalarOp) &&
+ TLI->getTypeAction(*getContext(), InSVT) !=
+ TargetLowering::TypeLegal)
+ return SDValue();
+ ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+ }
+
+ ScalarOps.push_back(ScalarOp);
+ }
+
+ // Constant fold the scalar operands.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
+
+ // Legalize the (integer) scalar constant if necessary.
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ ScalarResults.push_back(ScalarResult);
+ }
+
+ SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0])
+ : getBuildVector(VT, DL, ScalarResults);
+ NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDValue N1, SDValue N2) {
+ // TODO: We don't do any constant folding for strict FP opcodes here, but we
+ // should. That will require dealing with a potentially non-default
+ // rounding mode, checking the "opStatus" return value from the APFloat
+ // math calculations, and possibly other variations.
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
+ ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
+ if (N1CFP && N2CFP) {
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ const APFloat &C2 = N2CFP->getValueAPF();
+ switch (Opcode) {
+ case ISD::FADD:
+ C1.add(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FSUB:
+ C1.subtract(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FMUL:
+ C1.multiply(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FDIV:
+ C1.divide(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FREM:
+ C1.mod(C2);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FCOPYSIGN:
+ C1.copySign(C2);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FMINNUM:
+ return getConstantFP(minnum(C1, C2), DL, VT);
+ case ISD::FMAXNUM:
+ return getConstantFP(maxnum(C1, C2), DL, VT);
+ case ISD::FMINIMUM:
+ return getConstantFP(minimum(C1, C2), DL, VT);
+ case ISD::FMAXIMUM:
+ return getConstantFP(maximum(C1, C2), DL, VT);
+ default: break;
+ }
+ }
+ if (N1CFP && Opcode == ISD::FP_ROUND) {
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ bool Unused;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return getConstantFP(C1, DL, VT);
+ }
+
+ switch (Opcode) {
+ case ISD::FSUB:
+ // -0.0 - undef --> undef (consistent with "fneg undef")
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true))
+ if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef())
+ return getUNDEF(VT);
+ [[fallthrough]];
+
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ // If both operands are undef, the result is undef. If 1 operand is undef,
+ // the result is NaN. This should match the behavior of the IR optimizer.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+ return SDValue();
+}
+
+SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
+ assert(Val.getValueType().isInteger() && "Invalid AssertAlign!");
+
+ // There's no need to assert on a byte-aligned pointer. All pointers are at
+ // least byte aligned.
+ if (A == Align(1))
+ return Val;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val});
+ ID.AddInteger(A.value());
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ Val.getValueType(), A);
+ createOperands(N, {Val});
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, N2, Flags);
+}
+
+void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
+ SDValue &N2) const {
+ if (!TLI->isCommutativeBinOp(Opcode))
+ return;
+
+ // Canonicalize:
+ // binop(const, nonconst) -> binop(nonconst, const)
+ SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2);
+ SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((N1C && !N2C) || (N1CFP && !N2CFP))
+ std::swap(N1, N2);
+
+ // Canonicalize:
+ // binop(splat(x), step_vector) -> binop(step_vector, splat(x))
+ else if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ N2.getOpcode() == ISD::STEP_VECTOR)
+ std::swap(N1, N2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE &&
+ N2.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
+
+ canonicalizeCommutativeBinop(Opcode, N1, N2);
+
+ auto *N1C = dyn_cast<ConstantSDNode>(N1);
+ auto *N2C = dyn_cast<ConstantSDNode>(N2);
+
+ // Don't allow undefs in vector splats - we might be returning N2 when folding
+ // to zero etc.
+ ConstantSDNode *N2CV =
+ isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true);
+
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::AND:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2CV && N2CV->isZero())
+ return N2;
+ if (N2CV && N2CV->isAllOnes()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2CV && N2CV->isZero())
+ return N1;
+ if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::XOR, DL, VT, N1, N2);
+ break;
+ case ISD::MUL:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::AND, DL, VT, N1, N2);
+ if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
+ const APInt &MulImm = N1->getConstantOperandAPInt(0);
+ const APInt &N2CImm = N2C->getAPIntValue();
+ return getVScale(DL, VT, MulImm * N2CImm);
+ }
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
+ // fold (add_sat x, y) -> (or x, y) for bool types.
+ if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT)
+ return getNode(ISD::OR, DL, VT, N1, N2);
+ // fold (sub_sat x, y) -> (and x, ~y) for bool types.
+ if (Opcode == ISD::SSUBSAT || Opcode == ISD::USUBSAT)
+ return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT));
+ }
+ break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::SMIN:
+ case ISD::UMAX:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::OR, DL, VT, N1, N2);
+ break;
+ case ISD::SMAX:
+ case ISD::UMIN:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::AND, DL, VT, N1, N2);
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags))
+ return V;
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
+ const APInt &MulImm = N1->getConstantOperandAPInt(0);
+ const APInt &ShiftImm = N2C->getAPIntValue();
+ return getVScale(DL, VT, MulImm << ShiftImm);
+ }
+ [[fallthrough]];
+ case ISD::SRA:
+ case ISD::SRL:
+ if (SDValue V = simplifyShift(N1, N2))
+ return V;
+ [[fallthrough]];
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
+ // Verify that the shift amount VT is big enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getScalarSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2CV && N2CV->isZero())
+ return N1;
+ break;
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) &&
+ "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+ if (VT.getScalarType() == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorElementCount() == VT.getVectorElementCount()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
+ unsigned FromBits = EVT.getScalarSizeInBits();
+ Val <<= Val.getBitWidth() - FromBits;
+ Val.ashrInPlace(Val.getBitWidth() - FromBits);
+ return getConstant(Val, DL, ConstantVT);
+ };
+
+ if (N1C) {
+ const APInt &Val = N1C->getAPIntValue();
+ return SignExtendInReg(Val, VT);
+ }
+
+ if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+ SmallVector<SDValue, 8> Ops;
+ llvm::EVT OpVT = N1.getOperand(0).getValueType();
+ for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ SDValue Op = N1.getOperand(i);
+ if (Op.isUndef()) {
+ Ops.push_back(getUNDEF(OpVT));
+ continue;
+ }
+ ConstantSDNode *C = cast<ConstantSDNode>(Op);
+ APInt Val = C->getAPIntValue();
+ Ops.push_back(SignExtendInReg(Val, OpVT));
+ }
+ return getBuildVector(VT, DL, Ops);
+ }
+ break;
+ }
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: {
+ assert(VT.isInteger() && cast<VTSDNode>(N2)->getVT().isInteger() &&
+ N1.getValueType().isFloatingPoint() && "Invalid FP_TO_*INT_SAT");
+ assert(N1.getValueType().isVector() == VT.isVector() &&
+ "FP_TO_*INT_SAT type should be vector iff the operand type is "
+ "vector!");
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "Vector element counts must match in FP_TO_*INT_SAT");
+ assert(!cast<VTSDNode>(N2)->getVT().isVector() &&
+ "Type to saturate to must be a scalar.");
+ assert(cast<VTSDNode>(N2)->getVT().bitsLE(VT.getScalarType()) &&
+ "Not extending!");
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
+ "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
+ element type of the vector.");
+
+ // Extract from an undefined value or using an undefined index is undefined.
+ if (N1.isUndef() || N2.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length
+ // vectors. For scalable vectors we will provide appropriate support for
+ // dealing with arbitrary indices.
+ if (N2C && N1.getValueType().isFixedLengthVector() &&
+ N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers. This only works for
+ // fixed length vectors, since we need to know the exact number of
+ // elements.
+ if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getVectorIdxConstant(N2C->getZExtValue() % Factor, DL));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while
+ // lowering is expanding large vector constants.
+ if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR ||
+ N1.getOpcode() == ISD::SPLAT_VECTOR)) {
+ assert((N1.getOpcode() != ISD::BUILD_VECTOR ||
+ N1.getValueType().isFixedLengthVector()) &&
+ "BUILD_VECTOR used for scalable vectors");
+ unsigned Index =
+ N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0;
+ SDValue Elt = N1.getOperand(Index);
+
+ if (VT != Elt.getValueType())
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated, and the result implicitly
+ // extended. Make that explicit here.
+ Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ SDValue N1Op2 = N1.getOperand(2);
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);
+
+ if (N1Op2C && N2C) {
+ if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ if (VT.isFloatingPoint()) {
+ assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits());
+ return getFPExtendOrRound(N1.getOperand(1), DL, VT);
+ }
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ }
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ }
+
+ // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
+ // when vector types are scalarized and v1iX is legal.
+ // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx).
+ // Here we are completely ignoring the extract element index (N2),
+ // which is fine for fixed width vectors, since any index other than 0
+ // is undefined anyway. However, this cannot be ignored for scalable
+ // vectors - in theory we could support this, but we don't want to do this
+ // without a profitability check.
+ if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getValueType().isFixedLengthVector() &&
+ N1.getValueType().getVectorNumElements() == 1) {
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
+ N1.getOperand(1));
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (N1C) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ const APInt &Val = N1C->getAPIntValue();
+ return getConstant(Val.extractBits(ElementSize, Shift), DL, VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ EVT N1VT = N1.getValueType();
+ assert(VT.isVector() && N1VT.isVector() &&
+ "Extract subvector VTs must be vectors!");
+ assert(VT.getVectorElementType() == N1VT.getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) &&
+ "Cannot extract a scalable vector from a fixed length vector!");
+ assert((VT.isScalableVector() != N1VT.isScalableVector() ||
+ VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) &&
+ "Extract subvector must be from larger vector to smaller vector!");
+ assert(N2C && "Extract subvector index must be a constant");
+ assert((VT.isScalableVector() != N1VT.isScalableVector() ||
+ (VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
+ N1VT.getVectorMinNumElements()) &&
+ "Extract subvector overflow!");
+ assert(N2C->getAPIntValue().getBitWidth() ==
+ TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
+ "Constant index for EXTRACT_SUBVECTOR has an invalid size");
+
+ // Trivial extraction.
+ if (VT == N1VT)
+ return N1;
+
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorMinNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
+ }
+
+ // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+ // during shuffle legalization.
+ if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+ VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ break;
+ }
+ }
+
+ // Perform trivial constant folding.
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
+ return SV;
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.isUndef()) {
+ if (TLI->isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::SUB:
+ return getUNDEF(VT); // fold op(undef, arg2) -> undef
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.isUndef()) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.isUndef())
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, DL, VT);
+ [[fallthrough]];
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return getUNDEF(VT); // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
+ case ISD::OR:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ return getAllOnesConstant(DL, VT);
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2};
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, N2, N3, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE &&
+ N2.getOpcode() != ISD::DELETED_NODE &&
+ N3.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
+ // Perform various simplifications.
+ switch (Opcode) {
+ case ISD::FMA: {
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == VT && N2.getValueType() == VT &&
+ N3.getValueType() == VT && "FMA types must match!");
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+ ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
+ if (N1CFP && N2CFP && N3CFP) {
+ APFloat V1 = N1CFP->getValueAPF();
+ const APFloat &V2 = N2CFP->getValueAPF();
+ const APFloat &V3 = N3CFP->getValueAPF();
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::SETCC: {
+ assert(VT.isInteger() && "SETCC result type must be an integer!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ "SETCC operands must have the same type!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "SETCC type should be vector iff the operand type is vector!");
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
+ "SETCC vector element counts must match!");
+ // Use FoldSetCC to simplify SETCC's.
+ if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+ return V;
+ // Vector constant folding.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
+ NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
+ return V;
+ }
+ break;
+ }
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ if (SDValue V = simplifySelect(N1, N2, N3))
+ return V;
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::VECTOR_SPLICE: {
+ if (cast<ConstantSDNode>(N3)->isZero())
+ return N1;
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
+ // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
+ // for scalable vectors where we will generate appropriate code to
+ // deal with out-of-bounds cases correctly.
+ if (N3C && N1.getValueType().isFixedLengthVector() &&
+ N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ return getUNDEF(VT);
+
+ // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
+ if (N3.isUndef())
+ return getUNDEF(VT);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (N2.isUndef())
+ return N1;
+
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // Inserting undef into undef is still undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ EVT N2VT = N2.getValueType();
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(VT.isVector() && N2VT.isVector() &&
+ "Insert subvector VTs must be vectors!");
+ assert(VT.getVectorElementType() == N2VT.getVectorElementType() &&
+ "Insert subvector VTs must have the same element type!");
+ assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) &&
+ "Cannot insert a scalable vector into a fixed length vector!");
+ assert((VT.isScalableVector() != N2VT.isScalableVector() ||
+ VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ assert(isa<ConstantSDNode>(N3) &&
+ "Insert subvector index must be constant");
+ assert((VT.isScalableVector() != N2VT.isScalableVector() ||
+ (N2VT.getVectorMinNumElements() +
+ cast<ConstantSDNode>(N3)->getZExtValue()) <=
+ VT.getVectorMinNumElements()) &&
+ "Insert subvector overflow!");
+ assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() ==
+ TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
+ "Constant index for INSERT_SUBVECTOR has an invalid size");
+
+ // Trivial insertion.
+ if (VT == N2VT)
+ return N2;
+
+ // If this is an insert of an extracted vector into an undef vector, we
+ // can just use the input to the extract.
+ if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+ return N2.getOperand(0);
+ break;
+ }
+ case ISD::BITCAST:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ case ISD::VP_TRUNCATE:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ // Don't create noop casts.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2, N3};
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode *U : getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ assert(!Value.isUndef());
+
+ unsigned NumBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
+ if (VT.isInteger()) {
+ bool IsOpaque = VT.getSizeInBits() > 64 ||
+ !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+ return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+ }
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
+ VT);
+ }
+
+ assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+ EVT IntVT = VT.getScalarType();
+ if (!IntVT.isInteger())
+ IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+ DAG.getConstant(Magic, dl, IntVT));
+ }
+
+ if (VT != Value.getValueType() && !VT.isInteger())
+ Value = DAG.getBitcast(VT.getScalarType(), Value);
+ if (VT != Value.getValueType())
+ Value = DAG.getSplatBuildVector(VT, dl, Value);
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const ConstantDataArraySlice &Slice) {
+ // Handle vector with all elements zero.
+ if (Slice.Array == nullptr) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, dl, VT);
+ if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ return DAG.getConstantFP(0.0, dl, VT);
+ if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getConstant(0, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ }
+ llvm_unreachable("Expected type!");
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length));
+
+ APInt Val(NumVTBits, 0);
+ if (DAG.getDataLayout().isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Slice[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8;
+ }
+
+ // If the "cost" of materializing the integer immediate is less than the cost
+ // of a load, then it is cost effective to turn the load into the immediate.
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
+ return DAG.getConstant(Val, dl, VT);
+ return SDValue();
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
+ const SDLoc &DL,
+ const SDNodeFlags Flags) {
+ EVT VT = Base.getValueType();
+ SDValue Index;
+
+ if (Offset.isScalable())
+ Index = getVScale(DL, Base.getValueType(),
+ APInt(Base.getValueSizeInBits().getFixedValue(),
+ Offset.getKnownMinValue()));
+ else
+ Index = getConstant(Offset.getFixedValue(), DL, VT);
+
+ return getMemBasePlusOffset(Base, Index, DL, Flags);
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
+ const SDLoc &DL,
+ const SDNodeFlags Flags) {
+ assert(Offset.getValueType().isInteger());
+ EVT BasePtrVT = Ptr.getValueType();
+ return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags);
+}
+
+/// Returns true if memcpy source is constant data.
+static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
+ uint64_t SrcDelta = 0;
+ GlobalAddressSDNode *G = nullptr;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ return getConstantDataArrayInfo(G->getGlobal(), Slice, 8,
+ SrcDelta + G->getOffset());
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF,
+ SelectionDAG &DAG) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return DAG.shouldOptForSize();
+}
+
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SmallVector<SDValue, 32> &OutChains, unsigned From,
+ unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+ SmallVector<SDValue, 16> &OutStoreChains) {
+ assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+ assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+ SmallVector<SDValue, 16> GluedLoadChains;
+ for (unsigned i = From; i < To; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ GluedLoadChains.push_back(OutLoadChains[i]);
+ }
+
+ // Chain for all loads.
+ SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ GluedLoadChains);
+
+ for (unsigned i = From; i < To; ++i) {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+ SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ OutChains.push_back(NewStore);
+ }
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, Align Alignment,
+ bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo, AAResults *AA) {
+ // Turn a memcpy of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ MaybeAlign SrcAlign = DAG.InferPtrAlign(Src);
+ if (!SrcAlign || Alignment > *SrcAlign)
+ SrcAlign = Alignment;
+ assert(SrcAlign && "SrcAlign must be set");
+ ConstantDataArraySlice Slice;
+ // If marked as volatile, perform a copy even when marked as constant.
+ bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice);
+ bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+ const MemOp Op = isZeroConstant
+ ? MemOp::Set(Size, DstAlignCanChange, Alignment,
+ /*IsZeroMemset*/ true, isVol)
+ : MemOp::Copy(Size, DstAlignCanChange, Alignment,
+ *SrcAlign, isVol, CopyFromConstant);
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
+ if (NewAlign > Alignment) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
+ Alignment = NewAlign;
+ }
+ }
+
+ // Prepare AAInfo for loads/stores after lowering this memcpy.
+ AAMDNodes NewAAInfo = AAInfo;
+ NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+
+ const Value *SrcVal = dyn_cast_if_present<const Value *>(SrcPtrInfo.V);
+ bool isConstant =
+ AA && SrcVal &&
+ AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo));
+
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ SmallVector<SDValue, 16> OutLoadChains;
+ SmallVector<SDValue, 16> OutStoreChains;
+ SmallVector<SDValue, 32> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ SrcOff -= VTSize - Size;
+ DstOff -= VTSize - Size;
+ }
+
+ if (CopyFromConstant &&
+ (isZeroConstant || (VT.isInteger() && !VT.isVector()))) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We only handle zero vectors here.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ ConstantDataArraySlice SubSlice;
+ if (SrcOff < Slice.Length) {
+ SubSlice = Slice;
+ SubSlice.move(SrcOff);
+ } else {
+ // This is an out-of-bounds access and hence UB. Pretend we read zero.
+ SubSlice.Array = nullptr;
+ SubSlice.Offset = 0;
+ SubSlice.Length = VTSize;
+ }
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
+ if (Value.getNode()) {
+ Store = DAG.getStore(
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
+ OutChains.push_back(Store);
+ }
+ }
+
+ if (!Store.getNode()) {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(C, VT);
+ assert(NVT.bitsGE(VT));
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+ if (isConstant)
+ SrcMMOFlags |= MachineMemOperand::MOInvariant;
+
+ Value = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, NVT, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), VT,
+ commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
+ OutLoadChains.push_back(Value.getValue(1));
+
+ Store = DAG.getTruncStore(
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
+ OutStoreChains.push_back(Store);
+ }
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ Size -= VTSize;
+ }
+
+ unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+ TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+ unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+ if (NumLdStInMemcpy) {
+ // It may be that memcpy might be converted to memset if it's memcpy
+ // of constants. In such a case, we won't have loads and stores, but
+ // just stores. In the absence of loads, there is nothing to gang up.
+ if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+ // If target does not care, just leave as it.
+ for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ OutChains.push_back(OutStoreChains[i]);
+ }
+ } else {
+ // Ld/St less than/equal limit set by target.
+ if (NumLdStInMemcpy <= GluedLdStLimit) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ NumLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ } else {
+ unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit;
+ unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+ unsigned GlueIter = 0;
+
+ for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+ unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+ unsigned IndexTo = NumLdStInMemcpy - GlueIter;
+
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+ OutLoadChains, OutStoreChains);
+ GlueIter += GluedLdStLimit;
+ }
+
+ // Residual ld/st.
+ if (RemainingLdStInMemcpy) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ RemainingLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ }
+ }
+ }
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, Align Alignment,
+ bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo) {
+ // Turn a memmove of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ MaybeAlign SrcAlign = DAG.InferPtrAlign(Src);
+ if (!SrcAlign || Alignment > *SrcAlign)
+ SrcAlign = Alignment;
+ assert(SrcAlign && "SrcAlign must be set");
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes()))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
+ if (NewAlign > Alignment) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
+ Alignment = NewAlign;
+ }
+ }
+
+ // Prepare AAInfo for loads/stores after lowering this memmove.
+ AAMDNodes NewAAInfo = AAInfo;
+ NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ uint64_t SrcOff = 0, DstOff = 0;
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value;
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
+ Value = DAG.getLoad(
+ VT, dl, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Store;
+
+ Store = DAG.getStore(
+ Chain, dl, LoadValues[i],
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+/// Lower the call to 'memset' intrinsic function into a series of store
+/// operations.
+///
+/// \param DAG Selection DAG where lowered code is placed.
+/// \param dl Link to corresponding IR location.
+/// \param Chain Control flow dependency.
+/// \param Dst Pointer to destination memory location.
+/// \param Src Value of byte to write into the memory.
+/// \param Size Number of bytes to write.
+/// \param Alignment Alignment of the destination in bytes.
+/// \param isVol True if destination is volatile.
+/// \param AlwaysInline Makes sure no function call is generated.
+/// \param DstPtrInfo IR information on the memory pointer.
+/// \returns New head in the control flow, if lowering was successful, empty
+/// SDValue otherwise.
+///
+/// The function tries to replace 'llvm.memset' intrinsic with several store
+/// operations and value calculation code. This is usually profitable for small
+/// memory size or when the semantic requires inlining.
+static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, Align Alignment, bool isVol,
+ bool AlwaysInline, MachinePointerInfo DstPtrInfo,
+ const AAMDNodes &AAInfo) {
+ // Turn a memset of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool IsZeroVal = isNullConstant(Src);
+ unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
+
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ const DataLayout &DL = DAG.getDataLayout();
+ Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
+ if (NewAlign > Alignment) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
+ Alignment = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+ unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+ // Prepare AAInfo for loads/stores after lowering this memset.
+ AAMDNodes NewAAInfo = AAInfo;
+ NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ DstOff -= VTSize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
+ SDValue Store = DAG.getStore(
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment,
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
+ NewAAInfo);
+ OutChains.push_back(Store);
+ DstOff += VT.getSizeInBits() / 8;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, Align Alignment,
+ bool isVol, bool AlwaysInline, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo, AAResults *AA) {
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isZero())
+ return Chain;
+
+ SDValue Result = getMemcpyLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemcpy(
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA);
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
+ // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+ // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+ // respect volatile, so they may do things like read or write memory
+ // beyond the given memory regions. But fixing this isn't easy, and most
+ // people don't care.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, Align Alignment,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo, AAResults *AA) {
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isZero())
+ return Chain;
+
+ SDValue Result = getMemmoveLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result =
+ TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size,
+ Alignment, isVol, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
+ // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+ // not be safe. See memcpy above for more details.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, Align Alignment,
+ bool isVol, bool AlwaysInline, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ const AAMDNodes &AAInfo) {
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isZero())
+ return Chain;
+
+ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, AAInfo);
+
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemset(
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, AAInfo);
+ assert(Result &&
+ "getMemsetStores must return a valid sequence when AlwaysInline");
+ return Result;
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
+ // Emit a library call.
+ auto &Ctx = *getContext();
+ const auto& DL = getDataLayout();
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ // FIXME: pass in SDLoc
+ CLI.setDebugLoc(dl).setChain(Chain);
+
+ ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
+ const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
+ const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
+
+ // Helper function to create an Entry from Node and Type.
+ const auto CreateEntry = [](SDValue Node, Type *Ty) {
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Node;
+ Entry.Ty = Ty;
+ return Entry;
+ };
+
+ // If zeroing out and bzero is present, use it.
+ if (SrcIsZero && BzeroName) {
+ TargetLowering::ArgListTy Args;
+ Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+ CLI.setLibCallee(
+ TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
+ getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
+ } else {
+ TargetLowering::ArgListTy Args;
+ Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
+ Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+ CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(Ctx),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(DL)),
+ std::move(Args));
+ }
+
+ CLI.setDiscardResult().setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Value, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*getContext());
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDVTList VTList, ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
+ EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp,
+ MachineMemOperand *MMO) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_CLR ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FADD ||
+ Opcode == ISD::ATOMIC_LOAD_FSUB ||
+ Opcode == ISD::ATOMIC_LOAD_FMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UINC_WRAP ||
+ Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP ||
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Val};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachineMemOperand *MMO) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(Ops.size());
+ for (const SDValue &Op : Ops)
+ VTs.push_back(Op.getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
+}
+
+SDValue SelectionDAG::getMemIntrinsicNode(
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
+ EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
+ if (!Size && MemVT.isScalableVector())
+ Size = MemoryLocation::UnknownSize;
+ else if (!Size)
+ Size = MemVT.getStoreSize();
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
+}
+
+SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
+ SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
+ (Opcode <= (unsigned)std::numeric_limits<int>::max() &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
+ Opcode, dl.getIROrder(), VTList, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ ID.AddInteger(MemVT.getRawBits());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+ }
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
+ SDValue Chain, int FrameIndex,
+ int64_t Size, int64_t Offset) {
+ const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[2] = {
+ Chain,
+ getFrameIndex(FrameIndex,
+ getTargetLoweringInfo().getFrameIndexTy(getDataLayout()),
+ true)};
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(FrameIndex);
+ ID.AddInteger(Size);
+ ID.AddInteger(Offset);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ LifetimeSDNode *N = newSDNode<LifetimeSDNode>(
+ Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain,
+ uint64_t Guid, uint64_t Index,
+ uint32_t Attr) {
+ const unsigned Opcode = ISD::PSEUDO_PROBE;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(Guid);
+ ID.AddInteger(Index);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<PseudoProbeSDNode>(
+ Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
+ int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return Info;
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI,
+ Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
+ SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.isUndef())
+ return InferPointerInfo(Info, DAG, Ptr);
+ return Info;
+}
+
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ VT, MMO);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo,
+ MemVT, Alignment, MMOFlags, AAInfo);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ MemVT, MMO);
+}
+
+SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT,
+ MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
+ ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding) {
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *LD = cast<VPLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getMask(),
+ LD->getVectorLength(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(),
+ nullptr, LD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Offset, SDValue Mask,
+ SDValue EVL, EVT MemVT, MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM, bool IsTruncating,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ IsTruncating, IsCompressing, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
+ return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
+ IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStoreVP(Chain, dl, Val, Ptr, getUNDEF(Ptr.getValueType()), Mask,
+ EVL, VT, MMO, ISD::UNINDEXED,
+ /*IsTruncating*/ false, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *ST = cast<VPStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {ST->getChain(), ST->getValue(), Base,
+ Offset, ST->getMask(), ST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStoreSDNode>(
+ dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(),
+ ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::UnknownSize;
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) {
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL};
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedLoadSDNode>(
+ DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N =
+ newSDNode<VPStridedLoadSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
+ Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment,
+ MMOFlags, AAInfo, Ranges, IsExpanding);
+}
+
+SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain,
+ SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
+ Undef, Stride, Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtStridedLoadVP(
+ ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
+ Stride, Mask, EVL, PtrInfo, MemVT, Alignment,
+ MMOFlags, AAInfo, nullptr, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtStridedLoadVP(
+ ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
+ Stride, Mask, EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *SLD = cast<VPStridedLoadSDNode>(OrigLoad);
+ assert(SLD->getOffset().isUndef() &&
+ "Strided load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ SLD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getStridedLoadVP(
+ AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(),
+ Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(),
+ SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags,
+ SLD->getAAInfo(), nullptr, SLD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr,
+ SDValue Offset, SDValue Stride,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM,
+ bool IsTruncating, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>(
+ DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ VTs, AM, IsTruncating,
+ IsCompressing, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStridedStoreVP(
+ SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT,
+ Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo);
+ return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT,
+ MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr,
+ SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()),
+ Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED,
+ /*IsTruncating*/ false, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>(
+ DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ VTs, ISD::UNINDEXED, true,
+ IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore,
+ const SDLoc &DL, SDValue Base,
+ SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *SST = cast<VPStridedStoreSDNode>(OrigStore);
+ assert(SST->getOffset().isUndef() &&
+ "Strided store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(),
+ SST->getMask(), SST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(SST->getMemoryVT().getRawBits());
+ ID.AddInteger(SST->getRawSubclassData());
+ ID.AddInteger(SST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStridedStoreSDNode>(
+ DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(),
+ SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 7 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Base, SDValue Offset, SDValue Mask,
+ SDValue PassThru, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM,
+ ISD::LoadExtType ExtTy, bool isExpanding) {
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) &&
+ "Unindexed masked load with an offset!");
+ SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ AM, ExtTy, isExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!");
+ return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base,
+ Offset, LD->getMask(), LD->getPassThru(),
+ LD->getMemoryVT(), LD->getMemOperand(), AM,
+ LD->getExtensionType(), LD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Base, SDValue Offset,
+ SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM, bool IsTruncating,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) &&
+ "Unindexed masked store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Base, Offset, Mask};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
+ dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ IsTruncating, IsCompressing, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() &&
+ "Masked store is already a indexed store!");
+ return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset,
+ ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(),
+ AM, ST->isTruncatingStore(), ST->isCompressingStore());
+}
+
+SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType,
+ ISD::LoadExtType ExtTy) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
+ dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, MemVT, MMO, IndexType, ExtTy);
+ createOperands(N, Ops);
+
+ assert(N->getPassThru().getValueType() == N->getValueType(0) &&
+ "Incompatible type of the PassThru value in MaskedGatherSDNode");
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType,
+ bool IsTrunc) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
+ dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, MemVT, MMO, IndexType, IsTrunc);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::GET_FPENV_MEM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>(
+ ISD::GET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FPStateAccessSDNode>(ISD::GET_FPENV_MEM, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SET_FPENV_MEM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>(
+ ISD::SET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FPStateAccessSDNode>(ISD::SET_FPENV_MEM, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
+ // select undef, T, F --> T (if T is a constant), otherwise F
+ // select, ?, undef, F --> F
+ // select, ?, T, undef --> T
+ if (Cond.isUndef())
+ return isConstantValueOfAnyType(T) ? T : F;
+ if (T.isUndef())
+ return F;
+ if (F.isUndef())
+ return T;
+
+ // select true, T, F --> T
+ // select false, T, F --> F
+ if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
+ return CondC->isZero() ? F : T;
+
+ // TODO: This should simplify VSELECT with non-zero constant condition using
+ // something like this (but check boolean contents to be complete?):
+ if (ConstantSDNode *CondC = isConstOrConstSplat(Cond, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true))
+ if (CondC->isZero())
+ return F;
+
+ // select ?, T, T --> T
+ if (T == F)
+ return T;
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
+ // shift undef, Y --> 0 (can always assume that the undef value is 0)
+ if (X.isUndef())
+ return getConstant(0, SDLoc(X.getNode()), X.getValueType());
+ // shift X, undef --> undef (because it may shift by the bitwidth)
+ if (Y.isUndef())
+ return getUNDEF(X.getValueType());
+
+ // shift 0, Y --> 0
+ // shift X, 0 --> X
+ if (isNullOrNullSplat(X) || isNullOrNullSplat(Y))
+ return X;
+
+ // shift X, C >= bitwidth(X) --> undef
+ // All vector elements must be too big (or undef) to avoid partial undefs.
+ auto isShiftTooBig = [X](ConstantSDNode *Val) {
+ return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits());
+ };
+ if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true))
+ return getUNDEF(X.getValueType());
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
+ SDNodeFlags Flags) {
+ // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand
+ // (an undef operand can be chosen to be Nan/Inf), then the result of this
+ // operation is poison. That result can be relaxed to undef.
+ ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true);
+ ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+ bool HasNan = (XC && XC->getValueAPF().isNaN()) ||
+ (YC && YC->getValueAPF().isNaN());
+ bool HasInf = (XC && XC->getValueAPF().isInfinity()) ||
+ (YC && YC->getValueAPF().isInfinity());
+
+ if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef()))
+ return getUNDEF(X.getValueType());
+
+ if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef()))
+ return getUNDEF(X.getValueType());
+
+ if (!YC)
+ return SDValue();
+
+ // X + -0.0 --> X
+ if (Opcode == ISD::FADD)
+ if (YC->getValueAPF().isNegZero())
+ return X;
+
+ // X - +0.0 --> X
+ if (Opcode == ISD::FSUB)
+ if (YC->getValueAPF().isPosZero())
+ return X;
+
+ // X * 1.0 --> X
+ // X / 1.0 --> X
+ if (Opcode == ISD::FMUL || Opcode == ISD::FDIV)
+ if (YC->getValueAPF().isExactlyValue(1.0))
+ return X;
+
+ // X * 0.0 --> 0.0
+ if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros())
+ if (YC->getValueAPF().isZero())
+ return getConstantFP(0.0, SDLoc(Y), Y.getValueType());
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue SV, unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDUse> Ops) {
+ switch (Ops.size()) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]));
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end());
+ return getNode(Opcode, DL, VT, NewOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, Ops, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
+ unsigned NumOps = Ops.size();
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags);
+ default: break;
+ }
+
+#ifndef NDEBUG
+ for (const auto &Op : Ops)
+ assert(Op.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
+#endif
+
+ switch (Opcode) {
+ default: break;
+ case ISD::BUILD_VECTOR:
+ // Attempt to simplify BUILD_VECTOR.
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ case ISD::CONCAT_VECTORS:
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ case ISD::SELECT_CC:
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ assert((!Ops[0].getValueType().isVector() ||
+ Ops[0].getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount()) &&
+ "Expected select_cc with vector result to have the same sized "
+ "comparison type!");
+ break;
+ case ISD::BR_CC:
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ Opcode = ISD::VP_XOR;
+ break;
+ case ISD::VP_MUL:
+ // If it is VP_MUL mask operation then turn it to VP_AND
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ Opcode = ISD::VP_AND;
+ break;
+ case ISD::VP_REDUCE_MUL:
+ // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_AND;
+ break;
+ case ISD::VP_REDUCE_ADD:
+ // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_XOR;
+ break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMIN:
+ // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to
+ // VP_REDUCE_AND.
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_AND;
+ break;
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to
+ // VP_REDUCE_OR.
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_OR;
+ break;
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ N->setFlags(Flags);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
+ return getNode(Opcode, DL, getVTList(ResultTys), Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VTList, Ops, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags);
+
+#ifndef NDEBUG
+ for (const auto &Op : Ops)
+ assert(Op.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
+#endif
+
+ switch (Opcode) {
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
+ "Invalid add/sub overflow op!");
+ assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() &&
+ Ops[0].getValueType() == Ops[1].getValueType() &&
+ Ops[0].getValueType() == VTList.VTs[0] &&
+ "Binary operator types must match!");
+ SDValue N1 = Ops[0], N2 = Ops[1];
+ canonicalizeCommutativeBinop(Opcode, N1, N2);
+
+ // (X +- 0) -> X with zero-overflow.
+ ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true);
+ if (N2CV && N2CV->isZero()) {
+ SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags);
+ }
+ break;
+ }
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!");
+ assert(VTList.VTs[0].isInteger() && VTList.VTs[0] == VTList.VTs[1] &&
+ VTList.VTs[0] == Ops[0].getValueType() &&
+ VTList.VTs[0] == Ops[1].getValueType() &&
+ "Binary operator types must match!");
+ break;
+ }
+ case ISD::FFREXP: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 1 && "Invalid ffrexp op!");
+ assert(VTList.VTs[0].isFloatingPoint() && VTList.VTs[1].isInteger() &&
+ VTList.VTs[0] == Ops[0].getValueType() && "frexp type mismatch");
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Ops[0])) {
+ int FrexpExp;
+ APFloat FrexpMant =
+ frexp(C->getValueAPF(), FrexpExp, APFloat::rmNearestTiesToEven);
+ SDValue Result0 = getConstantFP(FrexpMant, DL, VTList.VTs[0]);
+ SDValue Result1 =
+ getConstant(FrexpMant.isFinite() ? FrexpExp : 0, DL, VTList.VTs[1]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {Result0, Result1}, Flags);
+ }
+
+ break;
+ }
+ case ISD::STRICT_FP_EXTEND:
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
+ "Invalid STRICT_FP_EXTEND!");
+ assert(VTList.VTs[0].isFloatingPoint() &&
+ Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!");
+ assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+ "STRICT_FP_EXTEND result type should be vector iff the operand "
+ "type is vector!");
+ assert((!VTList.VTs[0].isVector() ||
+ VTList.VTs[0].getVectorElementCount() ==
+ Ops[1].getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) &&
+ "Invalid fpext node, dst <= src!");
+ break;
+ case ISD::STRICT_FP_ROUND:
+ assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!");
+ assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+ "STRICT_FP_ROUND result type should be vector iff the operand "
+ "type is vector!");
+ assert((!VTList.VTs[0].isVector() ||
+ VTList.VTs[0].getVectorElementCount() ==
+ Ops[1].getValueType().getVectorElementCount()) &&
+ "Vector element count mismatch!");
+ assert(VTList.VTs[0].isFloatingPoint() &&
+ Ops[1].getValueType().isFloatingPoint() &&
+ VTList.VTs[0].bitsLT(Ops[1].getValueType()) &&
+ isa<ConstantSDNode>(Ops[2]) &&
+ (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 ||
+ cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) &&
+ "Invalid STRICT_FP_ROUND!");
+ break;
+#if 0
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+#endif
+ }
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
+ }
+
+ N->setFlags(Flags);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, std::nullopt);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(2U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(3U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(4U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+ ID.AddInteger(VT4.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
+ unsigned NumVTs = VTs.size();
+ FoldingSetNodeID ID;
+ ID.AddInteger(NumVTs);
+ for (unsigned index = 0; index < NumVTs; index++) {
+ ID.AddInteger(VTs[index].getRawBits());
+ }
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ llvm::copy(VTs, Array);
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ updateDivergence(N);
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return N; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ updateDivergence(N);
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
+ unsigned NumOps = Ops.size();
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // If no operands changed just return the input node.
+ if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ updateDivergence(N);
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
+ ArrayRef<MachineMemOperand *> NewMemRefs) {
+ if (NewMemRefs.empty()) {
+ N->clearMemRefs();
+ return;
+ }
+
+ // Check if we can avoid allocating by storing a single reference directly.
+ if (NewMemRefs.size() == 1) {
+ N->MemRefs = NewMemRefs[0];
+ N->NumMemRefs = 1;
+ return;
+ }
+
+ MachineMemOperand **MemRefsBuffer =
+ Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size());
+ llvm::copy(NewMemRefs, MemRefsBuffer);
+ N->MemRefs = MemRefsBuffer;
+ N->NumMemRefs = static_cast<int>(NewMemRefs.size());
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, std::nullopt);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, std::nullopt);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs,ArrayRef<SDValue> Ops) {
+ SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
+ // Reset the NodeID to -1.
+ New->setNodeId(-1);
+ if (New != N) {
+ ReplaceAllUsesWith(N, New);
+ RemoveDeadNode(N);
+ }
+ return New;
+}
+
+/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+/// For IROrder, we keep the smaller of the two
+SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
+ N->setDebugLoc(DebugLoc());
+ }
+ unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
+ N->setIROrder(Order);
+ return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the SDLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
+/// As a consequence it isn't appropriate to use from within the DAG combiner or
+/// the legalizer which maintain worklists that would need to be updated when
+/// deleting things.
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, ArrayRef<SDValue> Ops) {
+ // If an identical node already exists, use it.
+ void *IP = nullptr;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops);
+ if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
+ return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = nullptr;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ // For MachineNode, initialize the memory references information.
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
+ MN->clearMemRefs();
+
+ // Swap for an appropriately sized array from the recycler.
+ removeOperands(N);
+ createOperands(N, Ops);
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SDNode *N : DeadNodeSet)
+ if (N->use_empty())
+ DeadNodes.push_back(N);
+ RemoveDeadNodes(DeadNodes);
+ }
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
+ unsigned OrigOpc = Node->getOpcode();
+ unsigned NewOpc;
+ switch (OrigOpc) {
+ default:
+ llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
+#include "llvm/IR/ConstrainedOps.def"
+ }
+
+ assert(Node->getNumValues() == 2 && "Unexpected number of results!");
+
+ // We're taking this node out of the chain, so we need to re-link things.
+ SDValue InputChain = Node->getOperand(0);
+ SDValue OutputChain = SDValue(Node, 1);
+ ReplaceAllUsesOfValueWith(OutputChain, InputChain);
+
+ SmallVector<SDValue, 3> Ops;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ SDVTList VTs = getVTList(Node->getValueType(0));
+ SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ } else {
+ ReplaceAllUsesWith(Node, Res);
+ RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, std::nullopt);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ ArrayRef<EVT> ResultTys,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(ResultTys);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTs,
+ ArrayRef<SDValue> Ops) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+ MachineSDNode *N;
+ void *IP = nullptr;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops);
+ IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
+ }
+ }
+
+ // Allocate a new MachineSDNode.
+ N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this);
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNodeIfExists(Opcode, VTList, Ops, Flags);
+}
+
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags Flags) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
+ E->intersectFlagsWith(Flags);
+ return E;
+ }
+ }
+ return nullptr;
+}
+
+/// doesNodeExist - Check if a node exists without modifying its flags.
+bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (FindNodeOrInsertPos(ID, SDLoc(), IP))
+ return true;
+ }
+ return false;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+/// SDNode
+SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr,
+ SDNode *N, unsigned R, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromNode(N, R),
+ {}, IsIndirect, DL, O,
+ /*IsVariadic=*/false);
+}
+
+/// Constant
+SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ const Value *C,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromConst(C), {},
+ /*IsIndirect=*/false, DL, O,
+ /*IsVariadic=*/false);
+}
+
+/// FrameIndex
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+ DIExpression *Expr, unsigned FI,
+ bool IsIndirect,
+ const DebugLoc &DL,
+ unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return getFrameIndexDbgValue(Var, Expr, FI, {}, IsIndirect, DL, O);
+}
+
+/// FrameIndex with dependencies
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+ DIExpression *Expr, unsigned FI,
+ ArrayRef<SDNode *> Dependencies,
+ bool IsIndirect,
+ const DebugLoc &DL,
+ unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromFrameIdx(FI),
+ Dependencies, IsIndirect, DL, O,
+ /*IsVariadic=*/false);
+}
+
+/// VReg
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, DIExpression *Expr,
+ unsigned VReg, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromVReg(VReg),
+ {}, IsIndirect, DL, O,
+ /*IsVariadic=*/false);
+}
+
+SDDbgValue *SelectionDAG::getDbgValueList(DIVariable *Var, DIExpression *Expr,
+ ArrayRef<SDDbgOperand> Locs,
+ ArrayRef<SDNode *> Dependencies,
+ bool IsIndirect, const DebugLoc &DL,
+ unsigned O, bool IsVariadic) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, Locs, Dependencies, IsIndirect,
+ DL, O, IsVariadic);
+}
+
+void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
+ unsigned OffsetInBits, unsigned SizeInBits,
+ bool InvalidateDbg) {
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ assert(FromNode && ToNode && "Can't modify dbg values");
+
+ // PR35338
+ // TODO: assert(From != To && "Redundant dbg value transfer");
+ // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer");
+ if (From == To || FromNode == ToNode)
+ return;
+
+ if (!FromNode->getHasDebugValue())
+ return;
+
+ SDDbgOperand FromLocOp =
+ SDDbgOperand::fromNode(From.getNode(), From.getResNo());
+ SDDbgOperand ToLocOp = SDDbgOperand::fromNode(To.getNode(), To.getResNo());
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SDDbgValue *Dbg : GetDbgValues(FromNode)) {
+ if (Dbg->isInvalidated())
+ continue;
+
+ // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value");
+
+ // Create a new location ops vector that is equal to the old vector, but
+ // with each instance of FromLocOp replaced with ToLocOp.
+ bool Changed = false;
+ auto NewLocOps = Dbg->copyLocationOps();
+ std::replace_if(
+ NewLocOps.begin(), NewLocOps.end(),
+ [&Changed, FromLocOp](const SDDbgOperand &Op) {
+ bool Match = Op == FromLocOp;
+ Changed |= Match;
+ return Match;
+ },
+ ToLocOp);
+ // Ignore this SDDbgValue if we didn't find a matching location.
+ if (!Changed)
+ continue;
+
+ DIVariable *Var = Dbg->getVariable();
+ auto *Expr = Dbg->getExpression();
+ // If a fragment is requested, update the expression.
+ if (SizeInBits) {
+ // When splitting a larger (e.g., sign-extended) value whose
+ // lower bits are described with an SDDbgValue, do not attempt
+ // to transfer the SDDbgValue to the upper bits.
+ if (auto FI = Expr->getFragmentInfo())
+ if (OffsetInBits + SizeInBits > FI->SizeInBits)
+ continue;
+ auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits,
+ SizeInBits);
+ if (!Fragment)
+ continue;
+ Expr = *Fragment;
+ }
+
+ auto AdditionalDependencies = Dbg->getAdditionalDependencies();
+ // Clone the SDDbgValue and move it to To.
+ SDDbgValue *Clone = getDbgValueList(
+ Var, Expr, NewLocOps, AdditionalDependencies, Dbg->isIndirect(),
+ Dbg->getDebugLoc(), std::max(ToNode->getIROrder(), Dbg->getOrder()),
+ Dbg->isVariadic());
+ ClonedDVs.push_back(Clone);
+
+ if (InvalidateDbg) {
+ // Invalidate value and indicate the SDDbgValue should not be emitted.
+ Dbg->setIsInvalidated();
+ Dbg->setIsEmitted();
+ }
+ }
+
+ for (SDDbgValue *Dbg : ClonedDVs) {
+ assert(is_contained(Dbg->getSDNodes(), ToNode) &&
+ "Transferred DbgValues should depend on the new SDNode");
+ AddDbgValue(Dbg, false);
+ }
+}
+
+void SelectionDAG::salvageDebugInfo(SDNode &N) {
+ if (!N.getHasDebugValue())
+ return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (auto *DV : GetDbgValues(&N)) {
+ if (DV->isInvalidated())
+ continue;
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
+ uint64_t Offset = N.getConstantOperandVal(1);
+
+ // Rewrite an ADD constant node into a DIExpression. Since we are
+ // performing arithmetic to compute the variable's *value* in the
+ // DIExpression, we need to mark the expression with a
+ // DW_OP_stack_value.
+ auto *DIExpr = DV->getExpression();
+ auto NewLocOps = DV->copyLocationOps();
+ bool Changed = false;
+ for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ // We're not given a ResNo to compare against because the whole
+ // node is going away. We know that any ISD::ADD only has one
+ // result, so we can assume any node match is using the result.
+ if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE ||
+ NewLocOps[i].getSDNode() != &N)
+ continue;
+ NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
+ SmallVector<uint64_t, 3> ExprOps;
+ DIExpression::appendOffset(ExprOps, Offset);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ Changed = true;
+ }
+ (void)Changed;
+ assert(Changed && "Salvage target doesn't use N");
+
+ auto AdditionalDependencies = DV->getAdditionalDependencies();
+ SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr,
+ NewLocOps, AdditionalDependencies,
+ DV->isIndirect(), DV->getDebugLoc(),
+ DV->getOrder(), DV->isVariadic());
+ ClonedDVs.push_back(Clone);
+ DV->setIsInvalidated();
+ DV->setIsEmitted();
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
+ N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
+ }
+ }
+ }
+
+ for (SDDbgValue *Dbg : ClonedDVs) {
+ assert(!Dbg->getSDNodes().empty() &&
+ "Salvaged DbgValue should depend on a new SDNode");
+ AddDbgValue(Dbg, false);
+ }
+}
+
+/// Creates a SDDbgLabel node.
+SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG &d,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
+};
+
+} // end anonymous namespace
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Preserve Debug Values
+ transferDbgValues(FromN, To);
+ // Preserve extra info.
+ copyExtraInfo(From, To.getNode());
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
+ } while (UI != UE && *UI == User);
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Preserve Debug Info. Only do this if there's a use.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ if (From->hasAnyUseOfValue(i)) {
+ assert((i < To->getNumValues()) && "Invalid To location");
+ transferDbgValues(SDValue(From, i), SDValue(To, i));
+ }
+ // Preserve extra info.
+ copyExtraInfo(From, To);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
+
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) {
+ // Preserve Debug Info.
+ transferDbgValues(SDValue(From, i), To[i]);
+ // Preserve extra info.
+ copyExtraInfo(From, To[i].getNode());
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this happens the
+ // uses are usually next to each other in the list. To help reduce the
+ // number of CSE and divergence recomputations, process all the uses of this
+ // user that we can find this way.
+ bool To_IsDivergent = false;
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ To_IsDivergent |= ToOp->isDivergent();
+ } while (UI != UE && *UI == User);
+
+ if (To_IsDivergent != From->isDivergent())
+ updateDivergence(User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To);
+ return;
+ }
+
+ // Preserve Debug Info.
+ transferDbgValues(From, To);
+ copyExtraInfo(From.getNode(), To.getNode());
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
+ } while (UI != UE && *UI == User);
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
+}
+
+namespace {
+
+/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+/// to record information about a use.
+struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+};
+
+/// operator< - Sort Memos by User.
+bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+}
+
+/// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node
+/// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that
+/// the node already has been taken care of recursively.
+class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SmallVector<UseMemo, 4> &Uses;
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ for (UseMemo &Memo : Uses)
+ if (Memo.User == N)
+ Memo.User = nullptr;
+ }
+
+public:
+ RAUOVWUpdateListener(SelectionDAG &d, SmallVector<UseMemo, 4> &uses)
+ : SelectionDAG::DAGUpdateListener(d), Uses(uses) {}
+};
+
+} // end anonymous namespace
+
+bool SelectionDAG::calculateDivergence(SDNode *N) {
+ if (TLI->isSDNodeAlwaysUniform(N)) {
+ assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) &&
+ "Conflicting divergence information!");
+ return false;
+ }
+ if (TLI->isSDNodeSourceOfDivergence(N, FLI, UA))
+ return true;
+ for (const auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent())
+ return true;
+ }
+ return false;
+}
+
+void SelectionDAG::updateDivergence(SDNode *N) {
+ SmallVector<SDNode *, 16> Worklist(1, N);
+ do {
+ N = Worklist.pop_back_val();
+ bool IsDivergent = calculateDivergence(N);
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ llvm::append_range(Worklist, N->uses());
+ }
+ } while (!Worklist.empty());
+}
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
+ DenseMap<SDNode *, unsigned> Degree;
+ Order.reserve(AllNodes.size());
+ for (auto &N : allnodes()) {
+ unsigned NOps = N.getNumOperands();
+ Degree[&N] = NOps;
+ if (0 == NOps)
+ Order.push_back(&N);
+ }
+ for (size_t I = 0; I != Order.size(); ++I) {
+ SDNode *N = Order[I];
+ for (auto *U : N->uses()) {
+ unsigned &UnsortedOps = Degree[U];
+ if (0 == --UnsortedOps)
+ Order.push_back(U);
+ }
+ }
+}
+
+#ifndef NDEBUG
+void SelectionDAG::VerifyDAGDivergence() {
+ std::vector<SDNode *> TopoOrder;
+ CreateTopologicalOrder(TopoOrder);
+ for (auto *N : TopoOrder) {
+ assert(calculateDivergence(N) == N->isDivergent() &&
+ "Divergence bit inconsistency detected");
+ }
+}
+#endif
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To);
+
+ transferDbgValues(*From, *To);
+ copyExtraInfo(From->getNode(), To->getNode());
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ llvm::sort(Uses);
+ RAUOVWUpdateListener Listener(*this, Uses);
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+ // If the node has been deleted by recursive CSE updates when updating
+ // another node, then just skip this entry.
+ if (User == nullptr) {
+ ++UseIndex;
+ continue;
+ }
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (SDNode &N : llvm::make_early_inc_range(allnodes())) {
+ checkForCycles(&N, this);
+ unsigned Degree = N.getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N.setNodeId(DAGSize++);
+ allnodes_iterator Q(&N);
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N.setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (SDNode &Node : allnodes()) {
+ SDNode *N = &Node;
+ checkForCycles(N, this);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode *P : N->uses()) {
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P->getIterator() != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (Node.getIterator() == SortedPos) {
+#ifndef NDEBUG
+ allnodes_iterator I(N);
+ SDNode *S = &*++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull(this); dbgs() << "\n";
+ dbgs() << "Checking if this is due to cycles\n";
+ checkForCycles(this, true);
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) {
+ for (SDNode *SD : DB->getSDNodes()) {
+ if (!SD)
+ continue;
+ assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
+ SD->setHasDebugValue(true);
+ }
+ DbgInfo->add(DB, isParameter);
+}
+
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(DB); }
+
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
+ SDValue NewMemOpChain) {
+ assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node");
+ assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT");
+ // The new memory operation must have the same position as the old load in
+ // terms of memory dependency. Create a TokenFactor for the old load and new
+ // memory operation and update uses of the old load's output chain to use that
+ // TokenFactor.
+ if (OldChain == NewMemOpChain || OldChain.use_empty())
+ return NewMemOpChain;
+
+ SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other,
+ OldChain, NewMemOpChain);
+ ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
+ UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewMemOpChain);
+ return TokenFactor;
+}
+
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
+ assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+ SDValue OldChain = SDValue(OldLoad, 1);
+ SDValue NewMemOpChain = NewMemOp.getValue(1);
+ return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain);
+}
+
+SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **OutFunction) {
+ assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
+
+ auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ auto *Module = MF->getFunction().getParent();
+ auto *Function = Module->getFunction(Symbol);
+
+ if (OutFunction != nullptr)
+ *OutFunction = Function;
+
+ if (Function != nullptr) {
+ auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
+ return getGlobalAddress(Function, SDLoc(Op), PtrTy);
+ }
+
+ std::string ErrorStr;
+ raw_string_ostream ErrorFormatter(ErrorStr);
+ ErrorFormatter << "Undefined external symbol ";
+ ErrorFormatter << '"' << Symbol << '"';
+ report_fatal_error(Twine(ErrorFormatter.str()));
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+bool llvm::isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isZero();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnes();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
+bool llvm::isMinSignedConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isMinSignedValue();
+}
+
+bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V,
+ unsigned OperandNo) {
+ // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity().
+ // TODO: Target-specific opcodes could be added.
+ if (auto *Const = isConstOrConstSplat(V)) {
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UMAX:
+ return Const->isZero();
+ case ISD::MUL:
+ return Const->isOne();
+ case ISD::AND:
+ case ISD::UMIN:
+ return Const->isAllOnes();
+ case ISD::SMAX:
+ return Const->isMinSignedValue();
+ case ISD::SMIN:
+ return Const->isMaxSignedValue();
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return OperandNo == 1 && Const->isZero();
+ case ISD::UDIV:
+ case ISD::SDIV:
+ return OperandNo == 1 && Const->isOne();
+ }
+ } else if (auto *ConstFP = isConstOrConstSplatFP(V)) {
+ switch (Opcode) {
+ case ISD::FADD:
+ return ConstFP->isZero() &&
+ (Flags.hasNoSignedZeros() || ConstFP->isNegative());
+ case ISD::FSUB:
+ return OperandNo == 1 && ConstFP->isZero() &&
+ (Flags.hasNoSignedZeros() || !ConstFP->isNegative());
+ case ISD::FMUL:
+ return ConstFP->isExactlyValue(1.0);
+ case ISD::FDIV:
+ return OperandNo == 1 && ConstFP->isExactlyValue(1.0);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ EVT VT = V.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoNaNs()
+ ? APFloat::getQNaN(Semantics)
+ : !Flags.hasNoInfs()
+ ? APFloat::getInf(Semantics)
+ : APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXNUM)
+ NeutralAF.changeSign();
+
+ return ConstFP->isExactlyValue(NeutralAF);
+ }
+ }
+ }
+ return false;
+}
+
+SDValue llvm::peekThroughBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse())
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
+ while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughTruncates(SDValue V) {
+ while (V.getOpcode() == ISD::TRUNCATE)
+ V = V.getOperand(0);
+ return V;
+}
+
+bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
+ if (V.getOpcode() != ISD::XOR)
+ return false;
+ V = peekThroughBitcasts(V.getOperand(1));
+ unsigned NumBits = V.getScalarValueSizeInBits();
+ ConstantSDNode *C =
+ isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
+ return C && (C->getAPIntValue().countr_one() >= NumBits);
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
+ bool AllowTruncation) {
+ EVT VT = N.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorMinNumElements())
+ : APInt(1, 1);
+ return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation);
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs,
+ bool AllowTruncation) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ // SplatVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ EVT VecEltVT = N->getValueType(0).getVectorElementType();
+ if (auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ EVT CVT = CN->getValueType(0);
+ assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension");
+ if (AllowTruncation || CVT == VecEltVT)
+ return CN;
+ }
+ }
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ // TODO: Look into whether we should allow UndefElements in non-DemandedElts
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
+ }
+
+ return nullptr;
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
+ EVT VT = N.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorMinNumElements())
+ : APInt(1, 1);
+ return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs);
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
+ const APInt &DemandedElts,
+ bool AllowUndefs) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN =
+ BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+ // TODO: Look into whether we should allow UndefElements in non-DemandedElts
+ if (CN && (UndefElements.none() || AllowUndefs))
+ return CN;
+ }
+
+ if (N.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
+ return CN;
+
+ return nullptr;
+}
+
+bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
+ // TODO: may want to use peekThroughBitcast() here.
+ ConstantSDNode *C =
+ isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);
+ return C && C->isZero();
+}
+
+bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
+ ConstantSDNode *C =
+ isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true);
+ return C && C->isOne();
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
+ N = peekThroughBitcasts(N);
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
+ return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;
+}
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
+ const DebugLoc &DL,
+ const GlobalValue *GA, EVT VT,
+ int64_t o, unsigned TF)
+ : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ TheGlobal = GA;
+}
+
+AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
+ EVT VT, unsigned SrcAS,
+ unsigned DestAS)
+ : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
+ SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
+
+MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
+ : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ MemSDNodeBits.IsVolatile = MMO->isVolatile();
+ MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
+ MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
+ MemSDNodeBits.IsInvariant = MMO->isInvariant();
+
+ // We check here that the size of the memory operand fits within the size of
+ // the MMO. This is because the MMO might indicate only a possible address
+ // range instead of specifying the affected memory addresses precisely.
+ // TODO: Make MachineMemOperands aware of scalable vectors.
+ assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() &&
+ "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::VALUETYPE_SIZE);
+ for (unsigned i = 0; i < MVT::VALUETYPE_SIZE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+
+} // end anonymous namespace
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ static std::set<EVT, EVT::compareRawBits> EVTs;
+ static EVTArray SimpleVTArray;
+ static sys::SmartMutex<true> VTMutex;
+
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(VTMutex);
+ return &(*EVTs.insert(VT).first);
+ }
+ assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!");
+ return &SimpleVTArray.VTs[VT.getSimpleVT().SimpleTy];
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+bool SDNode::isOnlyUserOf(const SDNode *N) const {
+ bool Seen = false;
+ for (const SDNode *User : N->uses()) {
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// Return true if the only users of N are contained in Nodes.
+bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
+ bool Seen = false;
+ for (const SDNode *User : N->uses()) {
+ if (llvm::is_contained(Nodes, User))
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+bool SDValue::isOperandOf(const SDNode *N) const {
+ return is_contained(N->op_values(), *this);
+}
+
+bool SDNode::isOperandOf(const SDNode *N) const {
+ return any_of(N->op_values(),
+ [this](SDValue Op) { return this == Op.getNode(); });
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+///
+/// Note that we only need to examine chains when we're searching for
+/// side-effects; SelectionDAG requires that all side-effects are represented
+/// by chains, even if another operand would force a specific ordering. This
+/// constraint is necessary to allow transformations like splitting loads.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel.
+ if (getOpcode() == ISD::TokenFactor) {
+ // First, try a shallow search.
+ if (is_contained((*this)->ops(), Dest)) {
+ // We found the chain we want as an operand of this TokenFactor.
+ // Essentially, we reach the chain without side-effects if we could
+ // serialize the TokenFactor into a simple chain of operations with
+ // Dest as the last operation. This is automatically true if the
+ // chain has one use: there are no other ordering constraints.
+ // If the chain has more than one use, we give up: some other
+ // use of Dest might force a side-effect between Dest and the current
+ // node.
+ if (Dest.hasOneUse())
+ return true;
+ }
+ // Next, try a deep search: check whether every operand of the TokenFactor
+ // reaches Dest.
+ return llvm::all_of((*this)->ops(), [=](SDValue Op) {
+ return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
+ });
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (Ld->isUnordered())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(this);
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
+ this->Flags.intersectWith(Flags);
+}
+
+SDValue
+SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials) {
+ // The pattern must end in an extract from index 0.
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isNullConstant(Extract->getOperand(1)))
+ return SDValue();
+
+ // Match against one of the candidate binary ops.
+ SDValue Op = Extract->getOperand(0);
+ if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
+ return Op.getOpcode() == unsigned(BinOp);
+ }))
+ return SDValue();
+
+ // Floating-point reductions may require relaxed constraints on the final step
+ // of the reduction because they may reorder intermediate operations.
+ unsigned CandidateBinOp = Op.getOpcode();
+ if (Op.getValueType().isFloatingPoint()) {
+ SDNodeFlags Flags = Op->getFlags();
+ switch (CandidateBinOp) {
+ case ISD::FADD:
+ if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation())
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unhandled FP opcode for binop reduction");
+ }
+ }
+
+ // Matching failed - attempt to see if we did enough stages that a partial
+ // reduction from a subvector is possible.
+ auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) {
+ if (!AllowPartials || !Op)
+ return SDValue();
+ EVT OpVT = Op.getValueType();
+ EVT OpSVT = OpVT.getScalarType();
+ EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts);
+ if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
+ return SDValue();
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getVectorIdxConstant(0, SDLoc(Op)));
+ };
+
+ // At each stage, we're looking for something that looks like:
+ // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
+ // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
+ // i32 undef, i32 undef, i32 undef, i32 undef>
+ // %a = binop <8 x i32> %op, %s
+ // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
+ // we expect something like:
+ // <4,5,6,7,u,u,u,u>
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ // While a partial reduction match would be:
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+ SDValue PrevOp;
+ for (unsigned i = 0; i < Stages; ++i) {
+ unsigned MaskEnd = (1 << i);
+
+ if (Op.getOpcode() != CandidateBinOp)
+ return PartialReduction(PrevOp, MaskEnd);
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0);
+ if (Shuffle) {
+ Op = Op1;
+ } else {
+ Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1);
+ Op = Op0;
+ }
+
+ // The first operand of the shuffle should be the same as the other operand
+ // of the binop.
+ if (!Shuffle || Shuffle->getOperand(0) != Op)
+ return PartialReduction(PrevOp, MaskEnd);
+
+ // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+ for (int Index = 0; Index < (int)MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index))
+ return PartialReduction(PrevOp, MaskEnd);
+
+ PrevOp = Op;
+ }
+
+ // Handle subvector reductions, which tend to appear after the shuffle
+ // reduction stages.
+ while (Op.getOpcode() == CandidateBinOp) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op0.getOperand(0) != Op1.getOperand(0))
+ break;
+ SDValue Src = Op0.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (NumSrcElts != (2 * NumElts))
+ break;
+ if (!(Op0.getConstantOperandAPInt(1) == 0 &&
+ Op1.getConstantOperandAPInt(1) == NumElts) &&
+ !(Op1.getConstantOperandAPInt(1) == 0 &&
+ Op0.getConstantOperandAPInt(1) == NumElts))
+ break;
+ Op = Src;
+ }
+
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return Op;
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NE = VT.getVectorNumElements();
+
+ SDLoc dl(N);
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ if (N->getNumValues() == 2) {
+ SmallVector<SDValue, 8> Scalars0, Scalars1;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+ EVT VT1 = N->getValueType(1);
+ EVT EltVT1 = VT1.getVectorElementType();
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, getVectorIdxConstant(i, dl));
+ }
+
+ SDValue EltOp = getNode(N->getOpcode(), dl, {EltVT, EltVT1}, Operands);
+ Scalars0.push_back(EltOp);
+ Scalars1.push_back(EltOp.getValue(1));
+ }
+
+ SDValue Vec0 = getBuildVector(VT, dl, Scalars0);
+ SDValue Vec1 = getBuildVector(VT1, dl, Scalars1);
+ return getMergeValues({Vec0, Vec1}, dl);
+ }
+
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, getVectorIdxConstant(i, dl));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
+ break;
+ }
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+ return getBuildVector(VecVT, dl, Scalars);
+}
+
+std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
+ SDNode *N, unsigned ResNE) {
+ unsigned Opcode = N->getOpcode();
+ assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO ||
+ Opcode == ISD::USUBO || Opcode == ISD::SSUBO ||
+ Opcode == ISD::UMULO || Opcode == ISD::SMULO) &&
+ "Expected an overflow opcode");
+
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT ResEltVT = ResVT.getVectorElementType();
+ EVT OvEltVT = OvVT.getVectorElementType();
+ SDLoc dl(N);
+
+ // If ResNE is 0, fully unroll the vector op.
+ unsigned NE = ResVT.getVectorNumElements();
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ SmallVector<SDValue, 8> LHSScalars;
+ SmallVector<SDValue, 8> RHSScalars;
+ ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE);
+ ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE);
+
+ EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT);
+ SDVTList VTs = getVTList(ResEltVT, SVT);
+ SmallVector<SDValue, 8> ResScalars;
+ SmallVector<SDValue, 8> OvScalars;
+ for (unsigned i = 0; i < NE; ++i) {
+ SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]);
+ SDValue Ov =
+ getSelect(dl, OvEltVT, Res.getValue(1),
+ getBoolConstant(true, dl, OvEltVT, ResVT),
+ getConstant(0, dl, OvEltVT));
+
+ ResScalars.push_back(Res);
+ OvScalars.push_back(Ov);
+ }
+
+ ResScalars.append(ResNE - NE, getUNDEF(ResEltVT));
+ OvScalars.append(ResNE - NE, getUNDEF(OvEltVT));
+
+ EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE);
+ EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE);
+ return std::make_pair(getBuildVector(NewResVT, dl, ResScalars),
+ getBuildVector(NewOvVT, dl, OvScalars));
+}
+
+bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
+ LoadSDNode *Base,
+ unsigned Bytes,
+ int Dist) const {
+ if (LD->isVolatile() || Base->isVolatile())
+ return false;
+ // TODO: probably too restrictive for atomics, revisit
+ if (!LD->isSimple())
+ return false;
+ if (LD->isIndexed() || Base->isIndexed())
+ return false;
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getMemoryVT();
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+ auto LocDecomp = BaseIndexOffset::match(LD, *this);
+
+ int64_t Offset = 0;
+ if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
+ return (Dist * (int64_t)Bytes == Offset);
+ return false;
+}
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return
+/// std::nullopt if it cannot be inferred.
+MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ const GlobalValue *GV = nullptr;
+ int64_t GVOffset = 0;
+ if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ KnownBits Known(PtrWidth);
+ llvm::computeKnownBits(GV, Known, getDataLayout());
+ unsigned AlignBits = Known.countMinTrailingZeros();
+ if (AlignBits)
+ return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset);
+ }
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = INT_MIN;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != INT_MIN) {
+ const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+ return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset);
+ }
+
+ return std::nullopt;
+}
+
+/// Split the scalar node with EXTRACT_ELEMENT using the provided
+/// VTs and return the low/high part.
+std::pair<SDValue, SDValue> SelectionDAG::SplitScalar(const SDValue &N,
+ const SDLoc &DL,
+ const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(!LoVT.isVector() && !HiVT.isVector() && !N.getValueType().isVector() &&
+ "Split node must be a scalar type");
+ SDValue Lo =
+ getNode(ISD::EXTRACT_ELEMENT, DL, LoVT, N, getIntPtrConstant(0, DL));
+ SDValue Hi =
+ getNode(ISD::EXTRACT_ELEMENT, DL, HiVT, N, getIntPtrConstant(1, DL));
+ return std::make_pair(Lo, Hi);
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split (or expanded) into two not necessarily identical pieces.
+std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
+ // Currently all types are split in half.
+ EVT LoVT, HiVT;
+ if (!VT.isVector())
+ LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
+ else
+ LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext());
+
+ return std::make_pair(LoVT, HiVT);
+}
+
+/// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a
+/// type, dependent on an enveloping VT that has been split into two identical
+/// pieces. Sets the HiIsEmpty flag when hi type has zero storage size.
+std::pair<EVT, EVT>
+SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
+ bool *HiIsEmpty) const {
+ EVT EltTp = VT.getVectorElementType();
+ // Examples:
+ // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty)
+ // custom VL=9 with enveloping VL=8/8 yields 8/1
+ // custom VL=10 with enveloping VL=8/8 yields 8/2
+ // etc.
+ ElementCount VTNumElts = VT.getVectorElementCount();
+ ElementCount EnvNumElts = EnvVT.getVectorElementCount();
+ assert(VTNumElts.isScalable() == EnvNumElts.isScalable() &&
+ "Mixing fixed width and scalable vectors when enveloping a type");
+ EVT LoVT, HiVT;
+ if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
+ *HiIsEmpty = false;
+ } else {
+ // Flag that hi type has zero storage size, but return split envelop type
+ // (this would be easier if vector types with zero elements were allowed).
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
+ *HiIsEmpty = true;
+ }
+ return std::make_pair(LoVT, HiVT);
+}
+
+/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
+/// low/high part.
+std::pair<SDValue, SDValue>
+SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(LoVT.isScalableVector() == HiVT.isScalableVector() &&
+ LoVT.isScalableVector() == N.getValueType().isScalableVector() &&
+ "Splitting vector with an invalid mixture of fixed and scalable "
+ "vector types");
+ assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <=
+ N.getValueType().getVectorMinNumElements() &&
+ "More vector elements requested than available!");
+ SDValue Lo, Hi;
+ Lo =
+ getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL));
+ // For scalable vectors it is safe to use LoVT.getVectorMinNumElements()
+ // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales
+ // IDX with the runtime scaling factor of the result vector type. For
+ // fixed-width result vectors, that runtime scaling factor is 1.
+ Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
+ getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+ return std::make_pair(Lo, Hi);
+}
+
+std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT,
+ const SDLoc &DL) {
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ EVT VT = N.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? getConstant(HalfMinNumElts, DL, VT)
+ : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts);
+ SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts);
+ return std::make_pair(Lo, Hi);
+}
+
+/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
+ EVT VT = N.getValueType();
+ EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NextPowerOf2(VT.getVectorNumElements()));
+ return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
+ getVectorIdxConstant(0, DL));
+}
+
+void SelectionDAG::ExtractVectorElements(SDValue Op,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count,
+ EVT EltVT) {
+ EVT VT = Op.getValueType();
+ if (Count == 0)
+ Count = VT.getVectorNumElements();
+ if (EltVT == EVT())
+ EltVT = VT.getVectorElementType();
+ SDLoc SL(Op);
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op,
+ getVectorIdxConstant(i, SL)));
+ }
+}
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool IsBigEndian) const {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned VecWidth = VT.getSizeInBits();
+ if (MinSplatBits > VecWidth)
+ return false;
+
+ // FIXME: The widths are based on this node's type, but build vectors can
+ // truncate their operands.
+ SplatValue = APInt(VecWidth, 0);
+ SplatUndef = APInt(VecWidth, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int NumOps = getNumOperands();
+ assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltWidth = VT.getScalarSizeInBits();
+
+ for (unsigned j = 0; j < NumOps; ++j) {
+ unsigned i = IsBigEndian ? NumOps - 1 - j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltWidth;
+
+ if (OpVal.isUndef())
+ SplatUndef.setBits(BitPos, BitPos + EltWidth);
+ else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+ HasAnyUndefs = (SplatUndef != 0);
+
+ // FIXME: This does not work for vectors with elements less than 8 bits.
+ while (VecWidth > 8) {
+ unsigned HalfSize = VecWidth / 2;
+ APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize);
+ APInt LowValue = SplatValue.extractBits(HalfSize, 0);
+ APInt HighUndef = SplatUndef.extractBits(HalfSize, HalfSize);
+ APInt LowUndef = SplatUndef.extractBits(HalfSize, 0);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ VecWidth = HalfSize;
+ }
+
+ SplatBitSize = VecWidth;
+ return true;
+}
+
+SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ unsigned NumOps = getNumOperands();
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(NumOps);
+ }
+ assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size");
+ if (!DemandedElts)
+ return SDValue();
+ SDValue Splatted;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ SDValue Op = getOperand(i);
+ if (Op.isUndef()) {
+ if (UndefElements)
+ (*UndefElements)[i] = true;
+ } else if (!Splatted) {
+ Splatted = Op;
+ } else if (Splatted != Op) {
+ return SDValue();
+ }
+ }
+
+ if (!Splatted) {
+ unsigned FirstDemandedIdx = DemandedElts.countr_zero();
+ assert(getOperand(FirstDemandedIdx).isUndef() &&
+ "Can only have a splat without a constant for all undefs.");
+ return getOperand(FirstDemandedIdx);
+ }
+
+ return Splatted;
+}
+
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
+ return getSplatValue(DemandedElts, UndefElements);
+}
+
+bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
+ SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements) const {
+ unsigned NumOps = getNumOperands();
+ Sequence.clear();
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(NumOps);
+ }
+ assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size");
+ if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(NumOps))
+ return false;
+
+ // Set the undefs even if we don't find a sequence (like getSplatValue).
+ if (UndefElements)
+ for (unsigned I = 0; I != NumOps; ++I)
+ if (DemandedElts[I] && getOperand(I).isUndef())
+ (*UndefElements)[I] = true;
+
+ // Iteratively widen the sequence length looking for repetitions.
+ for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) {
+ Sequence.append(SeqLen, SDValue());
+ for (unsigned I = 0; I != NumOps; ++I) {
+ if (!DemandedElts[I])
+ continue;
+ SDValue &SeqOp = Sequence[I % SeqLen];
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ if (!SeqOp)
+ SeqOp = Op;
+ continue;
+ }
+ if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) {
+ Sequence.clear();
+ break;
+ }
+ SeqOp = Op;
+ }
+ if (!Sequence.empty())
+ return true;
+ }
+
+ assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern");
+ return false;
+}
+
+bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
+ return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
+}
+
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const {
+ if (ConstantFPSDNode *CN =
+ dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+ bool IsExact;
+ APSInt IntVal(BitWidth);
+ const APFloat &APF = CN->getValueAPF();
+ if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+ APFloat::opOK ||
+ !IsExact)
+ return -1;
+
+ return IntVal.exactLogBase2();
+ }
+ return -1;
+}
+
+bool BuildVectorSDNode::getConstantRawBits(
+ bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const {
+ // Early-out if this contains anything but Undef/Constant/ConstantFP.
+ if (!isConstant())
+ return false;
+
+ unsigned NumSrcOps = getNumOperands();
+ unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+
+ // Extract raw src bits.
+ SmallVector<APInt> SrcBitElements(NumSrcOps,
+ APInt::getZero(SrcEltSizeInBits));
+ BitVector SrcUndeElements(NumSrcOps, false);
+
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ SrcUndeElements.set(I);
+ continue;
+ }
+ auto *CInt = dyn_cast<ConstantSDNode>(Op);
+ auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
+ assert((CInt || CFP) && "Unknown constant");
+ SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
+ }
+
+ // Recast to dst width.
+ recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements,
+ SrcBitElements, UndefElements, SrcUndeElements);
+ return true;
+}
+
+void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
+ unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements) {
+ unsigned NumSrcOps = SrcBitElements.size();
+ unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+ assert(NumSrcOps == SrcUndefElements.size() &&
+ "Vector size mismatch");
+
+ unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
+ DstUndefElements.clear();
+ DstUndefElements.resize(NumDstOps, false);
+ DstBitElements.assign(NumDstOps, APInt::getZero(DstEltSizeInBits));
+
+ // Concatenate src elements constant bits together into dst element.
+ if (SrcEltSizeInBits <= DstEltSizeInBits) {
+ unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits;
+ for (unsigned I = 0; I != NumDstOps; ++I) {
+ DstUndefElements.set(I);
+ APInt &DstBits = DstBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ if (SrcUndefElements[Idx])
+ continue;
+ DstUndefElements.reset(I);
+ const APInt &SrcBits = SrcBitElements[Idx];
+ assert(SrcBits.getBitWidth() == SrcEltSizeInBits &&
+ "Illegal constant bitwidths");
+ DstBits.insertBits(SrcBits, J * SrcEltSizeInBits);
+ }
+ }
+ return;
+ }
+
+ // Split src element constant bits into dst elements.
+ unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits;
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ if (SrcUndefElements[I]) {
+ DstUndefElements.set(I * Scale, (I + 1) * Scale);
+ continue;
+ }
+ const APInt &SrcBits = SrcBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ APInt &DstBits = DstBitElements[Idx];
+ DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits);
+ }
+ }
+}
+
+bool BuildVectorSDNode::isConstant() const {
+ for (const SDValue &Op : op_values()) {
+ unsigned Opc = Op.getOpcode();
+ if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
+ return false;
+ }
+ return true;
+}
+
+std::optional<std::pair<APInt, APInt>>
+BuildVectorSDNode::isConstantSequence() const {
+ unsigned NumOps = getNumOperands();
+ if (NumOps < 2)
+ return std::nullopt;
+
+ if (!isa<ConstantSDNode>(getOperand(0)) ||
+ !isa<ConstantSDNode>(getOperand(1)))
+ return std::nullopt;
+
+ unsigned EltSize = getValueType(0).getScalarSizeInBits();
+ APInt Start = getConstantOperandAPInt(0).trunc(EltSize);
+ APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start;
+
+ if (Stride.isZero())
+ return std::nullopt;
+
+ for (unsigned i = 2; i < NumOps; ++i) {
+ if (!isa<ConstantSDNode>(getOperand(i)))
+ return std::nullopt;
+
+ APInt Val = getConstantOperandAPInt(i).trunc(EltSize);
+ if (Val != (Start + (Stride * i)))
+ return std::nullopt;
+ }
+
+ return std::make_pair(Start, Stride);
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ // If all elements are undefined, this shuffle can be considered a splat
+ // (although it should eventually get simplified away completely).
+ if (i == e)
+ return true;
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+// Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ // Treat a GlobalAddress supporting constant offset folding as a
+ // constant integer.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
+ if (GA->getOpcode() == ISD::GlobalAddress &&
+ TLI->isOffsetFoldingLegal(GA))
+ return GA;
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantSDNode>(N.getOperand(0)))
+ return N.getNode();
+ return nullptr;
+}
+
+// Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantFPSDNode>(N.getOperand(0)))
+ return N.getNode();
+
+ return nullptr;
+}
+
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+ assert(!Node->OperandList && "Node already has operands");
+ assert(SDNode::getMaxNumOperands() >= Vals.size() &&
+ "too many operands to fit into SDNode");
+ SDUse *Ops = OperandRecycler.allocate(
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+ bool IsDivergent = false;
+ for (unsigned I = 0; I != Vals.size(); ++I) {
+ Ops[I].setUser(Node);
+ Ops[I].setInitial(Vals[I]);
+ if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+ IsDivergent |= Ops[I].getNode()->isDivergent();
+ }
+ Node->NumOperands = Vals.size();
+ Node->OperandList = Ops;
+ if (!TLI->isSDNodeAlwaysUniform(Node)) {
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA);
+ Node->SDNodeBits.IsDivergent = IsDivergent;
+ }
+ checkForCycles(Node);
+}
+
+SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
+ SmallVectorImpl<SDValue> &Vals) {
+ size_t Limit = SDNode::getMaxNumOperands();
+ while (Vals.size() > Limit) {
+ unsigned SliceIdx = Vals.size() - Limit;
+ auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit);
+ SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs);
+ Vals.erase(Vals.begin() + SliceIdx, Vals.end());
+ Vals.emplace_back(NewTF);
+ }
+ return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
+}
+
+SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDNodeFlags Flags) {
+ switch (Opcode) {
+ default:
+ return SDValue();
+ case ISD::ADD:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UMAX:
+ return getConstant(0, DL, VT);
+ case ISD::MUL:
+ return getConstant(1, DL, VT);
+ case ISD::AND:
+ case ISD::UMIN:
+ return getAllOnesConstant(DL, VT);
+ case ISD::SMAX:
+ return getConstant(APInt::getSignedMinValue(VT.getSizeInBits()), DL, VT);
+ case ISD::SMIN:
+ return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT);
+ case ISD::FADD:
+ return getConstantFP(-0.0, DL, VT);
+ case ISD::FMUL:
+ return getConstantFP(1.0, DL, VT);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) :
+ !Flags.hasNoInfs() ? APFloat::getInf(Semantics) :
+ APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXNUM)
+ NeutralAF.changeSign();
+
+ return getConstantFP(NeutralAF, DL, VT);
+ }
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // Neutral element for fminimum is Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics)
+ : APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXIMUM)
+ NeutralAF.changeSign();
+
+ return getConstantFP(NeutralAF, DL, VT);
+ }
+
+ }
+}
+
+/// Helper used to make a call to a library function that has one argument of
+/// pointer type.
+///
+/// Such functions include 'fegetmode', 'fesetenv' and some others, which are
+/// used to get or set floating-point state. They have one argument of pointer
+/// type, which points to the memory region containing bits of the
+/// floating-point state. The value returned by such function is ignored in the
+/// created call.
+///
+/// \param LibFunc Reference to library function (value of RTLIB::Libcall).
+/// \param Ptr Pointer used to save/load state.
+/// \param InChain Ingoing token chain.
+/// \returns Outgoing chain token.
+SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr,
+ SDValue InChain,
+ const SDLoc &DLoc) {
+ assert(InChain.getValueType() == MVT::Other && "Expected token chain");
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Ptr;
+ Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext());
+ Args.push_back(Entry);
+ RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc);
+ SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC),
+ TLI->getPointerTy(getDataLayout()));
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee,
+ std::move(Args));
+ return TLI->LowerCallTo(CLI).second;
+}
+
+void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
+ assert(From && To && "Invalid SDNode; empty source SDValue?");
+ auto I = SDEI.find(From);
+ if (I == SDEI.end())
+ return;
+
+ // Use of operator[] on the DenseMap may cause an insertion, which invalidates
+ // the iterator, hence the need to make a copy to prevent a use-after-free.
+ NodeExtraInfo NEI = I->second;
+ if (LLVM_LIKELY(!NEI.PCSections)) {
+ // No deep copy required for the types of extra info set.
+ //
+ // FIXME: Investigate if other types of extra info also need deep copy. This
+ // depends on the types of nodes they can be attached to: if some extra info
+ // is only ever attached to nodes where a replacement To node is always the
+ // node where later use and propagation of the extra info has the intended
+ // semantics, no deep copy is required.
+ SDEI[To] = std::move(NEI);
+ return;
+ }
+
+ // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced
+ // through the replacement of From with To. Otherwise, replacements of a node
+ // (From) with more complex nodes (To and its operands) may result in lost
+ // extra info where the root node (To) is insignificant in further propagating
+ // and using extra info when further lowering to MIR.
+ //
+ // In the first step pre-populate the visited set with the nodes reachable
+ // from the old From node. This avoids copying NodeExtraInfo to parts of the
+ // DAG that is not new and should be left untouched.
+ SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom.
+ DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From.
+ auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) {
+ if (MaxDepth == 0) {
+ // Remember this node in case we need to increase MaxDepth and continue
+ // populating FromReach from this node.
+ Leafs.emplace_back(N);
+ return;
+ }
+ if (!FromReach.insert(N).second)
+ return;
+ for (const SDValue &Op : N->op_values())
+ Self(Self, Op.getNode(), MaxDepth - 1);
+ };
+
+ // Copy extra info to To and all its transitive operands (that are new).
+ SmallPtrSet<const SDNode *, 8> Visited;
+ auto DeepCopyTo = [&](auto &&Self, const SDNode *N) {
+ if (FromReach.contains(N))
+ return true;
+ if (!Visited.insert(N).second)
+ return true;
+ if (getEntryNode().getNode() == N)
+ return false;
+ for (const SDValue &Op : N->op_values()) {
+ if (!Self(Self, Op.getNode()))
+ return false;
+ }
+ // Copy only if entry node was not reached.
+ SDEI[N] = NEI;
+ return true;
+ };
+
+ // We first try with a lower MaxDepth, assuming that the path to common
+ // operands between From and To is relatively short. This significantly
+ // improves performance in the common case. The initial MaxDepth is big
+ // enough to avoid retry in the common case; the last MaxDepth is large
+ // enough to avoid having to use the fallback below (and protects from
+ // potential stack exhaustion from recursion).
+ for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024;
+ PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) {
+ // StartFrom is the previous (or initial) set of leafs reachable at the
+ // previous maximum depth.
+ SmallVector<const SDNode *> StartFrom;
+ std::swap(StartFrom, Leafs);
+ for (const SDNode *N : StartFrom)
+ VisitFrom(VisitFrom, N, MaxDepth - PrevDepth);
+ if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To)))
+ return;
+ // This should happen very rarely (reached the entry node).
+ LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n");
+ assert(!Leafs.empty());
+ }
+
+ // This should not happen - but if it did, that means the subgraph reachable
+ // from From has depth greater or equal to maximum MaxDepth, and VisitFrom()
+ // could not visit all reachable common operands. Consequently, we were able
+ // to reach the entry node.
+ errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n";
+ assert(false && "From subgraph too complex - increase max. MaxDepth?");
+ // Best-effort fallback if assertions disabled.
+ SDEI[To] = std::move(NEI);
+}
+
+#ifndef NDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+ SmallPtrSetImpl<const SDNode*> &Visited,
+ SmallPtrSetImpl<const SDNode*> &Checked,
+ const llvm::SelectionDAG *DAG) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N).second) {
+ errs() << "Detected cycle in SelectionDAG\n";
+ dbgs() << "Offending node:\n";
+ N->dumprFull(DAG); dbgs() << "\n";
+ abort();
+ }
+
+ for (const SDValue &Op : N->op_values())
+ checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG);
+
+ Checked.insert(N);
+ Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N,
+ const llvm::SelectionDAG *DAG,
+ bool force) {
+#ifndef NDEBUG
+ bool check = force;
+#ifdef EXPENSIVE_CHECKS
+ check = true;
+#endif // EXPENSIVE_CHECKS
+ if (check) {
+ assert(N && "Checking nonexistent SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked, DAG);
+ }
+#endif // !NDEBUG
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
+ checkForCycles(DAG->getRoot().getNode(), DAG, force);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
new file mode 100644
index 000000000000..a432d8e92bca
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -0,0 +1,324 @@
+//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+
+using namespace llvm;
+
+bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
+ const SelectionDAG &DAG,
+ int64_t &Off) const {
+ // Conservatively fail if we a match failed..
+ if (!Base.getNode() || !Other.Base.getNode())
+ return false;
+ if (!hasValidOffset() || !Other.hasValidOffset())
+ return false;
+ // Initial Offset difference.
+ Off = *Other.Offset - *Offset;
+
+ if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
+ // Trivial match.
+ if (Other.Base == Base)
+ return true;
+
+ // Match GlobalAddresses
+ if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if (A->getGlobal() == B->getGlobal()) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+
+ // Match Constants
+ if (auto *A = dyn_cast<ConstantPoolSDNode>(Base))
+ if (auto *B = dyn_cast<ConstantPoolSDNode>(Other.Base)) {
+ bool IsMatch =
+ A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry();
+ if (IsMatch) {
+ if (A->isMachineConstantPoolEntry())
+ IsMatch = A->getMachineCPVal() == B->getMachineCPVal();
+ else
+ IsMatch = A->getConstVal() == B->getConstVal();
+ }
+ if (IsMatch) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+ }
+
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ // Match FrameIndexes.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) {
+ // Equal FrameIndexes - offsets are directly comparable.
+ if (A->getIndex() == B->getIndex())
+ return true;
+ // Non-equal FrameIndexes - If both frame indices are fixed
+ // we know their relative offsets and can compare them. Otherwise
+ // we must be conservative.
+ if (MFI.isFixedObjectIndex(A->getIndex()) &&
+ MFI.isFixedObjectIndex(B->getIndex())) {
+ Off += MFI.getObjectOffset(B->getIndex()) -
+ MFI.getObjectOffset(A->getIndex());
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
+ const std::optional<int64_t> NumBytes0,
+ const SDNode *Op1,
+ const std::optional<int64_t> NumBytes1,
+ const SelectionDAG &DAG, bool &IsAlias) {
+
+ BaseIndexOffset BasePtr0 = match(Op0, DAG);
+ BaseIndexOffset BasePtr1 = match(Op1, DAG);
+
+ if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
+ return false;
+ int64_t PtrDiff;
+ if (NumBytes0 && NumBytes1 &&
+ BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ // If the size of memory access is unknown, do not use it to analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ if (PtrDiff >= 0 &&
+ *NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(*NumBytes0 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ *NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + *NumBytes1) <= 0);
+ return true;
+ }
+ return false;
+ }
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+ if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ // We can derive NoAlias In case of mismatched base types.
+ if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) {
+ IsAlias = false;
+ return true;
+ }
+ if (IsGV0 && IsGV1) {
+ auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal();
+ auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal();
+ // It doesn't make sense to access one global value using another globals
+ // values address, so we can assume that there is no aliasing in case of
+ // two different globals (unless we have symbols that may indirectly point
+ // to each other).
+ // FIXME: This is perhaps a bit too defensive. We could try to follow the
+ // chain with aliasee information for GlobalAlias variables to find out if
+ // we indirect symbols may alias or not.
+ if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) {
+ IsAlias = false;
+ return true;
+ }
+ }
+ }
+ return false; // Cannot determine whether the pointers alias.
+}
+
+bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
+ const BaseIndexOffset &Other,
+ int64_t OtherBitSize, int64_t &BitOffset) const {
+ int64_t Offset;
+ if (!equalBaseIndex(Other, DAG, Offset))
+ return false;
+ if (Offset >= 0) {
+ // Other is after *this:
+ // [-------*this---------]
+ // [---Other--]
+ // ==Offset==>
+ BitOffset = 8 * Offset;
+ return BitOffset + OtherBitSize <= BitSize;
+ }
+ // Other starts strictly before *this, it cannot be fully contained.
+ // [-------*this---------]
+ // [--Other--]
+ return false;
+}
+
+/// Parses tree in Ptr for base, index, offset addresses.
+static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
+ const SelectionDAG &DAG) {
+ SDValue Ptr = N->getBasePtr();
+
+ // (((B + I*M) + c)) + c ...
+ SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
+ SDValue Index = SDValue();
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+
+ // pre-inc/pre-dec ops are components of EA.
+ if (N->getAddressingMode() == ISD::PRE_INC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset += C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ } else if (N->getAddressingMode() == ISD::PRE_DEC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset -= C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ }
+
+ // Consume constant adds & ors with appropriate masking.
+ while (true) {
+ switch (Base->getOpcode()) {
+ case ISD::OR:
+ // Only consider ORs which act as adds.
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
+ if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
+ Offset += C->getSExtValue();
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
+ continue;
+ }
+ break;
+ case ISD::ADD:
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ Offset += C->getSExtValue();
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
+ continue;
+ }
+ break;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ auto *LSBase = cast<LSBaseSDNode>(Base.getNode());
+ unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0;
+ if (LSBase->isIndexed() && Base.getResNo() == IndexResNo)
+ if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) {
+ auto Off = C->getSExtValue();
+ if (LSBase->getAddressingMode() == ISD::PRE_DEC ||
+ LSBase->getAddressingMode() == ISD::POST_DEC)
+ Offset -= Off;
+ else
+ Offset += Off;
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr());
+ continue;
+ }
+ break;
+ }
+ }
+ // If we get here break out of the loop.
+ break;
+ }
+
+ if (Base->getOpcode() == ISD::ADD) {
+ // TODO: The following code appears to be needless as it just
+ // bails on some Ptrs early, reducing the cases where we
+ // find equivalence. We should be able to remove this.
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Base is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Base->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ Index = Base->getOperand(1);
+ SDValue PotentialBase = Base->getOperand(0);
+
+ // Skip signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Check if Index Offset pattern
+ if (Index->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Index->getOperand(1)))
+ return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
+
+ Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue();
+ Index = Index->getOperand(0);
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else
+ IsIndexSignExt = false;
+ Base = PotentialBase;
+ }
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+}
+
+BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
+ const SelectionDAG &DAG) {
+ if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
+ return matchLSNode(LS0, DAG);
+ if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
+ if (LN->hasOffset())
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),
+ false);
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), false);
+ }
+ return BaseIndexOffset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+LLVM_DUMP_METHOD void BaseIndexOffset::dump() const {
+ print(dbgs());
+}
+
+void BaseIndexOffset::print(raw_ostream& OS) const {
+ OS << "BaseIndexOffset base=[";
+ Base->print(OS);
+ OS << "] index=[";
+ if (Index)
+ Index->print(OS);
+ OS << "] offset=" << Offset;
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 000000000000..20c37eb4cb11
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,11977 @@
+//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <optional>
+#include <tuple>
+
+using namespace llvm;
+using namespace PatternMatch;
+using namespace SwitchCG;
+
+#define DEBUG_TYPE "isel"
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<bool>
+ InsertAssertAlign("insert-assert-align", cl::init(true),
+ cl::desc("Insert the experimental `assertalign` node."),
+ cl::ReallyHidden);
+
+static cl::opt<unsigned, true>
+ LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<unsigned> SwitchPeelThreshold(
+ "switch-peel-threshold", cl::Hidden, cl::init(66),
+ cl::desc("Set the case probability threshold for peeling the case from a "
+ "switch statement. A value greater than 100 will void this "
+ "optimization"));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It is easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ std::optional<CallingConv::ID> CC);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger than ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue
+getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
+ std::optional<CallingConv::ID> CC = std::nullopt,
+ std::optional<ISD::NodeType> AssertOp = std::nullopt) {
+ // Let the target assemble the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, CC))
+ return Val;
+
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
+ CC);
+
+ assert(NumParts > 0 && "No parts to assemble!");
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = llvm::bit_floor(NumParts);
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+ PartVT, HalfVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT, V);
+ } else {
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
+ OddVT, V, CC);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueSizeInBits(), DL,
+ TLI.getShiftAmountTy(
+ TotalVT, DAG.getDataLayout())));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+ }
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ // PartEVT is the type of the register class that holds the value.
+ // ValueVT is the type of the inline asm operation.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
+ // Handle types that have the same size.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle types with different sizes.
+ if (PartEVT.isInteger() && ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartEVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp)
+ Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+ }
+
+ // Handle MMX to a narrower integer type by bitcasting MMX to integer and
+ // then truncating.
+ if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
+ ValueVT.bitsLT(PartEVT)) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ report_fatal_error("Unknown mismatch in getCopyFromParts!");
+}
+
+static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
+ const Twine &ErrMsg) {
+ const Instruction *I = dyn_cast_or_null<Instruction>(V);
+ if (!V)
+ return Ctx.emitError(ErrMsg);
+
+ const char *AsmError = ", possible invalid constraint for vector type";
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->isInlineAsm())
+ return Ctx.emitError(I, ErrMsg + AsmError);
+
+ return Ctx.emitError(I, ErrMsg);
+}
+
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger than ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ std::optional<CallingConv::ID> CallConv) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const bool IsABIRegCopy = CallConv.has_value();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs;
+
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), *CallConv, ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT.getSizeInBits() ==
+ Parts[0].getSimpleValueType().getSizeInBits() &&
+ "Part type sizes don't match!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT, V, CallConv);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT, V, CallConv);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ EVT BuiltVectorTy =
+ IntermediateVT.isVector()
+ ? EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(),
+ IntermediateVT.getVectorElementCount() * NumParts)
+ : EVT::getVectorVT(*DAG.getContext(),
+ IntermediateVT.getScalarType(),
+ NumIntermediates);
+ Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, BuiltVectorTy, Ops);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isVector()) {
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // If the parts vector has more elements than the value vector, then we
+ // have a vector widening case (e.g. <2 x float> -> <4 x float>).
+ // Extract the elements we want.
+ if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
+ assert((PartEVT.getVectorElementCount().getKnownMinValue() >
+ ValueVT.getVectorElementCount().getKnownMinValue()) &&
+ (PartEVT.getVectorElementCount().isScalable() ==
+ ValueVT.getVectorElementCount().isScalable()) &&
+ "Cannot narrow, it would be a lossy transformation");
+ PartEVT =
+ EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
+ if (PartEVT == ValueVT)
+ return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>).
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
+
+ // Promoted vector extract
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ if (ValueVT.getVectorNumElements() != 1) {
+ // Certain ABIs require that vectors are passed as integers. For vectors
+ // are the same size, this is an obvious bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ } else if (ValueVT.bitsLT(PartEVT)) {
+ const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+ // Drop the extra bits.
+ Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
+ return DAG.getBitcast(ValueVT, Val);
+ }
+
+ diagnosePossiblyInvalidConstraint(
+ *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
+ return DAG.getUNDEF(ValueVT);
+ }
+
+ // Handle cases such as i8 -> <1 x i1>
+ EVT ValueSVT = ValueVT.getVectorElementType();
+ if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
+ unsigned ValueSize = ValueSVT.getSizeInBits();
+ if (ValueSize == PartEVT.getSizeInBits()) {
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
+ } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) {
+ // It's possible a scalar floating point type gets softened to integer and
+ // then promoted to a larger integer. If PartEVT is the larger integer
+ // we need to truncate it and then bitcast to the FP type.
+ assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types");
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
+ Val = DAG.getBitcast(ValueSVT, Val);
+ } else {
+ Val = ValueVT.isFloatingPoint()
+ ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+ : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ }
+ }
+
+ return DAG.getBuildVector(ValueVT, DL, Val);
+}
+
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V,
+ std::optional<CallingConv::ID> CallConv);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void
+getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, const Value *V,
+ std::optional<CallingConv::ID> CallConv = std::nullopt,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ // Let the target split the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
+ CallConv))
+ return;
+ EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+ CallConv);
+
+ unsigned OrigNumParts = NumParts;
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Copying to an illegal type!");
+
+ if (NumParts == 0)
+ return;
+
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ unsigned PartBits = PartVT.getSizeInBits();
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartEVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ if (PartEVT != ValueVT) {
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "scalar-to-vector conversion failed");
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = llvm::bit_floor(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getShiftAmountConstant(RoundBits, ValueVT, DL));
+
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
+ CallConv);
+
+ if (DAG.getDataLayout().isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+ }
+ }
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
+ const SDLoc &DL, EVT PartVT) {
+ if (!PartVT.isVector())
+ return SDValue();
+
+ EVT ValueVT = Val.getValueType();
+ EVT PartEVT = PartVT.getVectorElementType();
+ EVT ValueEVT = ValueVT.getVectorElementType();
+ ElementCount PartNumElts = PartVT.getVectorElementCount();
+ ElementCount ValueNumElts = ValueVT.getVectorElementCount();
+
+ // We only support widening vectors with equivalent element types and
+ // fixed/scalable properties. If a target needs to widen a fixed-length type
+ // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
+ if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
+ PartNumElts.isScalable() != ValueNumElts.isScalable())
+ return SDValue();
+
+ // Have a try for bf16 because some targets share its ABI with fp16.
+ if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) {
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Cannot widen to illegal type");
+ Val = DAG.getNode(ISD::BITCAST, DL,
+ ValueVT.changeVectorElementType(MVT::f16), Val);
+ } else if (PartEVT != ValueEVT) {
+ return SDValue();
+ }
+
+ // Widening a scalable vector to another scalable vector is done by inserting
+ // the vector into a larger undef one.
+ if (PartNumElts.isScalable())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
+ Val, DAG.getVectorIdxConstant(0, DL));
+
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(Val, Ops);
+ SDValue EltUndef = DAG.getUNDEF(PartEVT);
+ Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+ return DAG.getBuildVector(PartVT, DL, Ops);
+}
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V,
+ std::optional<CallingConv::ID> CallConv) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const bool IsABIRegCopy = CallConv.has_value();
+
+ if (NumParts == 1) {
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
+ Val = Widened;
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorElementCount() ==
+ ValueVT.getVectorElementCount()) {
+
+ // Promoted vector extract
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else if (PartEVT.isVector() &&
+ PartEVT.getVectorElementType() !=
+ ValueVT.getVectorElementType() &&
+ TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
+ TargetLowering::TypeWidenVector) {
+ // Combination of widening and promotion.
+ EVT WidenVT =
+ EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
+ PartVT.getVectorElementCount());
+ SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
+ Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
+ } else {
+ // Don't extract an integer from a float vector. This can happen if the
+ // FP type gets softened to integer and then promoted. The promotion
+ // prevents it from being picked up by the earlier bitcast case.
+ if (ValueVT.getVectorElementCount().isScalar() &&
+ (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) {
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
+ } else {
+ uint64_t ValueSize = ValueVT.getFixedSizeInBits();
+ assert(PartVT.getFixedSizeInBits() > ValueSize &&
+ "lossy conversion of vector to scalar type");
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+ Val = DAG.getBitcast(IntermediateType, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ }
+ }
+
+ assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs;
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
+ "Mixing scalable and fixed vectors when copying in parts");
+
+ std::optional<ElementCount> DestEltCnt;
+
+ if (IntermediateVT.isVector())
+ DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
+ else
+ DestEltCnt = ElementCount::getFixed(NumIntermediates);
+
+ EVT BuiltVectorTy = EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt);
+
+ if (ValueVT == BuiltVectorTy) {
+ // Nothing to do.
+ } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+ } else {
+ if (BuiltVectorTy.getVectorElementType().bitsGT(
+ ValueVT.getVectorElementType())) {
+ // Integer promotion.
+ ValueVT = EVT::getVectorVT(*DAG.getContext(),
+ BuiltVectorTy.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+ Val = Widened;
+ }
+ }
+
+ assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector()) {
+ // This does something sensible for scalable vectors - see the
+ // definition of EXTRACT_SUBVECTOR for further details.
+ unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
+ } else {
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+ DAG.getVectorIdxConstant(i, DL));
+ }
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumIntermediates != 0 && "division by zero");
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
+ CallConv);
+ }
+}
+
+RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
+ EVT valuevt, std::optional<CallingConv::ID> CC)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+ RegCount(1, regs.size()), CallConv(CC) {}
+
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ std::optional<CallingConv::ID> CC) {
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs);
+
+ CallConv = CC;
+
+ for (EVT ValueVT : ValueVTs) {
+ unsigned NumRegs =
+ isABIMangled()
+ ? TLI.getNumRegistersForCallingConv(Context, *CC, ValueVT)
+ : TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT =
+ isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(Context, *CC, ValueVT)
+ : TLI.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ RegCount.push_back(NumRegs);
+ Reg += NumRegs;
+ }
+}
+
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ const SDLoc &dl, SDValue &Chain,
+ SDValue *Glue, const Value *V) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = RegCount[Value];
+ MVT RegisterVT = isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), *CallConv, RegVTs[Value])
+ : RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (!Glue) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Glue);
+ *Glue = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+ Parts[i] = P;
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (!Register::isVirtualRegister(Regs[Part + i]) ||
+ !RegisterVT.isInteger())
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
+
+ unsigned RegSize = RegisterVT.getScalarSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
+
+ if (NumZeroBits == RegSize) {
+ // The current value is a zero.
+ // Explicitly express that as it would be easier for
+ // optimizations to kick in.
+ Parts[i] = DAG.getConstant(0, dl, RegisterVT);
+ continue;
+ }
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt;
+ EVT FromVT(MVT::Other);
+ if (NumZeroBits) {
+ FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
+ isSExt = false;
+ } else if (NumSignBits > 1) {
+ FromVT =
+ EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
+ isSExt = true;
+ } else {
+ continue;
+ }
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
+ RegisterVT, ValueVT, V, CallConv);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
+}
+
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain, SDValue *Glue,
+ const Value *V,
+ ISD::NodeType PreferredExtendType) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ISD::NodeType ExtendKind = PreferredExtendType;
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumParts = RegCount[Value];
+
+ MVT RegisterVT = isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), *CallConv, RegVTs[Value])
+ : RegVTs[Value];
+
+ if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
+ NumParts, RegisterVT, V, CallConv, ExtendKind);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (!Glue) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Glue);
+ *Glue = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Glue)
+ // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+}
+
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
+ Ops.push_back(Res);
+
+ if (Code == InlineAsm::Kind_Clobber) {
+ // Clobbers should always have a 1:1 mapping with registers, and may
+ // reference registers that have illegal (e.g. vector) types. Hence, we
+ // shouldn't try to apply any sort of splitting logic to them.
+ assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
+ "No 1:1 mapping from clobbers to regs?");
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ (void)SP;
+ for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
+ Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
+ assert(
+ (Regs[I] != SP ||
+ DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
+ "If we clobbered the stack pointer, MFI should know about it.");
+ }
+ return;
+ }
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT RegisterVT = RegVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
+ RegisterVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ unsigned TheReg = Regs[Reg++];
+ Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+ }
+ }
+}
+
+SmallVector<std::pair<unsigned, TypeSize>, 4>
+RegsForValue::getRegsAndSizes() const {
+ SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
+ unsigned I = 0;
+ for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
+ unsigned RegCount = std::get<0>(CountAndVT);
+ MVT RegisterVT = std::get<1>(CountAndVT);
+ TypeSize RegisterSize = RegisterVT.getSizeInBits();
+ for (unsigned E = I + RegCount; I != E; ++I)
+ OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
+ }
+ return OutVec;
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
+ AssumptionCache *ac,
+ const TargetLibraryInfo *li) {
+ AA = aa;
+ AC = ac;
+ GFI = gfi;
+ LibInfo = li;
+ Context = DAG.getContext();
+ LPadToCallSiteMap.clear();
+ SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
+ AssignmentTrackingEnabled = isAssignmentTrackingEnabled(
+ *DAG.getMachineFunction().getFunction().getParent());
+}
+
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ UnusedArgNodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
+ CurInst = nullptr;
+ HasTailCall = false;
+ SDNodeOrder = LowestSDNodeOrder;
+ StatepointLowering.clear();
+}
+
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
+// Update DAG root to include dependencies on Pending chains.
+SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
+ SDValue Root = DAG.getRoot();
+
+ if (Pending.empty())
+ return Root;
+
+ // Add current root to PendingChains, unless we already indirectly
+ // depend on it.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = Pending.size();
+ for (; i != e; ++i) {
+ assert(Pending[i].getNode()->getNumOperands() > 1);
+ if (Pending[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ Pending.push_back(Root);
+ }
+
+ if (Pending.size() == 1)
+ Root = Pending[0];
+ else
+ Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
+
+ DAG.setRoot(Root);
+ Pending.clear();
+ return Root;
+}
+
+SDValue SelectionDAGBuilder::getMemoryRoot() {
+ return updateRoot(PendingLoads);
+}
+
+SDValue SelectionDAGBuilder::getRoot() {
+ // Chain up all pending constrained intrinsics together with all
+ // pending loads, by simply appending them to PendingLoads and
+ // then calling getMemoryRoot().
+ PendingLoads.reserve(PendingLoads.size() +
+ PendingConstrainedFP.size() +
+ PendingConstrainedFPStrict.size());
+ PendingLoads.append(PendingConstrainedFP.begin(),
+ PendingConstrainedFP.end());
+ PendingLoads.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
+ return getMemoryRoot();
+}
+
+SDValue SelectionDAGBuilder::getControlRoot() {
+ // We need to emit pending fpexcept.strict constrained intrinsics,
+ // so append them to the PendingExports list.
+ PendingExports.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFPStrict.clear();
+ return updateRoot(PendingExports);
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (I.isTerminator()) {
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+ }
+
+ // Add SDDbgValue nodes for any var locs here. Do so before updating
+ // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
+ if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
+ // Add SDDbgValue nodes for any var locs here. Do so before updating
+ // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
+ for (auto It = FnVarLocs->locs_begin(&I), End = FnVarLocs->locs_end(&I);
+ It != End; ++It) {
+ auto *Var = FnVarLocs->getDILocalVariable(It->VariableID);
+ dropDanglingDebugInfo(Var, It->Expr);
+ if (It->Values.isKillLocation(It->Expr)) {
+ handleKillDebugValue(Var, It->Expr, It->DL, SDNodeOrder);
+ continue;
+ }
+ SmallVector<Value *> Values(It->Values.location_ops());
+ if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder,
+ It->Values.hasArgList()))
+ addDanglingDebugInfo(It, SDNodeOrder);
+ }
+ }
+
+ // Increase the SDNodeOrder if dealing with a non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++SDNodeOrder;
+
+ CurInst = &I;
+
+ // Set inserted listener only if required.
+ bool NodeInserted = false;
+ std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener;
+ MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections);
+ if (PCSectionsMD) {
+ InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>(
+ DAG, [&](SDNode *) { NodeInserted = true; });
+ }
+
+ visit(I.getOpcode(), I);
+
+ if (!I.isTerminator() && !HasTailCall &&
+ !isa<GCStatepointInst>(I)) // statepoints handle their exports internally
+ CopyToExportRegsIfNeeded(&I);
+
+ // Handle metadata.
+ if (PCSectionsMD) {
+ auto It = NodeMap.find(&I);
+ if (It != NodeMap.end()) {
+ DAG.addPCSections(It->second.getNode(), PCSectionsMD);
+ } else if (NodeInserted) {
+ // This should not happen; if it does, don't let it go unnoticed so we can
+ // fix it. Relevant visit*() function is probably missing a setValue().
+ errs() << "warning: loosing !pcsections metadata ["
+ << I.getModule()->getName() << "]\n";
+ LLVM_DEBUG(I.dump());
+ assert(false);
+ }
+ }
+
+ CurInst = nullptr;
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
+#include "llvm/IR/Instruction.def"
+ }
+}
+
+static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
+ DILocalVariable *Variable,
+ DebugLoc DL, unsigned Order,
+ RawLocationWrapper Values,
+ DIExpression *Expression) {
+ if (!Values.hasArgList())
+ return false;
+ // For variadic dbg_values we will now insert an undef.
+ // FIXME: We can potentially recover these!
+ SmallVector<SDDbgOperand, 2> Locs;
+ for (const Value *V : Values.location_ops()) {
+ auto *Undef = UndefValue::get(V->getType());
+ Locs.push_back(SDDbgOperand::fromConst(Undef));
+ }
+ SDDbgValue *SDV = DAG.getDbgValueList(Variable, Expression, Locs, {},
+ /*IsIndirect=*/false, DL, Order,
+ /*IsVariadic=*/true);
+ DAG.AddDbgValue(SDV, /*isParameter=*/false);
+ return true;
+}
+
+void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
+ unsigned Order) {
+ if (!handleDanglingVariadicDebugInfo(
+ DAG,
+ const_cast<DILocalVariable *>(DAG.getFunctionVarLocs()
+ ->getVariable(VarLoc->VariableID)
+ .getVariable()),
+ VarLoc->DL, Order, VarLoc->Values, VarLoc->Expr)) {
+ DanglingDebugInfoMap[VarLoc->Values.getVariableLocationOp(0)].emplace_back(
+ VarLoc, Order);
+ }
+}
+
+void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
+ unsigned Order) {
+ // We treat variadic dbg_values differently at this stage.
+ if (!handleDanglingVariadicDebugInfo(
+ DAG, DI->getVariable(), DI->getDebugLoc(), Order,
+ DI->getWrappedLocation(), DI->getExpression())) {
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE,
+ // which we should ideally fill with an extra Undef DBG_VALUE.
+ assert(DI->getNumVariableLocationOps() == 1 &&
+ "DbgValueInst without an ArgList should have a single location "
+ "operand.");
+ DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order);
+ }
+}
+
+void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr) {
+ auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
+ DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *DanglingExpr = DDI.getExpression();
+ if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI)
+ << "\n");
+ return true;
+ }
+ return false;
+ };
+
+ for (auto &DDIMI : DanglingDebugInfoMap) {
+ DanglingDebugInfoVector &DDIV = DDIMI.second;
+
+ // If debug info is to be dropped, run it through final checks to see
+ // whether it can be salvaged.
+ for (auto &DDI : DDIV)
+ if (isMatchingDbgValue(DDI))
+ salvageUnresolvedDbgValue(DDI);
+
+ erase_if(DDIV, isMatchingDbgValue);
+ }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
+ if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
+ return;
+
+ DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
+ for (auto &DDI : DDIV) {
+ DebugLoc DL = DDI.getDebugLoc();
+ unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *Expr = DDI.getExpression();
+ assert(Variable->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
+ // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
+ // we couldn't resolve it directly when examining the DbgValue intrinsic
+ // in the first place we should not be more successful here). Unless we
+ // have some test case that prove this to be correct we should avoid
+ // calling EmitFuncArgumentDbgValue here.
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
+ FuncArgumentDbgValueKind::Value, Val)) {
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI)
+ << "\n");
+ LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
+ // Increase the SDNodeOrder for the DbgValue here to make sure it is
+ // inserted after the definition of Val when emitting the instructions
+ // after ISel. An alternative could be to teach
+ // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
+ LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
+ << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
+ << ValSDNodeOrder << "\n");
+ SDV = getDbgValue(Val, Variable, Expr, DL,
+ std::max(DbgSDNodeOrder, ValSDNodeOrder));
+ DAG.AddDbgValue(SDV, false);
+ } else
+ LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
+ << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n");
+ auto Undef = UndefValue::get(V->getType());
+ auto SDV =
+ DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, false);
+ }
+ }
+ DDIV.clear();
+}
+
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+ // TODO: For the variadic implementation, instead of only checking the fail
+ // state of `handleDebugValue`, we need know specifically which values were
+ // invalid, so that we attempt to salvage only those values when processing
+ // a DIArgList.
+ Value *V = DDI.getVariableLocationOp(0);
+ Value *OrigV = V;
+ DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *Expr = DDI.getExpression();
+ DebugLoc DL = DDI.getDebugLoc();
+ unsigned SDOrder = DDI.getSDNodeOrder();
+
+ // Currently we consider only dbg.value intrinsics -- we tell the salvager
+ // that DW_OP_stack_value is desired.
+ bool StackValue = true;
+
+ // Can this Value can be encoded without any further work?
+ if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false))
+ return;
+
+ // Attempt to salvage back through as many instructions as possible. Bail if
+ // a non-instruction is seen, such as a constant expression or global
+ // variable. FIXME: Further work could recover those too.
+ while (isa<Instruction>(V)) {
+ Instruction &VAsInst = *cast<Instruction>(V);
+ // Temporary "0", awaiting real implementation.
+ SmallVector<uint64_t, 16> Ops;
+ SmallVector<Value *, 4> AdditionalValues;
+ V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ AdditionalValues);
+ // If we cannot salvage any further, and haven't yet found a suitable debug
+ // expression, bail out.
+ if (!V)
+ break;
+
+ // TODO: If AdditionalValues isn't empty, then the salvage can only be
+ // represented with a DBG_VALUE_LIST, so we give up. When we have support
+ // here for variadic dbg_values, remove that condition.
+ if (!AdditionalValues.empty())
+ break;
+
+ // New value and expr now represent this debuginfo.
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
+
+ // Some kind of simplification occurred: check whether the operand of the
+ // salvaged debug expression can be encoded in this DAG.
+ if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) {
+ LLVM_DEBUG(
+ dbgs() << "Salvaged debug location info for:\n " << *Var << "\n"
+ << *OrigV << "\nBy stripping back to:\n " << *V << "\n");
+ return;
+ }
+ }
+
+ // This was the final opportunity to salvage this debug information, and it
+ // couldn't be done. Place an undef DBG_VALUE at this location to terminate
+ // any earlier variable location.
+ assert(OrigV && "V shouldn't be null");
+ auto *Undef = UndefValue::get(OrigV->getType());
+ auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, false);
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI)
+ << "\n");
+}
+
+void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
+ DIExpression *Expr,
+ DebugLoc DbgLoc,
+ unsigned Order) {
+ Value *Poison = PoisonValue::get(Type::getInt1Ty(*Context));
+ DIExpression *NewExpr =
+ const_cast<DIExpression *>(DIExpression::convertToUndefExpression(Expr));
+ handleDebugValue(Poison, Var, NewExpr, DbgLoc, Order,
+ /*IsVariadic*/ false);
+}
+
+bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
+ DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc DbgLoc,
+ unsigned Order, bool IsVariadic) {
+ if (Values.empty())
+ return true;
+ SmallVector<SDDbgOperand> LocationOps;
+ SmallVector<SDNode *> Dependencies;
+ for (const Value *V : Values) {
+ // Constant value.
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
+ LocationOps.emplace_back(SDDbgOperand::fromConst(V));
+ continue;
+ }
+
+ // Look through IntToPtr constants.
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ LocationOps.emplace_back(SDDbgOperand::fromConst(CE->getOperand(0)));
+ continue;
+ }
+
+ // If the Value is a frame index, we can create a FrameIndex debug value
+ // without relying on the DAG at all.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
+ continue;
+ }
+ }
+
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ // Only emit func arg dbg value for non-variadic dbg.values for now.
+ if (!IsVariadic &&
+ EmitFuncArgumentDbgValue(V, Var, Expr, DbgLoc,
+ FuncArgumentDbgValueKind::Value, N))
+ return true;
+ if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
+ // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
+ // describe stack slot locations.
+ //
+ // Consider "int x = 0; int *px = &x;". There are two kinds of
+ // interesting debug values here after optimization:
+ //
+ // dbg.value(i32* %px, !"int *px", !DIExpression()), and
+ // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
+ //
+ // Both describe the direct values of their associated variables.
+ Dependencies.push_back(N.getNode());
+ LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
+ continue;
+ }
+ LocationOps.emplace_back(
+ SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
+ continue;
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Special rules apply for the first dbg.values of parameter variables in a
+ // function. Identify them by the fact they reference Argument Values, that
+ // they're parameters, and they are parameters of the current function. We
+ // need to let them dangle until they get an SDNode.
+ bool IsParamOfFunc =
+ isa<Argument>(V) && Var->isParameter() && !DbgLoc.getInlinedAt();
+ if (IsParamOfFunc)
+ return false;
+
+ // The value is not used in this block yet (or it would have an SDNode).
+ // We still want the value to appear for the user if possible -- if it has
+ // an associated VReg, we can refer to that instead.
+ auto VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ unsigned Reg = VMI->second;
+ // If this is a PHI node, it may be split up into several MI PHI nodes
+ // (in FunctionLoweringInfo::set).
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType(), std::nullopt);
+ if (RFV.occupiesMultipleRegs()) {
+ // FIXME: We could potentially support variadic dbg_values here.
+ if (IsVariadic)
+ return false;
+ unsigned Offset = 0;
+ unsigned BitsToDescribe = 0;
+ if (auto VarSize = Var->getSizeInBits())
+ BitsToDescribe = *VarSize;
+ if (auto Fragment = Expr->getFragmentInfo())
+ BitsToDescribe = Fragment->SizeInBits;
+ for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
+ // Bail out if all bits are described already.
+ if (Offset >= BitsToDescribe)
+ break;
+ // TODO: handle scalable vectors.
+ unsigned RegisterSize = RegAndSize.second;
+ unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+ ? BitsToDescribe - Offset
+ : RegisterSize;
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, FragmentSize);
+ if (!FragmentExpr)
+ continue;
+ SDDbgValue *SDV = DAG.getVRegDbgValue(
+ Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder);
+ DAG.AddDbgValue(SDV, false);
+ Offset += RegisterSize;
+ }
+ return true;
+ }
+ // We can use simple vreg locations for variadic dbg_values as well.
+ LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
+ continue;
+ }
+ // We failed to create a SDDbgOperand for V.
+ return false;
+ }
+
+ // We have created a SDDbgOperand for each Value in Values.
+ // Should use Order instead of SDNodeOrder?
+ assert(!LocationOps.empty());
+ SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, DbgLoc,
+ SDNodeOrder, IsVariadic);
+ DAG.AddDbgValue(SDV, /*isParameter=*/false);
+ return true;
+}
+
+void SelectionDAGBuilder::resolveOrClearDbgInfo() {
+ // Try to fixup any remaining dangling debug info -- and drop it if we can't.
+ for (auto &Pair : DanglingDebugInfoMap)
+ for (auto &DDI : Pair.second)
+ salvageUnresolvedDbgValue(DDI);
+ clearDanglingDebugInfo();
+}
+
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+ DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
+ SDValue Result;
+
+ if (It != FuncInfo.ValueMap.end()) {
+ Register InReg = It->second;
+
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Ty,
+ std::nullopt); // This is not an ABI copy.
+ SDValue Chain = DAG.getEntryNode();
+ Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+ V);
+ resolveDanglingDebugInfo(V, Result);
+ }
+
+ return Result;
+}
+
+/// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
+ return copyFromReg;
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) {
+ if (isIntOrFPConstant(N)) {
+ // Remove the debug location from the node as the node is about to be used
+ // in a location which may differ from the original debug location. This
+ // is relevant to Constant and ConstantFP nodes because they can appear
+ // as constant expressions inside PHI nodes.
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return DAG.getConstant(*CI, getCurSDLoc(), VT);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
+
+ if (isa<ConstantPointerNull>(C)) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return DAG.getConstant(0, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout(), AS));
+ }
+
+ if (match(C, m_VScale()))
+ return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return DAG.getUNDEF(VT);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the NodeMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (const Use &U : C->operands()) {
+ SDNode *Val = getValue(U).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(Constants, getCurSDLoc());
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(Ops, getCurSDLoc());
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ }
+
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
+ }
+
+ return DAG.getMergeValues(Constants, getCurSDLoc());
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
+ return getValue(Equiv->getGlobalValue());
+
+ if (const auto *NC = dyn_cast<NoCFIValue>(C))
+ return getValue(NC->getGlobalValue());
+
+ VectorType *VecTy = cast<VectorType>(V->getType());
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ SmallVector<SDValue, 16> Ops;
+ unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CV->getOperand(i)));
+
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ }
+
+ if (isa<ConstantAggregateZero>(C)) {
+ EVT EltVT =
+ TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
+ else
+ Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
+
+ return NodeMap[V] = DAG.getSplat(VT, getCurSDLoc(), Op);
+ }
+
+ llvm_unreachable("Unknown vector constant");
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(
+ SI->second, TLI.getValueType(DAG.getDataLayout(), AI->getType()));
+ }
+
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ Register InReg = FuncInfo.InitializeRegForValue(Inst);
+
+ RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
+ Inst->getType(), std::nullopt);
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ }
+
+ if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V))
+ return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
+
+ if (const auto *BB = dyn_cast<BasicBlock>(V))
+ return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+
+ llvm_unreachable("Can't get register for value!");
+}
+
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ if (!IsSEH)
+ CatchPadMBB->setIsEHScopeEntry();
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ CatchPadMBB->setIsEHFuncletEntry();
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+ TargetMBB->setIsEHCatchretTarget(true);
+ DAG.getMachineFunction().setHasEHCatchret(true);
+
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ if (IsSEH) {
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+ TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+ return;
+ }
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = I.getCatchSwitchParentPad();
+ const BasicBlock *SuccessorColor;
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccessorColor = &FuncInfo.Fn->getEntryBlock();
+ else
+ SuccessorColor = cast<Instruction>(ParentPad)->getParent();
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of an EH scope/funclet.
+ FuncInfo.MBB->setIsEHScopeEntry();
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ if (Pers != EHPersonality::Wasm_CXX) {
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+ }
+}
+
+// In wasm EH, even though a catchpad may not catch an exception if a tag does
+// not match, it is OK to add only the first unwind destination catchpad to the
+// successors, because there will be at least one invoke instruction within the
+// catch scope that points to the next unwind destination, if one exists, so
+// CFGSort cannot mess up with BB sorting order.
+// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
+// call within them, and catchpads only consisting of 'catch (...)' have a
+// '__cxa_end_catch' call within them, both of which generate invokes in case
+// the next unwind destination exists, i.e., the next unwind destination is not
+// the caller.)
+//
+// Having at most one EH pad successor is also simpler and helps later
+// transformations.
+//
+// For example,
+// current:
+// invoke void @foo to ... unwind label %catch.dispatch
+// catch.dispatch:
+// %0 = catchswitch within ... [label %catch.start] unwind label %next
+// catch.start:
+// ...
+// ... in this BB or some other child BB dominated by this BB there will be an
+// invoke that points to 'next' BB as an unwind destination
+//
+// next: ; We don't need to add this to 'current' BB's successor
+// ...
+static void findWasmUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ break;
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations. We don't
+ // continue to the unwind destination of the catchswitch for wasm.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ break;
+ } else {
+ continue;
+ }
+ }
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+ if (IsWasmCXX) {
+ findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
+ assert(UnwindDests.size() <= 1 &&
+ "There should be at most one unwind destination for wasm");
+ return;
+ }
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ auto UnwindDest = I.getUnwindDest();
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability UnwindDestProb =
+ (BPI && UnwindDest)
+ ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+ }
+ FuncInfo.MBB->normalizeSuccProbs();
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+ report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<SDValue, 8> OutVals;
+
+ // Calls to @llvm.experimental.deoptimize don't generate a return value, so
+ // lower
+ //
+ // %val = call <ty> @llvm.experimental.deoptimize()
+ // ret <ty> %val
+ //
+ // differently.
+ if (I.getParent()->getTerminatingDeoptimizeCall()) {
+ LowerDeoptimizingReturn();
+ return;
+ }
+
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, DL,
+ PointerType::get(F->getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ PtrValueVTs);
+
+ SDValue RetPtr =
+ DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
+ &Offsets, 0);
+ unsigned NumValues = ValueVTs.size();
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
+ for (unsigned i = 0; i != NumValues; ++i) {
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
+ TypeSize::Fixed(Offsets[i]));
+
+ SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
+ Chains[i] = DAG.getStore(
+ Chain, getCurSDLoc(), Val,
+ // FIXME: better loc info would be nice.
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
+ commonAlignment(BaseAlign, Offsets[i]));
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
+ MVT::Other, Chains);
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ const Function *F = I.getParent()->getParent();
+
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ I.getOperand(0)->getType(), F->getCallingConv(),
+ /*IsVarArg*/ false, DL);
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (F->getAttributes().hasRetAttr(Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ LLVMContext &Context = F->getContext();
+ bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
+
+ for (unsigned j = 0; j != NumValues; ++j) {
+ EVT VT = ValueVTs[j];
+
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
+
+ CallingConv::ID CC = F->getCallingConv();
+
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
+ MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurSDLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (RetInReg)
+ Flags.setInReg();
+
+ if (I.getOperand(0)->getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
+ }
+
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (j == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+
+ // Propagate extension type if any
+ if (ExtendKind == ISD::SIGN_EXTEND)
+ Flags.setSExt();
+ else if (ExtendKind == ISD::ZERO_EXTEND)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags,
+ Parts[i].getValueType().getSimpleVT(),
+ VT, /*isfixed=*/true, 0, 0));
+ OutVals.push_back(Parts[i]);
+ }
+ }
+ }
+ }
+
+ // Push in swifterror virtual register as the last element of Outs. This makes
+ // sure swifterror virtual register will be returned in the swifterror
+ // physical register.
+ const Function *F = I.getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+ assert(SwiftError.getFunctionArg() && "Need a swift error argument");
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ Flags.setSwiftError();
+ Outs.push_back(ISD::OutputArg(
+ Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
+ /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
+ // Create SDNode for the swifterror virtual register.
+ OutVals.push_back(
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
+ &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
+ EVT(TLI.getPointerTy(DL))));
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction().getCallingConv();
+ Chain = DAG.getTargetLoweringInfo().LowerReturn(
+ Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
+ DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ assert((!V->use_empty() || isa<CallBrInst>(V)) &&
+ "Unused value assigned virtual registers!");
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ Register Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB->isEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ if (!BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI)
+ Src->addSuccessorWithoutProb(Dst);
+ else {
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+ }
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb,
+ BranchProbability FProb,
+ bool InvertCond) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == SwitchBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ ICmpInst::Predicate Pred =
+ InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ Condition = getICmpCondCode(Pred);
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ FCmpInst::Predicate Pred =
+ InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ Condition = getFCmpCondCode(Pred);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
+ TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
+ CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+}
+
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc,
+ BranchProbability TProb,
+ BranchProbability FProb,
+ bool InvertCond) {
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ InBlock(NotCond, CurBB->getBasicBlock())) {
+ FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
+ if (BOp) {
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !InBlock(BOpOp1, CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+ TProb, FProb, InvertCond);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ BrMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) {
+ auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(Succ0MBB));
+ setValue(&I, Br);
+ DAG.setRoot(Br);
+ }
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ const Instruction *BOp = dyn_cast<Instruction>(CondVal);
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
+ BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
+ getEdgeProbability(BrMBB, Succ0MBB),
+ getEdgeProbability(BrMBB, Succ1MBB),
+ /*InvertCond=*/false);
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SL->SwitchCases)) {
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SL->SwitchCases[0], BrMBB);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
+
+ SL->SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ SDLoc dl = CB.DL;
+
+ if (CB.CC == ISD::SETTRUE) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ SwitchBB->normalizeSuccProbs();
+ if (CB.TrueBB != NextBlock(SwitchBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(CB.TrueBB)));
+ }
+ return;
+ }
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
+
+ // Build the setcc now.
+ if (!CB.CmpMHS) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else {
+ SDValue CondRHS = getValue(CB.CmpRHS);
+
+ // If a pointer's DAG type is larger than its memory type then the DAG
+ // values are zero-extended. This breaks signed comparisons so truncate
+ // back to the underlying type before doing the compare.
+ if (CondLHS.getValueType() != MemVT) {
+ CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
+ CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
+ }
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
+ }
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, dl, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+ SwitchBB->normalizeSuccProbs();
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock(SwitchBB)) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ setValue(CurInst, BrCond);
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
+ JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+
+ // Subtract the lowest switch case value from the value being switched on.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+ DAG.getConstant(JTH.First, dl, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
+
+ unsigned JumpTableReg =
+ FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ if (!JTH.FallthroughUnreachable) {
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ SDValue CMP = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+ } else {
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
+ DAG.getBasicBlock(JT.MBB)));
+ else
+ DAG.setRoot(CopyTo);
+ }
+}
+
+/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
+/// variable if there exists one.
+static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue &Chain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
+ MachineSDNode *Node =
+ DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
+ if (Global) {
+ MachinePointerInfo MPInfo(Global);
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable;
+ MachineMemOperand *MemRef = MF.getMachineMemOperand(
+ MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
+ DAG.setNodeMemRefs(Node, {MemRef});
+ }
+ if (PtrTy != PtrMemTy)
+ return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
+ return SDValue(Node, 0);
+}
+
+/// Codegen a new tail for a stack protector check ParentMBB which has had its
+/// tail spliced into a stack protector check success bb.
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
+
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
+
+ SDValue Guard;
+ SDLoc dl = getCurSDLoc();
+ SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
+ Align Align =
+ DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ SDValue GuardVal = DAG.getLoad(
+ PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
+ MachineMemOperand::MOVolatile);
+
+ if (TLI.useStackGuardXorFP())
+ GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = GuardVal;
+ Entry.Ty = FnTy->getParamType(0);
+ if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
+ Entry.IsInReg = true;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(DAG.getEntryNode())
+ .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
+ getValue(GuardCheckFn), std::move(Args));
+
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return;
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ SDValue Chain = DAG.getEntryNode();
+ if (TLI.useLoadStackGuardNode()) {
+ Guard = getLoadStackGuard(DAG, dl, Chain);
+ } else {
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ SDValue GuardPtr = getValue(IRGuard);
+
+ Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
+ MachinePointerInfo(IRGuard, 0), Align,
+ MachineMemOperand::MOVolatile);
+ }
+
+ // Perform the comparison via a getsetcc.
+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(),
+ Guard.getValueType()),
+ Guard, GuardVal, ISD::SETNE);
+
+ // If the guard/stackslot do not equal, branch to failure MBB.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, GuardVal.getOperand(0),
+ Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
+ // Otherwise branch to success MBB.
+ SDValue Br = DAG.getNode(ISD::BR, dl,
+ MVT::Other, BrCond,
+ DAG.getBasicBlock(SPD.getSuccessMBB()));
+
+ DAG.setRoot(Br);
+}
+
+/// Codegen the failure basic block for a stack protector check.
+///
+/// A failure stack protector machine basic block consists simply of a call to
+/// __stack_chk_fail().
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void
+SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setDiscardResult(true);
+ SDValue Chain =
+ TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
+ std::nullopt, CallOptions, getCurSDLoc())
+ .second;
+ // On PS4/PS5, the "return address" must still be within the calling
+ // function, even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ if (TM.getTargetTriple().isPS())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ if (TM.getTargetTriple().isWasm())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+
+ DAG.setRoot(Chain);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+
+ // Subtract the minimum value.
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue RangeSub =
+ DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
+
+ // Determine the type of the test operands.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT)) {
+ UsePtrType = true;
+ } else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ SDValue Sub = RangeSub;
+ if (UsePtrType) {
+ VT = TLI.getPointerTy(DAG.getDataLayout());
+ Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
+ }
+
+ B.RegVT = VT.getSimpleVT();
+ B.Reg = FuncInfo.CreateReg(B.RegVT);
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ if (!B.FallthroughUnreachable)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+ SwitchBB->normalizeSuccProbs();
+
+ SDValue Root = CopyTo;
+ if (!B.FallthroughUnreachable) {
+ // Conditional branch to the default block.
+ SDValue RangeCmp = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ RangeSub.getValueType()),
+ RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
+ ISD::SETUGT);
+
+ Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+ }
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != NextBlock(SwitchBB))
+ Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(Root);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ BranchProbability BranchProbToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+ MVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
+ SDValue Cmp;
+ unsigned PopCount = llvm::popcount(B.Mask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(llvm::countr_zero(B.Mask), dl, VT),
+ ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(llvm::countr_one(B.Mask), dl, VT), ISD::SETNE);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
+ DAG.getConstant(1, dl, VT), ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, dl,
+ VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(),
+ Cmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != NextBlock(SwitchBB))
+ BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors. Look through artificial IR level blocks like
+ // catchswitch for successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
+ MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
+ LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget,
+ LLVMContext::OB_clang_arc_attachedcall}) &&
+ "Cannot lower invokes with arbitrary operand bundles yet!");
+
+ const Value *Callee(I.getCalledOperand());
+ const Function *Fn = dyn_cast<Function>(Callee);
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(I, EHPadBB);
+ else if (Fn && Fn->isIntrinsic()) {
+ switch (Fn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Cannot invoke this intrinsic");
+ case Intrinsic::donothing:
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ case Intrinsic::seh_try_begin:
+ case Intrinsic::seh_scope_begin:
+ case Intrinsic::seh_try_end:
+ case Intrinsic::seh_scope_end:
+ if (EHPadMBB)
+ // a block referenced by EH table
+ // so dtor-funclet not removed by opts
+ EHPadMBB->setMachineBlockAddressTaken();
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(I, EHPadBB);
+ break;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
+ break;
+ case Intrinsic::wasm_rethrow: {
+ // This is usually done in visitTargetIntrinsic, but this intrinsic is
+ // special because it can be invoked, so we manually lower it to a DAG
+ // node here.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(getRoot()); // inchain
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+ SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
+ DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
+ break;
+ }
+ }
+ } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
+ // Currently we do not lower any intrinsic calls with deopt operand bundles.
+ // Eventually we will support lowering the @llvm.experimental.deoptimize
+ // intrinsic, and right now there are no plans to support other intrinsics
+ // with deopt state.
+ LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
+ } else {
+ LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
+ }
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ // We already took care of the exported value for the statepoint instruction
+ // during call to the LowerStatepoint.
+ if (!isa<GCStatepointInst>(I)) {
+ CopyToExportRegsIfNeeded(&I);
+ }
+
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, Return);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
+ MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower callbrs with arbitrary operand bundles yet!");
+
+ assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
+ visitInlineAsm(I);
+ CopyToExportRegsIfNeeded(&I);
+
+ // Retrieve successors.
+ SmallPtrSet<BasicBlock *, 8> Dests;
+ Dests.insert(I.getDefaultDest());
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
+ for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
+ BasicBlock *Dest = I.getIndirectDest(i);
+ MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
+ Target->setIsInlineAsmBrIndirectTarget();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
+ CallBrMBB->normalizeSuccProbs();
+
+ // Drop into default successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isEHPad() &&
+ "Call to landingpad not in landing pad!");
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
+ return;
+
+ SmallVector<EVT, 2> ValueVTs;
+ SDLoc dl = getCurSDLoc();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
+ assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
+
+ // Get the two live-in registers as SDValues. The physregs have already been
+ // copied into virtual registers.
+ SDValue Ops[2];
+ if (FuncInfo.ExceptionPointerVirtReg) {
+ Ops[0] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ FuncInfo.ExceptionPointerVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ dl, ValueVTs[0]);
+ } else {
+ Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ Ops[1] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ FuncInfo.ExceptionSelectorVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ dl, ValueVTs[1]);
+
+ // Merge into one.
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(ValueVTs), Ops);
+ setValue(&LP, Res);
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (JumpTableBlock &JTB : SL->JTCases)
+ if (JTB.first.HeaderBB == First)
+ JTB.first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (BitTestBlock &BTB : SL->BitTestCases)
+ if (BTB.Parent == First)
+ BTB.Parent = Last;
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges with unique successors.
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB).second;
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
+ addSuccessorWithProb(IndirectBrMBB, Succ);
+ }
+ IndirectBrMBB->normalizeSuccProbs();
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
+ if (!DAG.getTarget().Options.TrapUnreachable)
+ return;
+
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *I.getParent();
+ if (&I != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(&I));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return;
+ }
+ }
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
+
+void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
+
+ SDValue Op = getValue(I.getOperand(0));
+ SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
+ Op, Flags);
+ setValue(&I, UnNodeValue);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+ if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
+ Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
+ }
+ if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+ Flags.setExact(ExactOp->isExact());
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
+
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
+ Op1, Op2, Flags);
+ setValue(&I, BinNodeValue);
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
+ Op1.getValueType(), DAG.getDataLayout());
+
+ // Coerce the shift amount to the right type if we can. This exposes the
+ // truncate or zext to optimization early.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
+ "Unexpected shift type");
+ Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy);
+ }
+
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+
+ if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
+
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+ }
+ SDNodeFlags Flags;
+ Flags.setExact(exact);
+ Flags.setNoSignedWrap(nsw);
+ Flags.setNoUnsignedWrap(nuw);
+ SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
+ Flags);
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ SDNodeFlags Flags;
+ Flags.setExact(isa<PossiblyExactOperator>(&I) &&
+ cast<PossiblyExactOperator>(&I)->isExact());
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
+ Op2, Flags));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+
+ // If a pointer's DAG type is larger than its memory type then the DAG values
+ // are zero-extended. This breaks signed comparisons so truncate back to the
+ // underlying type before doing the compare.
+ if (Op1.getValueType() != MemVT) {
+ Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
+ Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
+ }
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ auto *FPMO = cast<FPMathOperator>(&I);
+ if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+
+ SDNodeFlags Flags;
+ Flags.copyFMF(*FPMO);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
+}
+
+// Check if the condition of the select has one use or two users that are both
+// selects with the same condition.
+static bool hasOnlySelectUsers(const Value *Cond) {
+ return llvm::all_of(Cond->users(), [](const Value *V) {
+ return isa<SelectInst>(V);
+ });
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue LHSVal = getValue(I.getOperand(1));
+ SDValue RHSVal = getValue(I.getOperand(2));
+ SmallVector<SDValue, 1> BaseOps(1, Cond);
+ ISD::NodeType OpCode =
+ Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
+
+ bool IsUnaryAbs = false;
+ bool Negate = false;
+
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
+
+ Flags.setUnpredictable(
+ cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
+
+ // Min/max matching is only viable if all output VTs are the same.
+ if (all_equal(ValueVTs)) {
+ EVT VT = ValueVTs[0];
+ LLVMContext &Ctx = *DAG.getContext();
+ auto &TLI = DAG.getTargetLoweringInfo();
+
+ // We care about the legality of the operation after it has been type
+ // legalized.
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
+ VT = TLI.getTypeToTransformTo(Ctx, VT);
+
+ // If the vselect is legal, assume we want to leave this as a vector setcc +
+ // vselect. Otherwise, if this is going to be scalarized, we want to see if
+ // min/max is legal on the scalar type.
+ bool UseScalarMinMax = VT.isVector() &&
+ !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+ // ValueTracking's select pattern matching does not account for -0.0,
+ // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
+ // -0.0 is less than +0.0.
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY:
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
+ Opc = ISD::FMINNUM;
+ break;
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
+ Opc = ISD::FMAXNUM;
+ break;
+ }
+ break;
+ case SPF_NABS:
+ Negate = true;
+ [[fallthrough]];
+ case SPF_ABS:
+ IsUnaryAbs = true;
+ Opc = ISD::ABS;
+ break;
+ default: break;
+ }
+
+ if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+ // If the underlying comparison instruction is used by any other
+ // instruction, the consumed instructions won't be destroyed, so it is
+ // not profitable to convert to a min/max.
+ hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ RHSVal = getValue(RHS);
+ BaseOps.clear();
+ }
+
+ if (IsUnaryAbs) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ BaseOps.clear();
+ }
+ }
+
+ if (IsUnaryAbs) {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDLoc dl = getCurSDLoc();
+ EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
+ Values[i] =
+ DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
+ if (Negate)
+ Values[i] = DAG.getNegative(Values[i], dl, VT);
+ }
+ } else {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+ Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
+ Values[i] = DAG.getNode(
+ OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
+ }
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ SDLoc dl = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
+ DAG.getTargetConstant(
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I) {
+ // FPExt is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I) {
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ EVT PtrMemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ SDLoc dl = getCurSDLoc();
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BITCAST or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BITCAST, dl,
+ DestVT, N)); // convert types.
+ // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
+ // might fold any kind of constant expression to an integer constant and that
+ // is not what we are looking for. Only recognize a bitcast of a genuine
+ // constant integer as an opaque constant.
+ else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
+ setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
+ /*isOpaque*/true));
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ SDValue N = getValue(SV);
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+ unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+ if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
+ N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InIdx));
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+ ArrayRef<int> Mask;
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ Mask = SVI->getShuffleMask();
+ else
+ Mask = cast<ConstantExpr>(I).getShuffleMask();
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT SrcVT = Src1.getValueType();
+
+ if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
+ VT.isScalableVector()) {
+ // Canonical splat form of first element of first input vector.
+ SDValue FirstElt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
+ DAG.getVectorIdxConstant(0, DL));
+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
+ return;
+ }
+
+ // For now, we only handle splats for scalable vectors.
+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.
+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
+
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned MaskNumElts = Mask.size();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts) {
+ // Mask is longer than the source vectors. We can use concatenate vector to
+ // make the mask and vectors lengths match.
+
+ if (MaskNumElts % SrcNumElts == 0) {
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool IsConcat = true;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcVT sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+ IsConcat = false;
+ break;
+ }
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together. Just emit
+ // a CONCAT_VECTORS operation.
+ if (IsConcat) {
+ SmallVector<SDValue, 8> ConcatOps;
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0)
+ ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+ else if (Src == 0)
+ ConcatOps.push_back(Src1);
+ else
+ ConcatOps.push_back(Src2);
+ }
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+ return;
+ }
+ }
+
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ PaddedMaskNumElts);
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+ Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - PaddedMaskNumElts;
+ MappedOps[i] = Idx;
+ }
+
+ SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+ // If the concatenated vector was padded, extract a subvector with the
+ // correct number of elements.
+ if (MaskNumElts != PaddedMaskNumElts)
+ Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+ DAG.getVectorIdxConstant(0, DL));
+
+ setValue(&I, Result);
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle.
+ int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
+ bool CanExtract = true;
+ for (int Idx : Mask) {
+ unsigned Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+
+ // If all the indices come from the same MaskNumElts sized portion of
+ // the sources we can use extract. Also make sure the extract wouldn't
+ // extract past the end of the source.
+ int NewStartIdx = alignDown(Idx, MaskNumElts);
+ if (NewStartIdx + MaskNumElts > SrcNumElts ||
+ (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
+ CanExtract = false;
+ // Make sure we always update StartIdx as we use it to track if all
+ // elements are undef.
+ StartIdx[Input] = NewStartIdx;
+ }
+
+ if (StartIdx[0] < 0 && StartIdx[1] < 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ if (CanExtract) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (StartIdx[Input] < 0)
+ Src = DAG.getUNDEF(VT);
+ else {
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+ DAG.getVectorIdxConstant(StartIdx[Input], DL));
+ }
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps(Mask);
+ for (int &Idx : MappedOps) {
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ else if (Idx >= 0)
+ Idx -= StartIdx[0];
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue,8> Ops;
+ for (int Idx : Mask) {
+ SDValue Res;
+
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
+ DAG.getVectorIdxConstant(Idx, DL));
+ }
+
+ Ops.push_back(Res);
+ }
+
+ setValue(&I, DAG.getBuildVector(VT, DL, Ops));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ ArrayRef<unsigned> Indices = I.getIndices();
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ // Ignore an insertvalue that produces an empty object
+ if (!NumAggValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SDValue Agg = getValue(Op0);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(AggValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ ArrayRef<unsigned> Indices = I.getIndices();
+ const Value *Op0 = I.getOperand(0);
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+ Value *Op0 = I.getOperand(0);
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
+ SDValue N = getValue(Op0);
+ SDLoc dl = getCurSDLoc();
+ auto &TLI = DAG.getTargetLoweringInfo();
+
+ // Normalize Vector GEP - all scalar operands should be converted to the
+ // splat vector.
+ bool IsVectorGEP = I.getType()->isVectorTy();
+ ElementCount VectorElementCount =
+ IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
+ : ElementCount::getFixed(0);
+
+ if (IsVectorGEP && !N.getValueType().isVector()) {
+ LLVMContext &Context = *DAG.getContext();
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
+ N = DAG.getSplat(VT, dl, N);
+ }
+
+ for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset =
+ DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field);
+
+ // In an inbounds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
+ DAG.getConstant(Offset, dl, N.getValueType()), Flags);
+ }
+ } else {
+ // IdxSize is the width of the arithmetic according to IR semantics.
+ // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
+ // (and fix up the result later).
+ unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
+ MVT IdxTy = MVT::getIntegerVT(IdxSize);
+ TypeSize ElementSize =
+ DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
+ // We intentionally mask away the high bits here; ElementSize may not
+ // fit in IdxTy.
+ APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
+ bool ElementScalable = ElementSize.isScalable();
+
+ // If this is a scalar constant or a splat vector of constants,
+ // handle it quickly.
+ const auto *C = dyn_cast<Constant>(Idx);
+ if (C && isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+
+ const auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (CI && CI->isZero())
+ continue;
+ if (CI && !ElementScalable) {
+ APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
+ LLVMContext &Context = *DAG.getContext();
+ SDValue OffsVal;
+ if (IsVectorGEP)
+ OffsVal = DAG.getConstant(
+ Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
+ else
+ OffsVal = DAG.getConstant(Offs, dl, IdxTy);
+
+ // In an inbounds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
+ continue;
+ }
+
+ // N = N + Idx * ElementMul;
+ SDValue IdxN = getValue(Idx);
+
+ if (!IdxN.getValueType().isVector() && IsVectorGEP) {
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
+ VectorElementCount);
+ IdxN = DAG.getSplat(VT, dl, IdxN);
+ }
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
+
+ if (ElementScalable) {
+ EVT VScaleTy = N.getValueType().getScalarType();
+ SDValue VScale = DAG.getNode(
+ ISD::VSCALE, dl, VScaleTy,
+ DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
+ if (IsVectorGEP)
+ VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
+ IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+ } else {
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementMul != 1) {
+ if (ElementMul.isPowerOf2()) {
+ unsigned Amt = ElementMul.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, dl,
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
+ IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, dl,
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, dl,
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+ MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
+ if (IsVectorGEP) {
+ PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
+ PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
+ }
+
+ if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
+ N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ SDLoc dl = getCurSDLoc();
+ Type *Ty = I.getAllocatedType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
+ TypeSize TySize = DL.getTypeAllocSize(Ty);
+ MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace());
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
+
+ if (TySize.isScalable())
+ AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getVScale(dl, IntPtr,
+ APInt(IntPtr.getScalarSizeInBits(),
+ TySize.getKnownMinValue())));
+ else
+ AllocSize =
+ DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
+ if (*Alignment <= StackAlign)
+ Alignment = std::nullopt;
+
+ const uint64_t StackAlignMask = StackAlign.value() - 1U;
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(~StackAlignMask, dl, IntPtr));
+
+ SDValue Ops[] = {
+ getRoot(), AllocSize,
+ DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
+}
+
+static const MDNode *getRangeMetadata(const Instruction &I) {
+ // If !noundef is not present, then !range violation results in a poison
+ // value rather than immediate undefined behavior. In theory, transferring
+ // these annotations to SDAG is fine, but in practice there are key SDAG
+ // transforms that are known not to be poison-safe, such as folding logical
+ // and/or to bitwise and/or. For now, only transfer !range if !noundef is
+ // also present.
+ if (!I.hasMetadata(LLVMContext::MD_noundef))
+ return nullptr;
+ return I.getMetadata(LLVMContext::MD_range);
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(SV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitLoadFromSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
+ if (Alloca->isSwiftError())
+ return visitLoadFromSwiftError(I);
+ }
+ }
+
+ SDValue Ptr = getValue(SV);
+
+ Type *Ty = I.getType();
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<TypeSize, 4> Offsets;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ Align Alignment = I.getAlign();
+ AAMDNodes AAInfo = I.getAAMetadata();
+ const MDNode *Ranges = getRangeMetadata(I);
+ bool isVolatile = I.isVolatile();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (isVolatile)
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (NumValues > MaxParallelChains)
+ Root = getMemoryRoot();
+ else if (AA &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ SV,
+ LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ MMOFlags |= MachineMemOperand::MOInvariant;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SDLoc dl = getCurSDLoc();
+
+ if (isVolatile)
+ Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ ArrayRef(Chains.data(), ChainI));
+ Root = Chain;
+ ChainI = 0;
+ }
+
+ // TODO: MachinePointerInfo only supports a fixed length offset.
+ MachinePointerInfo PtrInfo =
+ !Offsets[i].isScalable() || Offsets[i].isZero()
+ ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue())
+ : MachinePointerInfo();
+
+ SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]);
+ SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, PtrInfo, Alignment,
+ MMOFlags, AAInfo, Ranges);
+ Chains[ChainI] = L.getValue(1);
+
+ if (MemVTs[i] != ValueVTs[i])
+ L = DAG.getPtrExtOrTrunc(L, dl, ValueVTs[i]);
+
+ Values[i] = L;
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ ArrayRef(Chains.data(), ChainI));
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
+ "call visitStoreToSwiftError when backend supports swifterror");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ const Value *SrcV = I.getOperand(0);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &Offsets, 0);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ SDValue Src = getValue(SrcV);
+ // Create a virtual register, then update the virtual register.
+ Register VReg =
+ SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
+ // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
+ // Chain can be getRoot or getControlRoot.
+ SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
+ SDValue(Src.getNode(), Src.getResNo()));
+ DAG.setRoot(CopyNode);
+}
+
+void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
+ "call visitLoadFromSwiftError when backend supports swifterror");
+
+ assert(!I.isVolatile() &&
+ !I.hasMetadata(LLVMContext::MD_nontemporal) &&
+ !I.hasMetadata(LLVMContext::MD_invariant_load) &&
+ "Support volatile, non temporal, invariant for load_from_swift_error");
+
+ const Value *SV = I.getOperand(0);
+ Type *Ty = I.getType();
+ assert(
+ (!AA ||
+ !AA->pointsToConstantMemory(MemoryLocation(
+ SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ I.getAAMetadata()))) &&
+ "load_from_swift_error should not be constant memory");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
+ ValueVTs, &Offsets, 0);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
+ SDValue L = DAG.getCopyFromReg(
+ getRoot(), getCurSDLoc(),
+ SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
+
+ setValue(&I, L);
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
+ const Value *SrcV = I.getOperand(0);
+ const Value *PtrV = I.getOperand(1);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitStoreToSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
+ if (Alloca->isSwiftError())
+ return visitStoreToSwiftError(I);
+ }
+ }
+
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<TypeSize, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
+ SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+ SDLoc dl = getCurSDLoc();
+ Align Alignment = I.getAlign();
+ AAMDNodes AAInfo = I.getAAMetadata();
+
+ auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ ArrayRef(Chains.data(), ChainI));
+ Root = Chain;
+ ChainI = 0;
+ }
+
+ // TODO: MachinePointerInfo only supports a fixed length offset.
+ MachinePointerInfo PtrInfo =
+ !Offsets[i].isScalable() || Offsets[i].isZero()
+ ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue())
+ : MachinePointerInfo();
+
+ SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]);
+ SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
+ SDValue St =
+ DAG.getStore(Root, dl, Val, Add, PtrInfo, Alignment, MMOFlags, AAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ ArrayRef(Chains.data(), ChainI));
+ setValue(&I, StoreNode);
+ DAG.setRoot(StoreNode);
+}
+
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
+ bool IsCompressing) {
+ SDLoc sdl = getCurSDLoc();
+
+ auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
+ Src0 = I.getArgOperand(0);
+ Ptr = I.getArgOperand(1);
+ Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
+ Mask = I.getArgOperand(3);
+ };
+ auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
+ // llvm.masked.compressstore.*(Src0, Ptr, Mask)
+ Src0 = I.getArgOperand(0);
+ Ptr = I.getArgOperand(1);
+ Mask = I.getArgOperand(2);
+ Alignment = std::nullopt;
+ };
+
+ Value *PtrOperand, *MaskOperand, *Src0Operand;
+ MaybeAlign Alignment;
+ if (IsCompressing)
+ getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ else
+ getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(Src0Operand);
+ SDValue Mask = getValue(MaskOperand);
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+
+ EVT VT = Src0.getValueType();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
+ SDValue StoreNode =
+ DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
+ ISD::UNINDEXED, false /* Truncating */, IsCompressing);
+ DAG.setRoot(StoreNode);
+ setValue(&I, StoreNode);
+}
+
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the splat value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
+ ISD::MemIndexType &IndexType, SDValue &Scale,
+ SelectionDAGBuilder *SDB, const BasicBlock *CurBB,
+ uint64_t ElemSize) {
+ SelectionDAG& DAG = SDB->DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
+
+ // Handle splat constant pointer.
+ if (auto *C = dyn_cast<Constant>(Ptr)) {
+ C = C->getSplatValue();
+ if (!C)
+ return false;
+
+ Base = SDB->getValue(C);
+
+ ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
+ Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ return true;
+ }
+
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getParent() != CurBB)
+ return false;
+
+ if (GEP->getNumOperands() != 2)
+ return false;
+
+ const Value *BasePtr = GEP->getPointerOperand();
+ const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
+
+ // Make sure the base is scalar and the index is a vector.
+ if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
+ return false;
+
+ TypeSize ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ if (ScaleVal.isScalable())
+ return false;
+
+ // Target may not support the required addressing mode.
+ if (ScaleVal != 1 &&
+ !TLI.isLegalScaleForGatherScatter(ScaleVal.getFixedValue(), ElemSize))
+ return false;
+
+ Base = SDB->getValue(BasePtr);
+ Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
+
+ Scale =
+ DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ return true;
+}
+
+void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
+ const Value *Ptr = I.getArgOperand(1);
+ SDValue Src0 = getValue(I.getArgOperand(0));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
+ ->getMaybeAlignValue()
+ .value_or(DAG.getEVTAlign(VT.getScalarType()));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ SDValue Base;
+ SDValue Index;
+ ISD::MemIndexType IndexType;
+ SDValue Scale;
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent(), VT.getScalarStoreSize());
+
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
+ }
+
+ SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
+ SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
+ Ops, MMO, IndexType, false);
+ DAG.setRoot(Scatter);
+ setValue(&I, Scatter);
+}
+
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
+ SDLoc sdl = getCurSDLoc();
+
+ auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
+ // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+ Ptr = I.getArgOperand(0);
+ Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
+ Mask = I.getArgOperand(2);
+ Src0 = I.getArgOperand(3);
+ };
+ auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
+ // @llvm.masked.expandload.*(Ptr, Mask, Src0)
+ Ptr = I.getArgOperand(0);
+ Alignment = std::nullopt;
+ Mask = I.getArgOperand(1);
+ Src0 = I.getArgOperand(2);
+ };
+
+ Value *PtrOperand, *MaskOperand, *Src0Operand;
+ MaybeAlign Alignment;
+ if (IsExpanding)
+ getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ else
+ getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(Src0Operand);
+ SDValue Mask = getValue(MaskOperand);
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+
+ EVT VT = Src0.getValueType();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+
+ AAMDNodes AAInfo = I.getAAMetadata();
+ const MDNode *Ranges = getRangeMetadata(I);
+
+ // Do not serialize masked loads of constant memory with anything.
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+
+ SDValue Load =
+ DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
+ if (AddToChain)
+ PendingLoads.push_back(Load.getValue(1));
+ setValue(&I, Load);
+}
+
+void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+ const Value *Ptr = I.getArgOperand(0);
+ SDValue Src0 = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
+ ->getMaybeAlignValue()
+ .value_or(DAG.getEVTAlign(VT.getScalarType()));
+
+ const MDNode *Ranges = getRangeMetadata(I);
+
+ SDValue Root = DAG.getRoot();
+ SDValue Base;
+ SDValue Index;
+ ISD::MemIndexType IndexType;
+ SDValue Scale;
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent(), VT.getScalarStoreSize());
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
+
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
+ }
+
+ SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
+ SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
+ Ops, MMO, IndexType, ISD::NON_EXTLOAD);
+
+ PendingLoads.push_back(Gather.getValue(1));
+ setValue(&I, Gather);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ SDLoc dl = getCurSDLoc();
+ AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
+ AtomicOrdering FailureOrdering = I.getFailureOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
+ FailureOrdering);
+
+ SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MMO);
+
+ SDValue OutChain = L.getValue(2);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ SDLoc dl = getCurSDLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation");
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
+ case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
+ case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
+ case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
+ case AtomicRMWInst::UIncWrap:
+ NT = ISD::ATOMIC_LOAD_UINC_WRAP;
+ break;
+ case AtomicRMWInst::UDecWrap:
+ NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
+ break;
+ }
+ AtomicOrdering Ordering = I.getOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
+
+ SDValue L =
+ DAG.getAtomic(NT, dl, MemVT, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getValOperand()),
+ MMO);
+
+ SDValue OutChain = L.getValue(1);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ SDLoc dl = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
+ setValue(&I, N);
+ DAG.setRoot(N);
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ SDLoc dl = getCurSDLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlign().value() < MemVT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
+
+ InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
+
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for loads to prevent future divergence.
+ SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ SDValue OutChain = L.getValue(1);
+ if (!I.isUnordered())
+ DAG.setRoot(OutChain);
+ else
+ PendingLoads.push_back(OutChain);
+ return;
+ }
+
+ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
+ Ptr, MMO);
+
+ SDValue OutChain = L.getValue(1);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ SDLoc dl = getCurSDLoc();
+
+ AtomicOrdering Ordering = I.getOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
+
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlign().value() < MemVT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
+
+ SDValue Val = getValue(I.getValueOperand());
+ if (Val.getValueType() != MemVT)
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for stores to prevent future divergence.
+ SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ setValue(&I, S);
+ DAG.setRoot(S);
+ return;
+ }
+ SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
+ Ptr, Val, MMO);
+
+ setValue(&I, OutChain);
+ DAG.setRoot(OutChain);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ // Ignore the callsite's attributes. A specific call site may be marked with
+ // readnone, but the lowering code will expect the chain based on the
+ // definition.
+ const Function *F = I.getCalledFunction();
+ bool HasChain = !F->doesNotAccessMemory();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemIntrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
+ DAG.getMachineFunction(),
+ Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
+ const Value *Arg = I.getArgOperand(i);
+ if (!I.paramHasAttr(i, Attribute::ImmArg)) {
+ Ops.push_back(getValue(Arg));
+ continue;
+ }
+
+ // Use TargetConstant instead of a regular constant for immarg.
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
+ } else {
+ Ops.push_back(
+ DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
+ }
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ // Propagate fast-math-flags from IR to node(s).
+ SDNodeFlags Flags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPMO);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
+ // Create the node.
+ SDValue Result;
+ // In some cases, custom collection of operands from CallInst I may be needed.
+ TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ //
+ // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
+ // didn't yield anything useful.
+ MachinePointerInfo MPI;
+ if (Info.ptrVal)
+ MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
+ else if (Info.fallbackAddressSpace)
+ MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, MPI, Info.align, Info.flags,
+ Info.size, I.getAAMetadata());
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (!isa<VectorType>(I.getType()))
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+
+ MaybeAlign Alignment = I.getRetAlign();
+
+ // Insert `assertalign` node if there's an alignment.
+ if (InsertAssertAlign && Alignment) {
+ Result =
+ DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ }
+
+ setValue(&I, Result);
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, dl, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
+ const TargetLowering &TLI, const SDLoc &dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, dl, MVT::i32));
+ SDValue t1 = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, dl, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
+ const SDLoc &dl) {
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
+ MVT::f32);
+}
+
+static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ // IntegerPartOfX = ((int32_t)(t0);
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = t0 - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX =
+ DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, dl,
+ DAG.getTargetLoweringInfo().getShiftAmountTy(
+ MVT::i32, DAG.getDataLayout())));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234, dl));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000, dl));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDNodeFlags Flags) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // t0 = Op * log2(e)
+
+ // TODO: What fast-math-flags should be set here?
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
+}
+
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDNodeFlags Flags) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2).
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent =
+ DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue LogOfMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
+}
+
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDNodeFlags Flags) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ SDValue Log2ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
+}
+
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDNodeFlags Flags) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a, dl));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue Log10ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31, dl));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce, dl));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
+}
+
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDNodeFlags Flags) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+ return getLimitedPrecisionExp2(Op, dl, DAG);
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const TargetLowering &TLI,
+ SDNodeFlags Flags) {
+ bool IsExp10 = false;
+ if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+ APFloat Ten(10.0f);
+ IsExp10 = LHSC->isExactlyValue(Ten);
+ }
+ }
+
+ // TODO: What fast-math-flags should be set on the FMUL node?
+ if (IsExp10) {
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // t0 = Op * LOG2OF10;
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
+ getF32Constant(DAG, 0x40549a78, dl));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
+}
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree if
+ // it's beneficial on the target, otherwise we end up lowering to a call to
+ // __powidf2 (for example).
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ unsigned Val = RHSC->getSExtValue();
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, DL, LHS.getValueType());
+
+ if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
+ Val, DAG.shouldOptForSize())) {
+ // Get the exponent as a positive value.
+ if ((int)Val < 0)
+ Val = -Val;
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res =
+ DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
+ SDValue LHS, SDValue RHS, SDValue Scale,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+ bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
+ LLVMContext &Ctx = *DAG.getContext();
+
+ // If the type is legal but the operation isn't, this node might survive all
+ // the way to operation legalization. If we end up there and we do not have
+ // the ability to widen the type (if VT*2 is not legal), we cannot expand the
+ // node.
+
+ // Coax the legalizer into expanding the node during type legalization instead
+ // by bumping the size by one bit. This will force it to Promote, enabling the
+ // early expansion and avoiding the need to expand later.
+
+ // We don't have to do this if Scale is 0; that can always be expanded, unless
+ // it's a saturating signed operation. Those can experience true integer
+ // division overflow, a case which we must avoid.
+
+ // FIXME: We wouldn't have to do this (or any of the early
+ // expansion/promotion) if it was possible to expand a libcall of an
+ // illegal type during operation legalization. But it's not, so things
+ // get a bit hacky.
+ unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if ((ScaleInt > 0 || (Saturating && Signed)) &&
+ (TLI.isTypeLegal(VT) ||
+ (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
+ TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
+ Opcode, VT, ScaleInt);
+ if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
+ EVT PromVT;
+ if (VT.isScalarInteger())
+ PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
+ else if (VT.isVector()) {
+ PromVT = VT.getVectorElementType();
+ PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
+ PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
+ } else
+ llvm_unreachable("Wrong VT for DIVFIX?");
+ LHS = DAG.getExtOrTrunc(Signed, LHS, DL, PromVT);
+ RHS = DAG.getExtOrTrunc(Signed, RHS, DL, PromVT);
+ EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
+ // For saturating operations, we need to shift up the LHS to get the
+ // proper saturation width, and then shift down again afterwards.
+ if (Saturating)
+ LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
+ DAG.getConstant(1, DL, ShiftTy));
+ SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+ if (Saturating)
+ Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
+ DAG.getConstant(1, DL, ShiftTy));
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+ }
+
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
+}
+
+// getUnderlyingArgRegs - Find underlying registers used for a truncated,
+// bitcasted, or split argument. Returns a list of <Register, size in bits>
+static void
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
+ const SDValue &N) {
+ switch (N.getOpcode()) {
+ case ISD::CopyFromReg: {
+ SDValue Op = N.getOperand(1);
+ Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
+ Op.getValueType().getSizeInBits());
+ return;
+ }
+ case ISD::BITCAST:
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ case ISD::TRUNCATE:
+ getUnderlyingArgRegs(Regs, N.getOperand(0));
+ return;
+ case ISD::BUILD_PAIR:
+ case ISD::BUILD_VECTOR:
+ case ISD::CONCAT_VECTORS:
+ for (SDValue Op : N->op_values())
+ getUnderlyingArgRegs(Regs, Op);
+ return;
+ default:
+ return;
+ }
+}
+
+/// If the DbgValueInst is a dbg_value of a function argument, create the
+/// corresponding DBG_VALUE machine instruction for it now. At the end of
+/// instruction selection, they will be inserted to the entry BB.
+/// We don't currently support this for variadic dbg_values, as they shouldn't
+/// appear for function arguments or in the prologue.
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+ const Value *V, DILocalVariable *Variable, DIExpression *Expr,
+ DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
+ // we've been asked to pursue.
+ auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
+ bool Indirect) {
+ if (Reg.isVirtual() && MF.useDebugInstrRef()) {
+ // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
+ // pointing at the VReg, which will be patched up later.
+ auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+
+ auto *NewDIExpr = FragExpr;
+ // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
+ // the DIExpression.
+ if (Indirect)
+ NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ NewDIExpr = DIExpression::prependOpcodes(NewDIExpr, Ops);
+ return BuildMI(MF, DL, Inst, false, MOs, Variable, NewDIExpr);
+ } else {
+ // Create a completely standard DBG_VALUE.
+ auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
+ return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
+ }
+ };
+
+ if (Kind == FuncArgumentDbgValueKind::Value) {
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
+ // the entry block.
+ bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
+ if (!IsInEntryBlock)
+ return false;
+
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
+ // variable that also is a param.
+ //
+ // Although, if we are at the top of the entry block already, we can still
+ // emit using ArgDbgValue. This might catch some situations when the
+ // dbg.value refers to an argument that isn't used in the entry block, so
+ // any CopyToReg node would be optimized out and the only way to express
+ // this DBG_VALUE is by using the physical reg (or FI) as done in this
+ // method. ArgDbgValues are hoisted to the beginning of the entry block. So
+ // we should only emit as ArgDbgValue if the Variable is an argument to the
+ // current function, and the dbg.value intrinsic is found in the entry
+ // block.
+ bool VariableIsFunctionInputArg = Variable->isParameter() &&
+ !DL->getInlinedAt();
+ bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
+ if (!IsInPrologue && !VariableIsFunctionInputArg)
+ return false;
+
+ // Here we assume that a function argument on IR level only can be used to
+ // describe one input parameter on source level. If we for example have
+ // source code like this
+ //
+ // struct A { long x, y; };
+ // void foo(struct A a, long b) {
+ // ...
+ // b = a.x;
+ // ...
+ // }
+ //
+ // and IR like this
+ //
+ // define void @foo(i32 %a1, i32 %a2, i32 %b) {
+ // entry:
+ // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %b, "b",
+ // ...
+ // call void @llvm.dbg.value(metadata i32 %a1, "b"
+ // ...
+ //
+ // then the last dbg.value is describing a parameter "b" using a value that
+ // is an argument. But since we already has used %a1 to describe a parameter
+ // we should not handle that last dbg.value here (that would result in an
+ // incorrect hoisting of the DBG_VALUE to the function entry).
+ // Notice that we allow one dbg.value per IR level argument, to accommodate
+ // for the situation with fragments above.
+ if (VariableIsFunctionInputArg) {
+ unsigned ArgNo = Arg->getArgNo();
+ if (ArgNo >= FuncInfo.DescribedArgs.size())
+ FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
+ else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
+ return false;
+ FuncInfo.DescribedArgs.set(ArgNo);
+ }
+ }
+
+ bool IsIndirect = false;
+ std::optional<MachineOperand> Op;
+ // Some arguments' frame index is recorded during argument lowering.
+ int FI = FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != std::numeric_limits<int>::max())
+ Op = MachineOperand::CreateFI(FI);
+
+ SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
+ if (!Op && N.getNode()) {
+ getUnderlyingArgRegs(ArgRegsAndSizes, N);
+ Register Reg;
+ if (ArgRegsAndSizes.size() == 1)
+ Reg = ArgRegsAndSizes.front().first;
+
+ if (Reg && Reg.isVirtual()) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ Register PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ if (Reg) {
+ Op = MachineOperand::CreateReg(Reg, false);
+ IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
+ }
+ }
+
+ if (!Op && N.getNode()) {
+ // Check if frame index is available.
+ SDValue LCandidate = peekThroughBitcasts(N);
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ Op = MachineOperand::CreateFI(FINode->getIndex());
+ }
+
+ if (!Op) {
+ // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
+ auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
+ SplitRegs) {
+ unsigned Offset = 0;
+ for (const auto &RegAndSize : SplitRegs) {
+ // If the expression is already a fragment, the current register
+ // offset+size might extend beyond the fragment. In this case, only
+ // the register bits that are inside the fragment are relevant.
+ int RegFragmentSizeInBits = RegAndSize.second;
+ if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
+ uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
+ // The register is entirely outside the expression fragment,
+ // so is irrelevant for debug info.
+ if (Offset >= ExprFragmentSizeInBits)
+ break;
+ // The register is partially outside the expression fragment, only
+ // the low bits within the fragment are relevant for debug info.
+ if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
+ RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
+ }
+ }
+
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegFragmentSizeInBits);
+ Offset += RegAndSize.second;
+ // If a valid fragment expression cannot be created, the variable's
+ // correct value cannot be determined and so it is set as Undef.
+ if (!FragmentExpr) {
+ SDDbgValue *SDV = DAG.getConstantDbgValue(
+ Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, false);
+ continue;
+ }
+ MachineInstr *NewMI =
+ MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
+ Kind != FuncArgumentDbgValueKind::Value);
+ FuncInfo.ArgDbgValues.push_back(NewMI);
+ }
+ };
+
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, Register>::const_iterator
+ VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
+ V->getType(), std::nullopt);
+ if (RFV.occupiesMultipleRegs()) {
+ splitMultiRegDbgValue(RFV.getRegsAndSizes());
+ return true;
+ }
+
+ Op = MachineOperand::CreateReg(VMI->second, false);
+ IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
+ } else if (ArgRegsAndSizes.size() > 1) {
+ // This was split due to the calling convention, and no virtual register
+ // mapping exists for the value.
+ splitMultiRegDbgValue(ArgRegsAndSizes);
+ return true;
+ }
+ }
+
+ if (!Op)
+ return false;
+
+ // If the expression refers to the entry value of an Argument, use the
+ // corresponding livein physical register. As per the Verifier, this is only
+ // allowed for swiftasync Arguments.
+ if (Op->isReg() && Expr->isEntryValue()) {
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+ auto OpReg = Op->getReg();
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (OpReg == VirtReg || OpReg == PhysReg) {
+ SDDbgValue *SDV = DAG.getVRegDbgValue(
+ Variable, Expr, PhysReg,
+ Kind != FuncArgumentDbgValueKind::Value /*is indirect*/, DL,
+ SDNodeOrder);
+ DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n");
+ return true;
+ }
+
+ assert(Variable->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ MachineInstr *NewMI = nullptr;
+
+ if (Op->isReg())
+ NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
+ else
+ NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
+ Variable, Expr);
+
+ // Otherwise, use ArgDbgValues.
+ FuncInfo.ArgDbgValues.push_back(NewMI);
+ return true;
+}
+
+/// Return the appropriate SDDbgValue based on N.
+SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
+ DILocalVariable *Variable,
+ DIExpression *Expr,
+ const DebugLoc &dl,
+ unsigned DbgSDNodeOrder) {
+ if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
+ // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
+ // stack slot locations.
+ //
+ // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
+ // debug values here after optimization:
+ //
+ // dbg.value(i32* %px, !"int *px", !DIExpression()), and
+ // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
+ //
+ // Both describe the direct values of their associated variables.
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
+ }
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
+}
+
+static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::smul_fix:
+ return ISD::SMULFIX;
+ case Intrinsic::umul_fix:
+ return ISD::UMULFIX;
+ case Intrinsic::smul_fix_sat:
+ return ISD::SMULFIXSAT;
+ case Intrinsic::umul_fix_sat:
+ return ISD::UMULFIXSAT;
+ case Intrinsic::sdiv_fix:
+ return ISD::SDIVFIX;
+ case Intrinsic::udiv_fix:
+ return ISD::UDIVFIX;
+ case Intrinsic::sdiv_fix_sat:
+ return ISD::SDIVFIXSAT;
+ case Intrinsic::udiv_fix_sat:
+ return ISD::UDIVFIXSAT;
+ default:
+ llvm_unreachable("Unhandled fixed point intrinsic");
+ }
+}
+
+void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
+ const char *FunctionName) {
+ assert(FunctionName && "FunctionName must not be nullptr");
+ SDValue Callee = DAG.getExternalSymbol(
+ FunctionName,
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+ LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
+}
+
+/// Given a @llvm.call.preallocated.setup, return the corresponding
+/// preallocated call.
+static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
+ assert(cast<CallBase>(PreallocatedSetup)
+ ->getCalledFunction()
+ ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
+ "expected call_preallocated_setup Value");
+ for (const auto *U : PreallocatedSetup->users()) {
+ auto *UseCall = cast<CallBase>(U);
+ const Function *Fn = UseCall->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
+ return UseCall;
+ }
+ }
+ llvm_unreachable("expected corresponding call to preallocated setup/arg");
+}
+
+/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
+ unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc sdl = getCurSDLoc();
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ case Intrinsic::vscale: {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
+ return;
+ }
+ case Intrinsic::vastart: visitVAStart(I); return;
+ case Intrinsic::vaend: visitVAEnd(I); return;
+ case Intrinsic::vacopy: visitVACopy(I); return;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::addressofreturnaddress:
+ setValue(&I,
+ DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType())));
+ return;
+ case Intrinsic::sponentry:
+ setValue(&I,
+ DAG.getNode(ISD::SPONENTRY, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType())));
+ return;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
+ TLI.getFrameIndexTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::read_volatile_register:
+ case Intrinsic::read_register: {
+ Value *Reg = I.getArgOperand(0);
+ SDValue Chain = getRoot();
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Res = DAG.getNode(ISD::READ_REGISTER, sdl,
+ DAG.getVTList(VT, MVT::Other), Chain, RegName);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::write_register: {
+ Value *Reg = I.getArgOperand(0);
+ Value *RegValue = I.getArgOperand(1);
+ SDValue Chain = getRoot();
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
+ DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
+ RegName, getValue(RegValue)));
+ return;
+ }
+ case Intrinsic::memcpy: {
+ const auto &MCI = cast<MemCpyInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = std::min(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(
+ Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memcpy_inline: {
+ const auto &MCI = cast<MemCpyInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
+ // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = std::min(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
+ SDValue MC = DAG.getMemcpy(
+ getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
+ /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memset: {
+ const auto &MSI = cast<MemSetInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ Align Alignment = MSI.getDestAlign().valueOrOne();
+ bool isVol = MSI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MS = DAG.getMemset(
+ Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
+ isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+ updateDAGForMaybeTailCall(MS);
+ return;
+ }
+ case Intrinsic::memset_inline: {
+ const auto &MSII = cast<MemSetInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Value = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MSII.getDestAlign().valueOrOne();
+ bool isVol = MSII.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
+ /* AlwaysInline */ true, isTC,
+ MachinePointerInfo(I.getArgOperand(0)),
+ I.getAAMetadata());
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memmove: {
+ const auto &MMI = cast<MemMoveInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MMI.getDestAlign().valueOrOne();
+ Align SrcAlign = MMI.getSourceAlign().valueOrOne();
+ Align Alignment = std::min(DstAlign, SrcAlign);
+ bool isVol = MMI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memmove DAG
+ // node.
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata(), AA);
+ updateDAGForMaybeTailCall(MM);
+ return;
+ }
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
+
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue MC =
+ DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memmove_element_unordered_atomic: {
+ auto &MI = cast<AtomicMemMoveInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
+
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue MC =
+ DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memset_element_unordered_atomic: {
+ auto &MI = cast<AtomicMemSetInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Val = getValue(MI.getValue());
+ SDValue Length = getValue(MI.getLength());
+
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue MC =
+ DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::call_preallocated_setup: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
+ getRoot(), SrcValue);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::call_preallocated_arg: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = SrcValue;
+ Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+ MVT::i32); // arg index
+ SDValue Res = DAG.getNode(
+ ISD::PREALLOCATED_ARG, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::dbg_declare: {
+ const auto &DI = cast<DbgDeclareInst>(I);
+ // Debug intrinsics are handled separately in assignment tracking mode.
+ // Some intrinsics are handled right after Argument lowering.
+ if (AssignmentTrackingEnabled ||
+ FuncInfo.PreprocessedDbgDeclares.count(&DI))
+ return;
+ // Assume dbg.declare can not currently use DIArgList, i.e.
+ // it is non-variadic.
+ assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
+ assert(Variable && "Missing variable");
+ LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
+ << "\n");
+ // Check if address has undef value.
+ const Value *Address = DI.getVariableLocationOp(0);
+ if (!Address || isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
+ << " (bad/undef/unused-arg address)\n");
+ return;
+ }
+
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (isParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV =
+ DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
+ /*IsIndirect*/ true, dl, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
+ FuncArgumentDbgValueKind::Declare, N);
+ return;
+ } else {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ true, dl, SDNodeOrder);
+ }
+ DAG.AddDbgValue(SDV, isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
+ FuncArgumentDbgValueKind::Declare, N)) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
+ << " (could not emit func-arg dbg_value)\n");
+ }
+ }
+ return;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(I);
+ DILabel *Label = DI.getLabel();
+ assert(Label && "Missing label");
+
+ SDDbgLabel *SDV;
+ SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
+ DAG.AddDbgLabel(SDV);
+ return;
+ }
+ case Intrinsic::dbg_assign: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ if (AssignmentTrackingEnabled)
+ return;
+ // If assignment tracking hasn't been enabled then fall through and treat
+ // the dbg.assign as a dbg.value.
+ [[fallthrough]];
+ }
+ case Intrinsic::dbg_value: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ if (AssignmentTrackingEnabled)
+ return;
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ assert(DI.getVariable() && "Missing variable");
+
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
+
+ if (DI.isKillLocation()) {
+ handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder);
+ return;
+ }
+
+ SmallVector<Value *, 4> Values(DI.getValues());
+ if (Values.empty())
+ return;
+
+ bool IsVariadic = DI.hasArgList();
+ if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(),
+ SDNodeOrder, IsVariadic))
+ addDanglingDebugInfo(&DI, SDNodeOrder);
+ return;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, sdl, MVT::i32);
+ setValue(&I, Res);
+ return;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().setCallsUnwindInit(true);
+ return;
+ case Intrinsic::eh_dwarf_cfa:
+ setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(0));
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return;
+ }
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI.setFunctionContextIndex(FI);
+ return;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return;
+ }
+ case Intrinsic::eh_sjlj_longjmp:
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::eh_sjlj_setup_dispatch:
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return;
+ case Intrinsic::masked_gather:
+ visitMaskedGather(I);
+ return;
+ case Intrinsic::masked_load:
+ visitMaskedLoad(I);
+ return;
+ case Intrinsic::masked_scatter:
+ visitMaskedScatter(I);
+ return;
+ case Intrinsic::masked_store:
+ visitMaskedStore(I);
+ return;
+ case Intrinsic::masked_expandload:
+ visitMaskedLoad(I, true /* IsExpanding */);
+ return;
+ case Intrinsic::masked_compressstore:
+ visitMaskedStore(I, true /* IsCompressing */);
+ return;
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
+ return;
+ case Intrinsic::log:
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
+ return;
+ case Intrinsic::log2:
+ setValue(&I,
+ expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
+ return;
+ case Intrinsic::log10:
+ setValue(&I,
+ expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
+ return;
+ case Intrinsic::exp:
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
+ return;
+ case Intrinsic::exp2:
+ setValue(&I,
+ expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
+ return;
+ case Intrinsic::pow:
+ setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG, TLI, Flags));
+ return;
+ case Intrinsic::sqrt:
+ case Intrinsic::fabs:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::roundeven:
+ case Intrinsic::canonicalize: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
+ case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
+ }
+
+ setValue(&I, DAG.getNode(Opcode, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), Flags));
+ return;
+ }
+ case Intrinsic::lround:
+ case Intrinsic::llround:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::lround: Opcode = ISD::LROUND; break;
+ case Intrinsic::llround: Opcode = ISD::LLROUND; break;
+ case Intrinsic::lrint: Opcode = ISD::LRINT; break;
+ case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
+ }
+
+ EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
+ getValue(I.getArgOperand(0))));
+ return;
+ }
+ case Intrinsic::minnum:
+ setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::maxnum:
+ setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::minimum:
+ setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::maximum:
+ setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::copysign:
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::ldexp:
+ setValue(&I, DAG.getNode(ISD::FLDEXP, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::frexp: {
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ setValue(&I,
+ DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0))));
+ return;
+ }
+ case Intrinsic::arithmetic_fence: {
+ setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), Flags));
+ return;
+ }
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(
+ ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
+ return;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
+ visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
+ return;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#include "llvm/IR/VPIntrinsics.def"
+ visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
+ return;
+ case Intrinsic::fptrunc_round: {
+ // Get the last argument, the metadata and convert it to an integer in the
+ // call
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
+ std::optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Propagate fast-math-flags from IR to node(s).
+ SDNodeFlags Flags;
+ Flags.copyFMF(*cast<FPMathOperator>(&I));
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
+ SDValue Result;
+ Result = DAG.getNode(
+ ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant((int)*RoundMode, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I, Result);
+
+ return;
+ }
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ setValue(&I, DAG.getNode(ISD::FMA, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
+ } else {
+ // TODO: Intrinsic calls should have fast-math-flags.
+ SDValue Mul = DAG.getNode(
+ ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul, getValue(I.getArgOperand(2)), Flags);
+ setValue(&I, Add);
+ }
+ return;
+ }
+ case Intrinsic::convert_to_fp16:
+ setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+ DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant(0, sdl,
+ MVT::i32))));
+ return;
+ case Intrinsic::convert_from_fp16:
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
+ return;
+ case Intrinsic::fptosi_sat: {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
+ getValue(I.getArgOperand(0)),
+ DAG.getValueType(VT.getScalarType())));
+ return;
+ }
+ case Intrinsic::fptoui_sat: {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
+ getValue(I.getArgOperand(0)),
+ DAG.getValueType(VT.getScalarType())));
+ return;
+ }
+ case Intrinsic::set_rounding:
+ Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
+ {getRoot(), getValue(I.getArgOperand(0))});
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(0));
+ return;
+ case Intrinsic::is_fpclass: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT DestVT = TLI.getValueType(DLayout, I.getType());
+ EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
+ FPClassTest Test = static_cast<FPClassTest>(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
+ SDValue Op = getValue(I.getArgOperand(0));
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(
+ !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
+ // If ISD::IS_FPCLASS should be expanded, do it right now, because the
+ // expansion can use illegal types. Making expansion early allows
+ // legalizing these types prior to selection.
+ if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
+ SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
+ setValue(&I, Result);
+ return;
+ }
+
+ SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
+ SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
+ setValue(&I, V);
+ return;
+ }
+ case Intrinsic::get_fpenv: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT EnvVT = TLI.getValueType(DLayout, I.getType());
+ Align TempAlign = DAG.getEVTAlign(EnvVT);
+ SDValue Chain = getRoot();
+ // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node
+ // and temporary storage in stack.
+ if (TLI.isOperationLegalOrCustom(ISD::GET_FPENV, EnvVT)) {
+ Res = DAG.getNode(
+ ISD::GET_FPENV, sdl,
+ DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ MVT::Other),
+ Chain);
+ } else {
+ SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value());
+ int SPFI = cast<FrameIndexSDNode>(Temp.getNode())->getIndex();
+ auto MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ TempAlign);
+ Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
+ Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI);
+ }
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::set_fpenv: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ SDValue Env = getValue(I.getArgOperand(0));
+ EVT EnvVT = Env.getValueType();
+ Align TempAlign = DAG.getEVTAlign(EnvVT);
+ SDValue Chain = getRoot();
+ // If SET_FPENV is custom or legal, use it. Otherwise use loading
+ // environment from memory.
+ if (TLI.isOperationLegalOrCustom(ISD::SET_FPENV, EnvVT)) {
+ Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env);
+ } else {
+ // Allocate space in stack, copy environment bits into it and use this
+ // memory in SET_FPENV_MEM.
+ SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value());
+ int SPFI = cast<FrameIndexSDNode>(Temp.getNode())->getIndex();
+ auto MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign,
+ MachineMemOperand::MOStore);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ TempAlign);
+ Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
+ }
+ DAG.setRoot(Chain);
+ return;
+ }
+ case Intrinsic::reset_fpenv:
+ DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
+ return;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
+ return;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
+ DAG.getVTList(MVT::i64, MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::bitreverse:
+ setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ sdl, Ty, Arg));
+ return;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ sdl, Ty, Arg));
+ return;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
+ return;
+ }
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ bool IsFSHL = Intrinsic == Intrinsic::fshl;
+ SDValue X = getValue(I.getArgOperand(0));
+ SDValue Y = getValue(I.getArgOperand(1));
+ SDValue Z = getValue(I.getArgOperand(2));
+ EVT VT = X.getValueType();
+
+ if (X == Y) {
+ auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ } else {
+ auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+ setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
+ }
+ return;
+ }
+ case Intrinsic::sadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::uadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::ssub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::usub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::sshl_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::ushl_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1.getValueType(), Op1, Op2, Op3));
+ return;
+ }
+ case Intrinsic::sdiv_fix:
+ case Intrinsic::udiv_fix:
+ case Intrinsic::sdiv_fix_sat:
+ case Intrinsic::udiv_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1, Op2, Op3, DAG, TLI));
+ return;
+ }
+ case Intrinsic::smax: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::smin: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::umax: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::umin: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::abs: {
+ // TODO: Preserve "int min is poison" arg in SDAG?
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
+ return;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::stackrestore:
+ Res = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
+ return;
+ case Intrinsic::get_dynamic_area_offset: {
+ SDValue Op = getRoot();
+ EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
+ EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+ // target.
+ if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
+ report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+ " intrinsic!");
+ Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+ Op);
+ DAG.setRoot(Op);
+ setValue(&I, Res);
+ return;
+ }
+ case Intrinsic::stackguard: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Module &M = *MF.getFunction().getParent();
+ SDValue Chain = getRoot();
+ if (TLI.useLoadStackGuardNode()) {
+ Res = getLoadStackGuard(DAG, sdl, Chain);
+ } else {
+ EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ const Value *Global = TLI.getSDagStackGuard(M);
+ Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
+ Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
+ MachinePointerInfo(Global, 0), Align,
+ MachineMemOperand::MOVolatile);
+ }
+ if (TLI.useStackGuardXorFP())
+ Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
+ DAG.setRoot(Chain);
+ setValue(&I, Res);
+ return;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ SDValue Src, Chain = getRoot();
+
+ if (TLI.useLoadStackGuardNode())
+ Src = getLoadStackGuard(DAG, sdl, Chain);
+ else
+ Src = getValue(I.getArgOperand(0)); // The guard's value.
+
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI.setStackProtectorIndex(FI);
+ EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(
+ Chain, sdl, Src, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MaybeAlign(), MachineMemOperand::MOVolatile);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ // Drop the intrinsic, but forward the value
+ setValue(&I, getValue(I.getOperand(0)));
+ return;
+
+ case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::var_annotation:
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, noalias scope declarations, assumptions, and
+ // artificial side-effects.
+ return;
+
+ case Intrinsic::codeview_annotation: {
+ // Emit a label associated with this metadata.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MCSymbol *Label =
+ MF.getMMI().getContext().createTempSymbol("annotation", true);
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
+ Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
+ DAG.setRoot(Res);
+ return;
+ }
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
+
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::adjust_trampoline:
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::gcroot: {
+ assert(DAG.getMachineFunction().getFunction().hasGC() &&
+ "only valid in functions with gc specified, enforced by Verifier");
+ assert(GFI && "implied by previous");
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ return;
+ }
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ case Intrinsic::get_rounding:
+ Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot());
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+
+ case Intrinsic::expect:
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
+ return;
+
+ case Intrinsic::ubsantrap:
+ case Intrinsic::debugtrap:
+ case Intrinsic::trap: {
+ StringRef TrapFuncName =
+ I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
+ if (TrapFuncName.empty()) {
+ switch (Intrinsic) {
+ case Intrinsic::trap:
+ DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
+ break;
+ case Intrinsic::debugtrap:
+ DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
+ break;
+ case Intrinsic::ubsantrap:
+ DAG.setRoot(DAG.getNode(
+ ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
+ DAG.getTargetConstant(
+ cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
+ MVT::i32)));
+ break;
+ default: llvm_unreachable("unknown trap intrinsic");
+ }
+ return;
+ }
+ TargetLowering::ArgListTy Args;
+ if (Intrinsic == Intrinsic::ubsantrap) {
+ Args.push_back(TargetLoweringBase::ArgListEntry());
+ Args[0].Val = I.getArgOperand(0);
+ Args[0].Node = getValue(Args[0].Val);
+ Args[0].Ty = Args[0].Val->getType();
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ CallingConv::C, I.getType(),
+ DAG.getExternalSymbol(TrapFuncName.data(),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+
+ EVT ResultVT = Op1.getValueType();
+ EVT OverflowVT = MVT::i1;
+ if (ResultVT.isVector())
+ OverflowVT = EVT::getVectorVT(
+ *Context, OverflowVT, ResultVT.getVectorElementCount());
+
+ SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
+ setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
+ return;
+ }
+ case Intrinsic::prefetch: {
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
+ Ops[0] = DAG.getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
+ SDValue Result = DAG.getMemIntrinsicNode(
+ ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
+ /* align */ std::nullopt, Flags);
+
+ // Chain the prefetch in parallell with any pending loads, to stay out of
+ // the way of later optimizations.
+ PendingLoads.push_back(Result);
+ Result = getRoot();
+ DAG.setRoot(Result);
+ return;
+ }
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ const int64_t ObjectSize =
+ cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
+ Value *const ObjectPtr = I.getArgOperand(1);
+ SmallVector<const Value *, 4> Allocas;
+ getUnderlyingObjects(ObjectPtr, Allocas);
+
+ for (const Value *Alloca : Allocas) {
+ const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);
+
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ continue;
+
+ // First check that the Alloca is static, otherwise it won't have a
+ // valid frame index.
+ auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return;
+
+ const int FrameIndex = SI->second;
+ int64_t Offset;
+ if (GetPointerBaseWithConstantOffset(
+ ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
+ Offset = -1; // Cannot determine offset from alloca to lifetime object.
+ Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
+ Offset);
+ DAG.setRoot(Res);
+ }
+ return;
+ }
+ case Intrinsic::pseudoprobe: {
+ auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
+ auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::invariant_start:
+ // Discard region information.
+ setValue(&I,
+ DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType())));
+ return;
+ case Intrinsic::invariant_end:
+ // Discard region information.
+ return;
+ case Intrinsic::clear_cache:
+ /// FunctionName may be null.
+ if (const char *FunctionName = TLI.getClearCacheBuiltinName())
+ lowerCallToExternalSymbol(I, FunctionName);
+ return;
+ case Intrinsic::donothing:
+ case Intrinsic::seh_try_begin:
+ case Intrinsic::seh_scope_begin:
+ case Intrinsic::seh_try_end:
+ case Intrinsic::seh_scope_end:
+ // ignore
+ return;
+ case Intrinsic::experimental_stackmap:
+ visitStackmap(I);
+ return;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(I);
+ return;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(cast<GCStatepointInst>(I));
+ return;
+ case Intrinsic::experimental_gc_result:
+ visitGCResult(cast<GCResultInst>(I));
+ return;
+ case Intrinsic::experimental_gc_relocate:
+ visitGCRelocate(cast<GCRelocateInst>(I));
+ return;
+ case Intrinsic::instrprof_cover:
+ llvm_unreachable("instrprof failed to lower a cover");
+ case Intrinsic::instrprof_increment:
+ llvm_unreachable("instrprof failed to lower an increment");
+ case Intrinsic::instrprof_timestamp:
+ llvm_unreachable("instrprof failed to lower a timestamp");
+ case Intrinsic::instrprof_value_profile:
+ llvm_unreachable("instrprof failed to lower a value profiling call");
+ case Intrinsic::localescape: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
+ Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+ AllocaInst *Slot = cast<AllocaInst>(Arg);
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only escape static allocas");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::LOCAL_ESCAPE))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+ }
+
+ return;
+ }
+
+ case Intrinsic::localrecover: {
+ // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Get the symbol that defines the frame offset.
+ auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+ unsigned IdxVal =
+ unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
+
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ EVT PtrVT = FPVal.getValueType();
+
+ // Create a MCSymbol for the label to avoid any target lowering
+ // that would make this PC relative.
+ SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
+ SDValue OffsetVal =
+ DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
+
+ // Add the offset to the FP.
+ SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
+ setValue(&I, Add);
+
+ return;
+ }
+
+ case Intrinsic::eh_exceptionpointer:
+ case Intrinsic::eh_exceptioncode: {
+ // Get the exception pointer vreg, copy from it, and resize it to fit.
+ const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
+ unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
+ if (Intrinsic == Intrinsic::eh_exceptioncode)
+ N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
+ setValue(&I, N);
+ return;
+ }
+ case Intrinsic::xray_customevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
+ return;
+
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ SDValue LogEntryVal = getValue(I.getArgOperand(0));
+ SDValue StrSizeVal = getValue(I.getArgOperand(1));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
+ sdl, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return;
+ }
+ case Intrinsic::xray_typedevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
+ return;
+
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ // It's unclear to me how manipulating the selection DAG here forces callers
+ // to provide arguments in registers instead of on the stack.
+ SDValue LogTypeId = getValue(I.getArgOperand(0));
+ SDValue LogEntryVal = getValue(I.getArgOperand(1));
+ SDValue StrSizeVal = getValue(I.getArgOperand(2));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogTypeId);
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return;
+ }
+ case Intrinsic::experimental_deoptimize:
+ LowerDeoptimizeCall(&I);
+ return;
+ case Intrinsic::experimental_stepvector:
+ visitStepVector(I);
+ return;
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmaximum:
+ case Intrinsic::vector_reduce_fminimum:
+ visitVectorReduce(I, Intrinsic);
+ return;
+
+ case Intrinsic::icall_branch_funnel: {
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(getValue(I.getArgOperand(0)));
+
+ int64_t Offset;
+ auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(1), Offset, DAG.getDataLayout()));
+ if (!Base)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
+
+ struct BranchFunnelTarget {
+ int64_t Offset;
+ SDValue Target;
+ };
+ SmallVector<BranchFunnelTarget, 8> Targets;
+
+ for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
+ auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(Op), Offset, DAG.getDataLayout()));
+ if (ElemBase != Base)
+ report_fatal_error("all llvm.icall.branch.funnel operands must refer "
+ "to the same GlobalValue");
+
+ SDValue Val = getValue(I.getArgOperand(Op + 1));
+ auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
+ if (!GA)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Targets.push_back({Offset, DAG.getTargetGlobalAddress(
+ GA->getGlobal(), sdl, Val.getValueType(),
+ GA->getOffset())});
+ }
+ llvm::sort(Targets,
+ [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
+ return T1.Offset < T2.Offset;
+ });
+
+ for (auto &T : Targets) {
+ Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
+ Ops.push_back(T.Target);
+ }
+
+ Ops.push_back(DAG.getRoot()); // Chain
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
+ MVT::Other, Ops),
+ 0);
+ DAG.setRoot(N);
+ setValue(&I, N);
+ HasTailCall = true;
+ return;
+ }
+
+ case Intrinsic::wasm_landingpad_index:
+ // Information this intrinsic contained has been transferred to
+ // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
+ // delete it now.
+ return;
+
+ case Intrinsic::aarch64_settag:
+ case Intrinsic::aarch64_settag_zero: {
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
+ SDValue Val = TSI.EmitTargetCodeForSetTag(
+ DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
+ ZeroMemory);
+ DAG.setRoot(Val);
+ setValue(&I, Val);
+ return;
+ }
+ case Intrinsic::ptrmask: {
+ SDValue Ptr = getValue(I.getOperand(0));
+ SDValue Const = getValue(I.getOperand(1));
+
+ EVT PtrVT = Ptr.getValueType();
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
+ DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
+ return;
+ }
+ case Intrinsic::threadlocal_address: {
+ setValue(&I, getValue(I.getOperand(0)));
+ return;
+ }
+ case Intrinsic::get_active_lane_mask: {
+ EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Index = getValue(I.getOperand(0));
+ EVT ElementVT = Index.getValueType();
+
+ if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ SDValue TripCount = getValue(I.getOperand(1));
+ EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElementVT,
+ CCVT.getVectorElementCount());
+
+ SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index);
+ SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount);
+ SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
+ SDValue VectorInduction = DAG.getNode(
+ ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
+ SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction,
+ VectorTripCount, ISD::CondCode::SETULT);
+ setValue(&I, SetCC);
+ return;
+ }
+ case Intrinsic::experimental_get_vector_length: {
+ assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 &&
+ "Expected positive VF");
+ unsigned VF = cast<ConstantInt>(I.getOperand(1))->getZExtValue();
+ bool IsScalable = cast<ConstantInt>(I.getOperand(2))->isOne();
+
+ SDValue Count = getValue(I.getOperand(0));
+ EVT CountVT = Count.getValueType();
+
+ if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ // Expand to a umin between the trip count and the maximum elements the type
+ // can hold.
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Extend the trip count to at least the result VT.
+ if (CountVT.bitsLT(VT)) {
+ Count = DAG.getNode(ISD::ZERO_EXTEND, sdl, VT, Count);
+ CountVT = VT;
+ }
+
+ SDValue MaxEVL = DAG.getElementCount(sdl, CountVT,
+ ElementCount::get(VF, IsScalable));
+
+ SDValue UMin = DAG.getNode(ISD::UMIN, sdl, CountVT, Count, MaxEVL);
+ // Clip to the result type if needed.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, sdl, VT, UMin);
+
+ setValue(&I, Trunc);
+ return;
+ }
+ case Intrinsic::vector_insert: {
+ SDValue Vec = getValue(I.getOperand(0));
+ SDValue SubVec = getValue(I.getOperand(1));
+ SDValue Index = getValue(I.getOperand(2));
+
+ // The intrinsic's index type is i64, but the SDNode requires an index type
+ // suitable for the target. Convert the index as required.
+ MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (Index.getValueType() != VectorIdxTy)
+ Index = DAG.getVectorIdxConstant(
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
+
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
+ Index));
+ return;
+ }
+ case Intrinsic::vector_extract: {
+ SDValue Vec = getValue(I.getOperand(0));
+ SDValue Index = getValue(I.getOperand(1));
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // The intrinsic's index type is i64, but the SDNode requires an index type
+ // suitable for the target. Convert the index as required.
+ MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (Index.getValueType() != VectorIdxTy)
+ Index = DAG.getVectorIdxConstant(
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
+
+ setValue(&I,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
+ return;
+ }
+ case Intrinsic::experimental_vector_reverse:
+ visitVectorReverse(I);
+ return;
+ case Intrinsic::experimental_vector_splice:
+ visitVectorSplice(I);
+ return;
+ case Intrinsic::callbr_landingpad:
+ visitCallBrLandingPad(I);
+ return;
+ case Intrinsic::experimental_vector_interleave2:
+ visitVectorInterleave(I);
+ return;
+ case Intrinsic::experimental_vector_deinterleave2:
+ visitVectorDeinterleave(I);
+ return;
+ }
+}
+
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
+ const ConstrainedFPIntrinsic &FPI) {
+ SDLoc sdl = getCurSDLoc();
+
+ // We do not need to serialize constrained FP intrinsics against
+ // each other or against (nonvolatile) loads, so they can be
+ // chained like loads.
+ SDValue Chain = DAG.getRoot();
+ SmallVector<SDValue, 4> Opers;
+ Opers.push_back(Chain);
+ if (FPI.isUnaryOp()) {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ } else if (FPI.isTernaryOp()) {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ Opers.push_back(getValue(FPI.getArgOperand(1)));
+ Opers.push_back(getValue(FPI.getArgOperand(2)));
+ } else {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ Opers.push_back(getValue(FPI.getArgOperand(1)));
+ }
+
+ auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
+ assert(Result.getNode()->getNumValues() == 2);
+
+ // Push node to the appropriate list so that future instructions can be
+ // chained up correctly.
+ SDValue OutChain = Result.getValue(1);
+ switch (EB) {
+ case fp::ExceptionBehavior::ebIgnore:
+ // The only reason why ebIgnore nodes still need to be chained is that
+ // they might depend on the current rounding mode, and therefore must
+ // not be moved across instruction that may change that mode.
+ [[fallthrough]];
+ case fp::ExceptionBehavior::ebMayTrap:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks.
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks or read floating-point exception flags.
+ // In addition, they cannot be optimized out even if unused.
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+ };
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
+
+ SDNodeFlags Flags;
+ if (EB == fp::ExceptionBehavior::ebIgnore)
+ Flags.setNoFPExcept(true);
+
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
+ Flags.copyFMF(*FPOp);
+
+ unsigned Opcode;
+ switch (FPI.getIntrinsicID()) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ Opcode = ISD::STRICT_##DAGN; \
+ break;
+#include "llvm/IR/ConstrainedOps.def"
+ case Intrinsic::experimental_constrained_fmuladd: {
+ Opcode = ISD::STRICT_FMA;
+ // Break fmuladd into fmul and fadd.
+ if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
+ !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ Opers.pop_back();
+ SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
+ pushOutChain(Mul, EB);
+ Opcode = ISD::STRICT_FADD;
+ Opers.clear();
+ Opers.push_back(Mul.getValue(1));
+ Opers.push_back(Mul.getValue(0));
+ Opers.push_back(getValue(FPI.getArgOperand(2)));
+ }
+ break;
+ }
+ }
+
+ // A few strict DAG nodes carry additional operands that are not
+ // set up by the default code above.
+ switch (Opcode) {
+ default: break;
+ case ISD::STRICT_FP_ROUND:
+ Opers.push_back(
+ DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
+ break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
+ ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ Opers.push_back(DAG.getCondCode(Condition));
+ break;
+ }
+ }
+
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
+ pushOutChain(Result, EB);
+
+ SDValue FPResult = Result.getValue(0);
+ setValue(&FPI, FPResult);
+}
+
+static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
+ std::optional<unsigned> ResOPC;
+ switch (VPIntrin.getIntrinsicID()) {
+ case Intrinsic::vp_ctlz: {
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
+ ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
+ break;
+ }
+ case Intrinsic::vp_cttz: {
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
+ ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
+ break;
+ }
+#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
+ case Intrinsic::VPID: \
+ ResOPC = ISD::VPSD; \
+ break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+
+ if (!ResOPC)
+ llvm_unreachable(
+ "Inconsistency: no SDNode available for this VPIntrinsic!");
+
+ if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
+ *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
+ if (VPIntrin.getFastMathFlags().allowReassoc())
+ return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
+ : ISD::VP_REDUCE_FMUL;
+ }
+
+ return *ResOPC;
+}
+
+void SelectionDAGBuilder::visitVPLoad(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = getRangeMetadata(VPIntrin);
+ SDValue LD;
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPGather(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = getRangeMetadata(VPIntrin);
+ SDValue LD;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStore(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ SDValue Ptr = OpValues[1];
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
+ OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
+ /* IsTruncating */ false, /*IsCompressing*/ false);
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPScatter(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPStridedLoad(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = getRangeMetadata(VPIntrin);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+
+ SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO,
+ false /*IsExpanding*/);
+
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStridedStore(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+
+ SDValue ST = DAG.getStridedStoreVP(
+ getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3],
+ OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false,
+ /*IsCompressing*/ false);
+
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL = getCurSDLoc();
+
+ ISD::CondCode Condition;
+ CmpInst::Predicate CondCode = VPIntrin.getPredicate();
+ bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
+ if (IsFP) {
+ // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
+ // flags, but calls that don't return floating-point types can't be
+ // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
+ Condition = getFCmpCondCode(CondCode);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ } else {
+ Condition = getICmpCondCode(CondCode);
+ }
+
+ SDValue Op1 = getValue(VPIntrin.getOperand(0));
+ SDValue Op2 = getValue(VPIntrin.getOperand(1));
+ // #2 is the condition code
+ SDValue MaskOp = getValue(VPIntrin.getOperand(3));
+ SDValue EVL = getValue(VPIntrin.getOperand(4));
+ MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
+ assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
+ "Unexpected target EVL type");
+ EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL);
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ VPIntrin.getType());
+ setValue(&VPIntrin,
+ DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL));
+}
+
+void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
+ const VPIntrinsic &VPIntrin) {
+ SDLoc DL = getCurSDLoc();
+ unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
+
+ auto IID = VPIntrin.getIntrinsicID();
+
+ if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(&VPIntrin))
+ return visitVPCmp(*CmpI);
+
+ SmallVector<EVT, 4> ValueVTs;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID);
+
+ MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
+ assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
+ "Unexpected target EVL type");
+
+ // Request operands.
+ SmallVector<SDValue, 7> OpValues;
+ for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
+ auto Op = getValue(VPIntrin.getArgOperand(I));
+ if (I == EVLParamPos)
+ Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
+ OpValues.push_back(Op);
+ }
+
+ switch (Opcode) {
+ default: {
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
+ SDFlags.copyFMF(*FPMO);
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags);
+ setValue(&VPIntrin, Result);
+ break;
+ }
+ case ISD::VP_LOAD:
+ visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
+ break;
+ case ISD::VP_GATHER:
+ visitVPGather(VPIntrin, ValueVTs[0], OpValues);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
+ break;
+ case ISD::VP_STORE:
+ visitVPStore(VPIntrin, OpValues);
+ break;
+ case ISD::VP_SCATTER:
+ visitVPScatter(VPIntrin, OpValues);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ visitVPStridedStore(VPIntrin, OpValues);
+ break;
+ case ISD::VP_FMULADD: {
+ assert(OpValues.size() == 5 && "Unexpected number of operands");
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
+ SDFlags.copyFMF(*FPMO);
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) {
+ setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags));
+ } else {
+ SDValue Mul = DAG.getNode(
+ ISD::VP_FMUL, DL, VTs,
+ {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags);
+ SDValue Add =
+ DAG.getNode(ISD::VP_FADD, DL, VTs,
+ {Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags);
+ setValue(&VPIntrin, Add);
+ }
+ break;
+ }
+ case ISD::VP_INTTOPTR: {
+ SDValue N = OpValues[0];
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getType());
+ N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1],
+ OpValues[2]);
+ N = DAG.getVPZExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1],
+ OpValues[2]);
+ setValue(&VPIntrin, N);
+ break;
+ }
+ case ISD::VP_PTRTOINT: {
+ SDValue N = OpValues[0];
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ VPIntrin.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(),
+ VPIntrin.getOperand(0)->getType());
+ N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1],
+ OpValues[2]);
+ N = DAG.getVPZExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1],
+ OpValues[2]);
+ setValue(&VPIntrin, N);
+ break;
+ }
+ case ISD::VP_ABS:
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF: {
+ SDValue Result =
+ DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]});
+ setValue(&VPIntrin, Result);
+ break;
+ }
+ }
+}
+
+SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
+ const BasicBlock *EHPadBB,
+ MCSymbol *&BeginLabel) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().createTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
+
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
+}
+
+SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
+ const BasicBlock *EHPadBB,
+ MCSymbol *BeginLabel) {
+ assert(BeginLabel && "BeginLabel should've been set");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);
+
+ // Inform MachineModuleInfo of range.
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ // There is a platform (e.g. wasm) that uses funclet style IR but does not
+ // actually use outlined funclets and their LSDA info style.
+ if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
+ assert(II && "II should've been set");
+ WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
+ } else if (!isScopedEHPersonality(Pers)) {
+ assert(EHPadBB);
+ MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
+
+ return Chain;
+}
+
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB) {
+ MCSymbol *BeginLabel = nullptr;
+
+ if (EHPadBB) {
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
+ CLI.setChain(getRoot());
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+
+ assert((CLI.IsTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted
+ // and the DAG root is already updated.
+ HasTailCall = true;
+
+ // Since there's no actual continuation from this block, nothing can be
+ // relying on us setting vregs for them.
+ PendingExports.clear();
+ } else {
+ DAG.setRoot(Result.second);
+ }
+
+ if (EHPadBB) {
+ DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
+ BeginLabel));
+ }
+
+ return Result;
+}
+
+void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
+ bool isTailCall,
+ bool isMustTailCall,
+ const BasicBlock *EHPadBB) {
+ auto &DL = DAG.getDataLayout();
+ FunctionType *FTy = CB.getFunctionType();
+ Type *RetTy = CB.getType();
+
+ TargetLowering::ArgListTy Args;
+ Args.reserve(CB.arg_size());
+
+ const Value *SwiftErrorVal = nullptr;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (isTailCall) {
+ // Avoid emitting tail calls in functions with the disable-tail-calls
+ // attribute.
+ auto *Caller = CB.getParent()->getParent();
+ if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
+ "true" && !isMustTailCall)
+ isTailCall = false;
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+ }
+
+ for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
+ TargetLowering::ArgListEntry Entry;
+ const Value *V = *I;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ Entry.setAttributes(&CB, I - CB.arg_begin());
+
+ // Use swifterror virtual register as input to the call.
+ if (Entry.IsSwiftError && TLI.supportSwiftError()) {
+ SwiftErrorVal = V;
+ // We find the virtual register for the actual swifterror argument.
+ // Instead of using the Value, we use the virtual register instead.
+ Entry.Node =
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
+ }
+
+ Args.push_back(Entry);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (Entry.IsSRet && isa<Instruction>(V))
+ isTailCall = false;
+ }
+
+ // If call site has a cfguardtarget operand bundle, create and add an
+ // additional ArgListEntry.
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
+ TargetLowering::ArgListEntry Entry;
+ Value *V = Bundle->Inputs[0];
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode;
+ Entry.Ty = V->getType();
+ Entry.IsCFGuardTarget = true;
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI->LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
+ isTailCall = false;
+
+ // Disable tail calls if there is an swifterror argument. Targets have not
+ // been updated to support tail calls.
+ if (TLI.supportSwiftError() && SwiftErrorVal)
+ isTailCall = false;
+
+ ConstantInt *CFIType = nullptr;
+ if (CB.isIndirectCall()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi)) {
+ if (!TLI.supportKCFIBundles())
+ report_fatal_error(
+ "Target doesn't support calls with kcfi operand bundles.");
+ CFIType = cast<ConstantInt>(Bundle->Inputs[0]);
+ assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
+ }
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CB)
+ .setTailCall(isTailCall)
+ .setConvergent(CB.isConvergent())
+ .setIsPreallocated(
+ CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
+ .setCFIType(CFIType);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
+
+ if (Result.first.getNode()) {
+ Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
+ setValue(&CB, Result.first);
+ }
+
+ // The last element of CLI.InVals has the SDValue for swifterror return.
+ // Here we copy it to a virtual register and update SwiftErrorMap for
+ // book-keeping.
+ if (SwiftErrorVal && TLI.supportSwiftError()) {
+ // Get the last element of InVals.
+ SDValue Src = CLI.InVals.back();
+ Register VReg =
+ SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
+ SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
+ DAG.setRoot(CopyNode);
+ }
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+ SelectionDAGBuilder &Builder) {
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ Type *LoadTy =
+ Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
+ if (LoadVT.isVector())
+ LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());
+
+ LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+ PointerType::getUnqual(LoadTy));
+
+ if (const Constant *LoadCst =
+ ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+ LoadTy, Builder.DAG.getDataLayout()))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal =
+ Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
+ MachinePointerInfo(PtrVal), Align(1));
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+/// Record the value for an instruction that produces an integer result,
+/// converting the type where necessary.
+void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
+ SDValue Value,
+ bool IsSigned) {
+ EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
+ Value = DAG.getExtOrTrunc(IsSigned, Value, getCurSDLoc(), VT);
+ setValue(&I, Value);
+}
+
+/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+ const Value *Size = I.getArgOperand(2);
+ const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
+ if (CSize && CSize->getZExtValue() == 0) {
+ EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
+ setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
+ return true;
+ }
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
+ DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
+ getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
+ return false;
+
+ // If the target has a fast compare for the given size, it will return a
+ // preferred load type for that size. Require that the load VT is legal and
+ // that the target supports unaligned loads of that type. Otherwise, return
+ // INVALID.
+ auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT LVT = TLI.hasFastEqualityCompare(NumBits);
+ if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ // TODO: Check alignment of src and dest ptrs.
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ if (!TLI.isTypeLegal(LVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
+ LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ }
+
+ return LVT;
+ };
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+ MVT LoadVT;
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
+ default:
+ return false;
+ case 16:
+ LoadVT = MVT::i16;
+ break;
+ case 32:
+ LoadVT = MVT::i32;
+ break;
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
+ break;
+ }
+
+ if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return false;
+
+ SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
+ SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
+
+ // Bitcast to a wide integer type if the loads are vectors.
+ if (LoadVT.isVector()) {
+ EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
+ LoadL = DAG.getBitcast(CmpVT, LoadL);
+ LoadR = DAG.getBitcast(CmpVT, LoadR);
+ }
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
+ processIntegerCallValue(I, Cmp, false);
+ return true;
+}
+
+/// See if we can lower a memchr call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+ const Value *Src = I.getArgOperand(0);
+ const Value *Char = I.getArgOperand(1);
+ const Value *Length = I.getArgOperand(2);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Src), getValue(Char), getValue(Length),
+ MachinePointerInfo(Src));
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a mempcpy call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+
+ Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
+ Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
+ // DAG::getMemcpy needs Alignment to be defined.
+ Align Alignment = std::min(DstAlign, SrcAlign);
+
+ SDLoc sdl = getCurSDLoc();
+
+ // In the mempcpy context we need to pass in a false value for isTailCall
+ // because the return pointer needs to be adjusted by the size of
+ // the copied memory.
+ SDValue Root = getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, false, false,
+ /*isTailCall=*/false,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
+ assert(MC.getNode() != nullptr &&
+ "** memcpy should not be lowered as TailCall in mempcpy context **");
+ DAG.setRoot(MC);
+
+ // Check if Size needs to be truncated or extended.
+ Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
+
+ // Adjust return pointer to point just past the last dst byte.
+ SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
+ Dst, Size);
+ setValue(&I, DstPlusSize);
+ return true;
+}
+
+/// See if we can lower a strcpy call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1), isStpcpy);
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ DAG.setRoot(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a strcmp call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a strlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
+ const Value *Arg0 = I.getArgOperand(0);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a strnlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a unary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it, otherwise return
+/// false and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
+ return false;
+
+ SDNodeFlags Flags;
+ Flags.copyFMF(cast<FPMathOperator>(I));
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I,
+ DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
+ return true;
+}
+
+/// See if we can lower a binary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it. Otherwise return
+/// false, and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
+ return false;
+
+ SDNodeFlags Flags;
+ Flags.copyFMF(cast<FPMathOperator>(I));
+
+ SDValue Tmp0 = getValue(I.getArgOperand(0));
+ SDValue Tmp1 = getValue(I.getArgOperand(1));
+ EVT VT = Tmp0.getValueType();
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
+ return true;
+}
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (I.isInlineAsm()) {
+ visitInlineAsm(I);
+ return;
+ }
+
+ diagnoseDontCall(I);
+
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ // Is this an LLVM intrinsic or a target-specific intrinsic?
+ unsigned IID = F->getIntrinsicID();
+ if (!IID)
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
+ IID = II->getIntrinsicID(F);
+
+ if (IID) {
+ visitIntrinsicCall(I, IID);
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call. Don't do the check if marked as nobuiltin for
+ // some reason or the call site requires strict floating point semantics.
+ LibFunc Func;
+ if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
+ F->hasName() && LibInfo->getLibFunc(*F, Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc_bcmp:
+ if (visitMemCmpBCmpCall(I))
+ return;
+ break;
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ // We already checked this call's prototype; verify it doesn't modify
+ // errno.
+ if (I.onlyReadsMemory()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ break;
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
+ return;
+ break;
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ if (visitBinaryFloatCall(I, ISD::FMINNUM))
+ return;
+ break;
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ if (visitBinaryFloatCall(I, ISD::FMAXNUM))
+ return;
+ break;
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
+ return;
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
+ return;
+ break;
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
+ return;
+ break;
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
+ return;
+ break;
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
+ return;
+ break;
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
+ return;
+ break;
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
+ return;
+ break;
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ if (visitUnaryFloatCall(I, ISD::FROUND))
+ return;
+ break;
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
+ return;
+ break;
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
+ return;
+ break;
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
+ return;
+ break;
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ if (visitBinaryFloatCall(I, ISD::FLDEXP))
+ return;
+ break;
+ case LibFunc_memcmp:
+ if (visitMemCmpBCmpCall(I))
+ return;
+ break;
+ case LibFunc_mempcpy:
+ if (visitMemPCpyCall(I))
+ return;
+ break;
+ case LibFunc_memchr:
+ if (visitMemChrCall(I))
+ return;
+ break;
+ case LibFunc_strcpy:
+ if (visitStrCpyCall(I, false))
+ return;
+ break;
+ case LibFunc_stpcpy:
+ if (visitStrCpyCall(I, true))
+ return;
+ break;
+ case LibFunc_strcmp:
+ if (visitStrCmpCall(I))
+ return;
+ break;
+ case LibFunc_strlen:
+ if (visitStrLenCall(I))
+ return;
+ break;
+ case LibFunc_strnlen:
+ if (visitStrNLenCall(I))
+ return;
+ break;
+ }
+ }
+ }
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ // CFGuardTarget bundles are lowered in LowerCallTo.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
+ LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
+ "Cannot lower calls with arbitrary operand bundles!");
+
+ SDValue Callee = getValue(I.getCalledOperand());
+
+ if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
+ else
+ // Check if we can potentially perform a tail call. More detailed checking
+ // is be done within LowerCallTo, after more information about the call is
+ // known.
+ LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
+ }
+
+ /// Whether or not this operand accesses memory
+ bool hasMemory(const TargetLowering &TLI) const {
+ // Indirect operand accesses access memory.
+ if (isIndirect)
+ return true;
+
+ for (const auto &Code : Codes)
+ if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
+ return true;
+
+ return false;
+ }
+};
+
+
+} // end anonymous namespace
+
+/// Make sure that the output operand \p OpInfo and its corresponding input
+/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
+/// out).
+static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &MatchingOpInfo,
+ SelectionDAG &DAG) {
+ if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
+ return;
+
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
+ MatchingOpInfo.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ MatchingOpInfo.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ // FIXME: error out in a more elegant fashion
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
+}
+
+/// Get a direct memory input to behave well as an indirect operand.
+/// This may introduce stores, hence the need for a \p Chain.
+/// \return The (possibly updated) chain.
+static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
+ SDISelAsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we don't have an indirect input, put it in the constpool if we can,
+ // otherwise spill it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(
+ cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
+ return Chain;
+ }
+
+ // Otherwise, create a stack slot and emit a store to it before the asm.
+ Type *Ty = OpVal->getType();
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo().CreateStackObject(
+ TySize, DL.getPrefTypeAlign(Ty), false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
+ Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(MF, SSFI),
+ TLI.getMemValueType(DL, Ty));
+ OpInfo.CallOperand = StackSlot;
+
+ return Chain;
+}
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand
+/// RefOpInfo describes the matching operand if any, the operand otherwise
+static std::optional<unsigned>
+getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
+ LLVMContext &Context = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // No work to do for memory/address operands.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Address)
+ return std::nullopt;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ unsigned AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+ &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+ // RC is unset only on failure. Return immediately.
+ if (!RC)
+ return std::nullopt;
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
+
+ if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
+ // If this is an FP operand in an integer register (or visa versa), or more
+ // generally if the operand value disagrees with the register class we plan
+ // to stick it in, fix the operand type.
+ //
+ // If this is an input value, the bitcast to the new type is done now.
+ // Bitcast for output value is done at the end of visitInlineAsm().
+ if ((OpInfo.Type == InlineAsm::isOutput ||
+ OpInfo.Type == InlineAsm::isInput) &&
+ !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types). Note: output bitcast is done at the end of
+ // visitInlineAsm().
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ // Exclude indirect inputs while they are unsupported because the code
+ // to perform the load is missing and thus OpInfo.CallOperand still
+ // refers to the input address rather than the pointed-to value.
+ if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ // If the operand is an FP value and we want it in integer registers,
+ // use the corresponding integer type. This turns an f64 value into
+ // i64, which can be passed with two i32 values on a 32-bit machine.
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ if (OpInfo.Type == InlineAsm::isInput)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = VT;
+ }
+ }
+ }
+
+ // No need to allocate a matching input constraint since the constraint it's
+ // matching to has already been allocated.
+ if (OpInfo.isMatchingInputConstraint())
+ return std::nullopt;
+
+ EVT ValueVT = OpInfo.ConstraintVT;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Initialize NumRegs.
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+
+ // If this associated to a specific register, initialize iterator to correct
+ // place. If virtual, make sure we have enough registers
+
+ // Initialize iterator if necessary
+ TargetRegisterClass::iterator I = RC->begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Do not check for single registers.
+ if (AssignedReg) {
+ I = std::find(I, RC->end(), AssignedReg);
+ if (I == RC->end()) {
+ // RC does not contain the selected register, which indicates a
+ // mismatch between the register and the required type/bitwidth.
+ return {AssignedReg};
+ }
+ }
+
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
+ Regs.push_back(R);
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return std::nullopt;
+}
+
+static unsigned
+findMatchingInlineAsmOperand(unsigned OperandNo,
+ const std::vector<SDValue> &AsmNodeOperands) {
+ // Scan until we find the definition we already emitted of this operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) &&
+ "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+ }
+ return CurOp;
+}
+
+namespace {
+
+class ExtraFlags {
+ unsigned Flags = 0;
+
+public:
+ explicit ExtraFlags(const CallBase &Call) {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
+ if (IA->hasSideEffects())
+ Flags |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ Flags |= InlineAsm::Extra_IsAlignStack;
+ if (Call.isConvergent())
+ Flags |= InlineAsm::Extra_IsConvergent;
+ Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+ }
+
+ void update(const TargetLowering::AsmOperandInfo &OpInfo) {
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an Other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for Other constraints as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ Flags |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ Flags |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ unsigned get() const { return Flags; }
+};
+
+} // end anonymous namespace
+
+static bool isFunction(SDValue Op) {
+ if (Op && Op.getOpcode() == ISD::GlobalAddress) {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ auto Fn = dyn_cast_or_null<Function>(GA->getGlobal());
+
+ // In normal "call dllimport func" instruction (non-inlineasm) it force
+ // indirect access by specifing call opcode. And usually specially print
+ // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can
+ // not do in this way now. (In fact, this is similar with "Data Access"
+ // action). So here we ignore dllimport function.
+ if (Fn && !Fn->hasDLLImportStorageClass())
+ return true;
+ }
+ }
+ return false;
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
+ const BasicBlock *EHPadBB) {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
+ DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);
+
+ // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
+ // AsmDialect, MayLoad, MayStore).
+ bool HasSideEffect = IA->hasSideEffects();
+ ExtraFlags ExtraInfo(Call);
+
+ for (auto &T : TargetConstraints) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ if (OpInfo.CallOperandVal)
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+
+ if (!HasSideEffect)
+ HasSideEffect = OpInfo.hasMemory(TLI);
+
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ // FIXME: Could we compute this on OpInfo rather than T?
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(T, SDValue());
+
+ if (T.ConstraintType == TargetLowering::C_Immediate &&
+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
+ // We've delayed emitting a diagnostic like the "n" constraint because
+ // inlining could cause an integer showing up.
+ return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
+ "' expects an integer constant "
+ "expression");
+
+ ExtraInfo.update(T);
+ }
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+
+ bool EmitEHLabels = isa<InvokeInst>(Call);
+ if (EmitEHLabels) {
+ assert(EHPadBB && "InvokeInst must have an EHPadBB");
+ }
+ bool IsCallBr = isa<CallBrInst>(Call);
+
+ if (IsCallBr || EmitEHLabels) {
+ // If this is a callbr or invoke we need to flush pending exports since
+ // inlineasm_br and invoke are terminators.
+ // We need to do this before nodes are glued to the inlineasm_br node.
+ Chain = getControlRoot();
+ }
+
+ MCSymbol *BeginLabel = nullptr;
+ if (EmitEHLabels) {
+ Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
+ }
+
+ int OpNo = -1;
+ SmallVector<StringRef> AsmStrs;
+ IA->collectAsmStrs(AsmStrs);
+
+ // Second pass over the constraints: compute which constraint option to use.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput)
+ OpNo++;
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ patchMatchingInput(OpInfo, Input, DAG);
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber) ||
+ OpInfo.ConstraintType == TargetLowering::C_Address)
+ continue;
+
+ // In Linux PIC model, there are 4 cases about value/label addressing:
+ //
+ // 1: Function call or Label jmp inside the module.
+ // 2: Data access (such as global variable, static variable) inside module.
+ // 3: Function call or Label jmp outside the module.
+ // 4: Data access (such as global variable) outside the module.
+ //
+ // Due to current llvm inline asm architecture designed to not "recognize"
+ // the asm code, there are quite troubles for us to treat mem addressing
+ // differently for same value/adress used in different instuctions.
+ // For example, in pic model, call a func may in plt way or direclty
+ // pc-related, but lea/mov a function adress may use got.
+ //
+ // Here we try to "recognize" function call for the case 1 and case 3 in
+ // inline asm. And try to adjust the constraint for them.
+ //
+ // TODO: Due to current inline asm didn't encourage to jmp to the outsider
+ // label, so here we don't handle jmp function label now, but we need to
+ // enhance it (especilly in PIC model) if we meet meaningful requirements.
+ if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) &&
+ TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
+ TM.getCodeModel() != CodeModel::Large) {
+ OpInfo.isIndirect = false;
+ OpInfo.ConstraintType = TargetLowering::C_Address;
+ }
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value.
+ Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = nullptr;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
+ IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = Call.getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // bits as operand 3.
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Third pass: Loop over operands to prepare DAG-level operands.. As part of
+ // this, assign virtual and physical registers for inputs and otput.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ // Assign Registers.
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : OpInfo;
+ const auto RegError =
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ if (RegError) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const char *RegName = TRI.getName(*RegError);
+ emitInlineAsmError(Call, "register '" + Twine(RegName) +
+ "' allocated for constraint '" +
+ Twine(OpInfo.ConstraintCode) +
+ "' does not match required type");
+ return;
+ }
+
+ auto DetectWriteToReservedRegister = [&]() {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
+ if (Register::isPhysicalRegister(Reg) &&
+ TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
+ const char *RegName = TRI.getName(Reg);
+ emitInlineAsmError(Call, "write to reserved register '" +
+ Twine(RegName) + "'");
+ return true;
+ }
+ }
+ return false;
+ };
+ assert((OpInfo.ConstraintType != TargetLowering::C_Address ||
+ (OpInfo.Type == InlineAsm::isInput &&
+ !OpInfo.isMatchingInputConstraint())) &&
+ "Only address as input operand is allowed.");
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
+ MVT::i32));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ } else {
+ // Otherwise, this outputs to a register (directly for C_Register /
+ // C_RegisterClass, and a target-defined fashion for
+ // C_Immediate/C_Other). Find a register that we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(
+ Call, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ if (DetectWriteToReservedRegister())
+ return;
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(
+ OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
+ }
+ break;
+
+ case InlineAsm::isInput:
+ case InlineAsm::isLabel: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) {
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
+ AsmNodeOperands);
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ emitInlineAsmError(Call, "inline asm not supported yet: "
+ "don't know how to handle tied "
+ "indirect register inputs");
+ return;
+ }
+
+ SmallVector<unsigned, 4> Regs;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
+ Register TiedReg = R->getReg();
+ MVT RegVT = R->getSimpleValueType(0);
+ const TargetRegisterClass *RC =
+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
+ : TRI.getMinimalPhysRegClass(TiedReg);
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(MRI.createVirtualRegister(RC));
+
+ RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
+
+ SDLoc dl = getCurSDLoc();
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(), dl,
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty()) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
+ if (isa<ConstantSDNode>(InOperandVal)) {
+ emitInlineAsmError(Call, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ emitInlineAsmError(Call,
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ llvm::append_range(AsmNodeOperands, Ops);
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert((OpInfo.isIndirect ||
+ OpInfo.ConstraintType != TargetLowering::C_Memory) &&
+ "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() ==
+ TLI.getPointerTy(DAG.getDataLayout()) &&
+ "Memory operands expect pointer values");
+
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ getCurSDLoc(),
+ MVT::i32));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Address) {
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+
+ SDValue AsmOp = InOperandVal;
+ if (isFunction(InOperandVal)) {
+ auto *GA = cast<GlobalAddressSDNode>(InOperandVal);
+ ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1);
+ AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(),
+ InOperandVal.getValueType(),
+ GA->getOffset());
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+
+ AsmNodeOperands.push_back(
+ DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
+
+ AsmNodeOperands.push_back(AsmOp);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ emitInlineAsmError(
+ Call, "Don't know how to handle indirect register inputs yet "
+ "for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(Call,
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ if (DetectWriteToReservedRegister())
+ return;
+
+ SDLoc dl = getCurSDLoc();
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue,
+ &Call);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ dl, DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber:
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, getCurSDLoc(), DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Glue.getNode()) AsmNodeOperands.push_back(Glue);
+
+ unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
+ Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+ Glue = Chain.getValue(1);
+
+ // Do additional work to generate outputs.
+
+ SmallVector<EVT, 1> ResultVTs;
+ SmallVector<SDValue, 1> ResultValues;
+ SmallVector<SDValue, 8> OutChains;
+
+ llvm::Type *CallResultType = Call.getType();
+ ArrayRef<Type *> ResultTypes;
+ if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
+ ResultTypes = StructResult->elements();
+ else if (!CallResultType->isVoidTy())
+ ResultTypes = ArrayRef(CallResultType);
+
+ auto CurResultType = ResultTypes.begin();
+ auto handleRegAssign = [&](SDValue V) {
+ assert(CurResultType != ResultTypes.end() && "Unexpected value");
+ assert((*CurResultType)->isSized() && "Unexpected unsized type");
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
+ ++CurResultType;
+ // If the type of the inline asm call site return value is different but has
+ // same size as the type of the asm output bitcast it. One example of this
+ // is for vectors with different width / number of elements. This can
+ // happen for register classes that can contain multiple different value
+ // types. The preg or vreg allocated may not have the same VT as was
+ // expected.
+ //
+ // This can also happen for a return value that disagrees with the register
+ // class it is put in, eg. a double in a general-purpose register on a
+ // 32-bit machine.
+ if (ResultVT != V.getValueType() &&
+ ResultVT.getSizeInBits() == V.getValueSizeInBits())
+ V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
+ else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
+ V.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result
+ // may have a wider width than the expected result. Extract the
+ // relevant portion.
+ V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
+ }
+ assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
+ ResultVTs.push_back(ResultVT);
+ ResultValues.push_back(V);
+ };
+
+ // Deal with output operands.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.Type == InlineAsm::isOutput) {
+ SDValue Val;
+ // Skip trivial output operands.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ continue;
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass:
+ Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, &Glue, &Call);
+ break;
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, getCurSDLoc(),
+ OpInfo, DAG);
+ break;
+ case TargetLowering::C_Memory:
+ break; // Already handled.
+ case TargetLowering::C_Address:
+ break; // Silence warning.
+ case TargetLowering::C_Unknown:
+ assert(false && "Unexpected unknown constraint");
+ }
+
+ // Indirect output manifest as stores. Record output chains.
+ if (OpInfo.isIndirect) {
+ const Value *Ptr = OpInfo.CallOperandVal;
+ assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
+ SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
+ MachinePointerInfo(Ptr));
+ OutChains.push_back(Store);
+ } else {
+ // generate CopyFromRegs to associated registers.
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (Val.getOpcode() == ISD::MERGE_VALUES) {
+ for (const SDValue &V : Val->op_values())
+ handleRegAssign(V);
+ } else
+ handleRegAssign(Val);
+ }
+ }
+ }
+
+ // Set results.
+ if (!ResultValues.empty()) {
+ assert(CurResultType == ResultTypes.end() &&
+ "Mismatch in number of ResultTypes");
+ assert(ResultValues.size() == ResultTypes.size() &&
+ "Mismatch in number of output operands in asm result");
+
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(&Call, V);
+ }
+
+ // Collect store chains.
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
+
+ if (EmitEHLabels) {
+ Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
+ }
+
+ // Only Update Root if inline assembly has a memory effect.
+ if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
+ EmitEHLabels)
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
+ const Twine &Message) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(&Call, Message);
+
+ // Make sure we leave the DAG in a valid state
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);
+
+ if (ValueVTs.empty())
+ return;
+
+ SmallVector<SDValue, 1> Ops;
+ for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
+
+ setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ SDValue V = DAG.getVAArg(
+ TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
+ getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
+ DL.getABITypeAlign(I.getType()).value());
+ DAG.setRoot(V.getValue(1));
+
+ if (I.getType()->isPointerTy())
+ V = DAG.getPtrExtOrTrunc(
+ V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
+ setValue(&I, V);
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
+ const Instruction &I,
+ SDValue Op) {
+ const MDNode *Range = getRangeMetadata(I);
+ if (!Range)
+ return Op;
+
+ ConstantRange CR = getConstantRangeFromMetadata(*Range);
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
+ return Op;
+
+ APInt Lo = CR.getUnsignedMin();
+ if (!Lo.isMinValue())
+ return Op;
+
+ APInt Hi = CR.getUnsignedMax();
+ unsigned Bits = std::max(Hi.getActiveBits(),
+ static_cast<unsigned>(IntegerType::MIN_INT_BITS));
+
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
+
+ SDLoc SL = getCurSDLoc();
+
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+ DAG.getValueType(SmallVT));
+ unsigned NumVals = Op.getNode()->getNumValues();
+ if (NumVals == 1)
+ return ZExt;
+
+ SmallVector<SDValue, 4> Ops;
+
+ Ops.push_back(ZExt);
+ for (unsigned I = 1; I != NumVals; ++I)
+ Ops.push_back(Op.getValue(I));
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
+/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
+/// the call being lowered.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+void SelectionDAGBuilder::populateCallLoweringInfo(
+ TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
+ unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
+ bool IsPatchPoint) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
+ ArgI != ArgE; ++ArgI) {
+ const Value *V = Call->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(V);
+ Entry.Ty = V->getType();
+ Entry.setAttributes(Call, ArgI);
+ Args.push_back(Entry);
+ }
+
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setDiscardResult(Call->use_empty())
+ .setIsPatchPoint(IsPatchPoint)
+ .setIsPreallocated(
+ Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
+}
+
+/// Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so FinalizeISel can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
+ const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ SelectionDAG &DAG = Builder.DAG;
+ for (unsigned I = StartIdx; I < Call.arg_size(); I++) {
+ SDValue Op = Builder.getValue(Call.getArgOperand(I));
+
+ // Things on the stack are pointer-typed, meaning that they are already
+ // legal and can be emitted directly to target nodes.
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType()));
+ } else {
+ // Otherwise emit a target independent node to be legalised.
+ Ops.push_back(Builder.getValue(Call.getArgOperand(I)));
+ }
+ }
+}
+
+/// Lower llvm.experimental.stackmap.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+
+ assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+ SDValue Chain, InGlue, Callee;
+ SmallVector<SDValue, 32> Ops;
+
+ SDLoc DL = getCurSDLoc();
+ Callee = getValue(CI.getCalledOperand());
+
+ // The stackmap intrinsic only records the live variables (the arguments
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // chain, flag = CALLSEQ_START(chain, 0, 0)
+ // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
+ // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
+ //
+ Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
+ InGlue = Chain.getValue(1);
+
+ // Add the STACKMAP operands, starting with DAG house-keeping.
+ Ops.push_back(Chain);
+ Ops.push_back(InGlue);
+
+ // Add the <id>, <numShadowBytes> operands.
+ //
+ // These do not require legalisation, and can be emitted directly to target
+ // constant nodes.
+ SDValue ID = getValue(CI.getArgOperand(0));
+ assert(ID.getValueType() == MVT::i64);
+ SDValue IDConst = DAG.getTargetConstant(
+ cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType());
+ Ops.push_back(IDConst);
+
+ SDValue Shad = getValue(CI.getArgOperand(1));
+ assert(Shad.getValueType() == MVT::i32);
+ SDValue ShadConst = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType());
+ Ops.push_back(ShadConst);
+
+ // Add the live variables.
+ addStackMapLiveVars(CI, 2, DL, Ops, *this);
+
+ // Create the STACKMAP node.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
+ InGlue = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL);
+
+ // Stackmaps don't generate values, so nothing goes into the NodeMap.
+
+ // Set the root to the target-lowered call chain.
+ DAG.setRoot(Chain);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo().setHasStackMap();
+}
+
+/// Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
+ const BasicBlock *EHPadBB) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+
+ CallingConv::ID CC = CB.getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !CB.getType()->isVoidTy();
+ SDLoc dl = getCurSDLoc();
+ SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));
+
+ // Handle immediate and symbolic callees.
+ if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
+ Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
+ /*isTarget=*/true);
+ else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
+ SDLoc(SymbolicCallee),
+ SymbolicCallee->getValueType(0));
+
+ // Get the real number of arguments participating in the call <numArgs>
+ SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // Intrinsics include all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ Type *ReturnTy =
+ IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
+ ReturnTy, true);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
+
+ SDNode *CallEnd = Result.second.getNode();
+ if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ CallEnd = CallEnd->getOperand(0).getNode();
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool HasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the patchable intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Push the chain.
+ Ops.push_back(*(Call->op_begin()));
+
+ // Optionally, push the glue (if any).
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end() - 1));
+
+ // Push the register mask info.
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end() - 2));
+ else
+ Ops.push_back(*(Call->op_end() - 1));
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
+ SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
+ MVT::i32));
+
+ // Add the callee.
+ Ops.push_back(Callee);
+
+ // Adjust <numArgs> to account for any arguments that have been passed on the
+ // stack instead.
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
+ NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
+
+ // Add the calling convention
+ Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC)
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
+ Ops.push_back(getValue(CB.getArgOperand(i)));
+
+ // Push the arguments from the call instruction.
+ SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ Ops.append(Call->op_begin() + 2, e);
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
+
+ SDVTList NodeTys;
+ if (IsAnyRegCC && HasDef) {
+ // Create the return types based on the intrinsic definition
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 3> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
+ assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+ // There is always a chain and a glue type at the end
+ ValueVTs.push_back(MVT::Other);
+ ValueVTs.push_back(MVT::Glue);
+ NodeTys = DAG.getVTList(ValueVTs);
+ } else
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a PATCHPOINT node.
+ SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops);
+
+ // Update the NodeMap.
+ if (HasDef) {
+ if (IsAnyRegCC)
+ setValue(&CB, SDValue(PPV.getNode(), 0));
+ else
+ setValue(&CB, Result.first);
+ }
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence. Furthermore the location of the chain and glue can change
+ // when the AnyReg calling convention is used and the intrinsic returns a
+ // value.
+ if (IsAnyRegCC && HasDef) {
+ SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+ SDValue To[] = {PPV.getValue(1), PPV.getValue(2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ DAG.ReplaceAllUsesWith(Call, PPV.getNode());
+ DAG.DeleteNode(Call);
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo().setHasPatchPoint();
+}
+
+void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
+ unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2;
+ if (I.arg_size() > 1)
+ Op2 = getValue(I.getArgOperand(1));
+ SDLoc dl = getCurSDLoc();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Res;
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
+ SDFlags.copyFMF(*FPMO);
+
+ switch (Intrinsic) {
+ case Intrinsic::vector_reduce_fadd:
+ if (SDFlags.hasAllowReassociation())
+ Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
+ SDFlags);
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_fmul:
+ if (SDFlags.hasAllowReassociation())
+ Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
+ SDFlags);
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_add:
+ Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_mul:
+ Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_and:
+ Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_or:
+ Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_xor:
+ Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_smax:
+ Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_smin:
+ Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_umax:
+ Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_umin:
+ Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::vector_reduce_fmax:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_fmin:
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_fmaximum:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAXIMUM, dl, VT, Op1, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_fminimum:
+ Res = DAG.getNode(ISD::VECREDUCE_FMINIMUM, dl, VT, Op1, SDFlags);
+ break;
+ default:
+ llvm_unreachable("Unhandled vector reduce intrinsic");
+ }
+ setValue(&I, Res);
+}
+
+/// Returns an AttributeList representing the attributes applied to the return
+/// value of the given call.
+static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ Type *OrigRetTy = CLI.RetTy;
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ auto &DL = CLI.DAG.getDataLayout();
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0);
+
+ if (CLI.IsPostTypeLegalization) {
+ // If we are lowering a libcall after legalization, split the return type.
+ SmallVector<EVT, 4> OldRetTys;
+ SmallVector<uint64_t, 4> OldOffsets;
+ RetTys.swap(OldRetTys);
+ Offsets.swap(OldOffsets);
+
+ for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
+ EVT RetVT = OldRetTys[i];
+ uint64_t Offset = OldOffsets[i];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
+ unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
+ RetTys.append(NumRegs, RegisterVT);
+ for (unsigned j = 0; j != NumRegs; ++j)
+ Offsets.push_back(Offset + j * RegisterVTByteSZ);
+ }
+ }
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
+
+ bool CanLowerReturn =
+ this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+ CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+ if (!CanLowerReturn) {
+ // FIXME: equivalent assert?
+ // assert(!CS.hasInAllocaArgument() &&
+ // "sret demotion is incompatible with inalloca");
+ uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
+ Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ DemoteStackIdx =
+ MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
+ Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
+ DL.getAllocaAddrSpace());
+
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
+ ArgListEntry Entry;
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsInReg = false;
+ Entry.IsSRet = true;
+ Entry.IsNest = false;
+ Entry.IsByVal = false;
+ Entry.IsByRef = false;
+ Entry.IsReturned = false;
+ Entry.IsSwiftSelf = false;
+ Entry.IsSwiftAsync = false;
+ Entry.IsSwiftError = false;
+ Entry.IsCFGuardTarget = false;
+ Entry.Alignment = Alignment;
+ CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.NumFixedArgs += 1;
+ CLI.getArgs()[0].IndirectType = CLI.RetTy;
+ CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+ // sret demotion isn't compatible with tail-calls, since the sret argument
+ // points into the callers stack frame.
+ CLI.IsTailCall = false;
+ } else {
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ ISD::ArgFlagsTy Flags;
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (I == RetTys.size() - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.Flags = Flags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetTy->isPointerTy()) {
+ MyFlags.Flags.setPointer();
+ MyFlags.Flags.setPointerAddrSpace(
+ cast<PointerType>(CLI.RetTy)->getAddressSpace());
+ }
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // We push in swifterror return as the last element of CLI.Ins.
+ ArgListTy &Args = CLI.getArgs();
+ if (supportSwiftError()) {
+ for (const ArgListEntry &Arg : Args) {
+ if (Arg.IsSwiftError) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = getPointerTy(DL);
+ MyFlags.ArgVT = EVT(getPointerTy(DL));
+ MyFlags.Flags.setSwiftError();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
+ // FIXME: Split arguments if CLI.IsPostTypeLegalization
+ Type *FinalType = Args[i].Ty;
+ if (Args[i].IsByVal)
+ FinalType = Args[i].IndirectType;
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg, DL);
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
+ Flags.setOrigAlign(OriginalAlignment);
+
+ if (Args[i].Ty->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Args[i].Ty)->getAddressSpace());
+ }
+ if (Args[i].IsZExt)
+ Flags.setZExt();
+ if (Args[i].IsSExt)
+ Flags.setSExt();
+ if (Args[i].IsInReg) {
+ // If we are using vectorcall calling convention, a structure that is
+ // passed InReg - is surely an HVA
+ if (CLI.CallConv == CallingConv::X86_VectorCall &&
+ isa<StructType>(FinalType)) {
+ // The first value of a structure is marked
+ if (0 == Value)
+ Flags.setHvaStart();
+ Flags.setHva();
+ }
+ // Set InReg Flag
+ Flags.setInReg();
+ }
+ if (Args[i].IsSRet)
+ Flags.setSRet();
+ if (Args[i].IsSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Args[i].IsSwiftAsync)
+ Flags.setSwiftAsync();
+ if (Args[i].IsSwiftError)
+ Flags.setSwiftError();
+ if (Args[i].IsCFGuardTarget)
+ Flags.setCFGuardTarget();
+ if (Args[i].IsByVal)
+ Flags.setByVal();
+ if (Args[i].IsByRef)
+ Flags.setByRef();
+ if (Args[i].IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ Align MemAlign;
+ if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
+ unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
+ Flags.setByValSize(FrameSize);
+
+ // info is not there but there are cases it cannot get right.
+ if (auto MA = Args[i].Alignment)
+ MemAlign = *MA;
+ else
+ MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
+ } else if (auto MA = Args[i].Alignment) {
+ MemAlign = *MA;
+ } else {
+ MemAlign = OriginalAlignment;
+ }
+ Flags.setMemAlign(MemAlign);
+ if (Args[i].IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+
+ MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].IsSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].IsZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // Conservatively only handle 'returned' on non-vectors that can be lowered,
+ // for now.
+ if (Args[i].IsReturned && !Op.getValueType().isVector() &&
+ CanLowerReturn) {
+ assert((CLI.RetTy == Args[i].Ty ||
+ (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
+ CLI.RetTy->getPointerAddressSpace() ==
+ Args[i].Ty->getPointerAddressSpace())) &&
+ RetTys.size() == NumValues && "unexpected use of 'returned'");
+ // Before passing 'returned' to the target lowering code, ensure that
+ // either the register MVT and the actual EVT are the same size or that
+ // the return value and argument are extended in the same way; in these
+ // cases it's safe to pass the argument register value unchanged as the
+ // return register value (although it's at the target's option whether
+ // to do so)
+ // TODO: allow code generation to take advantage of partially preserved
+ // registers rather than clobbering the entire register when the
+ // parameter extension method is not compatible with the return
+ // extension method
+ if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+ (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
+ CLI.RetZExt == Args[i].IsZExt))
+ Flags.setReturned();
+ }
+
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
+ CLI.CallConv, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ // For scalable vectors the scalable part is currently handled
+ // by individual targets, so we just use the known minimum size here.
+ ISD::OutputArg MyFlags(
+ Flags, Parts[j].getValueType().getSimpleVT(), VT,
+ i < CLI.NumFixedArgs, i,
+ j * Parts[j].getValueType().getStoreSize().getKnownMinValue());
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0) {
+ MyFlags.Flags.setOrigAlign(Align(1));
+ if (j == NumParts - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
+
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
+ }
+
+ if (NeedsRegBlock && Value == NumValues - 1)
+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ CLI.Chain = LowerCall(CLI, InVals);
+
+ // Update CLI.InVals to use outside of this function.
+ CLI.InVals = InVals;
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!CLI.IsTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() && "LowerCall emitted a null value!");
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ }
+#endif
+
+ SmallVector<SDValue, 4> ReturnValues;
+ if (!CanLowerReturn) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy =
+ PointerType::get(OrigRetTy->getContext(), DL.getAllocaAddrSpace());
+
+ ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ unsigned NumValues = RetTys.size();
+ ReturnValues.resize(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+ CLI.DAG.getConstant(Offsets[i], CLI.DL,
+ PtrVT), Flags);
+ SDValue L = CLI.DAG.getLoad(
+ RetTys[i], CLI.DL, CLI.Chain, Add,
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ HiddenSRetAlign);
+ ReturnValues[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+ } else {
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ std::optional<ISD::NodeType> AssertOp;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, nullptr,
+ CLI.CallConv, AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+ }
+
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(RetTys), ReturnValues);
+ return std::make_pair(Res, CLI.Chain);
+}
+
+/// Places new result values for the node in Results (their number
+/// and types must exactly match those of the original return values of
+/// the node), or leaves Results empty, which indicates that the node is not
+/// to be custom lowered after all.
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+ if (!Res.getNode())
+ return;
+
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (N->getNumValues() == 1) {
+ Results.push_back(Res);
+ return;
+ }
+
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((N->getNumValues() == Res->getNumValues()) &&
+ "Lowering returned the wrong number of results!");
+
+ // Places new result values base on N result number.
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+}
+
+void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
+ unsigned Reg,
+ ISD::NodeType ExtendType) {
+ SDValue Op = getNonRegisterValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // If this is an InlineAsm we have to match the registers required, not the
+ // notional registers required by the type.
+
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
+ std::nullopt); // This is not an ABI copy.
+ SDValue Chain = DAG.getEntryNode();
+
+ if (ExtendType == ISD::ANY_EXTEND) {
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
+ }
+ RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (FastISel)
+ return A->use_empty();
+
+ const BasicBlock &Entry = A->getParent()->front();
+ for (const User *U : A->users())
+ if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+
+ return true;
+}
+
+using ArgCopyElisionMapTy =
+ DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+ FunctionLoweringInfo *FuncInfo,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+ // Record the state of every static alloca used in the entry block. Argument
+ // allocas are all used in the entry block, so we need approximately as many
+ // entries as we have arguments.
+ enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+ SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+ unsigned NumArgs = FuncInfo->Fn->arg_size();
+ StaticAllocas.reserve(NumArgs * 2);
+
+ auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+ if (!V)
+ return nullptr;
+ V = V->stripPointerCasts();
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+ return nullptr;
+ auto Iter = StaticAllocas.insert({AI, Unknown});
+ return &Iter.first->second;
+ };
+
+ // Look for stores of arguments to static allocas. Look through bitcasts and
+ // GEPs to handle type coercions, as long as the alloca is fully initialized
+ // by the store. Any non-store use of an alloca escapes it and any subsequent
+ // unanalyzed store might write it.
+ // FIXME: Handle structs initialized with multiple stores.
+ for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+ // Look for stores, and handle non-store uses conservatively.
+ const auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI) {
+ // We will look through cast uses, so ignore them completely.
+ if (I.isCast())
+ continue;
+ // Ignore debug info and pseudo op intrinsics, they don't escape or store
+ // to allocas.
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // This is an unknown instruction. Assume it escapes or writes to all
+ // static alloca operands.
+ for (const Use &U : I.operands()) {
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+ *Info = StaticAllocaInfo::Clobbered;
+ }
+ continue;
+ }
+
+ // If the stored value is a static alloca, mark it as escaped.
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+ *Info = StaticAllocaInfo::Clobbered;
+
+ // Check if the destination is a static alloca.
+ const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+ StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+ if (!Info)
+ continue;
+ const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+ // Skip allocas that have been initialized or clobbered.
+ if (*Info != StaticAllocaInfo::Unknown)
+ continue;
+
+ // Check if the stored value is an argument, and that this store fully
+ // initializes the alloca.
+ // If the argument type has padding bits we can't directly forward a pointer
+ // as the upper bits may contain garbage.
+ // Don't elide copies from the same argument twice.
+ const Value *Val = SI->getValueOperand()->stripPointerCasts();
+ const auto *Arg = dyn_cast<Argument>(Val);
+ if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
+ Arg->getType()->isEmptyTy() ||
+ DL.getTypeStoreSize(Arg->getType()) !=
+ DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ !DL.typeSizeEqualsStoreSize(Arg->getType()) ||
+ ArgCopyElisionCandidates.count(Arg)) {
+ *Info = StaticAllocaInfo::Clobbered;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
+ << '\n');
+
+ // Mark this alloca and store for argument copy elision.
+ *Info = StaticAllocaInfo::Elidable;
+ ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+ // Stop scanning if we've seen all arguments. This will happen early in -O0
+ // builds, which is useful, because -O0 builds have large entry blocks and
+ // many allocas.
+ if (ArgCopyElisionCandidates.size() == NumArgs)
+ break;
+ }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+ FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+ SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+ ArrayRef<SDValue> ArgVals, bool &ArgHasUses) {
+ // Check if this is a load from a fixed stack object.
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVals[0]);
+ if (!LNode)
+ return;
+ auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+ if (!FINode)
+ return;
+
+ // Check that the fixed stack object is the right size and alignment.
+ // Look at the alignment that the user wrote on the alloca instead of looking
+ // at the stack object.
+ auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+ assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+ const AllocaInst *AI = ArgCopyIter->second.first;
+ int FixedIndex = FINode->getIndex();
+ int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
+ int OldIndex = AllocaIndex;
+ MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+ if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+ LLVM_DEBUG(
+ dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
+ return;
+ }
+ Align RequiredAlignment = AI->getAlign();
+ if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
+ LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << DebugStr(RequiredAlignment) << " vs "
+ << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
+ return;
+ }
+
+ // Perform the elision. Delete the old stack object and replace its only use
+ // in the variable info map. Mark the stack object as mutable.
+ LLVM_DEBUG({
+ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+ << " Replacing frame index " << OldIndex << " with " << FixedIndex
+ << '\n';
+ });
+ MFI.RemoveStackObject(OldIndex);
+ MFI.setIsImmutableObjectIndex(FixedIndex, false);
+ AllocaIndex = FixedIndex;
+ ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+ for (SDValue ArgVal : ArgVals)
+ Chains.push_back(ArgVal.getValue(1));
+
+ // Avoid emitting code for the store implementing the copy.
+ const StoreInst *SI = ArgCopyIter->second.second;
+ ElidedArgCopyInstrs.insert(SI);
+
+ // Check for uses of the argument again so that we can avoid exporting ArgVal
+ // if it is't used by anything other than the store.
+ for (const Value *U : Arg.users()) {
+ if (U != SI) {
+ ArgHasUses = true;
+ break;
+ }
+ }
+}
+
+void SelectionDAGISel::LowerArguments(const Function &F) {
+ SelectionDAG &DAG = SDB->DAG;
+ SDLoc dl = SDB->getCurSDLoc();
+ const DataLayout &DL = DAG.getDataLayout();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ // In Naked functions we aren't going to save any registers.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return;
+
+ if (!FuncInfo->CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ PointerType::get(F.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
+ Ins.push_back(RetArg);
+ }
+
+ // Look for stores of arguments to static allocas. Mark such arguments with a
+ // flag to ask the target to give us the memory location of that argument if
+ // available.
+ ArgCopyElisionMapTy ArgCopyElisionCandidates;
+ findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
+ ArgCopyElisionCandidates);
+
+ // Set up the incoming argument description vector.
+ for (const Argument &Arg : F.args()) {
+ unsigned ArgNo = Arg.getArgNo();
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ bool isArgValueUsed = !Arg.use_empty();
+ unsigned PartBase = 0;
+ Type *FinalType = Arg.getType();
+ if (Arg.hasAttribute(Attribute::ByVal))
+ FinalType = Arg.getParamByValType();
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+ FinalType, F.getCallingConv(), F.isVarArg(), DL);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+
+
+ if (Arg.getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Arg.getType())->getAddressSpace());
+ }
+ if (Arg.hasAttribute(Attribute::ZExt))
+ Flags.setZExt();
+ if (Arg.hasAttribute(Attribute::SExt))
+ Flags.setSExt();
+ if (Arg.hasAttribute(Attribute::InReg)) {
+ // If we are using vectorcall calling convention, a structure that is
+ // passed InReg - is surely an HVA
+ if (F.getCallingConv() == CallingConv::X86_VectorCall &&
+ isa<StructType>(Arg.getType())) {
+ // The first value of a structure is marked
+ if (0 == Value)
+ Flags.setHvaStart();
+ Flags.setHva();
+ }
+ // Set InReg Flag
+ Flags.setInReg();
+ }
+ if (Arg.hasAttribute(Attribute::StructRet))
+ Flags.setSRet();
+ if (Arg.hasAttribute(Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (Arg.hasAttribute(Attribute::SwiftAsync))
+ Flags.setSwiftAsync();
+ if (Arg.hasAttribute(Attribute::SwiftError))
+ Flags.setSwiftError();
+ if (Arg.hasAttribute(Attribute::ByVal))
+ Flags.setByVal();
+ if (Arg.hasAttribute(Attribute::ByRef))
+ Flags.setByRef();
+ if (Arg.hasAttribute(Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.hasAttribute(Attribute::Preallocated)) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ const Align OriginalAlignment(
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Align MemAlign;
+ Type *ArgMemTy = nullptr;
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
+ Flags.isByRef()) {
+ if (!ArgMemTy)
+ ArgMemTy = Arg.getPointeeInMemoryValueType();
+
+ uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);
+
+ // For in-memory arguments, size and alignment should be passed from FE.
+ // BE will guess if this info is not there but there are cases it cannot
+ // get right.
+ if (auto ParamAlign = Arg.getParamStackAlign())
+ MemAlign = *ParamAlign;
+ else if ((ParamAlign = Arg.getParamAlign()))
+ MemAlign = *ParamAlign;
+ else
+ MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
+ if (Flags.isByRef())
+ Flags.setByRefSize(MemSize);
+ else
+ Flags.setByValSize(MemSize);
+ } else if (auto ParamAlign = Arg.getParamStackAlign()) {
+ MemAlign = *ParamAlign;
+ } else {
+ MemAlign = OriginalAlignment;
+ }
+ Flags.setMemAlign(MemAlign);
+
+ if (Arg.hasAttribute(Attribute::Nest))
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ if (ArgCopyElisionCandidates.count(&Arg))
+ Flags.setCopyElisionCandidate();
+ if (Arg.hasAttribute(Attribute::Returned))
+ Flags.setReturned();
+
+ MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+ unsigned NumRegs = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ // For scalable vectors, use the minimum size; individual targets
+ // are responsible for handling scalable vector arguments and
+ // return values.
+ ISD::InputArg MyFlags(
+ Flags, RegisterVT, VT, isArgValueUsed, ArgNo,
+ PartBase + i * RegisterVT.getStoreSize().getKnownMinValue());
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0) {
+ MyFlags.Flags.setOrigAlign(Align(1));
+ if (i == NumRegs - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
+ Ins.push_back(MyFlags);
+ }
+ if (NeedsRegBlock && Value == NumValues - 1)
+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
+ PartBase += VT.getStoreSize().getKnownMinValue();
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI->LowerFormalArguments(
+ DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ LLVM_DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ if (!FuncInfo->CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ PointerType::get(F.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
+ MVT VT = ValueVTs[0].getSimpleVT();
+ MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
+ std::optional<ISD::NodeType> AssertOp;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
+ nullptr, F.getCallingConv(), AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ Register SRetReg =
+ RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
+ FuncInfo->DemoteRegister = SRetReg;
+ NewRoot =
+ SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ ++i;
+ }
+
+ SmallVector<SDValue, 4> Chains;
+ DenseMap<int, int> ArgCopyElisionFrameIndexMap;
+ for (const Argument &Arg : F.args()) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ continue;
+
+ bool ArgHasUses = !Arg.use_empty();
+
+ // Elide the copying store if the target loaded this argument from a
+ // suitable fixed stack object.
+ if (Ins[i].Flags.isCopyElisionCandidate()) {
+ unsigned NumParts = 0;
+ for (EVT VT : ValueVTs)
+ NumParts += TLI->getNumRegistersForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+
+ tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+ ArrayRef(&InVals[i], NumParts), ArgHasUses);
+ }
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ bool isSwiftErrorArg =
+ TLI->supportSwiftError() &&
+ Arg.hasAttribute(Attribute::SwiftError);
+ if (!ArgHasUses && !isSwiftErrorArg) {
+ SDB->setUnusedArgValue(&Arg, InVals[i]);
+
+ // Also remember any frame index for use in FastISel.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+ }
+
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
+ MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+
+ // Even an apparent 'unused' swifterror argument needs to be returned. So
+ // we do generate a copy for it that can be used on return from the
+ // function.
+ if (ArgHasUses || isSwiftErrorArg) {
+ std::optional<ISD::NodeType> AssertOp;
+ if (Arg.hasAttribute(Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (Arg.hasAttribute(Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+ PartVT, VT, nullptr,
+ F.getCallingConv(), AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(ArrayRef(ArgValues.data(), NumValues),
+ SDB->getCurSDLoc());
+
+ SDB->setValue(&Arg, Res);
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ // We want to associate the argument with the frame index, among
+ // involved operands, that correspond to the lowest address. The
+ // getCopyFromParts function, called earlier, is swapping the order of
+ // the operands to BUILD_PAIR depending on endianness. The result of
+ // that swapping is that the least significant bits of the argument will
+ // be in the first operand of the BUILD_PAIR node, and the most
+ // significant bits will be in the second operand.
+ unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+ }
+
+ // Analyses past this point are naive and don't expect an assertion.
+ if (Res.getOpcode() == ISD::AssertZext)
+ Res = Res.getOperand(0);
+
+ // Update the SwiftErrorVRegDefMap.
+ if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg))
+ SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
+ Reg);
+ }
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[&Arg] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&Arg);
+ SDB->CopyToExportRegsIfNeeded(&Arg);
+ }
+ }
+
+ if (!Chains.empty()) {
+ Chains.push_back(NewRoot);
+ NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ }
+
+ DAG.setRoot(NewRoot);
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // If any argument copy elisions occurred and we have debug info, update the
+ // stale frame indices used in the dbg.declare variable info table.
+ if (!ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI :
+ MF->getInStackSlotVariableDbgInfo()) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.getStackSlot());
+ if (I != ArgCopyElisionFrameIndexMap.end())
+ VI.updateStackSlot(I->second);
+ }
+ }
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ emitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check PHI nodes in successors that expect a value to be available from this
+ // block.
+ for (const BasicBlock *SuccBB : successors(LLVMBB->getTerminator())) {
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (const PHINode &PN : SuccBB->phis()) {
+ // Ignore dead phi's.
+ if (PN.use_empty())
+ continue;
+
+ // Skip empty types
+ if (PN.getType()->isEmptyTy())
+ continue;
+
+ unsigned Reg;
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
+
+ if (const auto *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegs(C);
+ // We need to zero/sign extend ConstantInt phi operands to match
+ // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ CopyValueToVirtualRegister(C, RegOut, ExtendType);
+ }
+ Reg = RegOut;
+ } else {
+ DenseMap<const Value *, Register>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegs(PHIOp);
+ CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
+ for (EVT VT : ValueVTs) {
+ const unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ FuncInfo.PHINodesToUpdate.push_back(
+ std::make_pair(&*MBBI++, Reg + i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+
+ ConstantsOut.clear();
+}
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator I(MBB);
+ if (++I == FuncInfo.MF->end())
+ return nullptr;
+ return &*I;
+}
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+ // If the node is null, we do have a tail call.
+ if (MaybeTC.getNode() != nullptr)
+ DAG.setRoot(MaybeTC);
+ else
+ HasTailCall = true;
+}
+
+void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB) {
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *NextMBB = nullptr;
+ MachineFunction::iterator BBI(W.MBB);
+ if (++BBI != FuncInfo.MF->end())
+ NextMBB = &*BBI;
+
+ unsigned Size = W.LastCluster - W.FirstCluster + 1;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+
+ if (Size == 2 && W.MBB == SwitchMBB) {
+ // If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3
+ // cases.
+ // TODO: Handle cases where W.CaseBB != SwitchBB.
+ CaseCluster &Small = *W.FirstCluster;
+ CaseCluster &Big = *W.LastCluster;
+
+ if (Small.Low == Small.High && Big.Low == Big.High &&
+ Small.MBB == Big.MBB) {
+ const APInt &SmallValue = Small.Low->getValue();
+ const APInt &BigValue = Big.Low->getValue();
+
+ // Check that there is only one bit different.
+ APInt CommonBit = BigValue ^ SmallValue;
+ if (CommonBit.isPowerOf2()) {
+ SDValue CondLHS = getValue(Cond);
+ EVT VT = CondLHS.getValueType();
+ SDLoc DL = getCurSDLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, DL, VT));
+ SDValue Cond = DAG.getSetCC(
+ DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ // Both Small and Big will jump to Small.BB, so we sum up the
+ // probabilities.
+ addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+ if (BPI)
+ addSuccessorWithProb(
+ SwitchMBB, DefaultMBB,
+ // The default destination is the first successor in IR.
+ BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+ else
+ addSuccessorWithProb(SwitchMBB, DefaultMBB);
+
+ // Insert the true branch.
+ SDValue BrCond =
+ DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.MBB));
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(DefaultMBB));
+
+ DAG.setRoot(BrCond);
+ return;
+ }
+ }
+ }
+
+ if (TM.getOptLevel() != CodeGenOpt::None) {
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Prob != b.Prob ?
+ a.Prob > b.Prob :
+ a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Rearrange the case blocks so that the last one falls through if possible
+ // without changing the order of probabilities.
+ for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
+ --I;
+ if (I->Prob > W.LastCluster->Prob)
+ break;
+ if (I->Kind == CC_Range && I->MBB == NextMBB) {
+ std::swap(*I, *W.LastCluster);
+ break;
+ }
+ }
+ }
+
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
+
+ MachineBasicBlock *CurMBB = W.MBB;
+ for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
+ MachineBasicBlock *Fallthrough;
+ if (I == W.LastCluster) {
+ // For the last cluster, fall through to the default destination.
+ Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
+ } else {
+ Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+ CurMF->insert(BBI, Fallthrough);
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+ UnhandledProbs -= I->Prob;
+
+ switch (I->Kind) {
+ case CC_JumpTable: {
+ // FIXME: Optimize away range check based on pivot comparisons.
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
+
+ // The jump block hasn't been inserted yet; insert it here.
+ MachineBasicBlock *JumpMBB = JT->MBB;
+ CurMF->insert(BBI, JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ break;
+ }
+ }
+
+ // If the default clause is unreachable, propagate that knowledge into
+ // JTH->FallthroughUnreachable which will use it to suppress the range
+ // check.
+ //
+ // However, don't do this if we're doing branch target enforcement,
+ // because a table branch _without_ a range check can be a tempting JOP
+ // gadget - out-of-bounds inputs that are impossible in correct
+ // execution become possible again if an attacker can influence the
+ // control flow. So if an attacker doesn't already have a BTI bypass
+ // available, we don't want them to be able to get one out of this
+ // table branch.
+ if (FallthroughUnreachable) {
+ Function &CurFunc = CurMF->getFunction();
+ bool HasBranchTargetEnforcement = false;
+ if (CurFunc.hasFnAttribute("branch-target-enforcement")) {
+ HasBranchTargetEnforcement =
+ CurFunc.getFnAttribute("branch-target-enforcement")
+ .getValueAsBool();
+ } else {
+ HasBranchTargetEnforcement =
+ CurMF->getMMI().getModule()->getModuleFlag(
+ "branch-target-enforcement");
+ }
+ if (!HasBranchTargetEnforcement)
+ JTH->FallthroughUnreachable = true;
+ }
+
+ if (!JTH->FallthroughUnreachable)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
+
+ // The jump table header will be inserted in our current block, do the
+ // range check, and fall through to our fallthrough block.
+ JTH->HeaderBB = CurMBB;
+ JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+ // If we're in the right place, emit the jump table header right now.
+ if (CurMBB == SwitchMBB) {
+ visitJumpTableHeader(*JT, *JTH, SwitchMBB);
+ JTH->Emitted = true;
+ }
+ break;
+ }
+ case CC_BitTests: {
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ visitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ break;
+ }
+ case CC_Range: {
+ const Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->Low == I->High) {
+ // Check Cond == I->Low.
+ CC = ISD::SETEQ;
+ LHS = Cond;
+ RHS=I->Low;
+ MHS = nullptr;
+ } else {
+ // Check I->Low <= Cond <= I->High.
+ CC = ISD::SETLE;
+ LHS = I->Low;
+ MHS = Cond;
+ RHS = I->High;
+ }
+
+ // If Fallthrough is unreachable, fold away the comparison.
+ if (FallthroughUnreachable)
+ CC = ISD::SETTRUE;
+
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
+ getCurSDLoc(), I->Prob, UnhandledProbs);
+
+ if (CurMBB == SwitchMBB)
+ visitSwitchCase(CB, SwitchMBB);
+ else
+ SL->SwitchCases.push_back(CB);
+
+ break;
+ }
+ }
+ CurMBB = Fallthrough;
+ }
+}
+
+unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
+ CaseClusterIt First,
+ CaseClusterIt Last) {
+ return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
+ if (X.Prob != CC.Prob)
+ return X.Prob > CC.Prob;
+
+ // Ties are broken by comparing the case value.
+ return X.Low->getValue().slt(CC.Low->getValue());
+ });
+}
+
+void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
+ const SwitchWorkListItem &W,
+ Value *Cond,
+ MachineBasicBlock *SwitchMBB) {
+ assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
+ "Clusters not sorted?");
+
+ assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+ // Balance the tree based on branch probabilities to create a near-optimal (in
+ // terms of search time given key frequency) binary search tree. See e.g. Kurt
+ // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
+ CaseClusterIt LastLeft = W.FirstCluster;
+ CaseClusterIt FirstRight = W.LastCluster;
+ auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+ auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
+
+ // Move LastLeft and FirstRight towards each other from opposite directions to
+ // find a partitioning of the clusters which balances the probability on both
+ // sides. If LeftProb and RightProb are equal, alternate which side is
+ // taken to ensure 0-probability nodes are distributed evenly.
+ unsigned I = 0;
+ while (LastLeft + 1 < FirstRight) {
+ if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+ LeftProb += (++LastLeft)->Prob;
+ else
+ RightProb += (--FirstRight)->Prob;
+ I++;
+ }
+
+ while (true) {
+ // Our binary search tree differs from a typical BST in that ours can have up
+ // to three values in each leaf. The pivot selection above doesn't take that
+ // into account, which means the tree might require more nodes and be less
+ // efficient. We compensate for this here.
+
+ unsigned NumLeft = LastLeft - W.FirstCluster + 1;
+ unsigned NumRight = W.LastCluster - FirstRight + 1;
+
+ if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
+ // If one side has less than 3 clusters, and the other has more than 3,
+ // consider taking a cluster from the other side.
+
+ if (NumLeft < NumRight) {
+ // Consider moving the first cluster on the right to the left side.
+ CaseCluster &CC = *FirstRight;
+ unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+ unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+ if (LeftSideRank <= RightSideRank) {
+ // Moving the cluster to the left does not demote it.
+ ++LastLeft;
+ ++FirstRight;
+ continue;
+ }
+ } else {
+ assert(NumRight < NumLeft);
+ // Consider moving the last element on the left to the right side.
+ CaseCluster &CC = *LastLeft;
+ unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+ unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+ if (RightSideRank <= LeftSideRank) {
+ // Moving the cluster to the right does not demot it.
+ --LastLeft;
+ --FirstRight;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+
+ assert(LastLeft + 1 == FirstRight);
+ assert(LastLeft >= W.FirstCluster);
+ assert(FirstRight <= W.LastCluster);
+
+ // Use the first element on the right as pivot since we will make less-than
+ // comparisons against it.
+ CaseClusterIt PivotCluster = FirstRight;
+ assert(PivotCluster > W.FirstCluster);
+ assert(PivotCluster <= W.LastCluster);
+
+ CaseClusterIt FirstLeft = W.FirstCluster;
+ CaseClusterIt LastRight = W.LastCluster;
+
+ const ConstantInt *Pivot = PivotCluster->Low;
+
+ // New blocks will be inserted immediately after the current one.
+ MachineFunction::iterator BBI(W.MBB);
+ ++BBI;
+
+ // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
+ // we can branch to its destination directly if it's squeezed exactly in
+ // between the known lower bound and Pivot - 1.
+ MachineBasicBlock *LeftMBB;
+ if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
+ FirstLeft->Low == W.GE &&
+ (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
+ LeftMBB = FirstLeft->MBB;
+ } else {
+ LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, LeftMBB);
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+
+ // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
+ // single cluster, RHS.Low == Pivot, and we can branch to its destination
+ // directly if RHS.High equals the current upper bound.
+ MachineBasicBlock *RightMBB;
+ if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
+ W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
+ RightMBB = FirstRight->MBB;
+ } else {
+ RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, RightMBB);
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+
+ // Create the CaseBlock record that will be used to lower the branch.
+ CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
+ getCurSDLoc(), LeftProb, RightProb);
+
+ if (W.MBB == SwitchMBB)
+ visitSwitchCase(CB, SwitchMBB);
+ else
+ SL->SwitchCases.push_back(CB);
+}
+
+// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
+// from the swith statement.
+static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
+ BranchProbability PeeledCaseProb) {
+ if (PeeledCaseProb == BranchProbability::getOne())
+ return BranchProbability::getZero();
+ BranchProbability SwitchProb = PeeledCaseProb.getCompl();
+
+ uint32_t Numerator = CaseProb.getNumerator();
+ uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
+ return BranchProbability(Numerator, std::max(Numerator, Denominator));
+}
+
+// Try to peel the top probability case if it exceeds the threshold.
+// Return current MachineBasicBlock for the switch statement if the peeling
+// does not occur.
+// If the peeling is performed, return the newly created MachineBasicBlock
+// for the peeled switch statement. Also update Clusters to remove the peeled
+// case. PeeledCaseProb is the BranchProbability for the peeled case.
+MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
+ const SwitchInst &SI, CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ // Don't perform if there is only one cluster or optimizing for size.
+ if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
+ TM.getOptLevel() == CodeGenOpt::None ||
+ SwitchMBB->getParent()->getFunction().hasMinSize())
+ return SwitchMBB;
+
+ BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
+ unsigned PeeledCaseIndex = 0;
+ bool SwitchPeeled = false;
+ for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
+ CaseCluster &CC = Clusters[Index];
+ if (CC.Prob < TopCaseProb)
+ continue;
+ TopCaseProb = CC.Prob;
+ PeeledCaseIndex = Index;
+ SwitchPeeled = true;
+ }
+ if (!SwitchPeeled)
+ return SwitchMBB;
+
+ LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
+ << TopCaseProb << "\n");
+
+ // Record the MBB for the peeled switch statement.
+ MachineFunction::iterator BBI(SwitchMBB);
+ ++BBI;
+ MachineBasicBlock *PeeledSwitchMBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
+
+ ExportFromCurrentBlock(SI.getCondition());
+ auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
+ SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
+ nullptr, nullptr, TopCaseProb.getCompl()};
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+
+ Clusters.erase(PeeledCaseIt);
+ for (CaseCluster &CC : Clusters) {
+ LLVM_DEBUG(
+ dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
+ CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
+ LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ }
+ PeeledCaseProb = TopCaseProb;
+ return PeeledSwitchMBB;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+ // Extract cases from the switch.
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ CaseClusterVector Clusters;
+ Clusters.reserve(SI.getNumCases());
+ for (auto I : SI.cases()) {
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
+ const ConstantInt *CaseVal = I.getCaseValue();
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+ }
+
+ MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+
+ // The branch probablity of the peeled case.
+ BranchProbability PeeledCaseProb = BranchProbability::getZero();
+ MachineBasicBlock *PeeledSwitchMBB =
+ peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
+
+ // If there is only the default destination, jump there directly.
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ if (Clusters.empty()) {
+ assert(PeeledSwitchMBB == SwitchMBB);
+ SwitchMBB->addSuccessor(DefaultMBB);
+ if (DefaultMBB != NextBlock(SwitchMBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
+ }
+ return;
+ }
+
+ SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
+ SL->findBitTestClusters(Clusters, &SI);
+
+ LLVM_DEBUG({
+ dbgs() << "Case clusters: ";
+ for (const CaseCluster &C : Clusters) {
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
+
+ C.Low->getValue().print(dbgs(), true);
+ if (C.Low != C.High) {
+ dbgs() << '-';
+ C.High->getValue().print(dbgs(), true);
+ }
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ });
+
+ assert(!Clusters.empty());
+ SwitchWorkList WorkList;
+ CaseClusterIt First = Clusters.begin();
+ CaseClusterIt Last = Clusters.end() - 1;
+ auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
+ // Scale the branchprobability for DefaultMBB if the peel occurs and
+ // DefaultMBB is not replaced.
+ if (PeeledCaseProb != BranchProbability::getZero() &&
+ DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
+ DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
+ WorkList.push_back(
+ {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+ while (!WorkList.empty()) {
+ SwitchWorkListItem W = WorkList.pop_back_val();
+ unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
+
+ if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+ !DefaultMBB->getParent()->getFunction().hasMinSize()) {
+ // For optimized builds, lower large range as a balanced binary tree.
+ splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
+ continue;
+ }
+
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
+ }
+}
+
+void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto DL = getCurSDLoc();
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getStepVector(DL, ResultVT));
+}
+
+void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDLoc DL = getCurSDLoc();
+ SDValue V = getValue(I.getOperand(0));
+ assert(VT == V.getValueType() && "Malformed vector.reverse!");
+
+ if (VT.isScalableVector()) {
+ setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
+ return;
+ }
+
+ // Use VECTOR_SHUFFLE for the fixed-length vector
+ // to maintain existing behavior.
+ SmallVector<int, 8> Mask;
+ unsigned NumElts = VT.getVectorMinNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(NumElts - 1 - i);
+
+ setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
+}
+
+void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) {
+ auto DL = getCurSDLoc();
+ SDValue InVec = getValue(I.getOperand(0));
+ EVT OutVT =
+ InVec.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+
+ unsigned OutNumElts = OutVT.getVectorMinNumElements();
+
+ // ISD Node needs the input vectors split into two equal parts
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec,
+ DAG.getVectorIdxConstant(OutNumElts, DL));
+
+ // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
+ // legalisation and combines.
+ if (OutVT.isFixedLengthVector()) {
+ SDValue Even = DAG.getVectorShuffle(OutVT, DL, Lo, Hi,
+ createStrideMask(0, 2, OutNumElts));
+ SDValue Odd = DAG.getVectorShuffle(OutVT, DL, Lo, Hi,
+ createStrideMask(1, 2, OutNumElts));
+ SDValue Res = DAG.getMergeValues({Even, Odd}, getCurSDLoc());
+ setValue(&I, Res);
+ return;
+ }
+
+ SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(OutVT, OutVT), Lo, Hi);
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) {
+ auto DL = getCurSDLoc();
+ EVT InVT = getValue(I.getOperand(0)).getValueType();
+ SDValue InVec0 = getValue(I.getOperand(0));
+ SDValue InVec1 = getValue(I.getOperand(1));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
+ // legalisation and combines.
+ if (OutVT.isFixedLengthVector()) {
+ unsigned NumElts = InVT.getVectorMinNumElements();
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, InVec0, InVec1);
+ setValue(&I, DAG.getVectorShuffle(OutVT, DL, V, DAG.getUNDEF(OutVT),
+ createInterleaveMask(NumElts, 2)));
+ return;
+ }
+
+ SDValue Res = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(InVT, InVT), InVec0, InVec1);
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0),
+ Res.getValue(1));
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Op = getValue(I.getOperand(0));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
+ SDValue(Op.getNode(), Op.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDLoc DL = getCurSDLoc();
+ SDValue V1 = getValue(I.getOperand(0));
+ SDValue V2 = getValue(I.getOperand(1));
+ int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();
+
+ // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
+ if (VT.isScalableVector()) {
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
+ DAG.getConstant(Imm, DL, IdxVT)));
+ return;
+ }
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ uint64_t Idx = (NumElts + Imm) % NumElts;
+
+ // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Mask.push_back(Idx + i);
+ setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
+}
+
+// Consider the following MIR after SelectionDAG, which produces output in
+// phyregs in the first case or virtregs in the second case.
+//
+// INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx
+// %5:gr32 = COPY $ebx
+// %6:gr32 = COPY $edx
+// %1:gr32 = COPY %6:gr32
+// %0:gr32 = COPY %5:gr32
+//
+// INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32
+// %1:gr32 = COPY %6:gr32
+// %0:gr32 = COPY %5:gr32
+//
+// Given %0, we'd like to return $ebx in the first case and %5 in the second.
+// Given %1, we'd like to return $edx in the first case and %6 in the second.
+//
+// If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap
+// to a single virtreg (such as %0). The remaining outputs monotonically
+// increase in virtreg number from there. If a callbr has no outputs, then it
+// should not have a corresponding callbr landingpad; in fact, the callbr
+// landingpad would not even be able to refer to such a callbr.
+static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *MI = MRI.def_begin(Reg)->getParent();
+ // There is definitely at least one copy.
+ assert(MI->getOpcode() == TargetOpcode::COPY &&
+ "start of copy chain MUST be COPY");
+ Reg = MI->getOperand(1).getReg();
+ MI = MRI.def_begin(Reg)->getParent();
+ // There may be an optional second copy.
+ if (MI->getOpcode() == TargetOpcode::COPY) {
+ assert(Reg.isVirtual() && "expected COPY of virtual register");
+ Reg = MI->getOperand(1).getReg();
+ assert(Reg.isPhysical() && "expected COPY of physical register");
+ MI = MRI.def_begin(Reg)->getParent();
+ }
+ // The start of the chain must be an INLINEASM_BR.
+ assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
+ "end of copy chain MUST be INLINEASM_BR");
+ return Reg;
+}
+
+// We must do this walk rather than the simpler
+// setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
+// otherwise we will end up with copies of virtregs only valid along direct
+// edges.
+void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
+ SmallVector<EVT, 8> ResultVTs;
+ SmallVector<SDValue, 8> ResultValues;
+ const auto *CBR =
+ cast<CallBrInst>(I.getParent()->getUniquePredecessor()->getTerminator());
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+
+ unsigned InitialDef = FuncInfo.ValueMap[CBR];
+ SDValue Chain = DAG.getRoot();
+
+ // Re-parse the asm constraints string.
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI.ParseConstraints(DAG.getDataLayout(), TRI, *CBR);
+ for (auto &T : TargetConstraints) {
+ SDISelAsmOperandInfo OpInfo(T);
+ if (OpInfo.Type != InlineAsm::isOutput)
+ continue;
+
+ // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the
+ // individual constraint.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass: {
+ // Fill in OpInfo.AssignedRegs.Regs.
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, OpInfo);
+
+ // getRegistersForValue may produce 1 to many registers based on whether
+ // the OpInfo.ConstraintVT is legal on the target or not.
+ for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) {
+ Register OriginalDef = FollowCopyChain(MRI, InitialDef++);
+ if (Register::isPhysicalRegister(OriginalDef))
+ FuncInfo.MBB->addLiveIn(OriginalDef);
+ // Update the assigned registers to use the original defs.
+ OpInfo.AssignedRegs.Regs[i] = OriginalDef;
+ }
+
+ SDValue V = OpInfo.AssignedRegs.getCopyFromRegs(
+ DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, CBR);
+ ResultValues.push_back(V);
+ ResultVTs.push_back(OpInfo.ConstraintVT);
+ break;
+ }
+ case TargetLowering::C_Other: {
+ SDValue Flag;
+ SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ OpInfo, DAG);
+ ++InitialDef;
+ ResultValues.push_back(V);
+ ResultVTs.push_back(OpInfo.ConstraintVT);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(&I, V);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 000000000000..f2496f24973a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,803 @@
+//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+
+#include "StatepointLowering.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <optional>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class AAResults;
+class AllocaInst;
+class AtomicCmpXchgInst;
+class AtomicRMWInst;
+class AssumptionCache;
+class BasicBlock;
+class BranchInst;
+class CallInst;
+class CallBrInst;
+class CatchPadInst;
+class CatchReturnInst;
+class CatchSwitchInst;
+class CleanupPadInst;
+class CleanupReturnInst;
+class Constant;
+class ConstrainedFPIntrinsic;
+class DbgValueInst;
+class DataLayout;
+class DIExpression;
+class DILocalVariable;
+class DILocation;
+class FenceInst;
+class FunctionLoweringInfo;
+class GCFunctionInfo;
+class GCRelocateInst;
+class GCResultInst;
+class GCStatepointInst;
+class IndirectBrInst;
+class InvokeInst;
+class LandingPadInst;
+class LLVMContext;
+class LoadInst;
+class MachineBasicBlock;
+class PHINode;
+class ResumeInst;
+class ReturnInst;
+class SDDbgValue;
+class SelectionDAG;
+class StoreInst;
+class SwiftErrorValueTracking;
+class SwitchInst;
+class TargetLibraryInfo;
+class TargetMachine;
+class Type;
+class VAArgInst;
+class UnreachableInst;
+class Use;
+class User;
+class Value;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+ /// The current instruction being visited.
+ const Instruction *CurInst = nullptr;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+ /// Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ using DbgValTy = const DbgValueInst *;
+ using VarLocTy = const VarLocInfo *;
+ PointerUnion<DbgValTy, VarLocTy> Info;
+ unsigned SDNodeOrder = 0;
+
+ public:
+ DanglingDebugInfo() = default;
+ DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO)
+ : Info(DI), SDNodeOrder(SDNO) {}
+ DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO)
+ : Info(VarLoc), SDNodeOrder(SDNO) {}
+
+ DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
+ if (isa<VarLocTy>(Info))
+ return Locs->getDILocalVariable(cast<VarLocTy>(Info)->VariableID);
+ return cast<DbgValTy>(Info)->getVariable();
+ }
+ DIExpression *getExpression() const {
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->Expr;
+ return cast<DbgValTy>(Info)->getExpression();
+ }
+ Value *getVariableLocationOp(unsigned Idx) const {
+ assert(Idx == 0 && "Dangling variadic debug values not supported yet");
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->Values.getVariableLocationOp(Idx);
+ return cast<DbgValTy>(Info)->getVariableLocationOp(Idx);
+ }
+ DebugLoc getDebugLoc() const {
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->DL;
+ return cast<DbgValTy>(Info)->getDebugLoc();
+ }
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ /// Helper for printing DanglingDebugInfo. This hoop-jumping is to
+ /// accommodate the fact that an argument is required for getVariable.
+ /// Call SelectionDAGBuilder::printDDI instead of using directly.
+ struct Print {
+ Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs)
+ : DDI(DDI), VarLocs(VarLocs) {}
+ const DanglingDebugInfo &DDI;
+ const FunctionVarLocs *VarLocs;
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const DanglingDebugInfo::Print &P) {
+ OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs)
+ << ", val= " << *P.DDI.getVariableLocationOp(0)
+ << ", expr=" << *P.DDI.getExpression()
+ << ", order=" << P.DDI.getSDNodeOrder()
+ << ", loc=" << P.DDI.getDebugLoc() << ")";
+ return OS;
+ }
+ };
+ };
+
+ /// Returns an object that defines `raw_ostream &operator<<` for printing.
+ /// Usage example:
+ //// errs() << printDDI(MyDanglingInfo) << " is dangling\n";
+ DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) {
+ return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs());
+ }
+
+ /// Helper type for DanglingDebugInfoMap.
+ typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
+
+ /// Keeps track of dbg_values for which we have not yet seen the referent.
+ /// We defer handling these until we do see it.
+ MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+
+ /// Cache the module flag for whether we should use debug-info assignment
+ /// tracking.
+ bool AssignmentTrackingEnabled = false;
+
+public:
+ /// Loads are not emitted to the program immediately. We bunch them up and
+ /// then emit token factor nodes when possible. This allows us to get simple
+ /// disambiguation between loads without worrying about alias analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+
+ /// State used while lowering a statepoint sequence (gc_statepoint,
+ /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
+ StatepointLoweringState StatepointLowering;
+
+private:
+ /// CopyToReg nodes that copy values to virtual registers for export to other
+ /// blocks need to be emitted before any terminator instruction, but they have
+ /// no other ordering requirements. We bunch them up and the emit a single
+ /// tokenfactor for them just before terminator instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// Similar to loads, nodes corresponding to constrained FP intrinsics are
+ /// bunched up and emitted when necessary. These can be moved across each
+ /// other and any (normal) memory operation (load or store), but not across
+ /// calls or instructions having unspecified side effects. As a special
+ /// case, constrained FP intrinsics using fpexcept.strict may not be deleted
+ /// even if otherwise unused, so they need to be chained before any
+ /// terminator instruction (like PendingExports). We track the latter
+ /// set of nodes in a separate list.
+ SmallVector<SDValue, 8> PendingConstrainedFP;
+ SmallVector<SDValue, 8> PendingConstrainedFPStrict;
+
+ /// Update root to include all chains from the Pending list.
+ SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);
+
+ /// A unique monotonically increasing number used to order the SDNodes we
+ /// create.
+ unsigned SDNodeOrder;
+
+ /// Determine the rank by weight of CC in [First,Last]. If CC has more weight
+ /// than each cluster in the range, its rank is 0.
+ unsigned caseClusterRank(const SwitchCG::CaseCluster &CC,
+ SwitchCG::CaseClusterIt First,
+ SwitchCG::CaseClusterIt Last);
+
+ /// Emit comparison and split W into two subtrees.
+ void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+ const SwitchCG::SwitchWorkListItem &W, Value *Cond,
+ MachineBasicBlock *SwitchMBB);
+
+ /// Lower W.
+ void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB);
+
+ /// Peel the top probability case if it exceeds the threshold
+ MachineBasicBlock *
+ peelDominantCaseCluster(const SwitchInst &SI,
+ SwitchCG::CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
+
+private:
+ const TargetMachine &TM;
+
+public:
+ /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
+ /// nodes without a corresponding SDNode.
+ static const unsigned LowestSDNodeOrder = 1;
+
+ SelectionDAG &DAG;
+ AAResults *AA = nullptr;
+ AssumptionCache *AC = nullptr;
+ const TargetLibraryInfo *LibInfo = nullptr;
+
+ class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
+ public:
+ SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo)
+ : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {}
+
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown()) override {
+ SDB->addSuccessorWithProb(Src, Dst, Prob);
+ }
+
+ private:
+ SelectionDAGBuilder *SDB = nullptr;
+ };
+
+ // Data related to deferred switch lowerings. Used to construct additional
+ // Basic Blocks in SelectionDAGISel::FinishBasicBlock.
+ std::unique_ptr<SDAGSwitchLowering> SL;
+
+ /// A StackProtectorDescriptor structure used to communicate stack protector
+ /// information in between SelectBasicBlock and FinishBasicBlock.
+ StackProtectorDescriptor SPDescriptor;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<const Constant *, unsigned> ConstantsOut;
+
+ /// Information about the function as a whole.
+ FunctionLoweringInfo &FuncInfo;
+
+ /// Information about the swifterror values used throughout the function.
+ SwiftErrorValueTracking &SwiftError;
+
+ /// Garbage collection metadata for the function.
+ GCFunctionInfo *GFI = nullptr;
+
+ /// Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
+
+ /// This is set to true if a call in the current block has been translated as
+ /// a tail call. In this case, no subsequent DAG nodes should be created.
+ bool HasTailCall = false;
+
+ LLVMContext *Context = nullptr;
+
+ SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+ SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SwiftError(swifterror) {}
+
+ void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC,
+ const TargetLibraryInfo *li);
+
+ /// Clear out the current SelectionDAG and the associated state and prepare
+ /// this SelectionDAGBuilder object to be used for a new block. This doesn't
+ /// clear out information about additional blocks that are needed to complete
+ /// switch lowering or PHI node updating; that information is cleared out as
+ /// it is consumed.
+ void clear();
+
+ /// Clear the dangling debug information map. This function is separated from
+ /// the clear so that debug information that is dangling in a basic block can
+ /// be properly resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached to PHI nodes.
+ void clearDanglingDebugInfo();
+
+ /// Return the current virtual root of the Selection DAG, flushing any
+ /// PendingLoad items. This must be done before emitting a store or any other
+ /// memory node that may need to be ordered after any prior load instructions.
+ SDValue getMemoryRoot();
+
+ /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
+ /// items. This must be done before emitting any call other any other node
+ /// that may need to be ordered after FP instructions due to other side
+ /// effects.
+ SDValue getRoot();
+
+ /// Similar to getRoot, but instead of flushing all the PendingLoad items,
+ /// flush all the PendingExports (and PendingConstrainedFPStrict) items.
+ /// It is necessary to do this before emitting a terminator instruction.
+ SDValue getControlRoot();
+
+ SDLoc getCurSDLoc() const {
+ return SDLoc(CurInst, SDNodeOrder);
+ }
+
+ DebugLoc getCurDebugLoc() const {
+ return CurInst ? CurInst->getDebugLoc() : DebugLoc();
+ }
+
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg,
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND);
+
+ void visit(const Instruction &I);
+
+ void visit(unsigned Opcode, const User &I);
+
+ /// If there was virtual register allocated for the value V emit CopyFromReg
+ /// of the specified type Ty. Return empty SDValue() otherwise.
+ SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
+ /// Register a dbg_value which relies on a Value which we have not yet seen.
+ void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order);
+ void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order);
+
+ /// If we have dangling debug info that describes \p Variable, or an
+ /// overlapping part of variable considering the \p Expr, then this method
+ /// will drop that debug info as it isn't valid any longer.
+ void dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr);
+
+ /// If we saw an earlier dbg_value referring to V, generate the debug data
+ /// structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+
+ /// For the given dangling debuginfo record, perform last-ditch efforts to
+ /// resolve the debuginfo to something that is represented in this DAG. If
+ /// this cannot be done, produce an Undef debug value record.
+ void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+
+ /// For a given list of Values, attempt to create and record a SDDbgValue in
+ /// the SelectionDAG.
+ bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc DbgLoc, unsigned Order,
+ bool IsVariadic);
+
+ /// Create a record for a kill location debug intrinsic.
+ void handleKillDebugValue(DILocalVariable *Var, DIExpression *Expr,
+ DebugLoc DbgLoc, unsigned Order);
+
+ /// Evict any dangling debug information, attempting to salvage it first.
+ void resolveOrClearDbgInfo();
+
+ SDValue getValue(const Value *V);
+
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(!N.getNode() && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(!N.getNode() && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
+ void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb, BranchProbability FProb,
+ bool InvertCond);
+ bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(const Value *V);
+ void ExportFromCurrentBlock(const Value *V);
+ void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall,
+ bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr);
+
+ // Lower range metadata from 0 to N to assert zext to an integer of nearest
+ // floor power of two.
+ SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
+ SDValue Op);
+
+ void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
+ const CallBase *Call, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, bool IsPatchPoint);
+
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
+
+ /// When an MBB was split during scheduling, update the
+ /// references that need to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+ /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes
+ /// of lowering into a STATEPOINT node.
+ struct StatepointLoweringInfo {
+ /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set
+ /// of gc pointers this STATEPOINT has to relocate.
+ SmallVector<const Value *, 16> Bases;
+ SmallVector<const Value *, 16> Ptrs;
+
+ /// The set of gc.relocate calls associated with this gc.statepoint.
+ SmallVector<const GCRelocateInst *, 16> GCRelocates;
+
+ /// The full list of gc arguments to the gc.statepoint being lowered.
+ ArrayRef<const Use> GCArgs;
+
+ /// The gc.statepoint instruction.
+ const Instruction *StatepointInstr = nullptr;
+
+ /// The list of gc transition arguments present in the gc.statepoint being
+ /// lowered.
+ ArrayRef<const Use> GCTransitionArgs;
+
+ /// The ID that the resulting STATEPOINT instruction has to report.
+ unsigned ID = -1;
+
+ /// Information regarding the underlying call instruction.
+ TargetLowering::CallLoweringInfo CLI;
+
+ /// The deoptimization state associated with this gc.statepoint call, if
+ /// any.
+ ArrayRef<const Use> DeoptState;
+
+ /// Flags associated with the meta arguments being lowered.
+ uint64_t StatepointFlags = -1;
+
+ /// The number of patchable bytes the call needs to get lowered into.
+ unsigned NumPatchBytes = -1;
+
+ /// The exception handling unwind destination, in case this represents an
+ /// invoke of gc.statepoint.
+ const BasicBlock *EHPadBB = nullptr;
+
+ explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {}
+ };
+
+ /// Lower \p SLI into a STATEPOINT instruction.
+ SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI);
+
+ // This function is responsible for the whole statepoint lowering process.
+ // It uniformly handles invoke and call statepoints.
+ void LowerStatepoint(const GCStatepointInst &I,
+ const BasicBlock *EHPadBB = nullptr);
+
+ void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
+ const BasicBlock *EHPadBB);
+
+ void LowerDeoptimizeCall(const CallInst *CI);
+ void LowerDeoptimizingReturn();
+
+ void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
+ const BasicBlock *EHPadBB,
+ bool VarArgDisallowed,
+ bool ForceVoidReturnTy);
+
+ /// Returns the type of FrameIndex and TargetFrameIndex nodes.
+ MVT getFrameIndexTy() {
+ return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout());
+ }
+
+private:
+ // Terminator instructions.
+ void visitRet(const ReturnInst &I);
+ void visitBr(const BranchInst &I);
+ void visitSwitch(const SwitchInst &I);
+ void visitIndirectBr(const IndirectBrInst &I);
+ void visitUnreachable(const UnreachableInst &I);
+ void visitCleanupRet(const CleanupReturnInst &I);
+ void visitCatchSwitch(const CatchSwitchInst &I);
+ void visitCatchRet(const CatchReturnInst &I);
+ void visitCatchPad(const CatchPadInst &I);
+ void visitCleanupPad(const CleanupPadInst &CPI);
+
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
+
+public:
+ void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB);
+ void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+ void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
+ void visitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext, unsigned Reg,
+ SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+ void visitJumpTable(SwitchCG::JumpTable &JT);
+ void visitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitCallBr(const CallBrInst &I);
+ void visitCallBrLandingPad(const CallInst &I);
+ void visitResume(const ResumeInst &I);
+
+ void visitUnary(const User &I, unsigned Opcode);
+ void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
+
+ void visitBinary(const User &I, unsigned Opcode);
+ void visitShift(const User &I, unsigned Opcode);
+ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
+ void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(const User &I);
+ void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (const User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(const User &I);
+ void visitFCmp(const User &I);
+ // Visit the conversion instructions
+ void visitTrunc(const User &I);
+ void visitZExt(const User &I);
+ void visitSExt(const User &I);
+ void visitFPTrunc(const User &I);
+ void visitFPExt(const User &I);
+ void visitFPToUI(const User &I);
+ void visitFPToSI(const User &I);
+ void visitUIToFP(const User &I);
+ void visitSIToFP(const User &I);
+ void visitPtrToInt(const User &I);
+ void visitIntToPtr(const User &I);
+ void visitBitCast(const User &I);
+ void visitAddrSpaceCast(const User &I);
+
+ void visitExtractElement(const User &I);
+ void visitInsertElement(const User &I);
+ void visitShuffleVector(const User &I);
+
+ void visitExtractValue(const ExtractValueInst &I);
+ void visitInsertValue(const InsertValueInst &I);
+ void visitLandingPad(const LandingPadInst &LP);
+
+ void visitGetElementPtr(const User &I);
+ void visitSelect(const User &I);
+
+ void visitAlloca(const AllocaInst &I);
+ void visitLoad(const LoadInst &I);
+ void visitStore(const StoreInst &I);
+ void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
+ void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
+ void visitMaskedGather(const CallInst &I);
+ void visitMaskedScatter(const CallInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
+ void visitPHI(const PHINode &I);
+ void visitCall(const CallInst &I);
+ bool visitMemCmpBCmpCall(const CallInst &I);
+ bool visitMemPCpyCall(const CallInst &I);
+ bool visitMemChrCall(const CallInst &I);
+ bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
+ bool visitStrCmpCall(const CallInst &I);
+ bool visitStrLenCall(const CallInst &I);
+ bool visitStrNLenCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+ void visitLoadFromSwiftError(const LoadInst &I);
+ void visitStoreToSwiftError(const StoreInst &I);
+ void visitFreeze(const FreezeInst &I);
+
+ void visitInlineAsm(const CallBase &Call,
+ const BasicBlock *EHPadBB = nullptr);
+ void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPStore(const VPIntrinsic &VPIntrin,
+ const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPScatter(const VPIntrinsic &VPIntrin,
+ const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPStridedStore(const VPIntrinsic &VPIntrin,
+ const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
+ void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
+
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+ void visitStackmap(const CallInst &I);
+ void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr);
+
+ // These two are implemented in StatepointLowering.cpp
+ void visitGCRelocate(const GCRelocateInst &Relocate);
+ void visitGCResult(const GCResultInst &I);
+
+ void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+ void visitVectorReverse(const CallInst &I);
+ void visitVectorSplice(const CallInst &I);
+ void visitVectorInterleave(const CallInst &I);
+ void visitVectorDeinterleave(const CallInst &I);
+ void visitStepVector(const CallInst &I);
+
+ void visitUserOp1(const Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(const Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ void processIntegerCallValue(const Instruction &I,
+ SDValue Value, bool IsSigned);
+
+ void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ void emitInlineAsmError(const CallBase &Call, const Twine &Message);
+
+ /// An enum that states to emit func argument dbg value the kind of intrinsic
+ /// it originally had. This controls the internal behavior of
+ /// EmitFuncArgumentDbgValue.
+ enum class FuncArgumentDbgValueKind {
+ Value, // This was originally a llvm.dbg.value.
+ Declare, // This was originally a llvm.dbg.declare.
+ };
+
+ /// If V is an function argument then create corresponding DBG_VALUE machine
+ /// instruction for it now. At the end of instruction selection, they will be
+ /// inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
+ DIExpression *Expr, DILocation *DL,
+ FuncArgumentDbgValueKind Kind,
+ const SDValue &N);
+
+ /// Return the next block after MBB, or nullptr if there is none.
+ MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+ /// Update the DAG and DAG builder with the relevant information after
+ /// a new root node has been created which could be a tail call.
+ void updateDAGForMaybeTailCall(SDValue MaybeTC);
+
+ /// Return the appropriate SDDbgValue based on N.
+ SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
+ DIExpression *Expr, const DebugLoc &dl,
+ unsigned DbgSDNodeOrder);
+
+ /// Lowers CallInst to an external symbol.
+ void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName);
+
+ SDValue lowerStartEH(SDValue Chain, const BasicBlock *EHPadBB,
+ MCSymbol *&BeginLabel);
+ SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
+ const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+};
+
+/// This struct represents the registers (physical or virtual)
+/// that a particular set of values is assigned, and the type information about
+/// the value. The most common situation is to represent one value at a time,
+/// but struct or array values are handled element-wise as multiple values. The
+/// splitting of aggregates is performed recursively, so that we never have
+/// aggregate-typed registers. The values at this point do not necessarily have
+/// legal types, so each value may require one or more registers of some legal
+/// type.
+///
+struct RegsForValue {
+ /// The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// The value types of the registers. This is the same size as ValueVTs and it
+ /// records, for each value, what the type of the assigned register or
+ /// registers are. (Individual values are never synthesized from more than one
+ /// type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ SmallVector<MVT, 4> RegVTs;
+
+ /// This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ SmallVector<unsigned, 4> Regs;
+
+ /// This list holds the number of registers for each value.
+ SmallVector<unsigned, 4> RegCount;
+
+ /// Records if this value needs to be treated in an ABI dependant manner,
+ /// different to normal type legalization.
+ std::optional<CallingConv::ID> CallConv;
+
+ RegsForValue() = default;
+ RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+ std::optional<CallingConv::ID> CC = std::nullopt);
+ RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ std::optional<CallingConv::ID> CC);
+
+ bool isABIMangled() const { return CallConv.has_value(); }
+
+ /// Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ RegCount.push_back(RHS.Regs.size());
+ }
+
+ /// Emit a series of CopyFromReg nodes that copies from this value and returns
+ /// the result as a ValueVTs value. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
+ /// flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ const SDLoc &dl, SDValue &Chain, SDValue *Glue,
+ const Value *V = nullptr) const;
+
+ /// Emit a series of CopyToReg nodes that copies the specified value into the
+ /// registers specified by this object. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no
+ /// flag is used. If V is not nullptr, then it is used in printing better
+ /// diagnostic messages on error.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
+ SDValue &Chain, SDValue *Glue, const Value *V = nullptr,
+ ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
+
+ /// Add this value to the specified inlineasm node operand list. This adds the
+ /// code marker, matching input operand index (if applicable), and includes
+ /// the number of values added into it.
+ void AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+
+ /// Check if the total RegCount is greater than one.
+ bool occupiesMultipleRegs() const {
+ return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1;
+ }
+
+ /// Return a list of registers and their sizes.
+ SmallVector<std::pair<unsigned, TypeSize>, 4> getRegsAndSizes() const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..03a1ead5bbb4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,1096 @@
+//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cstdint>
+#include <iterator>
+
+using namespace llvm;
+
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+ cl::desc("Display more information when dumping selection "
+ "DAG nodes."));
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return std::string(TII->getName(getMachineOpcode()));
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
+ case ISD::ATOMIC_LOAD_UINC_WRAP:
+ return "AtomicLoadUIncWrap";
+ case ISD::ATOMIC_LOAD_UDEC_WRAP:
+ return "AtomicLoadUDecWrap";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+ case ISD::AssertAlign: return "AssertAlign";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueConstant";
+ return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::SPONENTRY: return "SPONENTRY";
+ case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
+ case ISD::READ_REGISTER: return "READ_REGISTER";
+ case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
+ if (!G)
+ return "Unknown intrinsic";
+ if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueTargetConstant";
+ return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::MCSymbol: return "MCSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::VSCALE: return "vscale";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::INLINEASM_BR: return "inlineasm_br";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::ANNOTATION_LABEL: return "annotation_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FMINNUM: return "fminnum";
+ case ISD::STRICT_FMINNUM: return "strict_fminnum";
+ case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::STRICT_FMAXNUM: return "strict_fmaxnum";
+ case ISD::FMINNUM_IEEE: return "fminnum_ieee";
+ case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
+ case ISD::FMINIMUM: return "fminimum";
+ case ISD::STRICT_FMINIMUM: return "strict_fminimum";
+ case ISD::FMAXIMUM: return "fmaximum";
+ case ISD::STRICT_FMAXIMUM: return "strict_fmaximum";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::STRICT_FSQRT: return "strict_fsqrt";
+ case ISD::FCBRT: return "fcbrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::STRICT_FSIN: return "strict_fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::STRICT_FCOS: return "strict_fcos";
+ case ISD::FSINCOS: return "fsincos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::STRICT_FTRUNC: return "strict_ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::STRICT_FFLOOR: return "strict_ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::STRICT_FCEIL: return "strict_fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::STRICT_FRINT: return "strict_frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
+ case ISD::FROUND: return "fround";
+ case ISD::STRICT_FROUND: return "strict_fround";
+ case ISD::FROUNDEVEN: return "froundeven";
+ case ISD::STRICT_FROUNDEVEN: return "strict_froundeven";
+ case ISD::FEXP: return "fexp";
+ case ISD::STRICT_FEXP: return "strict_fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::STRICT_FEXP2: return "strict_fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::STRICT_FLOG: return "strict_flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::STRICT_FLOG2: return "strict_flog2";
+ case ISD::FLOG10: return "flog10";
+ case ISD::STRICT_FLOG10: return "strict_flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::AVGFLOORU: return "avgflooru";
+ case ISD::AVGFLOORS: return "avgfloors";
+ case ISD::AVGCEILU: return "avgceilu";
+ case ISD::AVGCEILS: return "avgceils";
+ case ISD::ABDS: return "abds";
+ case ISD::ABDU: return "abdu";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FSHL: return "fshl";
+ case ISD::FSHR: return "fshr";
+ case ISD::FADD: return "fadd";
+ case ISD::STRICT_FADD: return "strict_fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::STRICT_FSUB: return "strict_fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::STRICT_FMUL: return "strict_fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::STRICT_FDIV: return "strict_fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::STRICT_FMA: return "strict_fma";
+ case ISD::FMAD: return "fmad";
+ case ISD::FREM: return "frem";
+ case ISD::STRICT_FREM: return "strict_frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FCANONICALIZE: return "fcanonicalize";
+ case ISD::IS_FPCLASS: return "is_fpclass";
+ case ISD::FPOW: return "fpow";
+ case ISD::STRICT_FPOW: return "strict_fpow";
+ case ISD::SMIN: return "smin";
+ case ISD::SMAX: return "smax";
+ case ISD::UMIN: return "umin";
+ case ISD::UMAX: return "umax";
+
+ case ISD::FLDEXP: return "fldexp";
+ case ISD::STRICT_FLDEXP: return "strict_fldexp";
+ case ISD::FFREXP: return "ffrexp";
+ case ISD::FPOWI: return "fpowi";
+ case ISD::STRICT_FPOWI: return "strict_fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SETCCCARRY: return "setcccarry";
+ case ISD::STRICT_FSETCC: return "strict_fsetcc";
+ case ISD::STRICT_FSETCCS: return "strict_fsetccs";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::VECTOR_DEINTERLEAVE: return "vector_deinterleave";
+ case ISD::VECTOR_INTERLEAVE: return "vector_interleave";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::VECTOR_SPLICE: return "vector_splice";
+ case ISD::SPLAT_VECTOR: return "splat_vector";
+ case ISD::SPLAT_VECTOR_PARTS: return "splat_vector_parts";
+ case ISD::VECTOR_REVERSE: return "vector_reverse";
+ case ISD::STEP_VECTOR: return "step_vector";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::UADDO_CARRY: return "uaddo_carry";
+ case ISD::SADDO_CARRY: return "saddo_carry";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::USUBO_CARRY: return "usubo_carry";
+ case ISD::SSUBO_CARRY: return "ssubo_carry";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ case ISD::SADDSAT: return "saddsat";
+ case ISD::UADDSAT: return "uaddsat";
+ case ISD::SSUBSAT: return "ssubsat";
+ case ISD::USUBSAT: return "usubsat";
+ case ISD::SSHLSAT: return "sshlsat";
+ case ISD::USHLSAT: return "ushlsat";
+
+ case ISD::SMULFIX: return "smulfix";
+ case ISD::SMULFIXSAT: return "smulfixsat";
+ case ISD::UMULFIX: return "umulfix";
+ case ISD::UMULFIXSAT: return "umulfixsat";
+
+ case ISD::SDIVFIX: return "sdivfix";
+ case ISD::SDIVFIXSAT: return "sdivfixsat";
+ case ISD::UDIVFIX: return "udivfix";
+ case ISD::UDIVFIXSAT: return "udivfixsat";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::STRICT_FP_ROUND: return "strict_fp_round";
+ case ISD::FP_EXTEND: return "fp_extend";
+ case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
+ case ISD::FP_TO_SINT_SAT: return "fp_to_sint_sat";
+ case ISD::FP_TO_UINT_SAT: return "fp_to_uint_sat";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::ADDRSPACECAST: return "addrspacecast";
+ case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
+ case ISD::FP_TO_FP16: return "fp_to_fp16";
+ case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
+ case ISD::BF16_TO_FP: return "bf16_to_fp";
+ case ISD::FP_TO_BF16: return "fp_to_bf16";
+ case ISD::LROUND: return "lround";
+ case ISD::STRICT_LROUND: return "strict_lround";
+ case ISD::LLROUND: return "llround";
+ case ISD::STRICT_LLROUND: return "strict_llround";
+ case ISD::LRINT: return "lrint";
+ case ISD::STRICT_LRINT: return "strict_lrint";
+ case ISD::LLRINT: return "llrint";
+ case ISD::STRICT_LLRINT: return "strict_llrint";
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // EH instructions
+ case ISD::CATCHRET: return "catchret";
+ case ISD::CLEANUPRET: return "cleanupret";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::MLOAD: return "masked_load";
+ case ISD::MSTORE: return "masked_store";
+ case ISD::MGATHER: return "masked_gather";
+ case ISD::MSCATTER: return "masked_scatter";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::UBSANTRAP: return "ubsantrap";
+ case ISD::LIFETIME_START: return "lifetime.start";
+ case ISD::LIFETIME_END: return "lifetime.end";
+ case ISD::PSEUDO_PROBE:
+ return "pseudoprobe";
+ case ISD::GC_TRANSITION_START: return "gc_transition.start";
+ case ISD::GC_TRANSITION_END: return "gc_transition.end";
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
+ case ISD::FREEZE: return "freeze";
+ case ISD::PREALLOCATED_SETUP:
+ return "call_setup";
+ case ISD::PREALLOCATED_ARG:
+ return "call_alloc";
+
+ // Floating point environment manipulation
+ case ISD::GET_ROUNDING: return "get_rounding";
+ case ISD::SET_ROUNDING: return "set_rounding";
+ case ISD::GET_FPENV: return "get_fpenv";
+ case ISD::SET_FPENV: return "set_fpenv";
+ case ISD::RESET_FPENV: return "reset_fpenv";
+ case ISD::GET_FPENV_MEM: return "get_fpenv_mem";
+ case ISD::SET_FPENV_MEM: return "set_fpenv_mem";
+
+ // Bit manipulation
+ case ISD::ABS: return "abs";
+ case ISD::BITREVERSE: return "bitreverse";
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+ case ISD::PARITY: return "parity";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setueq";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
+ case ISD::VECREDUCE_SEQ_FADD: return "vecreduce_seq_fadd";
+ case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
+ case ISD::VECREDUCE_SEQ_FMUL: return "vecreduce_seq_fmul";
+ case ISD::VECREDUCE_ADD: return "vecreduce_add";
+ case ISD::VECREDUCE_MUL: return "vecreduce_mul";
+ case ISD::VECREDUCE_AND: return "vecreduce_and";
+ case ISD::VECREDUCE_OR: return "vecreduce_or";
+ case ISD::VECREDUCE_XOR: return "vecreduce_xor";
+ case ISD::VECREDUCE_SMAX: return "vecreduce_smax";
+ case ISD::VECREDUCE_SMIN: return "vecreduce_smin";
+ case ISD::VECREDUCE_UMAX: return "vecreduce_umax";
+ case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
+ case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
+ case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+ case ISD::VECREDUCE_FMAXIMUM: return "vecreduce_fmaximum";
+ case ISD::VECREDUCE_FMINIMUM: return "vecreduce_fminimum";
+ case ISD::STACKMAP:
+ return "stackmap";
+ case ISD::PATCHPOINT:
+ return "patchpoint";
+
+ // Vector Predication
+#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
+ case ISD::SDID: \
+ return #NAME;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+static Printable PrintNodeId(const SDNode &Node) {
+ return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+ OS << 't' << Node.PersistentId;
+#else
+ OS << (const void*)&Node;
+#endif
+ });
+}
+
+// Print the MMO with more information from the SelectionDAG.
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const MachineFunction *MF, const Module *M,
+ const MachineFrameInfo *MFI,
+ const TargetInstrInfo *TII, LLVMContext &Ctx) {
+ ModuleSlotTracker MST(M);
+ if (MF)
+ MST.incorporateFunction(MF->getFunction());
+ SmallVector<StringRef, 0> SSNs;
+ MMO.print(OS, MST, SSNs, Ctx, MFI, TII);
+}
+
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const SelectionDAG *G) {
+ if (G) {
+ const MachineFunction *MF = &G->getMachineFunction();
+ return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
+ &MF->getFrameInfo(),
+ G->getSubtarget().getInstrInfo(), *G->getContext());
+ }
+
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
+
+LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+#endif
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (getFlags().hasNoUnsignedWrap())
+ OS << " nuw";
+
+ if (getFlags().hasNoSignedWrap())
+ OS << " nsw";
+
+ if (getFlags().hasExact())
+ OS << " exact";
+
+ if (getFlags().hasNoNaNs())
+ OS << " nnan";
+
+ if (getFlags().hasNoInfs())
+ OS << " ninf";
+
+ if (getFlags().hasNoSignedZeros())
+ OS << " nsz";
+
+ if (getFlags().hasAllowReciprocal())
+ OS << " arcp";
+
+ if (getFlags().hasAllowContract())
+ OS << " contract";
+
+ if (getFlags().hasApproximateFuncs())
+ OS << " afn";
+
+ if (getFlags().hasAllowReassociation())
+ OS << " reassoc";
+
+ if (getFlags().hasNoFPExcept())
+ OS << " nofpexcept";
+
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ printMemOperand(OS, **i, G);
+ if (std::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle())
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble())
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().print(OS, false);
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ GADN->getGlobal()->printAsOperand(OS);
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << printReg(R->getReg(),
+ G ? G->getSubtarget().getRegisterInfo() : nullptr);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<";
+
+ printMemOperand(OS, *LD->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *ST->getMemOperand(), G);
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) {
+ OS << "<";
+
+ printMemOperand(OS, *MLd->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MLd->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MLd->getMemoryVT();
+
+ const char *AM = getIndexedModeName(MLd->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ if (MLd->isExpandingLoad())
+ OS << ", expanding";
+
+ OS << ">";
+ } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MSt->getMemOperand(), G);
+
+ if (MSt->isTruncatingStore())
+ OS << ", trunc to " << MSt->getMemoryVT();
+
+ const char *AM = getIndexedModeName(MSt->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ if (MSt->isCompressingStore())
+ OS << ", compressing";
+
+ OS << ">";
+ } else if (const auto *MGather = dyn_cast<MaskedGatherSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MGather->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MGather->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MGather->getMemoryVT();
+
+ auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned";
+ auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled";
+ OS << ", " << Signed << " " << Scaled << " offset";
+
+ OS << ">";
+ } else if (const auto *MScatter = dyn_cast<MaskedScatterSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MScatter->getMemOperand(), G);
+
+ if (MScatter->isTruncatingStore())
+ OS << ", trunc to " << MScatter->getMemoryVT();
+
+ auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned";
+ auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled";
+ OS << ", " << Signed << " " << Scaled << " offset";
+
+ OS << ">";
+ } else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *M->getMemOperand(), G);
+ OS << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ int64_t offset = BA->getOffset();
+ OS << "<";
+ BA->getBlockAddress()->getFunction()->printAsOperand(OS, false);
+ OS << ", ";
+ BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false);
+ OS << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const AddrSpaceCastSDNode *ASC =
+ dyn_cast<AddrSpaceCastSDNode>(this)) {
+ OS << '['
+ << ASC->getSrcAddressSpace()
+ << " -> "
+ << ASC->getDestAddressSpace()
+ << ']';
+ } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
+ if (LN->hasOffset())
+ OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
+ } else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) {
+ OS << '<' << AA->getAlign().value() << '>';
+ }
+
+ if (VerboseDAGDumping) {
+ if (unsigned Order = getIROrder())
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+ if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
+ OS << " # D:" << isDivergent();
+
+ if (G && !G->GetDbgValues(this).empty()) {
+ OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']';
+ for (SDDbgValue *Dbg : G->GetDbgValues(this))
+ if (!Dbg->isInvalidated())
+ Dbg->print(OS);
+ } else if (getHasDebugValue())
+ OS << " [NoOfDbgValues>0]";
+
+ if (const auto *MD = G ? G->getPCSections(this) : nullptr) {
+ OS << " [pcsections ";
+ MD->printAsOperand(OS, G->getMachineFunction().getFunction().getParent());
+ OS << ']';
+ }
+ }
+}
+
+LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
+ OS << " DbgVal(Order=" << getOrder() << ')';
+ if (isInvalidated())
+ OS << "(Invalidated)";
+ if (isEmitted())
+ OS << "(Emitted)";
+ OS << "(";
+ bool Comma = false;
+ for (const SDDbgOperand &Op : getLocationOps()) {
+ if (Comma)
+ OS << ", ";
+ switch (Op.getKind()) {
+ case SDDbgOperand::SDNODE:
+ if (Op.getSDNode())
+ OS << "SDNODE=" << PrintNodeId(*Op.getSDNode()) << ':' << Op.getResNo();
+ else
+ OS << "SDNODE";
+ break;
+ case SDDbgOperand::CONST:
+ OS << "CONST";
+ break;
+ case SDDbgOperand::FRAMEIX:
+ OS << "FRAMEIX=" << Op.getFrameIx();
+ break;
+ case SDDbgOperand::VREG:
+ OS << "VREG=" << Op.getVReg();
+ break;
+ }
+ Comma = true;
+ }
+ OS << ")";
+ if (isIndirect()) OS << "(Indirect)";
+ if (isVariadic())
+ OS << "(Variadic)";
+ OS << ":\"" << Var->getName() << '"';
+#ifndef NDEBUG
+ if (Expr->getNumElements())
+ Expr->dump();
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDDbgValue::dump() const {
+ if (isInvalidated())
+ return;
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) {
+ // Avoid lots of cluttering when inline printing nodes with associated
+ // DbgValues in verbose mode.
+ if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty())
+ return false;
+ if (Node.getOpcode() == ISD::EntryToken)
+ return false;
+ return Node.getNumOperands() == 0;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (const SDValue &Op : N->op_values()) {
+ if (shouldPrintInline(*Op.getNode(), G))
+ continue;
+ if (Op.getNode()->hasOneUse())
+ DumpNodes(Op.getNode(), indent+2, G);
+ }
+
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+LLVM_DUMP_METHOD void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
+
+ for (const SDNode &N : allnodes()) {
+ if (!N.hasOneUse() && &N != getRoot().getNode() &&
+ (!shouldPrintInline(N, this) || N.use_empty()))
+ DumpNodes(&N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n";
+
+ if (VerboseDAGDumping) {
+ if (DbgBegin() != DbgEnd())
+ dbgs() << "SDDbgValues:\n";
+ for (auto *Dbg : make_range(DbgBegin(), DbgEnd()))
+ Dbg->dump();
+ if (ByvalParmDbgBegin() != ByvalParmDbgEnd())
+ dbgs() << "Byval SDDbgValues:\n";
+ for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd()))
+ Dbg->dump();
+ }
+ dbgs() << "\n";
+}
+#endif
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << PrintNodeId(*this) << ": ";
+ print_types(OS, G);
+ OS << " = " << getOperationName(G);
+ print_details(OS, G);
+}
+
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+ const SDValue Value) {
+ if (!Value.getNode()) {
+ OS << "<null>";
+ return false;
+ }
+
+ if (shouldPrintInline(*Value.getNode(), G)) {
+ OS << Value->getOperationName(G) << ':';
+ Value->print_types(OS, G);
+ Value->print_details(OS, G);
+ return true;
+ }
+
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>;
+
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N).second) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << " ";
+
+ const SDValue Op = N->getOperand(i);
+ bool printedInline = printOperand(OS, G, Op);
+ if (printedInline)
+ once.insert(Op.getNode());
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (const SDValue &Op : N->op_values())
+ DumpNodesr(OS, Op.getNode(), indent+2, G, once);
+}
+
+LLVM_DUMP_METHOD void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, nullptr, once);
+}
+
+LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+#endif
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ for (const SDValue &Op : N->op_values()) {
+ // Don't follow chain operands.
+ if (Op.getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+#endif
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ printr(OS, G);
+ // Under VerboseDAGDumping divergence will be printed always.
+ if (isDivergent() && !VerboseDAGDumping)
+ OS << " # D:1";
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ printOperand(OS, G, getOperand(i));
+ }
+ if (DebugLoc DL = getDebugLoc()) {
+ OS << ", ";
+ DL.print(OS);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 000000000000..35abd990f968
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,3894 @@
+//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "isel"
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+ "Number of entry blocks where fast isel failed to lower arguments");
+
+static cl::opt<int> EnableFastISelAbort(
+ "fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction: 0 disable the abort, 1 will "
+ "abort but for args, calls and terminators, 2 will also "
+ "abort for argument lowering, and 3 will never fallback "
+ "to SelectionDAG."));
+
+static cl::opt<bool> EnableFastISelFallbackReport(
+ "fast-isel-report-on-fallback", cl::Hidden,
+ cl::desc("Emit a diagnostic when \"fast\" instruction selection "
+ "falls back to SelectionDAG."));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<std::string>
+FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
+ cl::desc("Only display the basic block whose name "
+ "matches this for all view-*-dags options"));
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post "
+ "legalize types dag combine pass"));
+static cl::opt<bool>
+ ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
+ ViewDAGCombineLT = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false, ViewISelDAGs = false,
+ ViewSchedDAGs = false, ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry<RegisterScheduler::FunctionPassCtor>
+ RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler>>
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler), cl::Hidden,
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+
+ //===--------------------------------------------------------------------===//
+ /// This class is used by SelectionDAGISel to temporarily override
+ /// the optimization level on a per-function basis.
+ class OptLevelChanger {
+ SelectionDAGISel &IS;
+ CodeGenOpt::Level SavedOptLevel;
+ bool SavedFastISel;
+
+ public:
+ OptLevelChanger(SelectionDAGISel &ISel,
+ CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ SavedOptLevel = IS.OptLevel;
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == SavedOptLevel)
+ return;
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
+ << NewOptLevel << "\n");
+ if (NewOptLevel == CodeGenOpt::None) {
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+ LLVM_DEBUG(
+ dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
+ }
+ }
+
+ ~OptLevelChanger() {
+ if (IS.OptLevel == SavedOptLevel)
+ return;
+ LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O"
+ << SavedOptLevel << "\n");
+ IS.OptLevel = SavedOptLevel;
+ IS.TM.setOptLevel(SavedOptLevel);
+ IS.TM.setFastISel(SavedFastISel);
+ }
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering *TLI = IS->TLI;
+ const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
+
+ // Try first to see if the Target has its own way of selecting a scheduler
+ if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+ return SchedulerCtor(IS, OptLevel);
+ }
+
+ if (OptLevel == CodeGenOpt::None ||
+ (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
+ TLI->getSchedulingPreference() == Sched::Source)
+ return createSourceListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Fast)
+ return createFastDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Linearize)
+ return createDAGLinearizer(IS, OptLevel);
+ assert(TLI->getSchedulingPreference() == Sched::ILP &&
+ "Unknown sched type!");
+ return createILPListDAGScheduler(IS, OptLevel);
+ }
+
+} // end namespace llvm
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!\n";
+#endif
+ llvm_unreachable(nullptr);
+}
+
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ assert(!MI.hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm,
+ CodeGenOpt::Level OL)
+ : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
+ SwiftError(new SwiftErrorValueTracking()),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
+ OL)),
+ OptLevel(OL) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete CurDAG;
+ delete SwiftError;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addRequired<StackProtector>();
+ AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
+ // the module.
+ AU.addRequired<AssignmentTrackingAnalysis>();
+ AU.addPreserved<AssignmentTrackingAnalysis>();
+ if (OptLevel != CodeGenOpt::None)
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
+ MachineModuleInfo &MMI) {
+ // Only needed for MSVC
+ if (!TT.isWindowsMSVCEnvironment())
+ return;
+
+ // If it's already set, nothing to do.
+ if (MMI.usesMSVCFloatingPoint())
+ return;
+
+ for (const Instruction &I : instructions(F)) {
+ if (I.getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ for (const auto &Op : I.operands()) {
+ if (Op->getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ }
+ }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // If we already selected that function, we do not need to run SDISel.
+ if (mf.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
+ "-fast-isel-abort > 0 requires -fast-isel");
+
+ const Function &Fn = mf.getFunction();
+ MF = &mf;
+
+ // Decide what flavour of variable location debug-info will be used, before
+ // we change the optimisation level.
+ bool InstrRef = mf.shouldUseDebugInstrRef();
+ mf.setUseDebugInstrRef(InstrRef);
+
+ // Reset the target options before resetting the optimization
+ // level below.
+ // FIXME: This is a horrible hack and should be processed via
+ // codegen looking at the optimization level explicitly when
+ // it wants to look at it.
+ TM.resetTargetOptions(Fn);
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOpt::Level NewOptLevel = OptLevel;
+ if (OptLevel != CodeGenOpt::None && skipFunction(Fn))
+ NewOptLevel = CodeGenOpt::None;
+ OptLevelChanger OLC(*this, NewOptLevel);
+
+ TII = MF->getSubtarget().getInstrInfo();
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction());
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BlockFrequencyInfo *BFI = nullptr;
+ if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
+ BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+
+ FunctionVarLocs const *FnVarLocs = nullptr;
+ if (isAssignmentTrackingEnabled(*Fn.getParent()))
+ FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults();
+
+ LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ UniformityInfo *UA = nullptr;
+ if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>())
+ UA = &UAPass->getUniformityInfo();
+ CurDAG->init(*MF, *ORE, this, LibInfo, UA, PSI, BFI, FnVarLocs);
+ FuncInfo->set(Fn, *MF, CurDAG);
+ SwiftError->setFunction(*MF);
+
+ // Now get the optional analyzes if we want to.
+ // This is based on the possibly changed OptLevel (after optnone is taken
+ // into account). That's unfortunate but OK because it just means we won't
+ // ask for passes that have been required anyway.
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ else
+ FuncInfo->BPI = nullptr;
+
+ if (OptLevel != CodeGenOpt::None)
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ else
+ AA = nullptr;
+
+ SDB->init(GFI, AA, AC, LibInfo);
+
+ MF->setHasInlineAsm(false);
+
+ FuncInfo->SplitCSR = false;
+
+ // We split CSR if the target supports it for the given function
+ // and the function has only return exits.
+ if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
+ FuncInfo->SplitCSR = true;
+
+ // Collect all the return blocks.
+ for (const BasicBlock &BB : Fn) {
+ if (!succ_empty(&BB))
+ continue;
+
+ const Instruction *Term = BB.getTerminator();
+ if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
+ continue;
+
+ // Bail out if the exit block is not Return nor Unreachable.
+ FuncInfo->SplitCSR = false;
+ break;
+ }
+ }
+
+ MachineBasicBlock *EntryMBB = &MF->front();
+ if (FuncInfo->SplitCSR)
+ // This performs initialization so lowering for SplitCSR will be correct.
+ TLI->initializeSplitCSR(EntryMBB);
+
+ SelectAllBasicBlocks(Fn);
+ if (FastISelFailed && EnableFastISelFallbackReport) {
+ DiagnosticInfoISelFallback DiagFallback(Fn);
+ Fn.getContext().diagnose(DiagFallback);
+ }
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ // Note: it is important that this happens **before** the call to
+ // EmitLiveInCopies, since implementations can skip copies of unused
+ // registers. If we don't apply the reg fixups before, some registers may
+ // appear as unused and will be skipped, resulting in bad MI.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<Register, Register>::iterator I = FuncInfo->RegFixups.begin(),
+ E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ Register From = I->first;
+ Register To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ while (true) {
+ DenseMap<Register, Register>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E)
+ break;
+ To = J->second;
+ }
+ // Make sure the new register has a sufficiently constrained register class.
+ if (From.isVirtual() && To.isVirtual())
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
+ // Replace it.
+
+ // Replacing one register with another won't touch the kill flags.
+ // We need to conservatively clear the kill flags as a kill on the old
+ // register might dominate existing uses of the new register.
+ if (!MRI.use_empty(To))
+ MRI.clearKillFlags(From);
+ MRI.replaceRegWith(From, To);
+ }
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
+
+ // Insert copies in the entry block and the return blocks.
+ if (FuncInfo->SplitCSR) {
+ SmallVector<MachineBasicBlock*, 4> Returns;
+ // Collect all the return blocks.
+ for (MachineBasicBlock &MBB : mf) {
+ if (!MBB.succ_empty())
+ continue;
+
+ MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && Term->isReturn()) {
+ Returns.push_back(&MBB);
+ continue;
+ }
+ }
+ TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+ }
+
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (std::pair<unsigned, unsigned> LI : RegInfo->liveins())
+ if (LI.second)
+ LiveInMap.insert(LI);
+
+ // Insert DBG_VALUE instructions for function arguments to the entry block.
+ for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
+ assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
+ "Function parameters should not be described by DBG_VALUE_LIST.");
+ bool hasFI = MI->getDebugOperand(0).isFI();
+ Register Reg =
+ hasFI ? TRI.getFrameRegister(*MF) : MI->getDebugOperand(0).getReg();
+ if (Reg.isPhysical())
+ EntryMBB->insert(EntryMBB->begin(), MI);
+ else {
+ MachineInstr *Def = RegInfo->getVRegDef(Reg);
+ if (Def) {
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(std::next(InsertPos), MI);
+ } else
+ LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << Register::virtReg2Index(Reg) << "\n");
+ }
+
+ // Don't try and extend through copies in instruction referencing mode.
+ if (InstrRef)
+ continue;
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ assert(!hasFI && "There's no handling of frame pointer updating here yet "
+ "- add if needed");
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
+ DebugLoc DL = MI->getDebugLoc();
+ bool IsIndirect = MI->isIndirectDebugValue();
+ if (IsIndirect)
+ assert(MI->getDebugOffset().getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
+ "Didn't expect to see a DBG_VALUE_LIST here");
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+ IsIndirect, LDI->second, Variable, Expr);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = nullptr;
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = RegInfo->use_instr_begin(LDI->second),
+ E = RegInfo->use_instr_end(); UI != E; ) {
+ MachineInstr *UseMI = &*(UI++);
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = nullptr; break;
+ }
+ if (CopyUseMI &&
+ TRI.getRegSizeInBits(LDI->second, MRI) ==
+ TRI.getRegSizeInBits(CopyUseMI->getOperand(0).getReg(), MRI)) {
+ // Use MI's debug location, which describes where Variable was
+ // declared, rather than whatever is attached to CopyUseMI.
+ MachineInstr *NewMI =
+ BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ CopyUseMI->getOperand(0).getReg(), Variable, Expr);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
+ }
+ }
+ }
+
+ // For debug-info, in instruction referencing mode, we need to perform some
+ // post-isel maintenence.
+ if (MF->useDebugInstrRef())
+ MF->finalizeDebugInstrRefs();
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ for (const auto &MBB : *MF) {
+ if (MFI.hasCalls() && MF->hasInlineAsm())
+ break;
+
+ for (const auto &MI : MBB) {
+ const MCInstrDesc &MCID = TII->get(MI.getOpcode());
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ MI.isStackAligningInlineAsm()) {
+ MFI.setHasCalls(true);
+ }
+ if (MI.isInlineAsm()) {
+ MF->setHasInlineAsm(true);
+ }
+ }
+ }
+
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+
+ // Determine if floating point is used for msvc
+ computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
+
+ // Release function-specific state. SDB and CurDAG are already cleared
+ // at this point.
+ FuncInfo->clear();
+
+ LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ LLVM_DEBUG(MF->print(dbgs()));
+
+ return true;
+}
+
+static void reportFastISelFailure(MachineFunction &MF,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R,
+ bool ShouldAbort) {
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || ShouldAbort)
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (ShouldAbort)
+ report_fatal_error(Twine(R.getMsg()));
+
+ ORE.emit(R);
+ LLVM_DEBUG(dbgs() << R.getMsg() << "\n");
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
+ // Allow creating illegal types during DAG building for the basic block.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
+ // Lower the instructions. If a call is emitted as a tail call, cease emitting
+ // nodes for this block.
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+ if (!ElidedArgCopyInstrs.count(&*I))
+ SDB->visit(*I);
+ }
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+ HadTailCall = SDB->HasTailCall;
+ SDB->resolveOrClearDbgInfo();
+ SDB->clear();
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode *, 16> Added;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+ Added.insert(CurDAG->getRoot().getNode());
+
+ KnownBits Known;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // Otherwise, add all chain operands to the worklist.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second)
+ Worklist.push_back(Op.getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!Register::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ Known = CurDAG->computeKnownBits(Src);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ StringRef GroupName = "sdag";
+ StringRef GroupDescription = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
+#endif
+
+ // Pre-type legalization allow creation of any node types.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
+#ifndef NDEBUG
+ MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
+ FilterDAGBasicBlockName ==
+ FuncInfo->MBB->getBasicBlock()->getName());
+#endif
+#ifdef NDEBUG
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT ||
+ ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+#endif
+ {
+ BlockName =
+ (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
+ }
+ LLVM_DEBUG(dbgs() << "Initial selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ if (ViewDAGCombine1 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ {
+ NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
+ }
+
+ LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize-types input for " + BlockName);
+
+ bool Changed;
+ {
+ NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ // Only allow creation of legal node types.
+ CurDAG->NewNodesMustHaveLegalTypes = true;
+
+ if (Changed) {
+ if (ViewDAGCombineLT && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
+ GroupName, GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
+ }
+
+ LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+ }
+
+ {
+ NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ {
+ NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->LegalizeTypes();
+ }
+
+ LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ if (ViewDAGCombineLT && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
+ GroupName, GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
+ }
+
+ LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+ }
+
+ if (ViewLegalizeDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize input for " + BlockName);
+
+ {
+ NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->Legalize();
+ }
+
+ LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ if (ViewDAGCombine2 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ {
+ NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
+ }
+
+ LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDivergence();
+#endif
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ if (ViewISelDAGs && MatchFilterBB)
+ CurDAG->viewGraph("isel input for " + BlockName);
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ {
+ NamedRegionTimer T("isel", "Instruction Selection", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ DoInstructionSelection();
+ }
+
+ LLVM_DEBUG(dbgs() << "Selected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (ViewSchedDAGs && MatchFilterBB)
+ CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ {
+ NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
+ }
+
+ if (ViewSUnitDAGs && MatchFilterBB)
+ Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+ {
+ NamedRegionTimer T("emit", "Instruction Creation", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
+ }
+
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+ // Free the scheduler state.
+ {
+ NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ delete Scheduler;
+ }
+
+ // Free the SelectionDAG state, now that we're finished with it.
+ CurDAG->clear();
+}
+
+namespace {
+
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+
+ /// NodeInserted - Handle new nodes inserted into the graph: propagate
+ /// metadata from root nodes that also applies to new nodes, in case the root
+ /// is later deleted.
+ void NodeInserted(SDNode *N) override {
+ SDNode *CurNode = &*ISelPosition;
+ if (MDNode *MD = DAG.getPCSections(CurNode))
+ DAG.addPCSections(N, MD);
+ }
+};
+
+} // end anonymous namespace
+
+// This function is used to enforce the topological node id property
+// leveraged during instruction selection. Before the selection process all
+// nodes are given a non-negative id such that all nodes have a greater id than
+// their operands. As this holds transitively we can prune checks that a node N
+// is a predecessor of M another by not recursively checking through M's
+// operands if N's ID is larger than M's ID. This significantly improves
+// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).
+
+// However, when we fuse multiple nodes into a single node during the
+// selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged, violating the topological property.
+// Should a fused node have a successor which has yet to be selected,
+// our legality checks would be incorrect. To avoid this we mark all unselected
+// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>
+// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
+// We use bit-negation to more clearly enforce that node id -1 can only be
+// achieved by selected nodes. As the conversion is reversable to the original
+// Id, topological pruning can still be leveraged when looking for unselected
+// nodes. This method is called internally in all ISel replacement related
+// functions.
+void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ SDNode *N = Nodes.pop_back_val();
+ for (auto *U : N->uses()) {
+ auto UId = U->getNodeId();
+ if (UId > 0) {
+ InvalidateNodeId(U);
+ Nodes.push_back(U);
+ }
+ }
+ }
+}
+
+// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a
+// NodeId with the equivalent node id which is invalid for topological
+// pruning.
+void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
+ int InvalidId = -(N->getNodeId() + 1);
+ N->setNodeId(InvalidId);
+}
+
+// getUninvalidatedNodeId - get original uninvalidated node id.
+int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) {
+ int Id = N->getNodeId();
+ if (Id < -1)
+ return -(Id + 1);
+ return Id;
+}
+
+void SelectionDAGISel::DoInstructionSelection() {
+ LLVM_DEBUG(dbgs() << "===== Instruction selection begins: "
+ << printMBBReference(*FuncInfo->MBB) << " '"
+ << FuncInfo->MBB->getName() << "'\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function. New nodes inherit relevant metadata.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = &*--ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+#ifndef NDEBUG
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ auto N = Nodes.pop_back_val();
+ if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0)
+ continue;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op->getOpcode() == ISD::TokenFactor)
+ Nodes.push_back(Op.getNode());
+ else {
+ // We rely on topological ordering of node ids for checking for
+ // cycles when fusing nodes during selection. All unselected nodes
+ // successors of an already selected node should have a negative id.
+ // This assertion will catch such cases. If this assertion triggers
+ // it is likely you using DAG-level Value/Node replacement functions
+ // (versus equivalent ISEL replacement) in backend-specific
+ // selections. See comment in EnforceNodeIdInvariant for more
+ // details.
+ assert(Op->getNodeId() != -1 &&
+ "Node has already selected predecessor node");
+ }
+ }
+ }
+#endif
+
+ // When we are using non-default rounding modes or FP exception behavior
+ // FP operations are represented by StrictFP pseudo-operations. For
+ // targets that do not (yet) understand strict FP operations directly,
+ // we convert them to normal FP opcodes instead at this point. This
+ // will allow them to be handled by existing target-specific instruction
+ // selectors.
+ if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) {
+ // For some opcodes, we need to call TLI->getOperationAction using
+ // the first operand type instead of the result type. Note that this
+ // must match what SelectionDAGLegalize::LegalizeOp is doing.
+ EVT ActionVT;
+ switch (Node->getOpcode()) {
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ ActionVT = Node->getOperand(1).getValueType();
+ break;
+ default:
+ ActionVT = Node->getValueType(0);
+ break;
+ }
+ if (TLI->getOperationAction(Node->getOpcode(), ActionVT)
+ == TargetLowering::Expand)
+ Node = CurDAG->mutateStrictFPToFP(Node);
+ }
+
+ LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
+ Node->dump(CurDAG));
+
+ Select(Node);
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+
+ LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+}
+
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+ for (const User *U : CPI->users()) {
+ if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+ if (IID == Intrinsic::eh_exceptionpointer ||
+ IID == Intrinsic::eh_exceptioncode)
+ return true;
+ }
+ }
+ return false;
+}
+
+// wasm.landingpad.index intrinsic is for associating a landing pad index number
+// with a catchpad instruction. Retrieve the landing pad index in the intrinsic
+// and store the mapping in the function.
+static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
+ const CatchPadInst *CPI) {
+ MachineFunction *MF = MBB->getParent();
+ // In case of single catch (...), we don't emit LSDA, so we don't need
+ // this information.
+ bool IsSingleCatchAllClause =
+ CPI->arg_size() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue();
+ // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
+ // and they don't need LSDA info
+ bool IsCatchLongjmp = CPI->arg_size() == 0;
+ if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
+ // Create a mapping from landing pad label to landing pad index.
+ bool IntrFound = false;
+ for (const User *U : CPI->users()) {
+ if (const auto *Call = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = Call->getIntrinsicID();
+ if (IID == Intrinsic::wasm_landingpad_index) {
+ Value *IndexArg = Call->getArgOperand(1);
+ int Index = cast<ConstantInt>(IndexArg)->getZExtValue();
+ MF->setWasmLandingPadIndex(MBB, Index);
+ IntrFound = true;
+ break;
+ }
+ }
+ }
+ assert(IntrFound && "wasm.landingpad.index intrinsic not found!");
+ (void)IntrFound;
+ }
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+bool SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+ const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const TargetRegisterClass *PtrRC =
+ TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+
+ auto Pers = classifyEHPersonality(PersonalityFn);
+
+ // Catchpads have one live-in register, which typically holds the exception
+ // pointer or code.
+ if (isFuncletEHPersonality(Pers)) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
+ }
+ return true;
+ }
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->addLandingPad(MBB);
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
+ if (Pers == EHPersonality::Wasm_CXX) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
+ mapWasmLandingPadIndex(MBB, CPI);
+ } else {
+ // Assign the call site to the landing pad's begin label.
+ MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+ // Mark exception register as live in.
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
+ FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ // Mark exception selector register as live in.
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
+ FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ }
+
+ return true;
+}
+
+// Mark and Report IPToState for each Block under IsEHa
+void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ llvm::WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo();
+ if (!EHInfo)
+ return;
+ for (auto MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ const BasicBlock *BB = MBB->getBasicBlock();
+ int State = EHInfo->BlockToStateMap[BB];
+ if (BB->getFirstMayFaultInst()) {
+ // Report IP range only for blocks with Faulty inst
+ auto MBBb = MBB->getFirstNonPHI();
+ MachineInstr *MIb = &*MBBb;
+ if (MIb->isTerminator())
+ continue;
+
+ // Insert EH Labels
+ MCSymbol *BeginLabel = MMI.getContext().createTempSymbol();
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ EHInfo->addIPToStateRange(State, BeginLabel, EndLabel);
+ BuildMI(*MBB, MBBb, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL))
+ .addSym(BeginLabel);
+ auto MBBe = MBB->instr_end();
+ MachineInstr *MIe = &*(--MBBe);
+ // insert before (possible multiple) terminators
+ while (MIe->isTerminator())
+ MIe = &*(--MBBe);
+ ++MBBe;
+ BuildMI(*MBB, MBBe, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL))
+ .addSym(EndLabel);
+ }
+ }
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ const FunctionLoweringInfo &FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !I->isTerminator() && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
+ !FuncInfo.isExportedInst(I); // Exported instrs must be computed.
+}
+
+static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo,
+ const Value *Arg, DIExpression *Expr,
+ DILocalVariable *Var,
+ DebugLoc DbgLoc) {
+ if (!Expr->isEntryValue() || !isa<Argument>(Arg))
+ return false;
+
+ auto ArgIt = FuncInfo.ValueMap.find(Arg);
+ if (ArgIt == FuncInfo.ValueMap.end())
+ return false;
+ Register ArgVReg = ArgIt->getSecond();
+
+ // Find the corresponding livein physical register to this argument.
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (VirtReg == ArgVReg) {
+ FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc);
+ LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
+ << ", Expr=" << *Expr << ", MCRegister=" << PhysReg
+ << ", DbgLoc=" << DbgLoc << "\n");
+ return true;
+ }
+ return false;
+}
+
+static bool processDbgDeclare(FunctionLoweringInfo &FuncInfo,
+ const Value *Address, DIExpression *Expr,
+ DILocalVariable *Var, DebugLoc DbgLoc) {
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *Var
+ << " (bad address)\n");
+ return false;
+ }
+
+ if (processIfEntryValueDbgDeclare(FuncInfo, Address, Expr, Var, DbgLoc))
+ return true;
+
+ MachineFunction *MF = FuncInfo.MF;
+ const DataLayout &DL = MF->getDataLayout();
+
+ assert(Var && "Missing variable");
+ assert(DbgLoc && "Missing location");
+
+ // Look through casts and constant offset GEPs. These mostly come from
+ // inalloca.
+ APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
+ Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // Check if the variable is a static alloca or a byval or inalloca
+ // argument passed in memory. If it is not, then we will ignore this
+ // intrinsic and handle this during isel like dbg.value.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ FI = SI->second;
+ } else if (const auto *Arg = dyn_cast<Argument>(Address))
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
+
+ if (FI == std::numeric_limits<int>::max())
+ return false;
+
+ if (Offset.getBoolValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
+ Offset.getZExtValue());
+
+ LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
+ << ", Expr=" << *Expr << ", FI=" << FI
+ << ", DbgLoc=" << DbgLoc << "\n");
+ MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc);
+ return true;
+}
+
+/// Collect llvm.dbg.declare information. This is done after argument lowering
+/// in case the declarations refer to arguments.
+static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
+ for (const auto &I : instructions(*FuncInfo.Fn)) {
+ const auto *DI = dyn_cast<DbgDeclareInst>(&I);
+ if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
+ DI->getVariable(), DI->getDebugLoc()))
+ FuncInfo.PreprocessedDbgDeclares.insert(DI);
+ }
+}
+
+/// Collect single location variable information generated with assignment
+/// tracking. This is done after argument lowering in case the declarations
+/// refer to arguments.
+static void processSingleLocVars(FunctionLoweringInfo &FuncInfo,
+ FunctionVarLocs const *FnVarLocs) {
+ for (auto It = FnVarLocs->single_locs_begin(),
+ End = FnVarLocs->single_locs_end();
+ It != End; ++It) {
+ assert(!It->Values.hasArgList() && "Single loc variadic ops not supported");
+ processDbgDeclare(FuncInfo, It->Values.getVariableLocationOp(0), It->Expr,
+ FnVarLocs->getDILocalVariable(It->VariableID), It->DL);
+ }
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ FastISelFailed = false;
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = nullptr;
+ if (TM.Options.EnableFastISel) {
+ LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
+ FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+ }
+
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+
+ // Lower arguments up front. An RPO iteration always visits the entry block
+ // first.
+ assert(*RPOT.begin() == &Fn.getEntryBlock());
+ ++NumEntryBlocks;
+
+ // Set up FuncInfo for ISel. Entry blocks never have PHIs.
+ FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
+ FuncInfo->InsertPt = FuncInfo->MBB->begin();
+
+ CurDAG->setFunctionLoweringInfo(FuncInfo.get());
+
+ if (!FastIS) {
+ LowerArguments(Fn);
+ } else {
+ // See if fast isel can lower the arguments.
+ FastIS->startNewBlock();
+ if (!FastIS->lowerArguments()) {
+ FastISelFailed = true;
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Fn.getSubprogram(),
+ &Fn.getEntryBlock());
+ R << "FastISel didn't lower all arguments: "
+ << ore::NV("Prototype", Fn.getFunctionType());
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(nullptr);
+ }
+
+ bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());
+
+ if (FastIS && Inserted)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+
+ if (isAssignmentTrackingEnabled(*Fn.getParent())) {
+ assert(CurDAG->getFunctionVarLocs() &&
+ "expected AssignmentTrackingAnalysis pass results");
+ processSingleLocVars(*FuncInfo, CurDAG->getFunctionVarLocs());
+ } else {
+ processDbgDeclares(*FuncInfo);
+ }
+
+ // Iterate over all basic blocks in the function.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ for (const BasicBlock *LLVMBB : RPOT) {
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const BasicBlock *Pred : predecessors(LLVMBB)) {
+ if (!FuncInfo->VisitedBBs.count(Pred)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->ComputePHILiveOutRegInfo(&PN);
+ } else {
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
+ BasicBlock::const_iterator const Begin =
+ LLVMBB->getFirstNonPHI()->getIterator();
+ BasicBlock::const_iterator const End = LLVMBB->end();
+ BasicBlock::const_iterator BI = End;
+
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ if (!FuncInfo->MBB)
+ continue; // Some blocks like catchpads have no code or MBB.
+
+ // Insert new instructions after any phi or argument setup code.
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Setup an EH landing-pad block.
+ FuncInfo->ExceptionPointerVirtReg = 0;
+ FuncInfo->ExceptionSelectorVirtReg = 0;
+ if (LLVMBB->isEHPad())
+ if (!PrepareEHLandingPad())
+ continue;
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ if (LLVMBB != &Fn.getEntryBlock())
+ FastIS->startNewBlock();
+
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
+
+ // Pre-assign swifterror vregs.
+ SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);
+
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = &*std::prev(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, *FuncInfo) ||
+ ElidedArgCopyInstrs.count(Inst)) {
+ --NumFastIselRemaining;
+ continue;
+ }
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->selectInstruction(Inst)) {
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != &*Begin) {
+ BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
+ // If we succeeded, don't re-select the load.
+ LLVM_DEBUG(dbgs()
+ << "FastISel folded load: " << *BeforeInst << "\n");
+ BI = std::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
+ continue;
+ }
+
+ FastISelFailed = true;
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ // We cannot separate out GCrelocates to their own blocks since we need
+ // to keep track of gc-relocates for a particular gc-statepoint. This is
+ // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
+ // visitGCRelocate.
+ if (isa<CallInst>(Inst) && !isa<GCStatepointInst>(Inst) &&
+ !isa<GCRelocateInst>(Inst) && !isa<GCResultInst>(Inst)) {
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ R << "FastISel missed call";
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+
+ R << ": " << InstStr.str();
+ }
+
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2);
+
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+ !Inst->use_empty()) {
+ Register &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst);
+ }
+
+ bool HadTailCall = false;
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
+ SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ // We also need to delete any previously emitted instructions.
+ if (HadTailCall) {
+ FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
+ --BI;
+ break;
+ }
+
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+ NumFastIselRemaining = RemainingNow;
+ continue;
+ }
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ bool ShouldAbort = EnableFastISelAbort;
+ if (Inst->isTerminator()) {
+ // Use a different message for terminator misses.
+ R << "FastISel missed terminator";
+ // Don't abort for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
+ R << "FastISel missed";
+ }
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+ R << ": " << InstStr.str();
+ }
+
+ reportFastISelFailure(*MF, *ORE, R, ShouldAbort);
+
+ NumFastIselFailures += NumFastIselRemaining;
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ }
+
+ if (SP.shouldEmitSDCheck(*LLVMBB)) {
+ bool FunctionBasedInstrumentation =
+ TLI->getSSPStackGuardCheck(*Fn.getParent());
+ SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
+ FunctionBasedInstrumentation);
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+
+ // But if FastISel was run, we already selected some of the block.
+ // If we emitted a tail-call, we need to delete any previously emitted
+ // instruction that follows it.
+ if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
+ }
+
+ if (FastIS)
+ FastIS->finishBasicBlock();
+ FinishBasicBlock();
+ FuncInfo->PHINodesToUpdate.clear();
+ ElidedArgCopyInstrs.clear();
+ }
+
+ // AsynchEH: Report Block State under -AsynchEH
+ if (Fn.getParent()->getModuleFlag("eh-asynch"))
+ reportIPToStateForBlocks(MF);
+
+ SP.copyToMachineFrameInfo(MF->getFrameInfo());
+
+ SwiftError->propagateVRegs();
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
+ SDB->SPDescriptor.resetPerFunctionState();
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+ LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e;
+ ++i) dbgs()
+ << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ continue;
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+
+ // Handle stack protector.
+ if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ // The target provides a guard check function. There is no need to
+ // generate error handling code or to split current basic block.
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+
+ // Add load and check to the basicblock.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt =
+ findSplitPointForStackProtector(ParentMBB, *TII);
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ } else if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint =
+ findSplitPointForStackProtector(ParentMBB, *TII);
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
+ SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt = ParentMBB->end();
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ FuncInfo->MBB = FailureMBB;
+ FuncInfo->InsertPt = FailureMBB->end();
+ SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ }
+
+ // Lower each BitTestBlock.
+ for (auto &BTB : SDB->SL->BitTestCases) {
+ // Lower header first, if it wasn't already lowered
+ if (!BTB.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = BTB.Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitBitTestHeader(BTB, FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = BTB.Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range or omitted range
+ // check: fall through to the target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j],
+ FuncInfo->MBB);
+
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+
+ // Update PHI Nodes
+ for (const std::pair<MachineInstr *, unsigned> &P :
+ FuncInfo->PHINodesToUpdate) {
+ MachineInstrBuilder PHI(*MF, P.first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB, unless the latter was skipped.
+ if (PHIBB == BTB.Default) {
+ PHI.addReg(P.second).addMBB(BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ PHI.addReg(P.second).addMBB(BTB.Cases.back().ThisBB);
+ }
+ }
+ // One of "cases" BB.
+ for (const SwitchCG::BitTestCase &BT : BTB.Cases) {
+ MachineBasicBlock* cBB = BT.ThisBB;
+ if (cBB->isSuccessor(PHIBB))
+ PHI.addReg(P.second).addMBB(cBB);
+ }
+ }
+ }
+ SDB->SL->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->SL->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second,
+ SDB->SL->JTCases[i].first, FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTable(SDB->SL->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->SL->JTCases[i].second.Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->SL->JTCases[i].first.HeaderBB);
+ // JT BB. Just iterate over successors here
+ if (FuncInfo->MBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
+ }
+ }
+ SDB->SL->JTCases.clear();
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Determine the unique successors.
+ SmallVector<MachineBasicBlock *, 2> Succs;
+ Succs.push_back(SDB->SL->SwitchCases[i].TrueBB);
+ if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SL->SwitchCases[i].FalseBB);
+
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
+ SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator
+ MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+ MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+ MachineInstrBuilder PHI(*MF, MBBI);
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ SDB->SL->SwitchCases.clear();
+}
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ return ISHeuristic(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (!ActualMask.isSubsetOf(DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (!ActualMask.isSubsetOf(DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ KnownBits Known = CurDAG->computeKnownBits(LHS);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if (NeededMask.isSubsetOf(Known.One))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
+ const SDLoc &DL) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+ Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
+ Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+
+ unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+
+ unsigned TiedToOperand;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ // We need the constraint ID from the operand this is tied to.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ for (; TiedToOperand; --TiedToOperand) {
+ CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ }
+ }
+
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags);
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
+ report_fatal_error("Could not match memory address. Inline asm"
+ " failure!");
+
+ // Add this to the output node.
+ unsigned NewFlags =
+ InlineAsm::isMemKind(Flags)
+ ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size())
+ : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size());
+ NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
+ Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
+ llvm::append_range(Ops, SelOps);
+ i += 2;
+ }
+ }
+
+ // Add the glue input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return nullptr;
+}
+
+/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
+/// beyond "ImmedUse". We may ignore chains as they are checked separately.
+static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
+ bool IgnoreChains) {
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 16> WorkList;
+ // Only check if we have non-immediate uses of Def.
+ if (ImmedUse->isOnlyUserOf(Def))
+ return false;
+
+ // We don't care about paths to Def that go through ImmedUse so mark it
+ // visited and mark non-def operands as used.
+ Visited.insert(ImmedUse);
+ for (const SDValue &Op : ImmedUse->op_values()) {
+ SDNode *N = Op.getNode();
+ // Ignore chain deps (they are validated by
+ // HandleMergeInputChains) and immediate uses
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
+ continue;
+ if (!Visited.insert(N).second)
+ continue;
+ WorkList.push_back(N);
+ }
+
+ // Initialize worklist to operands of Root.
+ if (Root != ImmedUse) {
+ for (const SDValue &Op : Root->op_values()) {
+ SDNode *N = Op.getNode();
+ // Ignore chains (they are validated by HandleMergeInputChains)
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
+ continue;
+ if (!Visited.insert(N).second)
+ continue;
+ WorkList.push_back(N);
+ }
+ }
+
+ return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ CodeGenOpt::Level OptLevel,
+ bool IgnoreChains) {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [GU] //
+ //
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
+ // a cycle in the scheduling graph.
+
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (!GU)
+ break;
+ Root = GU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a glue result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, HandleMergeInputChains will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
+ }
+
+ return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
+}
+
+void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ SDLoc DL(N);
+
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops, DL);
+
+ const EVT VTs[] = {MVT::Other, MVT::Glue};
+ SDValue New = CurDAG->getNode(N->getOpcode(), DL, VTs, Ops);
+ New->setNodeId(-1);
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
+}
+
+void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+ EVT VT = Op->getValueType(0);
+ LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
+ Register Reg =
+ TLI->getRegisterByName(RegStr->getString().data(), Ty,
+ CurDAG->getMachineFunction());
+ SDValue New = CurDAG->getCopyFromReg(
+ Op->getOperand(0), dl, Reg, Op->getValueType(0));
+ New->setNodeId(-1);
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
+}
+
+void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+ EVT VT = Op->getOperand(2).getValueType();
+ LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
+
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty,
+ CurDAG->getMachineFunction());
+ SDValue New = CurDAG->getCopyToReg(
+ Op->getOperand(0), dl, Reg, Op->getOperand(2));
+ New->setNodeId(-1);
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
+}
+
+void SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
+}
+
+void SelectionDAGISel::Select_FREEZE(SDNode *N) {
+ // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
+ // If FREEZE instruction is added later, the code below must be changed as
+ // well.
+ CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0),
+ N->getOperand(0));
+}
+
+void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0),
+ N->getOperand(0));
+}
+
+void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::MEMBARRIER, N->getValueType(0),
+ N->getOperand(0));
+}
+
+void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
+ SDValue OpVal, SDLoc DL) {
+ SDNode *OpNode = OpVal.getNode();
+
+ // FrameIndex nodes should have been directly emitted to TargetFrameIndex
+ // nodes at DAG-construction time.
+ assert(OpNode->getOpcode() != ISD::FrameIndex);
+
+ if (OpNode->getOpcode() == ISD::Constant) {
+ Ops.push_back(
+ CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ Ops.push_back(
+ CurDAG->getTargetConstant(cast<ConstantSDNode>(OpNode)->getZExtValue(),
+ DL, OpVal.getValueType()));
+ } else {
+ Ops.push_back(OpVal);
+ }
+}
+
+void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
+ SmallVector<SDValue, 32> Ops;
+ auto *It = N->op_begin();
+ SDLoc DL(N);
+
+ // Stash the chain and glue operands so we can move them to the end.
+ SDValue Chain = *It++;
+ SDValue InGlue = *It++;
+
+ // <id> operand.
+ SDValue ID = *It++;
+ assert(ID.getValueType() == MVT::i64);
+ Ops.push_back(ID);
+
+ // <numShadowBytes> operand.
+ SDValue Shad = *It++;
+ assert(Shad.getValueType() == MVT::i32);
+ Ops.push_back(Shad);
+
+ // Live variable operands.
+ for (; It != N->op_end(); It++)
+ pushStackMapLiveVariable(Ops, *It, DL);
+
+ Ops.push_back(Chain);
+ Ops.push_back(InGlue);
+
+ SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
+}
+
+void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
+ SmallVector<SDValue, 32> Ops;
+ auto *It = N->op_begin();
+ SDLoc DL(N);
+
+ // Cache arguments that will be moved to the end in the target node.
+ SDValue Chain = *It++;
+ std::optional<SDValue> Glue;
+ if (It->getValueType() == MVT::Glue)
+ Glue = *It++;
+ SDValue RegMask = *It++;
+
+ // <id> operand.
+ SDValue ID = *It++;
+ assert(ID.getValueType() == MVT::i64);
+ Ops.push_back(ID);
+
+ // <numShadowBytes> operand.
+ SDValue Shad = *It++;
+ assert(Shad.getValueType() == MVT::i32);
+ Ops.push_back(Shad);
+
+ // Add the callee.
+ Ops.push_back(*It++);
+
+ // Add <numArgs>.
+ SDValue NumArgs = *It++;
+ assert(NumArgs.getValueType() == MVT::i32);
+ Ops.push_back(NumArgs);
+
+ // Calling convention.
+ Ops.push_back(*It++);
+
+ // Push the args for the call.
+ for (uint64_t I = cast<ConstantSDNode>(NumArgs)->getZExtValue(); I != 0; I--)
+ Ops.push_back(*It++);
+
+ // Now push the live variables.
+ for (; It != N->op_end(); It++)
+ pushStackMapLiveVariable(Ops, *It, DL);
+
+ // Finally, the regmask, chain and (if present) glue are moved to the end.
+ Ops.push_back(RegMask);
+ Ops.push_back(Chain);
+ if (Glue.has_value())
+ Ops.push_back(*Glue);
+
+ SDVTList NodeTys = N->getVTList();
+ CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops);
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+/// When a match is complete, this method updates uses of interior chain results
+/// to use the new results.
+void SelectionDAGISel::UpdateChains(
+ SDNode *NodeToMatch, SDValue InputChain,
+ SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // glue results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+ // If ChainNode is null, it's because we replaced it on a previous
+ // iteration and we cleared it out of the map. Just skip it.
+ if (!ChainNode)
+ continue;
+
+ assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted node left in chain");
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Glue)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ SelectionDAG::DAGNodeDeletedListener NDL(
+ *CurDAG, [&](SDNode *N, SDNode *E) {
+ std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+ static_cast<SDNode *>(nullptr));
+ });
+ if (ChainNode->getOpcode() != ISD::TokenFactor)
+ ReplaceUses(ChainVal, InputChain);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
+ !llvm::is_contained(NowDeadNodes, ChainNode))
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
+
+ LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n");
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ SmallVector<SDValue, 3> InputChains;
+ unsigned int Max = 8192;
+
+ // Quick exit on trivial merge.
+ if (ChainNodesMatched.size() == 1)
+ return ChainNodesMatched[0]->getOperand(0);
+
+ // Add chains that aren't already added (internal). Peek through
+ // token factors.
+ std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
+ if (V.getValueType() != MVT::Other)
+ return;
+ if (V->getOpcode() == ISD::EntryToken)
+ return;
+ if (!Visited.insert(V.getNode()).second)
+ return;
+ if (V->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : V->op_values())
+ AddChains(Op);
+ } else
+ InputChains.push_back(V);
+ };
+
+ for (auto *N : ChainNodesMatched) {
+ Worklist.push_back(N);
+ Visited.insert(N);
+ }
+
+ while (!Worklist.empty())
+ AddChains(Worklist.pop_back_val()->getOperand(0));
+
+ // Skip the search if there are no chain dependencies.
+ if (InputChains.size() == 0)
+ return CurDAG->getEntryNode();
+
+ // If one of these chains is a successor of input, we must have a
+ // node that is both the predecessor and successor of the
+ // to-be-merged nodes. Fail.
+ Visited.clear();
+ for (SDValue V : InputChains)
+ Worklist.push_back(V.getNode());
+
+ for (auto *N : ChainNodesMatched)
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
+ return SDValue();
+
+ // Return merged chain.
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
+ MVT::Other, InputChains);
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have glue and chains as well.
+ // In this case we need to shift the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though.
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ ReplaceUses(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults - 1));
+
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ ReplaceUses(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults - 1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node) {
+ ReplaceNode(Node, Res);
+ } else {
+ EnforceNodeIdInvariant(Res);
+ }
+
+ return Res;
+}
+
+/// CheckSame - Implements OP_CheckSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckChildSame - Implements OP_CheckChildXSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
+ const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
+ RecordedNodes);
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const TargetLowering *TLI, const DataLayout &DL) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI,
+ DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ if (2 >= N.getNumOperands())
+ return false;
+ return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
+}
+
+// Bit 0 stores the sign of the immediate. The upper bits contain the magnitude
+// shifted left by 1.
+static uint64_t decodeSignRotatedValue(uint64_t V) {
+ if ((V & 1) == 0)
+ return V >> 1;
+ if (V != 1)
+ return -(V >> 1);
+ // There is no such thing as -0 with integers. "-0" really means MININT.
+ return 1ULL << 63;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ Val = decodeSignRotatedValue(Val);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
+ SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Same:
+ case SelectionDAGISel::OPC_CheckChild1Same:
+ case SelectionDAGISel::OPC_CheckChild2Same:
+ case SelectionDAGISel::OPC_CheckChild3Same:
+ Result = !::CheckChildSame(Table, Index, N, RecordedNodes,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ case SelectionDAGISel::OPC_CheckTypeRes: {
+ unsigned Res = Table[Index++];
+ Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ }
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(
+ Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(),
+ Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild2CondCode:
+ Result = !::CheckChild2CondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Integer:
+ case SelectionDAGISel::OPC_CheckChild1Integer:
+ case SelectionDAGISel::OPC_CheckChild2Integer:
+ case SelectionDAGISel::OPC_CheckChild3Integer:
+ case SelectionDAGISel::OPC_CheckChild4Integer:
+ Result = !::CheckChildInteger(Table, Index, N,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+namespace {
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched;
+};
+
+/// \A DAG update listener to keep the matching state
+/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
+/// change the DAG while matching. X86 addressing mode matcher is an example
+/// for this.
+class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
+{
+ SDNode **NodeToMatch;
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
+
+public:
+ MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
+ SmallVectorImpl<MatchScope> &MS)
+ : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
+ RecordedNodes(RN), MatchScopes(MS) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ // Some early-returns here to avoid the search if we deleted the node or
+ // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
+ // do, so it's unnecessary to update matching state at that point).
+ // Neither of these can occur currently because we only install this
+ // update listener during matching a complex patterns.
+ if (!E || E->isMachineOpcode())
+ return;
+ // Check if NodeToMatch was updated.
+ if (N == *NodeToMatch)
+ *NodeToMatch = E;
+ // Performing linear search here does not matter because we almost never
+ // run this code. You'd have to have a CSE during complex pattern
+ // matching.
+ for (auto &I : RecordedNodes)
+ if (I.first.getNode() == N)
+ I.first.setNode(E);
+
+ for (auto &I : MatchScopes)
+ for (auto &J : I.NodeStack)
+ if (J.getNode() == N)
+ J.setNode(E);
+ }
+};
+
+} // end anonymous namespace
+
+void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
+ const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::RegisterMask:
+ case ISD::HANDLENODE:
+ case ISD::MDNODE_SDNODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::MCSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::PSEUDO_PROBE:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::AssertAlign:
+ ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
+ CurDAG->RemoveDeadNode(NodeToMatch);
+ return;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ Select_INLINEASM(NodeToMatch);
+ return;
+ case ISD::READ_REGISTER:
+ Select_READ_REGISTER(NodeToMatch);
+ return;
+ case ISD::WRITE_REGISTER:
+ Select_WRITE_REGISTER(NodeToMatch);
+ return;
+ case ISD::UNDEF:
+ Select_UNDEF(NodeToMatch);
+ return;
+ case ISD::FREEZE:
+ Select_FREEZE(NodeToMatch);
+ return;
+ case ISD::ARITH_FENCE:
+ Select_ARITH_FENCE(NodeToMatch);
+ return;
+ case ISD::MEMBARRIER:
+ Select_MEMBARRIER(NodeToMatch);
+ return;
+ case ISD::STACKMAP:
+ Select_STACKMAP(NodeToMatch);
+ return;
+ case ISD::PATCHPOINT:
+ Select_PATCHPOINT(NodeToMatch);
+ return;
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and glue for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputGlue;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+
+ LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n");
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (true) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (true) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+ unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (true) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ LLVM_DEBUG(
+ dbgs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate << ", continuing at "
+ << FailIndex << "\n");
+ ++NumDAGIselRetries;
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputGlue = InputGlue;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode: {
+ // Remember this node, it may end up being an operand in the pattern.
+ SDNode *Parent = nullptr;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
+ continue;
+ }
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ if (auto *MN = dyn_cast<MemSDNode>(N))
+ MatchedMemRefs.push_back(MN->getMemOperand());
+ else {
+ LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG);
+ dbgs() << '\n');
+ }
+
+ continue;
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveChild0: case OPC_MoveChild1:
+ case OPC_MoveChild2: case OPC_MoveChild3:
+ case OPC_MoveChild4: case OPC_MoveChild5:
+ case OPC_MoveChild6: case OPC_MoveChild7: {
+ unsigned ChildNo = Opcode-OPC_MoveChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+
+ case OPC_CheckChild0Same: case OPC_CheckChild1Same:
+ case OPC_CheckChild2Same: case OPC_CheckChild3Same:
+ if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
+ Opcode-OPC_CheckChild0Same))
+ break;
+ continue;
+
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckPredicateWithOperands: {
+ unsigned OpNum = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Operands;
+
+ for (unsigned i = 0; i < OpNum; ++i)
+ Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first);
+
+ unsigned PredNo = MatcherTable[MatcherIndex++];
+ if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands))
+ break;
+ continue;
+ }
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+
+ // If target can modify DAG during matching, keep the matching state
+ // consistent.
+ std::unique_ptr<MatchStateUpdater> MSU;
+ if (ComplexPatternFuncMutatesDAG())
+ MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
+ MatchScopes));
+
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+
+ case OPC_CheckTypeRes: {
+ unsigned Res = MatcherTable[MatcherIndex++];
+ if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ }
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (true) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == Opc)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to "
+ << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT CurNodeVT = N.getSimpleValueType();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (true) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ LLVM_DEBUG(dbgs() << " TypeSwitch[" << CurNodeVT
+ << "] from " << SwitchStart << " to " << MatcherIndex
+ << '\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout(),
+ Opcode - OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckChild2CondCode:
+ if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
+ case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
+ case OPC_CheckChild4Integer:
+ if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
+ Opcode-OPC_CheckChild0Integer)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckImmAllOnesV:
+ if (!ISD::isConstantSplatVectorAllOnes(N.getNode()))
+ break;
+ continue;
+ case OPC_CheckImmAllZerosV:
+ if (!ISD::isConstantSplatVectorAllZeros(N.getNode()))
+ break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use (ignoring chains, which are handled in UpdateChains).
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) {
+ unsigned NNonChainUses = 0;
+ SDNode *NS = NodeStack[i].getNode();
+ for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI)
+ if (UI.getUse().getValueType() != MVT::Other)
+ if (++NNonChainUses > 1) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, OptLevel,
+ true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger:
+ case OPC_EmitStringInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ if (Opcode == OPC_EmitInteger)
+ Val = decodeSignRotatedValue(Val);
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
+ VT), nullptr));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
+ continue;
+ }
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
+ SDValue Imm = RecordedNodes[RecNo].first;
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
+ }
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1
+ case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(!InputChain.getNode() &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0;
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // If the chained node is not the root, we can't fold it if it has
+ // multiple uses.
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (!InputChain.getNode())
+ break; // Failed to merge.
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(!InputChain.getNode() &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // If the chained node is not the root, we can't fold it if it has
+ // multiple uses.
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (!InputChain.getNode())
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg:
+ case OPC_EmitCopyToReg2: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+ if (Opcode == OPC_EmitCopyToReg2)
+ DestPhysReg |= MatcherTable[MatcherIndex++] << 8;
+
+ if (!InputChain.getNode())
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch),
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
+ continue;
+ }
+ case OPC_Coverage: {
+ // This is emitted right before MorphNode/EmitNode.
+ // So it should be safe to assume that this node has been selected
+ unsigned index = MatcherTable[MatcherIndex++];
+ index |= (MatcherTable[MatcherIndex++] << 8);
+ dbgs() << "COVERED: " << getPatternForIndex(index) << "\n";
+ dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n";
+ continue;
+ }
+
+ case OPC_EmitNode: case OPC_MorphNodeTo:
+ case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
+ case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs;
+ // If this is one of the compressed forms, get the number of VTs based
+ // on the Opcode. Otherwise read the next byte from the table.
+ if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
+ NumVTs = Opcode - OPC_MorphNodeTo0;
+ else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
+ NumVTs = Opcode - OPC_EmitNode0;
+ else
+ NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR)
+ VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs);
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo].first);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential glue
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Glue) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/glue inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
+ Ops.push_back(InputGlue);
+
+ // Check whether any matched node could raise an FP exception. Since all
+ // such nodes must have a chain, it suffices to check ChainNodesMatched.
+ // We need to perform this check before potentially modifying one of the
+ // nodes via MorphNode.
+ bool MayRaiseFPException =
+ llvm::any_of(ChainNodesMatched, [this](SDNode *N) {
+ return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept();
+ });
+
+ // Create the node.
+ MachineSDNode *Res = nullptr;
+ bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
+ (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
+ if (!IsMorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch),
+ VTList, Ops);
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ nullptr));
+ }
+ } else {
+ assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
+ "NodeToMatch was removed partway through selection");
+ SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
+ SDNode *E) {
+ CurDAG->salvageDebugInfo(*N);
+ auto &Chain = ChainNodesMatched;
+ assert((!E || !is_contained(Chain, N)) &&
+ "Chain node replaced during MorphNode");
+ llvm::erase_value(Chain, N);
+ });
+ Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
+ Ops, EmitNodeInfo));
+ }
+
+ // Set the NoFPExcept flag when no original matched node could
+ // raise an FP exception, but the new node potentially might.
+ if (!MayRaiseFPException && mayRaiseFPException(Res)) {
+ SDNodeFlags Flags = Res->getFlags();
+ Flags.setNoFPExcept(true);
+ Res->setFlags(Flags);
+ }
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TII->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ // We expect to have relatively few of these so just filter them into a
+ // temporary buffer so that we can easily add them to the instruction.
+ SmallVector<MachineMemOperand *, 4> FilteredMemRefs;
+ for (MachineMemOperand *MMO : MatchedMemRefs) {
+ if (MMO->isLoad()) {
+ if (mayLoad)
+ FilteredMemRefs.push_back(MMO);
+ } else if (MMO->isStore()) {
+ if (mayStore)
+ FilteredMemRefs.push_back(MMO);
+ } else {
+ FilteredMemRefs.push_back(MMO);
+ }
+ }
+
+ CurDAG->setNodeMemRefs(Res, FilteredMemRefs);
+ }
+
+ LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
+ << " Dropping mem operands\n";
+ dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created")
+ << " node: ";
+ Res->dump(CurDAG););
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (IsMorphNodeTo) {
+ // Update chain uses.
+ UpdateChains(Res, InputChain, ChainNodesMatched, true);
+ return;
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch");
+ SDValue Res = RecordedNodes[ResSlot].first;
+
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
+ "Invalid number of results to complete!");
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueSizeInBits()) &&
+ "invalid replacement");
+ ReplaceUses(SDValue(NodeToMatch, i), Res);
+ }
+
+ // Update chain uses.
+ UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false);
+
+ // If the root node defines glue, we need to update it to the glue result.
+ // TODO: This never happens in our tests and I think it can be removed /
+ // replaced with an assert, but if we do it this the way the change is
+ // NFC.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
+ MVT::Glue &&
+ InputGlue.getNode())
+ ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1),
+ InputGlue);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+ CurDAG->RemoveDeadNode(NodeToMatch);
+
+ return;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex
+ << "\n");
+ ++NumDAGIselRetries;
+ while (true) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
+
+ InputChain = LastScope.InputChain;
+ InputGlue = LastScope.InputGlue;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+/// Return whether the node may raise an FP exception.
+bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const {
+ // For machine opcodes, consult the MCID flag.
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ return MCID.mayRaiseFPException();
+ }
+
+ // For ISD opcodes, only StrictFP opcodes may raise an FP
+ // exception.
+ if (N->isTargetOpcode())
+ return N->isTargetStrictFPOpcode();
+ return N->isStrictFPOpcode();
+}
+
+bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return false;
+
+ // Detect when "or" is used to add an offset to a stack object.
+ if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ Align A = MFI.getObjectAlign(FN->getIndex());
+ int32_t Off = C->getSExtValue();
+ // If the alleged offset fits in the zero bits guaranteed by
+ // the alignment, then this or is really an add.
+ return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off));
+ }
+ return false;
+}
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot select: ";
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getName();
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
+ report_fatal_error(Twine(Msg.str()));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 000000000000..b66eeb6d2bb1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,314 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "dag-printer"
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return std::string(G->getMachineFunction().getName());
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static std::string getNodeIdentifierLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+#ifndef NDEBUG
+ OS << 't' << Node->PersistentId;
+#else
+ OS << static_cast<const void *>(Node);
+#endif
+ return R;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Glue)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getName(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// Just dump dot graph to a user-provided path and title.
+/// This doesn't open the dot viewer program and
+/// helps visualization when outside debugging session.
+/// FileName expects absolute path. If provided
+/// without any path separators then the file
+/// will be created in the current directory.
+/// Error will be emitted if the path is insane.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName,
+ const Twine &Title) {
+ dumpDotGraphToFile(this, FileName, Title);
+}
+#endif
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
+ return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
new file mode 100644
index 000000000000..3a2df6f60593
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -0,0 +1,17 @@
+//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGTargetInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+using namespace llvm;
+
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
new file mode 100644
index 000000000000..5afd05648772
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -0,0 +1,1313 @@
+//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StatepointLowering.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GCStrategy.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "statepoint-lowering"
+
+STATISTIC(NumSlotsAllocatedForStatepoints,
+ "Number of stack slots allocated for statepoints");
+STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
+STATISTIC(StatepointMaxSlotsRequired,
+ "Maximum number of stack slots required for a singe statepoint");
+
+cl::opt<bool> UseRegistersForDeoptValues(
+ "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for non pointer deopt args"));
+
+cl::opt<bool> UseRegistersForGCPointersInLandingPad(
+ "use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for gc pointer in landing pad"));
+
+cl::opt<unsigned> MaxRegistersForGCPointers(
+ "max-registers-for-gc-values", cl::Hidden, cl::init(0),
+ cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
+
+typedef FunctionLoweringInfo::StatepointRelocationRecord RecordType;
+
+static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
+ SelectionDAGBuilder &Builder, uint64_t Value) {
+ SDLoc L = Builder.getCurSDLoc();
+ Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L,
+ MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64));
+}
+
+void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+ // Consistency check
+ assert(PendingGCRelocateCalls.empty() &&
+ "Trying to visit statepoint before finished processing previous one");
+ Locations.clear();
+ NextSlotToAllocate = 0;
+ // Need to resize this on each safepoint - we need the two to stay in sync and
+ // the clear patterns of a SelectionDAGBuilder have no relation to
+ // FunctionLoweringInfo. Also need to ensure used bits get cleared.
+ AllocatedStackSlots.clear();
+ AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
+}
+
+void StatepointLoweringState::clear() {
+ Locations.clear();
+ AllocatedStackSlots.clear();
+ assert(PendingGCRelocateCalls.empty() &&
+ "cleared before statepoint sequence completed");
+}
+
+SDValue
+StatepointLoweringState::allocateStackSlot(EVT ValueType,
+ SelectionDAGBuilder &Builder) {
+ NumSlotsAllocatedForStatepoints++;
+ MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+
+ unsigned SpillSize = ValueType.getStoreSize();
+ assert((SpillSize * 8) ==
+ (-8u & (7 + ValueType.getSizeInBits())) && // Round up modulo 8.
+ "Size not in bytes?");
+
+ // First look for a previously created stack slot which is not in
+ // use (accounting for the fact arbitrary slots may already be
+ // reserved), or to create a new stack slot and use it.
+
+ const size_t NumSlots = AllocatedStackSlots.size();
+ assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
+
+ assert(AllocatedStackSlots.size() ==
+ Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
+
+ for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
+ if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
+ const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
+ if (MFI.getObjectSize(FI) == SpillSize) {
+ AllocatedStackSlots.set(NextSlotToAllocate);
+ // TODO: Is ValueType the right thing to use here?
+ return Builder.DAG.getFrameIndex(FI, ValueType);
+ }
+ }
+ }
+
+ // Couldn't find a free slot, so create a new one:
+
+ SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
+ const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MFI.markAsStatepointSpillSlotObjectIndex(FI);
+
+ Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+ AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true);
+ assert(AllocatedStackSlots.size() ==
+ Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
+
+ StatepointMaxSlotsRequired.updateMax(
+ Builder.FuncInfo.StatepointStackSlots.size());
+
+ return SpillSlot;
+}
+
+/// Utility function for reservePreviousStackSlotForValue. Tries to find
+/// stack slot index to which we have spilled value for previous statepoints.
+/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
+static std::optional<int> findPreviousSpillSlot(const Value *Val,
+ SelectionDAGBuilder &Builder,
+ int LookUpDepth) {
+ // Can not look any further - give up now
+ if (LookUpDepth <= 0)
+ return std::nullopt;
+
+ // Spill location is known for gc relocates
+ if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
+ const Value *Statepoint = Relocate->getStatepoint();
+ assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) &&
+ "GetStatepoint must return one of two types");
+ if (isa<UndefValue>(Statepoint))
+ return std::nullopt;
+
+ const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps
+ [cast<GCStatepointInst>(Statepoint)];
+
+ auto It = RelocationMap.find(Relocate);
+ if (It == RelocationMap.end())
+ return std::nullopt;
+
+ auto &Record = It->second;
+ if (Record.type != RecordType::Spill)
+ return std::nullopt;
+
+ return Record.payload.FI;
+ }
+
+ // Look through bitcast instructions.
+ if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val))
+ return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1);
+
+ // Look through phi nodes
+ // All incoming values should have same known stack slot, otherwise result
+ // is unknown.
+ if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
+ std::optional<int> MergedResult;
+
+ for (const auto &IncomingValue : Phi->incoming_values()) {
+ std::optional<int> SpillSlot =
+ findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
+ if (!SpillSlot)
+ return std::nullopt;
+
+ if (MergedResult && *MergedResult != *SpillSlot)
+ return std::nullopt;
+
+ MergedResult = SpillSlot;
+ }
+ return MergedResult;
+ }
+
+ // TODO: We can do better for PHI nodes. In cases like this:
+ // ptr = phi(relocated_pointer, not_relocated_pointer)
+ // statepoint(ptr)
+ // We will return that stack slot for ptr is unknown. And later we might
+ // assign different stack slots for ptr and relocated_pointer. This limits
+ // llvm's ability to remove redundant stores.
+ // Unfortunately it's hard to accomplish in current infrastructure.
+ // We use this function to eliminate spill store completely, while
+ // in example we still need to emit store, but instead of any location
+ // we need to use special "preferred" location.
+
+ // TODO: handle simple updates. If a value is modified and the original
+ // value is no longer live, it would be nice to put the modified value in the
+ // same slot. This allows folding of the memory accesses for some
+ // instructions types (like an increment).
+ // statepoint (i)
+ // i1 = i+1
+ // statepoint (i1)
+ // However we need to be careful for cases like this:
+ // statepoint(i)
+ // i1 = i+1
+ // statepoint(i, i1)
+ // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just
+ // put handling of simple modifications in this function like it's done
+ // for bitcasts we might end up reserving i's slot for 'i+1' because order in
+ // which we visit values is unspecified.
+
+ // Don't know any information about this instruction
+ return std::nullopt;
+}
+
+/// Return true if-and-only-if the given SDValue can be lowered as either a
+/// constant argument or a stack reference. The key point is that the value
+/// doesn't need to be spilled or tracked as a vreg use.
+static bool willLowerDirectly(SDValue Incoming) {
+ // We are making an unchecked assumption that the frame size <= 2^16 as that
+ // is the largest offset which can be encoded in the stackmap format.
+ if (isa<FrameIndexSDNode>(Incoming))
+ return true;
+
+ // The largest constant describeable in the StackMap format is 64 bits.
+ // Potential Optimization: Constants values are sign extended by consumer,
+ // and thus there are many constants of static type > 64 bits whose value
+ // happens to be sext(Con64) and could thus be lowered directly.
+ if (Incoming.getValueType().getSizeInBits() > 64)
+ return false;
+
+ return isIntOrFPConstant(Incoming) || Incoming.isUndef();
+}
+
+/// Try to find existing copies of the incoming values in stack slots used for
+/// statepoint spilling. If we can find a spill slot for the incoming value,
+/// mark that slot as allocated, and reuse the same slot for this safepoint.
+/// This helps to avoid series of loads and stores that only serve to reshuffle
+/// values on the stack between calls.
+static void reservePreviousStackSlotForValue(const Value *IncomingValue,
+ SelectionDAGBuilder &Builder) {
+ SDValue Incoming = Builder.getValue(IncomingValue);
+
+ // If we won't spill this, we don't need to check for previously allocated
+ // stack slots.
+ if (willLowerDirectly(Incoming))
+ return;
+
+ SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
+ if (OldLocation.getNode())
+ // Duplicates in input
+ return;
+
+ const int LookUpDepth = 6;
+ std::optional<int> Index =
+ findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
+ if (!Index)
+ return;
+
+ const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots;
+
+ auto SlotIt = find(StatepointSlots, *Index);
+ assert(SlotIt != StatepointSlots.end() &&
+ "Value spilled to the unknown stack slot");
+
+ // This is one of our dedicated lowering slots
+ const int Offset = std::distance(StatepointSlots.begin(), SlotIt);
+ if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+ // stack slot already assigned to someone else, can't use it!
+ // TODO: currently we reserve space for gc arguments after doing
+ // normal allocation for deopt arguments. We should reserve for
+ // _all_ deopt and gc arguments, then start allocating. This
+ // will prevent some moves being inserted when vm state changes,
+ // but gc state doesn't between two calls.
+ return;
+ }
+ // Reserve this stack slot
+ Builder.StatepointLowering.reserveStackSlot(Offset);
+
+ // Cache this slot so we find it when going through the normal
+ // assignment loop.
+ SDValue Loc =
+ Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy());
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+}
+
+/// Extract call from statepoint, lower it and return pointer to the
+/// call node. Also update NodeMap so that getValue(statepoint) will
+/// reference lowered call result
+static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder) {
+ SDValue ReturnValue, CallEndVal;
+ std::tie(ReturnValue, CallEndVal) =
+ Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
+ SDNode *CallEnd = CallEndVal.getNode();
+
+ // Get a call instruction from the call sequence chain. Tail calls are not
+ // allowed. The following code is essentially reverse engineering X86's
+ // LowerCallTo.
+ //
+ // We are expecting DAG to have the following form:
+ //
+ // ch = eh_label (only in case of invoke statepoint)
+ // ch, glue = callseq_start ch
+ // ch, glue = X86::Call ch, glue
+ // ch, glue = callseq_end ch, glue
+ // get_return_value ch, glue
+ //
+ // get_return_value can either be a sequence of CopyFromReg instructions
+ // to grab the return value from the return register(s), or it can be a LOAD
+ // to load a value returned by reference via a stack slot.
+
+ bool HasDef = !SI.CLI.RetTy->isVoidTy();
+ if (HasDef) {
+ if (CallEnd->getOpcode() == ISD::LOAD)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ else
+ while (CallEnd->getOpcode() == ISD::CopyFromReg)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ }
+
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
+ return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
+}
+
+static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
+ FrameIndexSDNode &FI) {
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex());
+ auto MMOFlags = MachineMemOperand::MOStore |
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ auto &MFI = MF.getFrameInfo();
+ return MF.getMachineMemOperand(PtrInfo, MMOFlags,
+ MFI.getObjectSize(FI.getIndex()),
+ MFI.getObjectAlign(FI.getIndex()));
+}
+
+/// Spill a value incoming to the statepoint. It might be either part of
+/// vmstate
+/// or gcstate. In both cases unconditionally spill it on the stack unless it
+/// is a null constant. Return pair with first element being frame index
+/// containing saved value and second element with outgoing chain from the
+/// emitted store
+static std::tuple<SDValue, SDValue, MachineMemOperand*>
+spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
+ SelectionDAGBuilder &Builder) {
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+ MachineMemOperand* MMO = nullptr;
+
+ // Emit new store if we didn't do it for this ptr before
+ if (!Loc.getNode()) {
+ Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(),
+ Builder);
+ int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
+ // We use TargetFrameIndex so that isel will not select it into LEA
+ Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
+
+ // Right now we always allocate spill slots that are of the same
+ // size as the value we're about to spill (the size of spillee can
+ // vary since we spill vectors of pointers too). At some point we
+ // can consider allowing spills of smaller values to larger slots
+ // (i.e. change the '==' in the assert below to a '>=').
+ MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ assert((MFI.getObjectSize(Index) * 8) ==
+ (-8 & (7 + // Round up modulo 8.
+ (int64_t)Incoming.getValueSizeInBits())) &&
+ "Bad spill: stack slot does not match!");
+
+ // Note: Using the alignment of the spill slot (rather than the abi or
+ // preferred alignment) is required for correctness when dealing with spill
+ // slots with preferred alignments larger than frame alignment..
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *StoreMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
+ Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
+ StoreMMO);
+
+ MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+
+ assert(Loc.getNode());
+ return std::make_tuple(Loc, Chain, MMO);
+}
+
+/// Lower a single value incoming to a statepoint node. This value can be
+/// either a deopt value or a gc value, the handling is the same. We special
+/// case constants and allocas, then fall back to spilling if required.
+static void
+lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
+ SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs,
+ SelectionDAGBuilder &Builder) {
+
+ if (willLowerDirectly(Incoming)) {
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint (this is only
+ // really meaningful for a deopt value. For GC, we'd be trying to
+ // relocate the address of the alloca itself?)
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+ return;
+ }
+
+ assert(Incoming.getValueType().getSizeInBits() <= 64);
+
+ if (Incoming.isUndef()) {
+ // Put an easily recognized constant that's unlikely to be a valid
+ // value so that uses of undef by the consumer of the stackmap is
+ // easily recognized. This is legal since the compiler is always
+ // allowed to chose an arbitrary value for undef.
+ pushStackMapConstant(Ops, Builder, 0xFEFEFEFE);
+ return;
+ }
+
+ // If the original value was a constant, make sure it gets recorded as
+ // such in the stackmap. This is required so that the consumer can
+ // parse any internal format to the deopt state. It also handles null
+ // pointers and other constant pointers in GC states.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ pushStackMapConstant(Ops, Builder, C->getSExtValue());
+ return;
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Incoming)) {
+ pushStackMapConstant(Ops, Builder,
+ C->getValueAPF().bitcastToAPInt().getZExtValue());
+ return;
+ }
+
+ llvm_unreachable("unhandled direct lowering case");
+ }
+
+
+
+ if (!RequireSpillSlot) {
+ // If this value is live in (not live-on-return, or live-through), we can
+ // treat it the same way patchpoint treats it's "live in" values. We'll
+ // end up folding some of these into stack references, but they'll be
+ // handled by the register allocator. Note that we do not have the notion
+ // of a late use so these values might be placed in registers which are
+ // clobbered by the call. This is fine for live-in. For live-through
+ // fix-up pass should be executed to force spilling of such registers.
+ Ops.push_back(Incoming);
+ } else {
+ // Otherwise, locate a spill slot and explicitly spill it so it can be
+ // found by the runtime later. Note: We know all of these spills are
+ // independent, but don't bother to exploit that chain wise. DAGCombine
+ // will happily do so as needed, so doing it here would be a small compile
+ // time win at most.
+ SDValue Chain = Builder.getRoot();
+ auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
+ Ops.push_back(std::get<0>(Res));
+ if (auto *MMO = std::get<2>(Res))
+ MemRefs.push_back(MMO);
+ Chain = std::get<1>(Res);
+ Builder.DAG.setRoot(Chain);
+ }
+
+}
+
+/// Return true if value V represents the GC value. The behavior is conservative
+/// in case it is not sure that value is not GC the function returns true.
+static bool isGCValue(const Value *V, SelectionDAGBuilder &Builder) {
+ auto *Ty = V->getType();
+ if (!Ty->isPtrOrPtrVectorTy())
+ return false;
+ if (auto *GFI = Builder.GFI)
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ return *IsManaged;
+ return true; // conservative
+}
+
+/// Lower deopt state and gc pointer arguments of the statepoint. The actual
+/// lowering is described in lowerIncomingStatepointValue. This function is
+/// responsible for lowering everything in the right position and playing some
+/// tricks to avoid redundant stack manipulation where possible. On
+/// completion, 'Ops' will contain ready to use operands for machine code
+/// statepoint. The chain nodes will have already been created and the DAG root
+/// will be set to the last value spilled (if any were).
+static void
+lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs,
+ SmallVectorImpl<SDValue> &GCPtrs,
+ DenseMap<SDValue, int> &LowerAsVReg,
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder) {
+ // Lower the deopt and gc arguments for this statepoint. Layout will be:
+ // deopt argument length, deopt arguments.., gc arguments...
+
+ // Figure out what lowering strategy we're going to use for each part
+ // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+ // as "live-through". A "live-through" variable is one which is "live-in",
+ // "live-out", and live throughout the lifetime of the call (i.e. we can find
+ // it from any PC within the transitive callee of the statepoint). In
+ // particular, if the callee spills callee preserved registers we may not
+ // be able to find a value placed in that register during the call. This is
+ // fine for live-out, but not for live-through. If we were willing to make
+ // assumptions about the code generator producing the callee, we could
+ // potentially allow live-through values in callee saved registers.
+ const bool LiveInDeopt =
+ SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+
+ // Decide which deriver pointers will go on VRegs
+ unsigned MaxVRegPtrs = MaxRegistersForGCPointers.getValue();
+
+ // Pointers used on exceptional path of invoke statepoint.
+ // We cannot assing them to VRegs.
+ SmallSet<SDValue, 8> LPadPointers;
+ if (!UseRegistersForGCPointersInLandingPad)
+ if (const auto *StInvoke =
+ dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) {
+ LandingPadInst *LPI = StInvoke->getLandingPadInst();
+ for (const auto *Relocate : SI.GCRelocates)
+ if (Relocate->getOperand(0) == LPI) {
+ LPadPointers.insert(Builder.getValue(Relocate->getBasePtr()));
+ LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr()));
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Deciding how to lower GC Pointers:\n");
+
+ // List of unique lowered GC Pointer values.
+ SmallSetVector<SDValue, 16> LoweredGCPtrs;
+ // Map lowered GC Pointer value to the index in above vector
+ DenseMap<SDValue, unsigned> GCPtrIndexMap;
+
+ unsigned CurNumVRegs = 0;
+
+ auto canPassGCPtrOnVReg = [&](SDValue SD) {
+ if (SD.getValueType().isVector())
+ return false;
+ if (LPadPointers.count(SD))
+ return false;
+ return !willLowerDirectly(SD);
+ };
+
+ auto processGCPtr = [&](const Value *V) {
+ SDValue PtrSD = Builder.getValue(V);
+ if (!LoweredGCPtrs.insert(PtrSD))
+ return; // skip duplicates
+ GCPtrIndexMap[PtrSD] = LoweredGCPtrs.size() - 1;
+
+ assert(!LowerAsVReg.count(PtrSD) && "must not have been seen");
+ if (LowerAsVReg.size() == MaxVRegPtrs)
+ return;
+ assert(V->getType()->isVectorTy() == PtrSD.getValueType().isVector() &&
+ "IR and SD types disagree");
+ if (!canPassGCPtrOnVReg(PtrSD)) {
+ LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG));
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG));
+ LowerAsVReg[PtrSD] = CurNumVRegs++;
+ };
+
+ // Process derived pointers first to give them more chance to go on VReg.
+ for (const Value *V : SI.Ptrs)
+ processGCPtr(V);
+ for (const Value *V : SI.Bases)
+ processGCPtr(V);
+
+ LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n");
+
+ auto requireSpillSlot = [&](const Value *V) {
+ if (!Builder.DAG.getTargetLoweringInfo().isTypeLegal(
+ Builder.getValue(V).getValueType()))
+ return true;
+ if (isGCValue(V, Builder))
+ return !LowerAsVReg.count(Builder.getValue(V));
+ return !(LiveInDeopt || UseRegistersForDeoptValues);
+ };
+
+ // Before we actually start lowering (and allocating spill slots for values),
+ // reserve any stack slots which we judge to be profitable to reuse for a
+ // particular value. This is purely an optimization over the code below and
+ // doesn't change semantics at all. It is important for performance that we
+ // reserve slots for both deopt and gc values before lowering either.
+ for (const Value *V : SI.DeoptState) {
+ if (requireSpillSlot(V))
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+
+ for (const Value *V : SI.Ptrs) {
+ SDValue SDV = Builder.getValue(V);
+ if (!LowerAsVReg.count(SDV))
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+
+ for (const Value *V : SI.Bases) {
+ SDValue SDV = Builder.getValue(V);
+ if (!LowerAsVReg.count(SDV))
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+
+ // First, prefix the list with the number of unique values to be
+ // lowered. Note that this is the number of *Values* not the
+ // number of SDValues required to lower them.
+ const int NumVMSArgs = SI.DeoptState.size();
+ pushStackMapConstant(Ops, Builder, NumVMSArgs);
+
+ // The vm state arguments are lowered in an opaque manner. We do not know
+ // what type of values are contained within.
+ LLVM_DEBUG(dbgs() << "Lowering deopt state\n");
+ for (const Value *V : SI.DeoptState) {
+ SDValue Incoming;
+ // If this is a function argument at a static frame index, generate it as
+ // the frame index.
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != INT_MAX)
+ Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy());
+ }
+ if (!Incoming.getNode())
+ Incoming = Builder.getValue(V);
+ LLVM_DEBUG(dbgs() << "Value " << *V
+ << " requireSpillSlot = " << requireSpillSlot(V) << "\n");
+ lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
+ Builder);
+ }
+
+ // Finally, go ahead and lower all the gc arguments.
+ pushStackMapConstant(Ops, Builder, LoweredGCPtrs.size());
+ for (SDValue SDV : LoweredGCPtrs)
+ lowerIncomingStatepointValue(SDV, !LowerAsVReg.count(SDV), Ops, MemRefs,
+ Builder);
+
+ // Copy to out vector. LoweredGCPtrs will be empty after this point.
+ GCPtrs = LoweredGCPtrs.takeVector();
+
+ // If there are any explicit spill slots passed to the statepoint, record
+ // them, but otherwise do not do anything special. These are user provided
+ // allocas and give control over placement to the consumer. In this case,
+ // it is the contents of the slot which may get updated, not the pointer to
+ // the alloca
+ SmallVector<SDValue, 4> Allocas;
+ for (Value *V : SI.GCArgs) {
+ SDValue Incoming = Builder.getValue(V);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
+ Allocas.push_back(Builder.DAG.getTargetFrameIndex(
+ FI->getIndex(), Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+ }
+ }
+ pushStackMapConstant(Ops, Builder, Allocas.size());
+ Ops.append(Allocas.begin(), Allocas.end());
+
+ // Now construct GC base/derived map;
+ pushStackMapConstant(Ops, Builder, SI.Ptrs.size());
+ SDLoc L = Builder.getCurSDLoc();
+ for (unsigned i = 0; i < SI.Ptrs.size(); ++i) {
+ SDValue Base = Builder.getValue(SI.Bases[i]);
+ assert(GCPtrIndexMap.count(Base) && "base not found in index map");
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(GCPtrIndexMap[Base], L, MVT::i64));
+ SDValue Derived = Builder.getValue(SI.Ptrs[i]);
+ assert(GCPtrIndexMap.count(Derived) && "derived not found in index map");
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(GCPtrIndexMap[Derived], L, MVT::i64));
+ }
+}
+
+SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI) {
+ // The basic scheme here is that information about both the original call and
+ // the safepoint is encoded in the CallInst. We create a temporary call and
+ // lower it, then reverse engineer the calling sequence.
+
+ NumOfStatepoints++;
+ // Clear state
+ StatepointLowering.startNewStatepoint(*this);
+ assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
+ assert((GFI || SI.Bases.empty()) &&
+ "No gc specified, so cannot relocate pointers!");
+
+ LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
+#ifndef NDEBUG
+ for (const auto *Reloc : SI.GCRelocates)
+ if (Reloc->getParent() == SI.StatepointInstr->getParent())
+ StatepointLowering.scheduleRelocCall(*Reloc);
+#endif
+
+ // Lower statepoint vmstate and gcstate arguments
+
+ // All lowered meta args.
+ SmallVector<SDValue, 10> LoweredMetaArgs;
+ // Lowered GC pointers (subset of above).
+ SmallVector<SDValue, 16> LoweredGCArgs;
+ SmallVector<MachineMemOperand*, 16> MemRefs;
+ // Maps derived pointer SDValue to statepoint result of relocated pointer.
+ DenseMap<SDValue, int> LowerAsVReg;
+ lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LoweredGCArgs, LowerAsVReg,
+ SI, *this);
+
+ // Now that we've emitted the spills, we need to update the root so that the
+ // call sequence is ordered correctly.
+ SI.CLI.setChain(getRoot());
+
+ // Get call node, we will replace it later with statepoint
+ SDValue ReturnVal;
+ SDNode *CallNode;
+ std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this);
+
+ // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
+ // nodes with all the appropriate arguments and return values.
+
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ SDValue Chain = CallNode->getOperand(0);
+
+ SDValue Glue;
+ bool CallHasIncomingGlue = CallNode->getGluedNode();
+ if (CallHasIncomingGlue) {
+ // Glue is always last operand
+ Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+ }
+
+ // Build the GC_TRANSITION_START node if necessary.
+ //
+ // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
+ // order in which they appear in the call to the statepoint intrinsic. If
+ // any of the operands is a pointer-typed, that operand is immediately
+ // followed by a SRCVALUE for the pointer that may be used during lowering
+ // (e.g. to form MachinePointerInfo values for loads/stores).
+ const bool IsGCTransition =
+ (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) ==
+ (uint64_t)StatepointFlags::GCTransition;
+ if (IsGCTransition) {
+ SmallVector<SDValue, 8> TSOps;
+
+ // Add chain
+ TSOps.push_back(Chain);
+
+ // Add GC transition arguments
+ for (const Value *V : SI.GCTransitionArgs) {
+ TSOps.push_back(getValue(V));
+ if (V->getType()->isPointerTy())
+ TSOps.push_back(DAG.getSrcValue(V));
+ }
+
+ // Add glue if necessary
+ if (CallHasIncomingGlue)
+ TSOps.push_back(Glue);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDValue GCTransitionStart =
+ DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);
+
+ Chain = GCTransitionStart.getValue(0);
+ Glue = GCTransitionStart.getValue(1);
+ }
+
+ // TODO: Currently, all of these operands are being marked as read/write in
+ // PrologEpilougeInserter.cpp, we should special case the VMState arguments
+ // and flags to be read-only.
+ SmallVector<SDValue, 40> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32));
+
+ // Calculate and push starting position of vmstate arguments
+ // Get number of arguments incoming directly into call node
+ unsigned NumCallRegArgs =
+ CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));
+
+ // Add call target
+ SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
+ Ops.push_back(CallTarget);
+
+ // Add call arguments
+ // Get position of register mask in the call
+ SDNode::op_iterator RegMaskIt;
+ if (CallHasIncomingGlue)
+ RegMaskIt = CallNode->op_end() - 2;
+ else
+ RegMaskIt = CallNode->op_end() - 1;
+ Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
+
+ // Add a constant argument for the calling convention
+ pushStackMapConstant(Ops, *this, SI.CLI.CallConv);
+
+ // Add a constant argument for the flags
+ uint64_t Flags = SI.StatepointFlags;
+ assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) &&
+ "Unknown flag used");
+ pushStackMapConstant(Ops, *this, Flags);
+
+ // Insert all vmstate and gcstate arguments
+ llvm::append_range(Ops, LoweredMetaArgs);
+
+ // Add register mask from call node
+ Ops.push_back(*RegMaskIt);
+
+ // Add chain
+ Ops.push_back(Chain);
+
+ // Same for the glue, but we add it only if original call had it
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Compute return values. Provide a glue output since we consume one as
+ // input. This allows someone else to chain off us as needed.
+ SmallVector<EVT, 8> NodeTys;
+ for (auto SD : LoweredGCArgs) {
+ if (!LowerAsVReg.count(SD))
+ continue;
+ NodeTys.push_back(SD.getValueType());
+ }
+ LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n");
+ assert(NodeTys.size() == LowerAsVReg.size() && "Inconsistent GC Ptr lowering");
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+
+ unsigned NumResults = NodeTys.size();
+ MachineSDNode *StatepointMCNode =
+ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+ DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
+
+ // For values lowered to tied-defs, create the virtual registers if used
+ // in other blocks. For local gc.relocate record appropriate statepoint
+ // result in StatepointLoweringState.
+ DenseMap<SDValue, Register> VirtRegs;
+ for (const auto *Relocate : SI.GCRelocates) {
+ Value *Derived = Relocate->getDerivedPtr();
+ SDValue SD = getValue(Derived);
+ if (!LowerAsVReg.count(SD))
+ continue;
+
+ SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
+
+ // Handle local relocate. Note that different relocates might
+ // map to the same SDValue.
+ if (SI.StatepointInstr->getParent() == Relocate->getParent()) {
+ SDValue Res = StatepointLowering.getLocation(SD);
+ if (Res)
+ assert(Res == Relocated);
+ else
+ StatepointLowering.setLocation(SD, Relocated);
+ continue;
+ }
+
+ // Handle multiple gc.relocates of the same input efficiently.
+ if (VirtRegs.count(SD))
+ continue;
+
+ auto *RetTy = Relocate->getType();
+ Register Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy, std::nullopt);
+ SDValue Chain = DAG.getRoot();
+ RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+
+ VirtRegs[SD] = Reg;
+ }
+
+ // Record for later use how each relocation was lowered. This is needed to
+ // allow later gc.relocates to mirror the lowering chosen.
+ const Instruction *StatepointInstr = SI.StatepointInstr;
+ auto &RelocationMap = FuncInfo.StatepointRelocationMaps[StatepointInstr];
+ for (const GCRelocateInst *Relocate : SI.GCRelocates) {
+ const Value *V = Relocate->getDerivedPtr();
+ SDValue SDV = getValue(V);
+ SDValue Loc = StatepointLowering.getLocation(SDV);
+
+ bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent());
+
+ RecordType Record;
+ if (IsLocal && LowerAsVReg.count(SDV)) {
+ // Result is already stored in StatepointLowering
+ Record.type = RecordType::SDValueNode;
+ } else if (LowerAsVReg.count(SDV)) {
+ Record.type = RecordType::VReg;
+ assert(VirtRegs.count(SDV));
+ Record.payload.Reg = VirtRegs[SDV];
+ } else if (Loc.getNode()) {
+ Record.type = RecordType::Spill;
+ Record.payload.FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ } else {
+ Record.type = RecordType::NoRelocate;
+ // If we didn't relocate a value, we'll essentialy end up inserting an
+ // additional use of the original value when lowering the gc.relocate.
+ // We need to make sure the value is available at the new use, which
+ // might be in another block.
+ if (Relocate->getParent() != StatepointInstr->getParent())
+ ExportFromCurrentBlock(V);
+ }
+ RelocationMap[Relocate] = Record;
+ }
+
+
+
+ SDNode *SinkNode = StatepointMCNode;
+
+ // Build the GC_TRANSITION_END node if necessary.
+ //
+ // See the comment above regarding GC_TRANSITION_START for the layout of
+ // the operands to the GC_TRANSITION_END node.
+ if (IsGCTransition) {
+ SmallVector<SDValue, 8> TEOps;
+
+ // Add chain
+ TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2));
+
+ // Add GC transition arguments
+ for (const Value *V : SI.GCTransitionArgs) {
+ TEOps.push_back(getValue(V));
+ if (V->getType()->isPointerTy())
+ TEOps.push_back(DAG.getSrcValue(V));
+ }
+
+ // Add glue
+ TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1));
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDValue GCTransitionStart =
+ DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);
+
+ SinkNode = GCTransitionStart.getNode();
+ }
+
+ // Replace original call
+ // Call: ch,glue = CALL ...
+ // Statepoint: [gc relocates],ch,glue = STATEPOINT ...
+ unsigned NumSinkValues = SinkNode->getNumValues();
+ SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2),
+ SDValue(SinkNode, NumSinkValues - 1)};
+ DAG.ReplaceAllUsesWith(CallNode, StatepointValues);
+ // Remove original call node
+ DAG.DeleteNode(CallNode);
+
+ // Since we always emit CopyToRegs (even for local relocates), we must
+ // update root, so that they are emitted before any local uses.
+ (void)getControlRoot();
+
+ // TODO: A better future implementation would be to emit a single variable
+ // argument, variable return value STATEPOINT node here and then hookup the
+ // return value of each gc.relocate to the respective output of the
+ // previously emitted STATEPOINT value. Unfortunately, this doesn't appear
+ // to actually be possible today.
+
+ return ReturnVal;
+}
+
+/// Return two gc.results if present. First result is a block local
+/// gc.result, second result is a non-block local gc.result. Corresponding
+/// entry will be nullptr if not present.
+static std::pair<const GCResultInst*, const GCResultInst*>
+getGCResultLocality(const GCStatepointInst &S) {
+ std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr);
+ for (const auto *U : S.users()) {
+ auto *GRI = dyn_cast<GCResultInst>(U);
+ if (!GRI)
+ continue;
+ if (GRI->getParent() == S.getParent())
+ Res.first = GRI;
+ else
+ Res.second = GRI;
+ }
+ return Res;
+}
+
+void
+SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
+ const BasicBlock *EHPadBB /*= nullptr*/) {
+ assert(I.getCallingConv() != CallingConv::AnyReg &&
+ "anyregcc is not supported on statepoints!");
+
+#ifndef NDEBUG
+ // Check that the associated GCStrategy expects to encounter statepoints.
+ assert(GFI->getStrategy().useStatepoints() &&
+ "GCStrategy does not expect to encounter statepoints");
+#endif
+
+ SDValue ActualCallee;
+ SDValue Callee = getValue(I.getActualCalledOperand());
+
+ if (I.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `undef` instead. Not lowering the call target lets statepoint clients
+ // get away without providing a physical address for the symbolic call
+ // target at link time.
+ ActualCallee = DAG.getUNDEF(Callee.getValueType());
+ } else {
+ ActualCallee = Callee;
+ }
+
+ StatepointLoweringInfo SI(DAG);
+ populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos,
+ I.getNumCallArgs(), ActualCallee,
+ I.getActualReturnType(), false /* IsPatchPoint */);
+
+ // There may be duplication in the gc.relocate list; such as two copies of
+ // each relocation on normal and exceptional path for an invoke. We only
+ // need to spill once and record one copy in the stackmap, but we need to
+ // reload once per gc.relocate. (Dedupping gc.relocates is trickier and best
+ // handled as a CSE problem elsewhere.)
+ // TODO: There a couple of major stackmap size optimizations we could do
+ // here if we wished.
+ // 1) If we've encountered a derived pair {B, D}, we don't need to actually
+ // record {B,B} if it's seen later.
+ // 2) Due to rematerialization, actual derived pointers are somewhat rare;
+ // given that, we could change the format to record base pointer relocations
+ // separately with half the space. This would require a format rev and a
+ // fairly major rework of the STATEPOINT node though.
+ SmallSet<SDValue, 8> Seen;
+ for (const GCRelocateInst *Relocate : I.getGCRelocates()) {
+ SI.GCRelocates.push_back(Relocate);
+
+ SDValue DerivedSD = getValue(Relocate->getDerivedPtr());
+ if (Seen.insert(DerivedSD).second) {
+ SI.Bases.push_back(Relocate->getBasePtr());
+ SI.Ptrs.push_back(Relocate->getDerivedPtr());
+ }
+ }
+
+ // If we find a deopt value which isn't explicitly added, we need to
+ // ensure it gets lowered such that gc cycles occurring before the
+ // deoptimization event during the lifetime of the call don't invalidate
+ // the pointer we're deopting with. Note that we assume that all
+ // pointers passed to deopt are base pointers; relaxing that assumption
+ // would require relatively large changes to how we represent relocations.
+ for (Value *V : I.deopt_operands()) {
+ if (!isGCValue(V, *this))
+ continue;
+ if (Seen.insert(getValue(V)).second) {
+ SI.Bases.push_back(V);
+ SI.Ptrs.push_back(V);
+ }
+ }
+
+ SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end());
+ SI.StatepointInstr = &I;
+ SI.ID = I.getID();
+
+ SI.DeoptState = ArrayRef<const Use>(I.deopt_begin(), I.deopt_end());
+ SI.GCTransitionArgs = ArrayRef<const Use>(I.gc_transition_args_begin(),
+ I.gc_transition_args_end());
+
+ SI.StatepointFlags = I.getFlags();
+ SI.NumPatchBytes = I.getNumPatchBytes();
+ SI.EHPadBB = EHPadBB;
+
+ SDValue ReturnValue = LowerAsSTATEPOINT(SI);
+
+ // Export the result value if needed
+ const auto GCResultLocality = getGCResultLocality(I);
+
+ if (!GCResultLocality.first && !GCResultLocality.second) {
+ // The return value is not needed, just generate a poison value.
+ // Note: This covers the void return case.
+ setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ return;
+ }
+
+ if (GCResultLocality.first) {
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies. The gc_result will simply grab
+ // this value.
+ setValue(&I, ReturnValue);
+ }
+
+ if (!GCResultLocality.second)
+ return;
+ // Result value will be used in a different basic block so we need to export
+ // it now. Default exporting mechanism will not work here because statepoint
+ // call has a different type than the actual call. It means that by default
+ // llvm will create export register of the wrong type (always i32 in our
+ // case). So instead we need to create export register with correct type
+ // manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completely and make statepoint call to return a tuple.
+ Type *RetTy = GCResultLocality.second->getType();
+ Register Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy,
+ I.getCallingConv());
+ SDValue Chain = DAG.getEntryNode();
+
+ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+ FuncInfo.ValueMap[&I] = Reg;
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
+ bool VarArgDisallowed, bool ForceVoidReturnTy) {
+ StatepointLoweringInfo SI(DAG);
+ unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
+ populateCallLoweringInfo(
+ SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
+ ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
+ false);
+ if (!VarArgDisallowed)
+ SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
+
+ auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt);
+
+ unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
+
+ auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
+ SI.ID = SD.StatepointID.value_or(DefaultID);
+ SI.NumPatchBytes = SD.NumPatchBytes.value_or(0);
+
+ SI.DeoptState =
+ ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
+ SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None);
+ SI.EHPadBB = EHPadBB;
+
+ // NB! The GC arguments are deliberately left empty.
+
+ if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
+ ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
+ setValue(Call, ReturnVal);
+ }
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) {
+ LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB,
+ /* VarArgDisallowed = */ false,
+ /* ForceVoidReturnTy = */ false);
+}
+
+void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
+ // The result value of the gc_result is simply the result of the actual
+ // call. We've already emitted this, so just grab the value.
+ const Value *SI = CI.getStatepoint();
+ assert((isa<GCStatepointInst>(SI) || isa<UndefValue>(SI)) &&
+ "GetStatepoint must return one of two types");
+ if (isa<UndefValue>(SI))
+ return;
+
+ if (cast<GCStatepointInst>(SI)->getParent() == CI.getParent()) {
+ setValue(&CI, getValue(SI));
+ return;
+ }
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actual call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ Type *RetTy = CI.getType();
+ SDValue CopyFromReg = getCopyFromRegs(SI, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
+}
+
+void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
+ const Value *Statepoint = Relocate.getStatepoint();
+#ifndef NDEBUG
+ // Consistency check
+ // We skip this check for relocates not in the same basic block as their
+ // statepoint. It would be too expensive to preserve validation info through
+ // different basic blocks.
+ assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) &&
+ "GetStatepoint must return one of two types");
+ if (isa<UndefValue>(Statepoint))
+ return;
+
+ if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent())
+ StatepointLowering.relocCallVisited(Relocate);
+#endif
+
+ const Value *DerivedPtr = Relocate.getDerivedPtr();
+ auto &RelocationMap =
+ FuncInfo.StatepointRelocationMaps[cast<GCStatepointInst>(Statepoint)];
+ auto SlotIt = RelocationMap.find(&Relocate);
+ assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value");
+ const RecordType &Record = SlotIt->second;
+
+ // If relocation was done via virtual register..
+ if (Record.type == RecordType::SDValueNode) {
+ assert(cast<GCStatepointInst>(Statepoint)->getParent() ==
+ Relocate.getParent() &&
+ "Nonlocal gc.relocate mapped via SDValue");
+ SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr));
+ assert(SDV.getNode() && "empty SDValue");
+ setValue(&Relocate, SDV);
+ return;
+ }
+ if (Record.type == RecordType::VReg) {
+ Register InReg = Record.payload.Reg;
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Relocate.getType(),
+ std::nullopt); // This is not an ABI copy.
+ // We generate copy to/from regs even for local uses, hence we must
+ // chain with current root to ensure proper ordering of copies w.r.t.
+ // statepoint.
+ SDValue Chain = DAG.getRoot();
+ SDValue Relocation = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, nullptr, nullptr);
+ setValue(&Relocate, Relocation);
+ return;
+ }
+
+ if (Record.type == RecordType::Spill) {
+ unsigned Index = Record.payload.FI;
+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
+
+ // All the reloads are independent and are reading memory only modified by
+ // statepoints (i.e. no other aliasing stores); informing SelectionDAG of
+ // this lets CSE kick in for free and allows reordering of
+ // instructions if possible. The lowering for statepoint sets the root,
+ // so this is ordering all reloads with the either
+ // a) the statepoint node itself, or
+ // b) the entry of the current block for an invoke statepoint.
+ const SDValue Chain = DAG.getRoot(); // != Builder.getRoot()
+
+ auto &MF = DAG.getMachineFunction();
+ auto &MFI = MF.getFrameInfo();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
+
+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType());
+
+ SDValue SpillLoad =
+ DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO);
+ PendingLoads.push_back(SpillLoad.getValue(1));
+
+ assert(SpillLoad.getNode());
+ setValue(&Relocate, SpillLoad);
+ return;
+ }
+
+ assert(Record.type == RecordType::NoRelocate);
+ SDValue SD = getValue(DerivedPtr);
+
+ if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
+ // Lowering relocate(undef) as arbitrary constant. Current constant value
+ // is chosen such that it's unlikely to be a valid pointer.
+ setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64));
+ return;
+ }
+
+ // We didn't need to spill these special cases (constants and allocas).
+ // See the handling in spillIncomingValueForStatepoint for detail.
+ setValue(&Relocate, SD);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ // We don't lower calls to __llvm_deoptimize as varargs, but as a regular
+ // call. We also do not lower the return value to any virtual register, and
+ // change the immediately following return to a trap instruction.
+ LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr,
+ /* VarArgDisallowed = */ true,
+ /* ForceVoidReturnTy = */ true);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizingReturn() {
+ // We do not lower the return value from llvm.deoptimize to any virtual
+ // register, and change the immediately following return to a trap
+ // instruction.
+ if (DAG.getTarget().Options.TrapUnreachable)
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
new file mode 100644
index 000000000000..addc0a7eef3a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -0,0 +1,126 @@
+//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include <cassert>
+
+namespace llvm {
+
+class SelectionDAGBuilder;
+
+/// This class tracks both per-statepoint and per-selectiondag information.
+/// For each statepoint it tracks locations of it's gc valuess (incoming and
+/// relocated) and list of gcreloc calls scheduled for visiting (this is
+/// used for a debug mode consistency check only). The spill slot tracking
+/// works in concert with information in FunctionLoweringInfo.
+class StatepointLoweringState {
+public:
+ StatepointLoweringState() = default;
+
+ /// Reset all state tracking for a newly encountered safepoint. Also
+ /// performs some consistency checking.
+ void startNewStatepoint(SelectionDAGBuilder &Builder);
+
+ /// Clear the memory usage of this object. This is called from
+ /// SelectionDAGBuilder::clear. We require this is never called in the
+ /// midst of processing a statepoint sequence.
+ void clear();
+
+ /// Returns the spill location of a value incoming to the current
+ /// statepoint. Will return SDValue() if this value hasn't been
+ /// spilled. Otherwise, the value has already been spilled and no
+ /// further action is required by the caller.
+ SDValue getLocation(SDValue Val) {
+ auto I = Locations.find(Val);
+ if (I == Locations.end())
+ return SDValue();
+ return I->second;
+ }
+
+ void setLocation(SDValue Val, SDValue Location) {
+ assert(!Locations.count(Val) &&
+ "Trying to allocate already allocated location");
+ Locations[Val] = Location;
+ }
+
+ /// Record the fact that we expect to encounter a given gc_relocate
+ /// before the next statepoint. If we don't see it, we'll report
+ /// an assertion.
+ void scheduleRelocCall(const GCRelocateInst &RelocCall) {
+ // We are not interested in lowering dead instructions.
+ if (!RelocCall.use_empty())
+ PendingGCRelocateCalls.push_back(&RelocCall);
+ }
+
+ /// Remove this gc_relocate from the list we're expecting to see
+ /// before the next statepoint. If we weren't expecting to see
+ /// it, we'll report an assertion.
+ void relocCallVisited(const GCRelocateInst &RelocCall) {
+ // We are not interested in lowering dead instructions.
+ if (RelocCall.use_empty())
+ return;
+ auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
+ assert(I != PendingGCRelocateCalls.end() &&
+ "Visited unexpected gcrelocate call");
+ PendingGCRelocateCalls.erase(I);
+ }
+
+ // TODO: Should add consistency tracking to ensure we encounter
+ // expected gc_result calls too.
+
+ /// Get a stack slot we can use to store an value of type ValueType. This
+ /// will hopefully be a recylced slot from another statepoint.
+ SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder);
+
+ void reserveStackSlot(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ assert(!AllocatedStackSlots.test(Offset) && "already reserved!");
+ assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
+ AllocatedStackSlots.set(Offset);
+ }
+
+ bool isStackSlotAllocated(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ return AllocatedStackSlots.test(Offset);
+ }
+
+private:
+ /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
+ /// into it's location (currently only stack slots)
+ DenseMap<SDValue, SDValue> Locations;
+
+ /// A boolean indicator for each slot listed in the FunctionInfo as to
+ /// whether it has been used in the current statepoint. Since we try to
+ /// preserve stack slots across safepoints, there can be gaps in which
+ /// slots have been allocated.
+ SmallBitVector AllocatedStackSlots;
+
+ /// Points just beyond the last slot known to have been allocated
+ unsigned NextSlotToAllocate = 0;
+
+ /// Keep track of pending gcrelocate calls for consistency check
+ SmallVector<const GCRelocateInst *, 10> PendingGCRelocateCalls;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 000000000000..a84d35a6ea4e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,10800 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cctype>
+using namespace llvm;
+
+/// NOTE: The TargetMachine owns TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm)
+ : TargetLoweringBase(tm) {}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return nullptr;
+}
+
+bool TargetLowering::isPositionIndependent() const {
+ return getTargetMachine().isPositionIndependent();
+}
+
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ SDValue &Chain) const {
+ const Function &F = DAG.getMachineFunction().getFunction();
+
+ // First, check if tail calls have been disabled in this function.
+ if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
+ return false;
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore following attributes because they don't affect the
+ // call sequence.
+ AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
+ for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull, Attribute::NoAlias,
+ Attribute::NonNull, Attribute::NoUndef})
+ CallerAttrs.removeAttribute(Attr);
+
+ if (CallerAttrs.hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.contains(Attribute::ZExt) ||
+ CallerAttrs.contains(Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return isUsedByReturnOnly(Node, Chain);
+}
+
+bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
+ const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<SDValue> &OutVals) const {
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ const CCValAssign &ArgLoc = ArgLocs[I];
+ if (!ArgLoc.isRegLoc())
+ continue;
+ MCRegister Reg = ArgLoc.getLocReg();
+ // Only look at callee saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
+ continue;
+ // Check that we pass the value used for the caller.
+ // (We look for a CopyFromReg reading a virtual register that is used
+ // for the function live-in value of register Reg)
+ SDValue Value = OutVals[I];
+ if (Value->getOpcode() == ISD::AssertZext)
+ Value = Value.getOperand(0);
+ if (Value->getOpcode() != ISD::CopyFromReg)
+ return false;
+ Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ if (MRI.getLiveInPhysReg(ArgReg) != Reg)
+ return false;
+ }
+ return true;
+}
+
+/// Set CallLoweringInfo attribute flags based on a call instruction
+/// and called function attributes.
+void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
+ unsigned ArgIdx) {
+ IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
+ IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
+ IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ Alignment = Call->getParamStackAlign(ArgIdx);
+ IndirectType = nullptr;
+ assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
+ "multiple ABI attributes?");
+ if (IsByVal) {
+ IndirectType = Call->getParamByValType(ArgIdx);
+ if (!Alignment)
+ Alignment = Call->getParamAlign(ArgIdx);
+ }
+ if (IsPreallocated)
+ IndirectType = Call->getParamPreallocatedType(ArgIdx);
+ if (IsInAlloca)
+ IndirectType = Call->getParamInAllocaType(ArgIdx);
+ if (IsSRet)
+ IndirectType = Call->getParamStructRetType(ArgIdx);
+}
+
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+std::pair<SDValue, SDValue>
+TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
+ ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl,
+ SDValue InChain) const {
+ if (!InChain)
+ InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ Args.reserve(Ops.size());
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ SDValue NewOp = Ops[i];
+ Entry.Node = NewOp;
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
+ CallOptions.IsSExt);
+ Entry.IsZExt = !Entry.IsSExt;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
+ Entry.IsSExt = Entry.IsZExt = false;
+ }
+ Args.push_back(Entry);
+ }
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported library call operation!");
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
+ bool zeroExtend = !signExtend;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
+ signExtend = zeroExtend = false;
+ }
+
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setNoReturn(CallOptions.DoesNotReturn)
+ .setDiscardResult(!CallOptions.IsReturnValueUsed)
+ .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
+ .setSExtResult(signExtend)
+ .setZExtResult(zeroExtend);
+ return LowerCallTo(CLI);
+}
+
+bool TargetLowering::findOptimalMemOpLowering(
+ std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
+ unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
+ Op.getSrcAlign() < Op.getDstAlign())
+ return false;
+
+ EVT VT = getOptimalMemOpType(Op, FuncAttributes);
+
+ if (VT == MVT::Other) {
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+
+ // Find the largest legal integer type.
+ MVT LVT = MVT::i64;
+ while (!isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ uint64_t Size = Op.size();
+ while (Size) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ unsigned Fast;
+ if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
+ allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS) const {
+ SDValue Chain;
+ return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
+ OldRHS, Chain);
+}
+
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS,
+ SDValue &Chain,
+ bool IsSignaling) const {
+ // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
+ // not supporting it. We can update this code when libgcc provides such
+ // functions.
+
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
+ && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ bool ShouldInvertCC = false;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+ (VT == MVT::f64) ? RTLIB::UNE_F64 :
+ (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETO:
+ ShouldInvertCC = true;
+ [[fallthrough]];
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
+ break;
+ case ISD::SETONE:
+ // SETONE = O && UNE
+ ShouldInvertCC = true;
+ [[fallthrough]];
+ case ISD::SETUEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
+ break;
+ default:
+ // Invert CC for unordered comparisons
+ ShouldInvertCC = true;
+ switch (CCCode) {
+ case ISD::SETULT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
+ break;
+ case ISD::SETULE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
+ break;
+ case ISD::SETUGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ break;
+ default: llvm_unreachable("Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparison lib calls.
+ EVT RetVT = getCmpLibcallReturnType();
+ SDValue Ops[2] = {NewLHS, NewRHS};
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { OldLHS.getValueType(),
+ OldRHS.getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
+ auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+ NewLHS = Call.first;
+ NewRHS = DAG.getConstant(0, dl, RetVT);
+
+ CCCode = getCmpLibcallCC(LC1);
+ if (ShouldInvertCC) {
+ assert(RetVT.isInteger());
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ }
+
+ if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
+ // Update Chain.
+ Chain = Call.second;
+ } else {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+ SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
+ auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
+ CCCode = getCmpLibcallCC(LC2);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
+ Call2.second);
+ NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
+ Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (!isPositionIndependent())
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
+
+ return Table;
+}
+
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ const TargetMachine &TM = getTargetMachine();
+ const GlobalValue *GV = GA->getGlobal();
+
+ // If the address is not even local to this DSO we will have to load it from
+ // a got and then add the offset.
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return false;
+
+ // If the code is position independent we will have to add a base register.
+ if (isPositionIndependent())
+ return false;
+
+ // Otherwise we can do it.
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// If the specified instruction has a constant integer operand and there are
+/// bits set in that constant that are not demanded, then clear those bits and
+/// return true.
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const {
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
+
+ // Early-out if we've ended up calling an undemanded node, leave this to
+ // constant folding.
+ if (DemandedBits.isZero() || DemandedElts.isZero())
+ return false;
+
+ // Do target-specific constant optimization.
+ if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return TLO.New.getNode();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Opcode) {
+ default:
+ break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Op1C || Op1C->isOpaque())
+ return false;
+
+ // If this is a 'not' op, don't touch it because that's a canonical form.
+ const APInt &C = Op1C->getAPIntValue();
+ if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
+ return false;
+
+ if (!C.isSubsetOf(DemandedBits)) {
+ EVT VT = Op.getValueType();
+ SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ TargetLoweringOpt &TLO) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
+}
+
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
+bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
+ const APInt &DemandedBits,
+ TargetLoweringOpt &TLO) const {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ EVT VT = Op.getValueType();
+ SelectionDAG &DAG = TLO.DAG;
+ SDLoc dl(Op);
+
+ // Early return, as this function cannot handle vector types.
+ if (VT.isVector())
+ return false;
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned DemandedSize = DemandedBits.getActiveBits();
+ for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
+ SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(
+ Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
+ assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
+ SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
+ return TLO.CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ KnownBits Known;
+
+ bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ KnownBits Known;
+
+ bool Simplified =
+ SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
+ AssumeSingleUse);
+}
+
+// TODO: Under what circumstances can we create nodes? Constant folding?
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Ignore UNDEFs.
+ if (Op.isUndef())
+ return SDValue();
+
+ // Not demanding any bits/elts from Op.
+ if (DemandedBits == 0 || DemandedElts == 0)
+ return DAG.getUNDEF(VT);
+
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned BitWidth = DemandedBits.getBitWidth();
+ KnownBits LHSKnown, RHSKnown;
+ switch (Op.getOpcode()) {
+ case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ return SDValue();
+
+ SDValue Src = peekThroughBitcasts(Op.getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == DstVT)
+ return Src;
+
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
+ if (NumSrcEltBits == NumDstEltBits)
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
+ unsigned Scale = NumDstEltBits / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
+ if (!Sub.isZero()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ // TODO - bigendian once we have test coverage.
+ if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
+ unsigned Scale = NumSrcEltBits / NumDstEltBits;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * NumDstEltBits;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ break;
+ }
+ case ISD::AND: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::OR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::XOR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::SHL: {
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ if (const APInt *MaxSA =
+ DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ SDValue Op0 = Op.getOperand(0);
+ unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned NumSignBits =
+ DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
+ return Op0;
+ }
+ break;
+ }
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
+ getBooleanContents(Op0.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return Op0;
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ SDValue Op0 = Op.getOperand(0);
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ExBits = ExVT.getScalarSizeInBits();
+ if (DemandedBits.getActiveBits() <= ExBits &&
+ shouldRemoveRedundantExtend(Op))
+ return Op0;
+ // If the input is already sign extended, just drop the extension.
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ if (NumSignBits >= (BitWidth - ExBits + 1))
+ return Op0;
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // If we only want the lowest element and none of extended bits, then we can
+ // return the bitcasted source vector.
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (IsLE && DemandedElts == 1 &&
+ DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
+ return DAG.getBitcast(DstVT, Src);
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ // If we don't demand the inserted subvector, return the base vector.
+ if (DemandedSubElts == 0)
+ return Vec;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ AllUndef = false;
+ IdentityLHS &= (M == (int)i);
+ IdentityRHS &= ((M - NumElts) == i);
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(Op.getValueType());
+ if (IdentityLHS)
+ return Op.getOperand(0);
+ if (IdentityRHS)
+ return Op.getOperand(1);
+ break;
+ }
+ default:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (VT.isScalableVector())
+ return SDValue();
+
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth))
+ return V;
+ break;
+ }
+ return SDValue();
+}
+
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
+ Depth);
+}
+
+SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
+ SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
+ return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
+ Depth);
+}
+
+// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
+// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
+static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (!N1C || !N1C->isOne())
+ return SDValue();
+
+ // We are looking for an avgfloor
+ // add(ext, ext)
+ // or one of these as a avgceil
+ // add(add(ext, ext), 1)
+ // add(add(ext, 1), ext)
+ // add(ext, add(ext, 1))
+ SDValue Add = Op.getOperand(0);
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue ExtOpA = Add.getOperand(0);
+ SDValue ExtOpB = Add.getOperand(1);
+ SDValue Add2;
+ auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
+ ConstantSDNode *ConstOp;
+ if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op1;
+ ExtOpB = Op3;
+ Add2 = A;
+ return true;
+ }
+ if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op1;
+ ExtOpB = Op2;
+ Add2 = A;
+ return true;
+ }
+ return false;
+ };
+ bool IsCeil =
+ (ExtOpA.getOpcode() == ISD::ADD &&
+ MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
+ (ExtOpB.getOpcode() == ISD::ADD &&
+ MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
+
+ // If the shift is signed (sra):
+ // - Needs >= 2 sign bit for both operands.
+ // - Needs >= 2 zero bits.
+ // If the shift is unsigned (srl):
+ // - Needs >= 1 zero bit for both operands.
+ // - Needs 1 demanded bit zero and >= 2 sign bits.
+ unsigned ShiftOpc = Op.getOpcode();
+ bool IsSigned = false;
+ unsigned KnownBits;
+ unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
+ unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
+ unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
+ unsigned NumZeroA =
+ DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
+ unsigned NumZeroB =
+ DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
+ unsigned NumZero = std::min(NumZeroA, NumZeroB);
+
+ switch (ShiftOpc) {
+ default:
+ llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
+ case ISD::SRA: {
+ if (NumZero >= 2 && NumSigned < NumZero) {
+ IsSigned = false;
+ KnownBits = NumZero;
+ break;
+ }
+ if (NumSigned >= 1) {
+ IsSigned = true;
+ KnownBits = NumSigned;
+ break;
+ }
+ return SDValue();
+ }
+ case ISD::SRL: {
+ if (NumZero >= 1 && NumSigned < NumZero) {
+ IsSigned = false;
+ KnownBits = NumZero;
+ break;
+ }
+ if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
+ IsSigned = true;
+ KnownBits = NumSigned;
+ break;
+ }
+ return SDValue();
+ }
+ }
+
+ unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
+ : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
+
+ // Find the smallest power-2 type that is legal for this vector size and
+ // operation, given the original type size and the number of known sign/zero
+ // bits.
+ EVT VT = Op.getValueType();
+ unsigned MinWidth =
+ std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
+ if (VT.isVector())
+ NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
+ // If we could not transform, and (both) adds are nuw/nsw, we can use the
+ // larger type size to do the transform.
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
+ return SDValue();
+
+ if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
+ Add.getOperand(1)) ==
+ SelectionDAG::OFK_Never &&
+ (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
+ Add2.getOperand(1)) ==
+ SelectionDAG::OFK_Never))
+ NVT = VT;
+ else
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ SDValue ResultAVG =
+ DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
+ DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
+ return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
+ ResultAVG);
+}
+
+/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of Known bits for the expression (used to simplify the
+/// caller). The Known bits may only be accurate for those bits in the
+/// OriginalDemandedBits and OriginalDemandedElts.
+bool TargetLowering::SimplifyDemandedBits(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth, bool AssumeSingleUse) const {
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
+ assert(Op.getScalarValueSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+
+ // Don't know anything.
+ Known = KnownBits(BitWidth);
+
+ EVT VT = Op.getValueType();
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
+ unsigned NumElts = OriginalDemandedElts.getBitWidth();
+ assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
+ "Unexpected vector size");
+
+ APInt DemandedBits = OriginalDemandedBits;
+ APInt DemandedElts = OriginalDemandedElts;
+ SDLoc dl(Op);
+ auto &DL = TLO.DAG.getDataLayout();
+
+ // Undef operand.
+ if (Op.isUndef())
+ return false;
+
+ // We can't simplify target constants.
+ if (Op.getOpcode() == ISD::TargetConstant)
+ return false;
+
+ if (Op.getOpcode() == ISD::Constant) {
+ // We know all of the bits for a constant!
+ Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
+ return false;
+ }
+
+ if (Op.getOpcode() == ISD::ConstantFP) {
+ // We know all of the bits for a floating point constant!
+ Known = KnownBits::makeConstant(
+ cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
+ return false;
+ }
+
+ // Other users may use these bits.
+ bool HasMultiUse = false;
+ if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
+ if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
+ return false;
+ }
+ // Allow multiple uses, just set the DemandedBits/Elts to all bits.
+ DemandedBits = APInt::getAllOnes(BitWidth);
+ DemandedElts = APInt::getAllOnes(NumElts);
+ HasMultiUse = true;
+ } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
+ // Not demanding any bits/elts from Op.
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
+ return false;
+ }
+
+ KnownBits Known2;
+ switch (Op.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (VT.isScalableVector())
+ return false;
+ if (!DemandedElts[0])
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+ KnownBits SrcKnown;
+ SDValue Src = Op.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
+ return true;
+
+ // Upper elements are undef, so only get the knownbits if we just demand
+ // the bottom element.
+ if (DemandedElts == 1)
+ Known = SrcKnown.anyextOrTrunc(BitWidth);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ // Collect the known bits that are shared by every demanded element.
+ // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::LOAD: {
+ auto *LD = cast<LoadSDNode>(Op);
+ if (getTargetConstantFromLoad(LD)) {
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ }
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ EVT MemVT = LD->getMemoryVT();
+ unsigned MemBits = MemVT.getScalarSizeInBits();
+ Known.Zero.setBitsFrom(MemBits);
+ return false; // Don't fall through, will infinitely loop.
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ return false;
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+
+ // If index isn't constant, assume we need all vector elements AND the
+ // inserted element.
+ APInt DemandedVecElts(DemandedElts);
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+ unsigned Idx = CIdx->getZExtValue();
+ DemandedVecElts.clearBit(Idx);
+
+ // Inserted element is not required.
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ }
+
+ KnownBits KnownScl;
+ unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+ APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ Known = KnownScl.anyextOrTrunc(BitWidth);
+
+ KnownBits KnownVec;
+ if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
+ Depth + 1))
+ return true;
+
+ if (!!DemandedVecElts)
+ Known = Known.intersectWith(KnownVec);
+
+ return false;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return false;
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
+
+ KnownBits KnownSub, KnownSrc;
+ if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
+ Depth + 1))
+ return true;
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!DemandedSubElts)
+ Known = Known.intersectWith(KnownSub);
+ if (!!DemandedSrcElts)
+ Known = Known.intersectWith(KnownSrc);
+
+ // Attempt to avoid multi-use src if we don't need anything from it.
+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
+ !DemandedSrcElts.isAllOnes()) {
+ SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
+ SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewSub || NewSrc) {
+ NewSub = NewSub ? NewSub : Sub;
+ NewSrc = NewSrc ? NewSrc : Src;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
+ Op.getOperand(2));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return false;
+ // Offset the demanded elts by the subvector index.
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+
+ if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
+ Depth + 1))
+ return true;
+
+ // Attempt to avoid multi-use src if we don't need anything from it.
+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
+ SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (DemandedSrc) {
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
+ Op.getOperand(1));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ if (VT.isScalableVector())
+ return false;
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ APInt DemandedSubElts =
+ DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ // Known bits are shared by every demanded subvector element.
+ if (!!DemandedSubElts)
+ Known = Known.intersectWith(Known2);
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS, DemandedRHS;
+ if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
+ DemandedRHS))
+ break;
+
+ if (!!DemandedLHS || !!DemandedRHS) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!DemandedLHS) {
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known = Known.intersectWith(Known2);
+ }
+ if (!!DemandedRHS) {
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known = Known.intersectWith(Known2);
+ }
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
+ // Do not increment Depth here; that can cause an infinite loop.
+ KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
+ // If the LHS already has zeros where RHSC does, this 'and' is dead.
+ if ((LHSKnown.Zero & DemandedBits) ==
+ (~RHSC->getAPIntValue() & DemandedBits))
+ return TLO.CombineTo(Op, Op0);
+
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
+ DemandedElts, TLO))
+ return true;
+
+ // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
+ // constant, but if this 'and' is only clearing bits that were just set by
+ // the xor, then this 'and' can be eliminated by shrinking the mask of
+ // the xor. For example, for a 32-bit X:
+ // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
+ if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
+ LHSKnown.One == ~RHSC->getAPIntValue()) {
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
+ return TLO.CombineTo(Op, Xor);
+ }
+ }
+
+ // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
+ // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
+ if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
+ (Op0.getOperand(0).isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
+ Op0->hasOneUse()) {
+ unsigned NumSubElts =
+ Op0.getOperand(1).getValueType().getVectorNumElements();
+ unsigned SubIdx = Op0.getConstantOperandVal(2);
+ APInt DemandedSub =
+ APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
+ KnownBits KnownSubMask =
+ TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
+ if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
+ SDValue NewAnd =
+ TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
+ SDValue NewInsert =
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
+ Op0.getOperand(1), Op0.getOperand(2));
+ return TLO.CombineTo(Op, NewInsert);
+ }
+ }
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
+ return TLO.CombineTo(Op, Op1);
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
+ TLO))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ Known &= Known2;
+ break;
+ }
+ case ISD::OR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
+ // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ // Attempt to match all commutations - m_c_Or would've been useful!
+ for (int I = 0; I != 2; ++I) {
+ SDValue X = Op.getOperand(I).getOperand(0);
+ SDValue C1 = Op.getOperand(I).getOperand(1);
+ SDValue Alt = Op.getOperand(1 - I).getOperand(0);
+ SDValue C2 = Op.getOperand(1 - I).getOperand(1);
+ if (Alt.getOpcode() == ISD::OR) {
+ for (int J = 0; J != 2; ++J) {
+ if (X == Alt.getOperand(J)) {
+ SDValue Y = Alt.getOperand(1 - J);
+ if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
+ {C1, C2})) {
+ SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
+ SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ Known |= Known2;
+ break;
+ }
+ case ISD::XOR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if (DemandedBits.isSubsetOf(Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
+
+ ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
+ if (C) {
+ // If one side is a constant, and all of the set bits in the constant are
+ // also known set on the other side, turn this into an AND, as we know
+ // the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if (C->getAPIntValue() == Known2.One) {
+ SDValue ANDC =
+ TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
+ }
+
+ // If the RHS is a constant, see if we can change it. Don't alter a -1
+ // constant because that's a 'not' op, and that is better for combining
+ // and codegen.
+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
+ }
+
+ unsigned Op0Opcode = Op0.getOpcode();
+ if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
+ if (ConstantSDNode *ShiftC =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ // Don't crash on an oversized shift. We can not guarantee that a
+ // bogus shift has been simplified to undef.
+ if (ShiftC->getAPIntValue().ult(BitWidth)) {
+ uint64_t ShiftAmt = ShiftC->getZExtValue();
+ APInt Ones = APInt::getAllOnes(BitWidth);
+ Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
+ : Ones.lshr(ShiftAmt);
+ const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
+ if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
+ TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
+ // If the xor constant is a demanded mask, do a 'not' before the
+ // shift:
+ // xor (X << ShiftC), XorC --> (not X) << ShiftC
+ // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
+ SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
+ Op0.getOperand(1)));
+ }
+ }
+ }
+ }
+ }
+
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (!C || !C->isAllOnes())
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ Known ^= Known2;
+ break;
+ }
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ Known = Known.intersectWith(Known2);
+ break;
+ case ISD::VSELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ Known = Known.intersectWith(Known2);
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ Known = Known.intersectWith(Known2);
+ break;
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
+ getBooleanContents(Op0.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return TLO.CombineTo(Op, Op0);
+
+ // TODO: Should we check for other forms of sign-bit comparisons?
+ // Examples: X <= -1, X >= 0
+ }
+ if (getBooleanContents(Op0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case ISD::SHL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
+
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
+ unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ // TODO - support non-uniform vector amounts.
+ if (Op0.getOpcode() == ISD::SRL) {
+ if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ }
+ }
+ }
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ // TODO - support non-uniform vector amounts.
+ if (Op0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = Op0.getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ unsigned InnerBits = InnerVT.getScalarSizeInBits();
+ if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ SDValue NarrowShl = TLO.DAG.getNode(
+ ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+ }
+
+ // Repeat the SHL optimization above in cases where an extension
+ // intervenes: (shl (anyext (shr x, c1)), c2) to
+ // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
+ // aren't demanded (as above) and that the shifted upper c1 bits of
+ // x aren't demanded.
+ // TODO - support non-uniform vector amounts.
+ if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
+ InnerOp.hasOneUse()) {
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
+ unsigned InnerShAmt = SA2->getZExtValue();
+ if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
+ DemandedBits.getActiveBits() <=
+ (InnerBits - InnerShAmt + ShAmt) &&
+ DemandedBits.countr_zero() >= ShAmt) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
+ }
+ }
+ }
+ }
+
+ APInt InDemandedMask = DemandedBits.lshr(ShAmt);
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
+ // low bits known zero.
+ Known.Zero.setLowBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // Try shrinking the operation as long as the shift amount will still be
+ // in range.
+ if ((ShAmt < DemandedBits.getActiveBits()) &&
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+ } else {
+ // This is a variable shift, so we can't shift the demand mask by a known
+ // amount. But if we are not demanding high bits, then we are not
+ // demanding those bits from the pre-shifted operand either.
+ if (unsigned CTLZ = DemandedBits.countl_zero()) {
+ APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
+ if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
+ Depth + 1)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
+ return true;
+ }
+ Known.resetAll();
+ }
+ }
+
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ if (const APInt *MaxSA =
+ TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
+ return TLO.CombineTo(Op, Op0);
+ }
+ break;
+ }
+ case ISD::SRL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
+
+ // Try to match AVG patterns.
+ if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
+ unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ // TODO - support non-uniform vector amounts.
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ }
+ }
+ }
+
+ APInt InDemandedMask = (DemandedBits << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
+
+ // Narrow shift to lower half - similar to ShrinkDemandedOp.
+ // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
+ if ((BitWidth % 2) == 0 && !VT.isVector() &&
+ ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
+ TLO.DAG.MaskedValueIsZero(
+ Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) {
+ EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
+ if (isNarrowingProfitable(VT, HalfVT) &&
+ isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
+ SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
+ ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShift =
+ TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+ // High bits known zero.
+ Known.Zero.setHighBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ } else {
+ // Use generic knownbits computation as it has support for non-uniform
+ // shift amounts.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ }
+ break;
+ }
+ case ISD::SRA: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
+
+ // If we only want bits that already match the signbit then we don't need
+ // to shift.
+ unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
+ if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
+ NumHiDemandedBits)
+ return TLO.CombineTo(Op, Op0);
+
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedBits.isOne())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+
+ // Try to match AVG patterns.
+ if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
+ unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ APInt InDemandedMask = (DemandedBits << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ if (DemandedBits.countl_zero() < ShAmt)
+ InDemandedMask.setSignBit();
+
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (Known.Zero[BitWidth - ShAmt - 1] ||
+ DemandedBits.countl_zero() >= ShAmt) {
+ SDNodeFlags Flags;
+ Flags.setExact(Op->getFlags().hasExact());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
+ }
+
+ int Log2 = DemandedBits.exactLogBase2();
+ if (Log2 >= 0) {
+ // The bit must come from the sign.
+ SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
+ }
+
+ if (Known.One[BitWidth - ShAmt - 1])
+ // New bits are known one.
+ Known.One.setHighBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::FSHL:
+ case ISD::FSHR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
+
+ // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
+ // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
+ APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
+ APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+
+ Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known = Known.unionWith(Known2);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
+ !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
+ DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
+ DemandedOp1, Op2);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ }
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ }
+ break;
+ }
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ bool IsROTL = (Op.getOpcode() == ISD::ROTL);
+
+ // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
+ if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, Op0);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+ unsigned RevAmt = BitWidth - Amt;
+
+ // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
+ // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
+ APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ // rot*(x, 0) --> x
+ if (Amt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // See if we don't demand either half of the rotated bits.
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
+ DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
+ }
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
+ DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ }
+ }
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ }
+ break;
+ }
+ case ISD::UMIN: {
+ // Check if one arg is always less than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known0, Known1);
+ if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
+ if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
+ break;
+ }
+ case ISD::UMAX: {
+ // Check if one arg is always greater than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known0, Known1);
+ if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
+ if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
+ break;
+ }
+ case ISD::BITREVERSE: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.reverseBits();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.reverseBits();
+ Known.Zero = Known2.Zero.reverseBits();
+ break;
+ }
+ case ISD::BSWAP: {
+ SDValue Src = Op.getOperand(0);
+
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedBits.countl_zero();
+ unsigned NTZ = DemandedBits.countr_zero();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ = alignDown(NLZ, 8);
+ NTZ = alignDown(NTZ, 8);
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth - NLZ - NTZ == 8) {
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
+ if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
+ unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
+ SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
+ SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ APInt DemandedSrcBits = DemandedBits.byteSwap();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.byteSwap();
+ Known.Zero = Known2.Zero.byteSwap();
+ break;
+ }
+ case ISD::CTPOP: {
+ // If only 1 bit is demanded, replace with PARITY as long as we're before
+ // op legalization.
+ // FIXME: Limit to scalars for now.
+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
+ Op.getOperand(0)));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ SDValue Op0 = Op.getOperand(0);
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
+
+ // If we only care about the highest bit, don't bother shifting right.
+ if (DemandedBits.isSignMask()) {
+ unsigned MinSignedBits =
+ TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;
+ // However if the input is already sign extended we expect the sign
+ // extension to be dropped altogether later and do not simplify.
+ if (!AlreadySignExtended) {
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
+ getShiftAmountTy(VT, DL));
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
+ }
+ }
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (DemandedBits.getActiveBits() <= ExVTBits)
+ return TLO.CombineTo(Op, Op0);
+
+ APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits.setBit(ExVTBits - 1);
+
+ if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (Known.Zero[ExVTBits - 1])
+ return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
+
+ APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
+ if (Known.One[ExVTBits - 1]) { // Input sign bit known set
+ Known.One.setBitsFrom(ExVTBits);
+ Known.Zero &= Mask;
+ } else { // Input sign bit unknown
+ Known.Zero &= Mask;
+ Known.One &= Mask;
+ }
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT HalfVT = Op.getOperand(0).getValueType();
+ unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
+
+ APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+
+ KnownBits KnownLo, KnownHi;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
+ return true;
+
+ Known = KnownHi.concat(KnownLo);
+ break;
+ }
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
+ break;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::SIGN_EXTEND: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InDemandedBits.setBit(InBits - 1);
+
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+
+ // If the sign bit is known one, the top bits match.
+ Known = Known.sext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (Known.isNonNegative()) {
+ unsigned Opc =
+ IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::ANY_EXTEND: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
+
+ // If we only need the bottom element then we can just bitcast.
+ // TODO: Handle ANY_EXTEND?
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.anyext(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
+ break;
+ }
+ case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
+ APInt TruncMask = DemandedBits.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ Known = Known.trunc(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ switch (Src.getOpcode()) {
+ default:
+ break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
+ break;
+
+ if (Src.getNode()->hasOneUse()) {
+ const APInt *ShAmtC =
+ TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
+ if (!ShAmtC || ShAmtC->uge(BitWidth))
+ break;
+ uint64_t ShVal = ShAmtC->getZExtValue();
+
+ APInt HighBits =
+ APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
+ HighBits.lshrInPlace(ShVal);
+ HighBits = HighBits.trunc(BitWidth);
+
+ if (!(HighBits & DemandedBits)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewShAmt = TLO.DAG.getConstant(
+ ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
+ SDValue NewTrunc =
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
+ }
+ }
+ break;
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
+ EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
+ TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ Known.Zero |= ~InMask;
+ Known.One &= (~Known.Zero);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
+ unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+
+ if (SrcEltCnt.isScalable())
+ return false;
+
+ // Demand the bits from every vector element without a constant index.
+ unsigned NumSrcElts = SrcEltCnt.getFixedValue();
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
+ if (CIdx->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
+
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ APInt DemandedSrcBits = DemandedBits;
+ if (BitWidth > EltBitWidth)
+ DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
+
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ Known = Known2;
+ if (BitWidth > EltBitWidth)
+ Known = Known.anyext(BitWidth);
+ break;
+ }
+ case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ return false;
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
+ DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
+ SrcVT.isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
+ SrcVT != MVT::f128) {
+ // Cannot eliminate/lower SHL for f128 yet.
+ EVT Ty = OpVTLegal ? VT : MVT::i32;
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
+ unsigned OpVTSizeInBits = Op.getValueSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
+ unsigned ShVal = Op.getValueSizeInBits() - 1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+ }
+ }
+
+ // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
+ // Demand the elt/bit if any of the original elts/bits are demanded.
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
+ unsigned Scale = BitWidth / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
+ if (!Sub.isZero()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
+ return true;
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
+ } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
+ // TODO - bigendian once we have test coverage.
+ unsigned Scale = NumSrcEltBits / BitWidth;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * BitWidth;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SrcVT.isVector()) {
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
+ return true;
+ }
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ }
+
+ // If this is a bitcast, let computeKnownBits handle it. Only do this on a
+ // recursive call where Known may be useful to the caller.
+ if (Depth > 0) {
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false;
+ }
+ break;
+ }
+ case ISD::MUL:
+ if (DemandedBits.isPowerOf2()) {
+ // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
+ // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
+ // odd (has LSB set), then the left-shifted low bit of X is the answer.
+ unsigned CTZ = DemandedBits.countr_zero();
+ ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (C && C->getAPIntValue().countr_zero() == CTZ) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
+ return TLO.CombineTo(Op, Shl);
+ }
+ }
+ // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
+ // X * X is odd iff X is odd.
+ // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
+ if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
+ SDValue One = TLO.DAG.getConstant(1, dl, VT);
+ SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
+ return TLO.CombineTo(Op, And1);
+ }
+ [[fallthrough]];
+ case ISD::ADD:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ unsigned DemandedBitsLZ = DemandedBits.countl_zero();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+ KnownBits KnownOp0, KnownOp1;
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
+ Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
+ Depth + 1) ||
+ // See if the operation should be performed at a smaller bit width.
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
+ return true;
+ }
+
+ // neg x with only low bit demanded is simply x.
+ if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
+ isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
+ return TLO.CombineTo(Op, Op1);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // If we have a constant operand, we may be able to turn it into -1 if we
+ // do not demand the high bits. This can make the constant smaller to
+ // encode, allow more general folding, or match specialized instruction
+ // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
+ // is probably not useful (and could be detrimental).
+ ConstantSDNode *C = isConstOrConstSplat(Op1);
+ APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
+ if (C && !C->isAllOnes() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnes()) {
+ SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+
+ // Match a multiply with a disguised negated-power-of-2 and convert to a
+ // an equivalent shift-left amount.
+ // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
+ auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
+ if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
+ return 0;
+
+ // Don't touch opaque constants. Also, ignore zero and power-of-2
+ // multiplies. Those will get folded later.
+ ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
+ if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
+ !MulC->getAPIntValue().isPowerOf2()) {
+ APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
+ if (UnmaskedC.isNegatedPowerOf2())
+ return (-UnmaskedC).logBase2();
+ }
+ return 0;
+ };
+
+ auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
+ SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
+ return TLO.CombineTo(Op, Res);
+ };
+
+ if (isOperationLegalOrCustom(ISD::SHL, VT)) {
+ if (Op.getOpcode() == ISD::ADD) {
+ // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op0))
+ return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
+ // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op1))
+ return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
+ }
+ if (Op.getOpcode() == ISD::SUB) {
+ // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op1))
+ return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
+ }
+ }
+
+ if (Op.getOpcode() == ISD::MUL) {
+ Known = KnownBits::mul(KnownOp0, KnownOp1);
+ } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
+ Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
+ Flags.hasNoSignedWrap(), KnownOp0,
+ KnownOp1);
+ }
+ break;
+ }
+ default:
+ // We also ask the target about intrinsics (which could be specific to it).
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
+ if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
+ Known, TLO, Depth))
+ return true;
+ break;
+ }
+
+ // Just use computeKnownBits to compute output bits.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if (!isTargetCanonicalConstantNode(Op) &&
+ DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+ // Avoid folding to a constant if any OpaqueConstant is involved.
+ const SDNode *N = Op.getNode();
+ for (SDNode *Op :
+ llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->isOpaque())
+ return false;
+ }
+ if (VT.isInteger())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ if (VT.isFloatingPoint())
+ return TLO.CombineTo(
+ Op,
+ TLO.DAG.getConstantFP(
+ APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
+ }
+
+ // A multi use 'all demanded elts' simplify failed to find any knownbits.
+ // Try again just for the original demanded elts.
+ // Ensure we do this AFTER constant folding above.
+ if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
+ Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
+
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+
+ APInt KnownUndef, KnownZero;
+ bool Simplified =
+ SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ return Simplified;
+}
+
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+ const APInt &UndefOp0,
+ const APInt &UndefOp1) {
+ EVT VT = BO.getValueType();
+ assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
+ "Vector binop only");
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
+ assert(UndefOp0.getBitWidth() == NumElts &&
+ UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+ auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+ const APInt &UndefVals) {
+ if (UndefVals[Index])
+ return DAG.getUNDEF(EltVT);
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ // Try hard to make sure that the getNode() call is not creating temporary
+ // nodes. Ignore opaque integers because they do not constant fold.
+ SDValue Elt = BV->getOperand(Index);
+ auto *C = dyn_cast<ConstantSDNode>(Elt);
+ if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+ return Elt;
+ }
+
+ return SDValue();
+ };
+
+ APInt KnownUndef = APInt::getZero(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // If both inputs for this element are either constant or undef and match
+ // the element type, compute the constant/undef result for this element of
+ // the vector.
+ // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+ // not handle FP constants. The code within getNode() should be refactored
+ // to avoid the danger of creating a bogus temporary node here.
+ SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+ SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+ if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+ if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+ KnownUndef.setBit(i);
+ }
+ return KnownUndef;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(
+ SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
+ APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
+ APInt DemandedElts = OriginalDemandedElts;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ assert(VT.isVector() && "Expected vector op");
+
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
+
+ const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
+ if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
+ return false;
+
+ // TODO: For now we assume we know nothing about scalable vectors.
+ if (VT.isScalableVector())
+ return false;
+
+ assert(VT.getVectorNumElements() == NumElts &&
+ "Mask size mismatches value type element count!");
+
+ // Undef operand.
+ if (Op.isUndef()) {
+ KnownUndef.setAllBits();
+ return false;
+ }
+
+ // If Op has other users, assume that all elements are needed.
+ if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
+ DemandedElts.setAllBits();
+
+ // Not demanding any elements from Op.
+ if (DemandedElts == 0) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return false;
+
+ SDLoc DL(Op);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
+
+ // Helper for demanding the specified elements and all the bits of both binary
+ // operands.
+ auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
+ SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
+ TLO.DAG, Depth + 1);
+ SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
+ TLO.DAG, Depth + 1);
+ if (NewOp0 || NewOp1) {
+ SDValue NewOp = TLO.DAG.getNode(
+ Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ return false;
+ };
+
+ switch (Opcode) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0]) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+ SDValue ScalarSrc = Op.getOperand(0);
+ if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Src = ScalarSrc.getOperand(0);
+ SDValue Idx = ScalarSrc.getOperand(1);
+ EVT SrcVT = Src.getValueType();
+
+ ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
+
+ if (SrcEltCnt.isScalable())
+ return false;
+
+ unsigned NumSrcElts = SrcEltCnt.getFixedValue();
+ if (isNullConstant(Idx)) {
+ APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
+ APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
+ APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+ }
+ }
+ KnownUndef.setHighBits(NumElts - 1);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // We only handle vectors here.
+ // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
+ if (!SrcVT.isVector())
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ if (NumSrcElts == NumElts)
+ return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1);
+
+ APInt SrcDemandedElts, SrcZero, SrcUndef;
+
+ // Bitcast from 'large element' src vector to 'small element' vector, we
+ // must demand a source element if any DemandedElt maps to it.
+ if ((NumElts % NumSrcElts) == 0) {
+ unsigned Scale = NumElts / NumSrcElts;
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // Try calling SimplifyDemandedBits, converting demanded elts to the bits
+ // of the large element.
+ // TODO - bigendian once we have test coverage.
+ if (IsLE) {
+ unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Ofs = (i % Scale) * EltSizeInBits;
+ SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
+ }
+
+ KnownBits Known;
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
+
+ // The bitcast has split each wide element into a number of
+ // narrow subelements. We have just computed the Known bits
+ // for wide elements. See if element splitting results in
+ // some subelements being zero. Only for demanded elements!
+ for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
+ if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
+ .isAllOnes())
+ continue;
+ for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
+ unsigned Elt = Scale * SrcElt + SubElt;
+ if (DemandedElts[Elt])
+ KnownZero.setBit(Elt);
+ }
+ }
+ }
+
+ // If the src element is zero/undef then all the output elements will be -
+ // only demanded elements are guaranteed to be correct.
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ if (SrcDemandedElts[i]) {
+ if (SrcZero[i])
+ KnownZero.setBits(i * Scale, (i + 1) * Scale);
+ if (SrcUndef[i])
+ KnownUndef.setBits(i * Scale, (i + 1) * Scale);
+ }
+ }
+ }
+
+ // Bitcast from 'small element' src vector to 'large element' vector, we
+ // demand all smaller source elements covered by the larger demanded element
+ // of this vector.
+ if ((NumSrcElts % NumElts) == 0) {
+ unsigned Scale = NumSrcElts / NumElts;
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // If all the src elements covering an output element are zero/undef, then
+ // the output element will be as well, assuming it was demanded.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (DemandedElts[i]) {
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
+ KnownZero.setBit(i);
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
+ KnownUndef.setBit(i);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Check all elements and simplify any unused elements with UNDEF.
+ if (!DemandedElts.isAllOnes()) {
+ // Don't simplify BROADCASTS.
+ if (llvm::any_of(Op->op_values(),
+ [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
+ SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
+ bool Updated = false;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i] && !Ops[i].isUndef()) {
+ Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
+ KnownUndef.setBit(i);
+ Updated = true;
+ }
+ }
+ if (Updated)
+ return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
+ }
+ }
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (SrcOp.isUndef()) {
+ KnownUndef.setBit(i);
+ } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
+ (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, i * NumSubElts);
+ KnownZero.insertBits(SubZero, i * NumSubElts);
+ }
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnes()) {
+ bool FoundNewSub = false;
+ SmallVector<SDValue, 2> DemandedSubOps;
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
+ SubOp, SubElts, TLO.DAG, Depth + 1);
+ DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
+ FoundNewSub = NewSubOp ? true : FoundNewSub;
+ }
+ if (FoundNewSub) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
+
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+
+ // If none of the src operand elements are demanded, replace it with undef.
+ if (!DemandedSrcElts && !Src.isUndef())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT), Sub,
+ Op.getOperand(2)));
+
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, Idx);
+ KnownZero.insertBits(SubZero, Idx);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
+ SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedSrcElts, TLO.DAG, Depth + 1);
+ SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
+ Sub, DemandedSubElts, TLO.DAG, Depth + 1);
+ if (NewSrc || NewSub) {
+ NewSrc = NewSrc ? NewSrc : Src;
+ NewSub = NewSub ? NewSub : Sub;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
+ NewSub, Op.getOperand(2));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+ KnownZero = SrcZero.extractBits(NumElts, Idx);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnes()) {
+ SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewSrc) {
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
+ Op.getOperand(1));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+
+ // For a legal, constant insertion index, if we don't need this insertion
+ // then strip it, else remove it from the demanded elts.
+ if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+
+ APInt DemandedVecElts(DemandedElts);
+ DemandedVecElts.clearBit(Idx);
+ if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownUndef.setBitVal(Idx, Scl.isUndef());
+
+ KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
+ break;
+ }
+
+ APInt VecUndef, VecZero;
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
+ Depth + 1))
+ return true;
+ // Without knowing the insertion index we can't set KnownUndef/KnownZero.
+ break;
+ }
+ case ISD::VSELECT: {
+ SDValue Sel = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
+ // Try to transform the select condition based on the current demanded
+ // elements.
+ APInt UndefSel, UndefZero;
+ if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
+ Depth + 1))
+ return true;
+
+ // See if we can simplify either vselect operand.
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
+ return true;
+
+ KnownUndef = UndefLHS & UndefRHS;
+ KnownZero = ZeroLHS & ZeroRHS;
+
+ // If we know that the selected element is always zero, we don't need the
+ // select value element.
+ APInt DemandedSel = DemandedElts & ~KnownZero;
+ if (DemandedSel != DemandedElts)
+ if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
+ Depth + 1))
+ return true;
+
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ // See if we can simplify either shuffle operand.
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
+ return true;
+
+ // Simplify mask using undef elements from LHS/RHS.
+ bool Updated = false;
+ bool IdentityLHS = true, IdentityRHS = true;
+ SmallVector<int, 32> NewMask(ShuffleMask);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int &M = NewMask[i];
+ if (M < 0)
+ continue;
+ if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
+ (M >= (int)NumElts && UndefRHS[M - NumElts])) {
+ Updated = true;
+ M = -1;
+ }
+ IdentityLHS &= (M < 0) || (M == (int)i);
+ IdentityRHS &= (M < 0) || ((M - NumElts) == i);
+ }
+
+ // Update legal shuffle masks based on demanded elements if it won't reduce
+ // to Identity which can cause premature removal of the shuffle mask.
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
+ SDValue LegalShuffle =
+ buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
+ if (LegalShuffle)
+ return TLO.CombineTo(Op, LegalShuffle);
+ }
+
+ // Propagate undef/zero elements from LHS/RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ KnownUndef.setBit(i);
+ } else if (M < (int)NumElts) {
+ if (UndefLHS[M])
+ KnownUndef.setBit(i);
+ if (ZeroLHS[M])
+ KnownZero.setBit(i);
+ } else {
+ if (UndefRHS[M - NumElts])
+ KnownUndef.setBit(i);
+ if (ZeroRHS[M - NumElts])
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ APInt SrcUndef, SrcZero;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownZero = SrcZero.zextOrTrunc(NumElts);
+ KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+ if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
+ DemandedSrcElts == 1) {
+ // aext - if we just need the bottom element then we can bitcast.
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+ }
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
+ Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
+ }
+ break;
+ }
+
+ // TODO: There are more binop opcodes that could be handled here - MIN,
+ // MAX, saturated math, etc.
+ case ISD::ADD: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1, /*AssumeSingleUse*/ true))
+ return true;
+ }
+ [[fallthrough]];
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
+ return true;
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
+ return true;
+
+ KnownZero = ZeroLHS & ZeroRHS;
+ KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnes())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
+ break;
+ }
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
+ return true;
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
+ return true;
+
+ KnownZero = ZeroLHS;
+ KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnes())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
+ break;
+ }
+ case ISD::MUL:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ // If we know that a demanded element was zero in Op1 we don't need to
+ // demand it in Op0 - its guaranteed to be zero.
+ APInt DemandedElts0 = DemandedElts & ~SrcZero;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
+
+ KnownUndef &= DemandedElts0;
+ KnownZero &= DemandedElts0;
+
+ // If every element pair has a zero/undef then just fold to zero.
+ // fold (and x, undef) -> 0 / (and x, 0) -> 0
+ // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
+ if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ // If either side has a zero element, then the result element is zero, even
+ // if the other is an UNDEF.
+ // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+ // and then handle 'and' nodes with the rest of the binop opcodes.
+ KnownZero |= SrcZero;
+ KnownUndef &= SrcUndef;
+ KnownUndef &= ~KnownZero;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnes())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
+ break;
+ }
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
+ break;
+ default: {
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth))
+ return true;
+ } else {
+ KnownBits Known;
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
+ if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
+ TLO, Depth, AssumeSingleUse))
+ return true;
+ }
+ break;
+ }
+ }
+ assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
+
+ // Constant fold all undef cases.
+ // TODO: Handle zero cases as well.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+ return false;
+}
+
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the Known.
+void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ Known.resetAll();
+}
+
+void TargetLowering::computeKnownBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, KnownBits &Known,
+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ Known.resetAll();
+}
+
+void TargetLowering::computeKnownBitsForFrameIndex(
+ const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
+}
+
+Align TargetLowering::computeKnownAlignForTargetInstr(
+ GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ return Align(1);
+}
+
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &,
+ const SelectionDAG &,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+unsigned TargetLowering::computeNumSignBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI, unsigned Depth) const {
+ return 1;
+}
+
+bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedVectorElts if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedBits if you don't know whether Op"
+ " is a target node!");
+ computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
+ return false;
+}
+
+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
+ " is a target node!");
+ return SDValue();
+}
+
+SDValue
+TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const {
+ bool LegalMask = isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(N0, N1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (!LegalMask)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
+}
+
+const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
+ return nullptr;
+}
+
+bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
+bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use canCreateUndefOrPoison if you don't know whether Op"
+ " is a target node!");
+ // Be conservative and return true.
+ return true;
+}
+
+bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isKnownNeverNaN if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
+bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &UndefElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isSplatValue if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
+// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
+// work with truncating build vectors and vectors with elements of less than
+// 8 bits.
+bool TargetLowering::isConstTrueVal(SDValue N) const {
+ if (!N)
+ return false;
+
+ unsigned EltWidth;
+ APInt CVal;
+ if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true)) {
+ CVal = CN->getAPIntValue();
+ EltWidth = N.getValueType().getScalarSizeInBits();
+ } else
+ return false;
+
+ // If this is a truncating splat, truncate the splat value.
+ // Otherwise, we may fail to match the expected values below.
+ if (EltWidth < CVal.getBitWidth())
+ CVal = CVal.trunc(EltWidth);
+
+ switch (getBooleanContents(N.getValueType())) {
+ case UndefinedBooleanContent:
+ return CVal[0];
+ case ZeroOrOneBooleanContent:
+ return CVal.isOne();
+ case ZeroOrNegativeOneBooleanContent:
+ return CVal.isAllOnes();
+ }
+
+ llvm_unreachable("Invalid boolean contents");
+}
+
+bool TargetLowering::isConstFalseVal(SDValue N) const {
+ if (!N)
+ return false;
+
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) {
+ const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ // Only interested in constant splats, we don't care about undef
+ // elements in identifying boolean constants and getConstantSplatNode
+ // returns NULL if all ops are undef;
+ CN = BV->getConstantSplatNode();
+ if (!CN)
+ return false;
+ }
+
+ if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
+ return !CN->getAPIntValue()[0];
+
+ return CN->isZero();
+}
+
+bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
+ bool SExt) const {
+ if (VT == MVT::i1)
+ return N->isOne();
+
+ TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
+ switch (Cnt) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // An extended value of 1 is always true, unless its original type is i1,
+ // in which case it will be sign extended to -1.
+ return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return N->isAllOnes() && SExt;
+ }
+ llvm_unreachable("Unexpected enumeration.");
+}
+
+/// This helper function of SimplifySetCC tries to optimize the comparison when
+/// either operand of the SetCC node is a bitwise-and instruction.
+SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
+ if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
+ std::swap(N0, N1);
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
+ if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
+ (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+ return SDValue();
+
+ // (X & Y) != 0 --> zextOrTrunc(X & Y)
+ // iff everything but LSB is known zero:
+ if (Cond == ISD::SETNE && isNullConstant(N1) &&
+ (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
+ getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
+ unsigned NumEltBits = OpVT.getScalarSizeInBits();
+ APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
+ if (DAG.MaskedValueIsZero(N0, UpperBits))
+ return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
+ }
+
+ // Try to eliminate a power-of-2 mask constant by converting to a signbit
+ // test in a narrow type that we can truncate to with no cost. Examples:
+ // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
+ // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
+ // TODO: This conservatively checks for type legality on the source and
+ // destination types. That may inhibit optimizations, but it also
+ // allows setcc->shift transforms that may be more beneficial.
+ auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
+ isTypeLegal(OpVT) && N0.hasOneUse()) {
+ EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
+ AndC->getAPIntValue().getActiveBits());
+ if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
+ SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
+ SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
+ return DAG.getSetCC(DL, VT, Trunc, Zero,
+ Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
+ }
+ }
+
+ // Match these patterns in any of their permutations:
+ // (X & Y) == Y
+ // (X & Y) != Y
+ SDValue X, Y;
+ if (N0.getOperand(0) == N1) {
+ X = N0.getOperand(1);
+ Y = N0.getOperand(0);
+ } else if (N0.getOperand(1) == N1) {
+ X = N0.getOperand(0);
+ Y = N0.getOperand(1);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
+ // Note that where Y is variable and is known to have at most one bit set
+ // (for example, if it is Z & 1) we cannot do this; the expressions are not
+ // equivalent when Y == 0.
+ assert(OpVT.isInteger());
+ Cond = ISD::getSetCCInverse(Cond, OpVT);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N0.getSimpleValueType()))
+ return DAG.getSetCC(DL, VT, N0, Zero, Cond);
+ } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
+ // If the target supports an 'and-not' or 'and-complement' logic operation,
+ // try to use that to make a comparison operation more efficient.
+ // But don't do this transform if the mask is a single bit because there are
+ // more efficient ways to deal with that case (for example, 'bt' on x86 or
+ // 'rlwinm' on PPC).
+
+ // Bail out if the compare operand that we want to turn into a zero is
+ // already a zero (otherwise, infinite loop).
+ auto *YConst = dyn_cast<ConstantSDNode>(Y);
+ if (YConst && YConst->isZero())
+ return SDValue();
+
+ // Transform this into: ~X & Y == 0.
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
+ return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
+ }
+
+ return SDValue();
+}
+
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+/// ((%x << C) a>> C) dstcond %x
+/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+ EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ // We must be comparing with a constant.
+ ConstantSDNode *C1;
+ if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+ return SDValue();
+
+ // N0 should be: add %x, (1 << (KeptBits-1))
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // And we must be 'add'ing a constant.
+ ConstantSDNode *C01;
+ if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+ return SDValue();
+
+ SDValue X = N0->getOperand(0);
+ EVT XVT = X.getValueType();
+
+ // Validate constants ...
+
+ APInt I1 = C1->getAPIntValue();
+
+ ISD::CondCode NewCond;
+ if (Cond == ISD::CondCode::SETULT) {
+ NewCond = ISD::CondCode::SETEQ;
+ } else if (Cond == ISD::CondCode::SETULE) {
+ NewCond = ISD::CondCode::SETEQ;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGT) {
+ NewCond = ISD::CondCode::SETNE;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGE) {
+ NewCond = ISD::CondCode::SETNE;
+ } else
+ return SDValue();
+
+ APInt I01 = C01->getAPIntValue();
+
+ auto checkConstants = [&I1, &I01]() -> bool {
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
+ };
+
+ if (checkConstants()) {
+ // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
+ } else {
+ // What if we invert constants? (and the target predicate)
+ I1.negate();
+ I01.negate();
+ assert(XVT.isInteger());
+ NewCond = getSetCCInverse(NewCond, XVT);
+ if (!checkConstants())
+ return SDValue();
+ // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
+ }
+
+ // They are power-of-two, so which bit is set?
+ const unsigned KeptBits = I1.logBase2();
+ const unsigned KeptBitsMinusOne = I01.logBase2();
+
+ // Magic!
+ if (KeptBits != (KeptBitsMinusOne + 1))
+ return SDValue();
+ assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+ // We don't want to do this in every single case.
+ SelectionDAG &DAG = DCI.DAG;
+ if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+ XVT, KeptBits))
+ return SDValue();
+
+ const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+ assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+ // Unfold into: ((%x << C) a>> C) cond %x
+ // Where 'cond' will be either 'eq' or 'ne'.
+ SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+ SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+ SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+ return T2;
+}
+
+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const {
+ assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
+ "Should be a comparison with 0.");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Valid only for [in]equality comparisons.");
+
+ unsigned NewShiftOpcode;
+ SDValue X, C, Y;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Look for '(C l>>/<< Y)'.
+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
+ // The shift should be one-use.
+ if (!V.hasOneUse())
+ return false;
+ unsigned OldShiftOpcode = V.getOpcode();
+ switch (OldShiftOpcode) {
+ case ISD::SHL:
+ NewShiftOpcode = ISD::SRL;
+ break;
+ case ISD::SRL:
+ NewShiftOpcode = ISD::SHL;
+ break;
+ default:
+ return false; // must be a logical shift.
+ }
+ // We should be shifting a constant.
+ // FIXME: best to use isConstantOrConstantVector().
+ C = V.getOperand(0);
+ ConstantSDNode *CC =
+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ if (!CC)
+ return false;
+ Y = V.getOperand(1);
+
+ ConstantSDNode *XC =
+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
+ };
+
+ // LHS of comparison should be an one-use 'and'.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ X = N0.getOperand(0);
+ SDValue Mask = N0.getOperand(1);
+
+ // 'and' is commutative!
+ if (!Match(Mask)) {
+ std::swap(X, Mask);
+ if (!Match(Mask))
+ return SDValue();
+ }
+
+ EVT VT = X.getValueType();
+
+ // Produce:
+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
+ return T2;
+}
+
+/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
+/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
+/// handle the commuted versions of these patterns.
+SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
+ unsigned BOpcode = N0.getOpcode();
+ assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
+ "Unexpected binop");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
+
+ // (X + Y) == X --> Y == 0
+ // (X - Y) == X --> Y == 0
+ // (X ^ Y) == X --> Y == 0
+ SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+ if (X == N1)
+ return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
+
+ if (Y != N1)
+ return SDValue();
+
+ // (X + Y) == Y --> X == 0
+ // (X ^ Y) == Y --> X == 0
+ if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
+
+ // The shift would not be valid if the operands are boolean (i1).
+ if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
+ return SDValue();
+
+ // (X - Y) == Y --> X == Y << 1
+ EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
+ !DCI.isBeforeLegalize());
+ SDValue One = DAG.getConstant(1, DL, ShiftVT);
+ SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(YShl1.getNode());
+ return DAG.getSetCC(DL, VT, X, YShl1, Cond);
+}
+
+static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
+ SDValue N0, const APInt &C1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // Look through truncs that don't change the value of a ctpop.
+ // FIXME: Add vector support? Need to be careful with setcc result type below.
+ SDValue CTPOP = N0;
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
+ N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
+ return SDValue();
+
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // Expand a power-of-2-or-zero comparison based on ctpop:
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
+ // Keep the CTPOP if it is a legal vector op.
+ if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ return SDValue();
+
+ unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
+ if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
+ return SDValue();
+ if (C1 == 0 && (Cond == ISD::SETULT))
+ return SDValue(); // This is handled elsewhere.
+
+ unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
+
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Result = CTOp;
+ for (unsigned i = 0; i < Passes; i++) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
+ Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
+ }
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
+ }
+
+ // Expand a power-of-2 comparison based on ctpop:
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
+ // Keep the CTPOP if it is legal.
+ if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ return SDValue();
+
+ SDValue Zero = DAG.getConstant(0, dl, CTVT);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ assert(CTVT.isInteger());
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
+ // check before the emit a potentially unnecessary op.
+ if (DAG.isKnownNeverZero(CTOp))
+ return RHS;
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+ unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+ return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
+
+ return SDValue();
+}
+
+static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !(C1->isZero() || C1->isAllOnes()))
+ return SDValue();
+
+ auto getRotateSource = [](SDValue X) {
+ if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
+ return X.getOperand(0);
+ return SDValue();
+ };
+
+ // Peek through a rotated value compared against 0 or -1:
+ // (rot X, Y) == 0/-1 --> X == 0/-1
+ // (rot X, Y) != 0/-1 --> X != 0/-1
+ if (SDValue R = getRotateSource(N0))
+ return DAG.getSetCC(dl, VT, R, N1, Cond);
+
+ // Peek through an 'or' of a rotated value compared against 0:
+ // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
+ // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
+ //
+ // TODO: Add the 'and' with -1 sibling.
+ // TODO: Recurse through a series of 'or' ops to find the rotate.
+ EVT OpVT = N0.getValueType();
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
+ if (SDValue R = getRotateSource(N0.getOperand(0))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (SDValue R = getRotateSource(N0.getOperand(1))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // If we are testing for all-bits-clear, we might be able to do that with
+ // less shifting since bit-order does not matter.
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !C1->isZero())
+ return SDValue();
+
+ if (!N0.hasOneUse() ||
+ (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
+ return SDValue();
+
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+ auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+ return SDValue();
+
+ // Canonicalize fshr as fshl to reduce pattern-matching.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ if (N0.getOpcode() == ISD::FSHR)
+ ShAmt = BitWidth - ShAmt;
+
+ // Match an 'or' with a specific operand 'Other' in either commuted variant.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
+ if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
+ return false;
+ if (Or.getOperand(0) == Other) {
+ X = Or.getOperand(0);
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (Or.getOperand(1) == Other) {
+ X = Or.getOperand(1);
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ EVT OpVT = N0.getValueType();
+ EVT ShAmtVT = N0.getOperand(2).getValueType();
+ SDValue F0 = N0.getOperand(0);
+ SDValue F1 = N0.getOperand(1);
+ if (matchOr(F0, F1)) {
+ // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (matchOr(F1, F0)) {
+ // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+
+ return SDValue();
+}
+
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
+SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI,
+ const SDLoc &dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+ const DataLayout &Layout = DAG.getDataLayout();
+ EVT OpVT = N0.getValueType();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // Constant fold or commute setcc.
+ if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
+ return Fold;
+
+ bool N0ConstOrSplat =
+ isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+ bool N1ConstOrSplat =
+ isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+
+ // Canonicalize toward having the constant on the RHS.
+ // TODO: Handle non-splat vector constants. All undef causes trouble.
+ // FIXME: We can't yet fold constant scalable vector splats, so avoid an
+ // infinite loop here when we encounter one.
+ ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
+ if (N0ConstOrSplat && !N1ConstOrSplat &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
+ // If we have a subtract with the same 2 non-constant operands as this setcc
+ // -- but in reverse order -- then try to commute the operands of this setcc
+ // to match. A matching pair of setcc (cmp) and sub may be combined into 1
+ // instruction on some targets.
+ if (!N0ConstOrSplat && !N1ConstOrSplat &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
+ DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
+ !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
+ if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
+ if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
+ if (auto *N1C = isConstOrConstSplat(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // Optimize some CTPOP cases.
+ if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
+ return V;
+
+ // For equality to 0 of a no-wrap multiply, decompose and test each op:
+ // X * Y == 0 --> (X == 0) || (Y == 0)
+ // X * Y != 0 --> (X != 0) && (Y != 0)
+ // TODO: This bails out if minsize is set, but if the target doesn't have a
+ // single instruction multiply for this type, it would likely be
+ // smaller to decompose.
+ if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
+ (N0->getFlags().hasNoUnsignedWrap() ||
+ N0->getFlags().hasNoSignedWrap()) &&
+ !Attr.hasFnAttr(Attribute::MinSize)) {
+ SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
+ unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
+ return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
+ }
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
+ if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
+ Cond);
+ }
+ }
+ }
+ }
+
+ // FIXME: Support vectors.
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // (zext x) == C --> x == (trunc C)
+ // (sext x) == C --> x == (trunc C)
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ DCI.isBeforeLegalize() && N0->hasOneUse()) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreExt;
+ bool Signed = false;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countr_one();
+ PreExt = N0->getOperand(0);
+ }
+ } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
+ // SExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ Signed = true;
+ } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD / SEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
+ } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
+ Signed = true;
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
+ }
+ }
+
+ // Figure out how many bits we need to preserve this constant.
+ unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
+
+ // Make sure we're not losing bits from the constant.
+ if (MinBits > 0 &&
+ MinBits < C1.getBitWidth() &&
+ MinBits >= ReqdBits) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+ if (MinBits == 1 && C1 == 1)
+ // Invert the condition.
+ return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+
+ // If truncating the setcc operands is not desirable, we can still
+ // simplify the expression in some cases:
+ // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
+ // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
+ // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
+ // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
+ SDValue TopSetCC = N0->getOperand(0);
+ unsigned N0Opc = N0->getOpcode();
+ bool SExt = (N0Opc == ISD::SIGN_EXTEND);
+ if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
+ TopSetCC.getOpcode() == ISD::SETCC &&
+ (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
+ (isConstFalseVal(N1) ||
+ isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
+
+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
+ (!N1C->isZero() && Cond == ISD::SETNE);
+
+ if (!Inverse)
+ return TopSetCC;
+
+ ISD::CondCode InvCond = ISD::getSetCCInverse(
+ cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
+ TopSetCC.getOperand(0).getValueType());
+ return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
+ TopSetCC.getOperand(1),
+ InvCond);
+ }
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0, the test is for
+ // equality or unsigned, and all 1 bits of the const are in the same
+ // partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ !ISD::isSignedIntSetCC(Cond) &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (Lod->isSimple() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask = N0.getConstantOperandAPInt(1);
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if (Mask.isSubsetOf(newMask)) {
+ if (Layout.isLittleEndian())
+ bestOffset = (uint64_t)offset * (width/8);
+ else
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask <<= width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+ if (newVT.isRound() &&
+ shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
+ SDValue NewLoad =
+ DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ Lod->getOriginalAlign());
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ dl, newVT)),
+ DAG.getConstant(0LL, dl, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ:
+ return DAG.getConstant(0, dl, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE:
+ return DAG.getConstant(1, dl, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), dl, VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), dl, VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
+ EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
+ SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
+
+ SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
+ NewConst, Cond);
+ return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
+ }
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
+ OpVT)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getSignificantBits() > ExtSrcTyBits)
+ return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
+
+ assert(ExtDstTy == N0.getOperand(0).getValueType() &&
+ ExtDstTy != ExtSrcTy && "Unexpected types!");
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
+ DAG.getConstant(Imm, dl, ExtDstTy));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
+ } else if ((N1C->isZero() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
+ (N0.getValueType() == MVT::i1 ||
+ getBooleanContents(N0.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
+ if (TrueWhenTrue)
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isOneConstant(N0.getOperand(1))) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR) {
+ Val = N0.getOperand(0);
+ } else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ } else if (N1C->isOne()) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ SDValue XorLHS = Op0.getOperand(0);
+ SDValue XorRHS = Op0.getOperand(1);
+ // Ensure that the input setccs return an i1 type or 0/1 value.
+ if (Op0.getValueType() == MVT::i1 ||
+ (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent &&
+ getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
+ }
+ }
+ if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType().bitsGT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, dl, VT));
+ else if (Op0.getValueType().bitsLT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, dl, VT));
+
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ if (Op0.getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
+ KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ }
+
+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
+ N1C && N1C->isAllOnes()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, OpVT),
+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
+ }
+
+ if (SDValue V =
+ optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+ return V;
+ }
+
+ // These simplifications apply to splat vectors as well.
+ // TODO: Handle more splat vector cases.
+ if (auto *N1C = isConstOrConstSplat(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ // X >= MIN --> true
+ if (C1 == MinVal)
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
+
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ // X >= C0 --> X > (C0 - 1)
+ APInt C = C1 - 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ // X <= MAX --> true
+ if (C1 == MaxVal)
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
+
+ // X <= C0 --> X < (C0 + 1)
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ APInt C = C1 + 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+ }
+
+ if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
+ if (C1 == MinVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setlt X, Max --> setne X, Max
+ if (C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if (C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
+
+ if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
+ if (C1 == MaxVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setgt X, Min --> setne X, Min
+ if (C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if (C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
+
+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ if (C1.isZero())
+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ VT, N0, N1, Cond, DCI, dl))
+ return CC;
+
+ // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
+ // For example, when high 32-bits of i64 X are known clear:
+ // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
+ // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
+ bool CmpZero = N1C->isZero();
+ bool CmpNegOne = N1C->isAllOnes();
+ if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
+ // Match or(lo,shl(hi,bw/2)) pattern.
+ auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
+ unsigned EltBits = V.getScalarValueSizeInBits();
+ if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
+ return false;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
+ // Unshifted element must have zero upperbits.
+ if (RHS.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(RHS.getOperand(1)) &&
+ RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
+ DAG.MaskedValueIsZero(LHS, HiBits)) {
+ Lo = LHS;
+ Hi = RHS.getOperand(0);
+ return true;
+ }
+ if (LHS.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
+ DAG.MaskedValueIsZero(RHS, HiBits)) {
+ Lo = RHS;
+ Hi = LHS.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
+ unsigned EltBits = N0.getScalarValueSizeInBits();
+ unsigned HalfBits = EltBits / 2;
+ APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
+ SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
+ SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
+ SDValue NewN0 =
+ DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
+ SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
+ return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
+ };
+
+ SDValue Lo, Hi;
+ if (IsConcat(N0, Lo, Hi))
+ return MergeConcat(Lo, Hi);
+
+ if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
+ SDValue Lo0, Lo1, Hi0, Hi1;
+ if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
+ IsConcat(N0.getOperand(1), Lo1, Hi1)) {
+ return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
+ DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
+ }
+ }
+ }
+ }
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ // SETUGE X, SINTMIN -> SETLT X, 0
+ if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C1.isMinSignedValue()))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, dl, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ // SETULE X, SINTMAX -> SETGT X, -1
+ if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
+ (Cond == ISD::SETULE && C1.isMaxSignedValue()))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getAllOnesConstant(dl, N1.getValueType()),
+ ISD::SETGT);
+ }
+ }
+
+ // Back to non-vector simplifications.
+ // TODO: Can we do these for vector splats?
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const APInt &C1 = N1C->getAPIntValue();
+ EVT ShValTy = N0.getValueType();
+
+ // Fold bit comparisons when we can. This will result in an
+ // incorrect value when boolean false is negative one, unless
+ // the bitsize is 1 in which case the false value is the same
+ // in practice regardless of the representation.
+ if ((VT.getSizeInBits() == 1 ||
+ getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
+ N0.getOpcode() == ISD::AND) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy =
+ getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ unsigned ShCt = AndRHS->getAPIntValue().logBase2();
+ if (AndRHS->getAPIntValue().isPowerOf2() &&
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ unsigned ShCt = C1.logBase2();
+ if (C1.isPowerOf2() &&
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
+ }
+ }
+ }
+ }
+
+ if (C1.getSignificantBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countr_zero();
+ if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift =
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countr_one();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countr_zero();
+ }
+ NewC.lshrInPlace(ShiftBits);
+ if (ShiftBits && NewC.getSignificantBits() <= 64 &&
+ isLegalICmpImmediate(NewC.getSExtValue()) &&
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
+ }
+
+ if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
+ auto *CFP = cast<ConstantFPSDNode>(N1);
+ assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // setcc (fneg x), C -> setcc swap(pred) x, -C
+ if (N0.getOpcode() == ISD::FNEG) {
+ ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
+ SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
+ }
+ }
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ bool IsNegInf = CFP->getValueAPF().isNegative();
+ ISD::CondCode NewCond = ISD::SETCC_INVALID;
+ switch (Cond) {
+ case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
+ case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
+ case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
+ case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
+ default: break;
+ }
+ if (NewCond != ISD::SETCC_INVALID &&
+ isCondCodeLegal(NewCond, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ assert(!N0.getValueType().isInteger() &&
+ "Integer types should be handled by FoldSetCC");
+
+ bool EqTrue = ISD::isTrueWhenEqual(Cond);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+ if (UOF == unsigned(EqTrue))
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCond, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ // ~X > ~Y --> Y > X
+ // ~X < ~Y --> Y < X
+ // ~X < C --> X > ~C
+ // ~X > C --> X < ~C
+ if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
+ N0.getValueType().isInteger()) {
+ if (isBitwiseNot(N0)) {
+ if (isBitwiseNot(N1))
+ return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue Not = DAG.getNOT(dl, N1, OpVT);
+ return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
+ }
+ }
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+
+ // Turn (X^C1) == C2 --> X == C1^C2
+ if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
+ }
+
+ // (X+Y) == X --> Y == 0 and similar folds.
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Y has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.hasOneUse())
+ if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
+ return V;
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR)
+ if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
+ return V;
+
+ if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
+ return V;
+ }
+
+ // Fold remainder of division by a constant.
+ if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
+ N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // When division is cheap or optimizing for minimum size,
+ // fall through to DIVREM creation by skipping this fold.
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
+ if (N0.getOpcode() == ISD::UREM) {
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ } else if (N0.getOpcode() == ISD::SREM) {
+ if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
+ SDValue Temp;
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, OpVT);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
+ break;
+ }
+ if (VT.getScalarType() != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
+ N0 = DAG.getNode(ExtendCode, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
+ int64_t &Offset) const {
+
+ SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
+
+ if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(StringRef Constraint) const {
+ unsigned S = Constraint.size();
+
+ if (S == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r':
+ return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'p': // Address.
+ return C_Address;
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ return C_Immediate;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 's': // Relocatable Constant
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case '<':
+ case '>':
+ return C_Other;
+ }
+ }
+
+ if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
+ if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
+ return C_Memory;
+ return C_Register;
+ }
+ return C_Unknown;
+}
+
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return nullptr;
+}
+
+SDValue TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+ return SDValue();
+}
+
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+
+ ConstantSDNode *C;
+ uint64_t Offset = 0;
+
+ // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
+ // etc., since getelementpointer is variadic. We can't use
+ // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
+ // while in this case the GA may be furthest from the root node which is
+ // likely an ISD::ADD.
+ while (true) {
+ if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
+ BooleanContent BCont = getBooleanContents(MVT::i64);
+ ISD::NodeType ExtOpc =
+ IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
+ int64_t ExtVal =
+ ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
+ Ops.push_back(
+ DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
+ return;
+ }
+ if (ConstraintLetter != 'n') {
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+ GA->getValueType(0),
+ Offset + GA->getOffset()));
+ return;
+ }
+ if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ Ops.push_back(DAG.getTargetBlockAddress(
+ BA->getBlockAddress(), BA->getValueType(0),
+ Offset + BA->getOffset(), BA->getTargetFlags()));
+ return;
+ }
+ if (isa<BasicBlockSDNode>(Op)) {
+ Ops.push_back(Op);
+ return;
+ }
+ }
+ const unsigned OpCode = Op.getOpcode();
+ if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
+ if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
+ Op = Op.getOperand(1);
+ // Subtraction is not commutative.
+ else if (OpCode == ISD::ADD &&
+ (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
+ Op = Op.getOperand(0);
+ else
+ return;
+ Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
+ continue;
+ }
+ return;
+ }
+ break;
+ }
+ }
+}
+
+void TargetLowering::CollectTargetIntrinsicOperands(
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+ StringRef Constraint,
+ MVT VT) const {
+ if (Constraint.empty() || Constraint[0] != '{')
+ return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
+ assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
+
+ std::pair<unsigned, const TargetRegisterClass *> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
+
+ // Figure out which register class contains this reg.
+ for (const TargetRegisterClass *RC : RI->regclasses()) {
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ if (!isLegalRC(*RI, *RC))
+ continue;
+
+ for (const MCPhysReg &PR : *RC) {
+ if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
+ std::pair<unsigned, const TargetRegisterClass *> S =
+ std::make_pair(PR, RC);
+
+ // If this register class has the requested value type, return it,
+ // otherwise keep searching and return the first class found
+ // if no other is found which explicitly has the requested type.
+ if (RI->isTypeLegalForClass(*RC, VT))
+ return S;
+ if (!R.second)
+ R = S;
+ }
+ }
+ }
+
+ return R;
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
+}
+
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const DataLayout &DL,
+ const TargetRegisterInfo *TRI,
+ const CallBase &Call) const {
+ /// Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
+
+ for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ ConstraintOperands.emplace_back(std::move(CI));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
+ OpInfo.ConstraintVT =
+ getSimpleValueType(DL, STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT =
+ getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
+ break;
+ case InlineAsm::isLabel:
+ OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
+ ++LabelNo;
+ continue;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ OpTy = Call.getParamElementType(ArgNo);
+ assert(OpTy && "Indirect operand must have elementtype attribute");
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(OpTy->getContext(), BitSize);
+ break;
+ }
+ }
+
+ EVT VT = getAsmOperandValueType(DL, OpTy, true);
+ OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
+ ArgNo++;
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (!ConstraintOperands.empty()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (AsmOperandInfo &cInfo : ConstraintOperands)
+ if (cInfo.Type != InlineAsm::isClobber)
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+/// Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ case TargetLowering::C_Address:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (const std::string &rCode : *rCodes) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, rCode.c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // Indirect 'other' or 'immediate' constraints are not allowed.
+ if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+ CType == TargetLowering::C_Register ||
+ CType == TargetLowering::C_RegisterClass))
+ continue;
+
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if ((CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Constants are handled elsewhere. For Functions, the type here is the
+ // type of the result, which is not what we want to look at; leave them
+ // alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<ConstantInt>(v) || isa<Function>(v)) {
+ return;
+ }
+
+ if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
+ OpInfo.ConstraintCode = "i";
+ return;
+ }
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+/// Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ bool UseSRA = false;
+ SmallVector<SDValue, 16> Shifts, Factors;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isZero())
+ return false;
+ APInt Divisor = C->getAPIntValue();
+ unsigned Shift = Divisor.countr_zero();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t;
+ APInt Factor = Divisor;
+ while ((t = Divisor * Factor) != 1)
+ Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+ Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue Shift, Factor;
+ if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(Shifts.size() == 1 && Factors.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
+ } else {
+ assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ SDValue Res = Op0;
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ if (UseSRA) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
+ Created.push_back(Res.getNode());
+ }
+
+ return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
+}
+
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SDIV as SDIV
+ return SDValue();
+}
+
+SDValue
+TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SREM as SREM
+ return SDValue();
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ EVT MulVT;
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT)) {
+ // Limit this to simple scalars for now.
+ if (VT.isVector() || !VT.isSimple())
+ return SDValue();
+
+ // If this type will be promoted to a large enough type with a legal
+ // multiply operation, we can go ahead and do this transform.
+ if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
+ return SDValue();
+
+ MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ if (MulVT.getSizeInBits() < (2 * EltBits) ||
+ !isOperationLegal(ISD::MUL, MulVT))
+ return SDValue();
+ }
+
+ // If the sdiv has an 'exact' bit we can use a simpler lowering.
+ if (N->getFlags().hasExact())
+ return BuildExactSDIV(*this, N, dl, DAG, Created);
+
+ SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isZero())
+ return false;
+
+ const APInt &Divisor = C->getAPIntValue();
+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
+ int NumeratorFactor = 0;
+ int ShiftMask = -1;
+
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
+ // If d is +1/-1, we just multiply the numerator by +1/-1.
+ NumeratorFactor = Divisor.getSExtValue();
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
+ ShiftMask = 0;
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
+ // If d > 0 and m < 0, add the numerator.
+ NumeratorFactor = 1;
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
+ // If d < 0 and m > 0, subtract the numerator.
+ NumeratorFactor = -1;
+ }
+
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
+ Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
+ ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts / magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue MagicFactor, Factor, Shift, ShiftMask;
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+ } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
+ Shifts.size() == 1 && ShiftMasks.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
+ } else {
+ assert(isa<ConstantSDNode>(N1) && "Expected a constant");
+ MagicFactor = MagicFactors[0];
+ Factor = Factors[0];
+ Shift = Shifts[0];
+ ShiftMask = ShiftMasks[0];
+ }
+
+ // Multiply the numerator (operand 0) by the magic value.
+ // FIXME: We should support doing a MUL in a wider type.
+ auto GetMULHS = [&](SDValue X, SDValue Y) {
+ // If the type isn't legal, use a wider mul of the the type calculated
+ // earlier.
+ if (!isTypeLegal(VT)) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
+ DAG.getShiftAmountConstant(EltBits, MulVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
+
+ if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
+ return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
+ if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+ return SDValue(LoHi.getNode(), 1);
+ }
+ // If type twice as wide legal, widen and use a mul plus a shift.
+ unsigned Size = VT.getScalarSizeInBits();
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
+ DAG.getShiftAmountConstant(EltBits, WideVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
+ return SDValue();
+ };
+
+ SDValue Q = GetMULHS(N0, MagicFactor);
+ if (!Q)
+ return SDValue();
+
+ Created.push_back(Q.getNode());
+
+ // (Optionally) Add/subtract the numerator using Factor.
+ Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
+ Created.push_back(Factor.getNode());
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
+ Created.push_back(Q.getNode());
+
+ // Shift right algebraic by shift value.
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
+ Created.push_back(Q.getNode());
+
+ // Extract the sign bit, mask it and add it to the quotient.
+ SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
+ SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
+ Created.push_back(T.getNode());
+ T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
+ Created.push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ EVT MulVT;
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT)) {
+ // Limit this to simple scalars for now.
+ if (VT.isVector() || !VT.isSimple())
+ return SDValue();
+
+ // If this type will be promoted to a large enough type with a legal
+ // multiply operation, we can go ahead and do this transform.
+ if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
+ return SDValue();
+
+ MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ if (MulVT.getSizeInBits() < (2 * EltBits) ||
+ !isOperationLegal(ISD::MUL, MulVT))
+ return SDValue();
+ }
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Try to use leading zeros of the dividend to reduce the multiplier and
+ // avoid expensive fixups.
+ // TODO: Support vectors.
+ unsigned LeadingZeros = 0;
+ if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
+ assert(!isOneConstant(N1) && "Unexpected divisor");
+ LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
+ // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
+ // the dividend exceeds the leading zeros for the divisor.
+ LeadingZeros = std::min(
+ LeadingZeros, cast<ConstantSDNode>(N1)->getAPIntValue().countl_zero());
+ }
+
+ bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
+ SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isZero())
+ return false;
+ const APInt& Divisor = C->getAPIntValue();
+
+ SDValue PreShift, MagicFactor, NPQFactor, PostShift;
+
+ // Magic algorithm doesn't work for division by 1. We need to emit a select
+ // at the end.
+ if (Divisor.isOne()) {
+ PreShift = PostShift = DAG.getUNDEF(ShSVT);
+ MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
+ } else {
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
+
+ MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) &&
+ "Unexpected pre-shift");
+ PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
+ PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
+ NPQFactor = DAG.getConstant(
+ magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits),
+ dl, SVT);
+ UseNPQ |= magics.IsAdd;
+ UsePreShift |= magics.PreShift != 0;
+ UsePostShift |= magics.PostShift != 0;
+ }
+
+ PreShifts.push_back(PreShift);
+ MagicFactors.push_back(MagicFactor);
+ NPQFactors.push_back(NPQFactor);
+ PostShifts.push_back(PostShift);
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ return SDValue();
+
+ SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+ PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
+ PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+ } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
+ NPQFactors.size() == 1 && PostShifts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one for scalable vectors");
+ PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
+ MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
+ NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
+ PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
+ } else {
+ assert(isa<ConstantSDNode>(N1) && "Expected a constant");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ SDValue Q = N0;
+ if (UsePreShift) {
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+ Created.push_back(Q.getNode());
+ }
+
+ // FIXME: We should support doing a MUL in a wider type.
+ auto GetMULHU = [&](SDValue X, SDValue Y) {
+ // If the type isn't legal, use a wider mul of the the type calculated
+ // earlier.
+ if (!isTypeLegal(VT)) {
+ X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
+ Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
+ DAG.getShiftAmountConstant(EltBits, MulVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
+
+ if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
+ return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
+ if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+ return SDValue(LoHi.getNode(), 1);
+ }
+ // If type twice as wide legal, widen and use a mul plus a shift.
+ unsigned Size = VT.getScalarSizeInBits();
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
+ Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
+ DAG.getShiftAmountConstant(EltBits, WideVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
+ return SDValue(); // No mulhu or equivalent
+ };
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = GetMULHU(Q, MagicFactor);
+ if (!Q)
+ return SDValue();
+
+ Created.push_back(Q.getNode());
+
+ if (UseNPQ) {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
+ Created.push_back(NPQ.getNode());
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (VT.isVector())
+ NPQ = GetMULHU(NPQ, NPQFactor);
+ else
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
+
+ Created.push_back(NPQ.getNode());
+
+ Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ Created.push_back(Q.getNode());
+ }
+
+ if (UsePostShift) {
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+ Created.push_back(Q.getNode());
+ }
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
+ return DAG.getSelect(dl, VT, IsOne, N0, Q);
+}
+
+/// If all values in Values that *don't* match the predicate are same 'splat'
+/// value, then replace all values with that splat value.
+/// Else, if AlternativeReplacement was provided, then replace all values that
+/// do match predicate with AlternativeReplacement value.
+static void
+turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
+ std::function<bool(SDValue)> Predicate,
+ SDValue AlternativeReplacement = SDValue()) {
+ SDValue Replacement;
+ // Is there a value for which the Predicate does *NOT* match? What is it?
+ auto SplatValue = llvm::find_if_not(Values, Predicate);
+ if (SplatValue != Values.end()) {
+ // Does Values consist only of SplatValue's and values matching Predicate?
+ if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
+ return Value == *SplatValue || Predicate(Value);
+ })) // Then we shall replace values matching predicate with SplatValue.
+ Replacement = *SplatValue;
+ }
+ if (!Replacement) {
+ // Oops, we did not find the "baseline" splat value.
+ if (!AlternativeReplacement)
+ return; // Nothing to do.
+ // Let's replace with provided value then.
+ Replacement = AlternativeReplacement;
+ }
+ std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
+}
+
+/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 5> Built;
+ if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - Q = floor(((2^W) - 1) / D)
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ bool ComparingWithAllZeros = true;
+ bool AllComparisonsWithNonZerosAreTautological = true;
+ bool HadTautologicalLanes = false;
+ bool AllLanesAreTautological = true;
+ bool HadEvenDivisor = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ bool HadTautologicalInvertedLanes = false;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
+
+ auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (CDiv->isZero())
+ return false;
+
+ const APInt &D = CDiv->getAPIntValue();
+ const APInt &Cmp = CCmp->getAPIntValue();
+
+ ComparingWithAllZeros &= Cmp.isZero();
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we will only be able to produce the comparison that will give the
+ // opposive tautological answer. So this lane would need to be fixed up.
+ bool TautologicalInvertedLane = D.ule(Cmp);
+ HadTautologicalInvertedLanes |= TautologicalInvertedLane;
+
+ // If all lanes are tautological (either all divisors are ones, or divisor
+ // is not greater than the constant we are comparing with),
+ // we will prefer to avoid the fold.
+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
+ HadTautologicalLanes |= TautologicalLane;
+ AllLanesAreTautological &= TautologicalLane;
+
+ // If we are comparing with non-zero, we need'll need to subtract said
+ // comparison value from the LHS. But there is no point in doing that if
+ // every lane where we are comparing with non-zero is tautological..
+ if (!Cmp.isZero())
+ AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countr_zero();
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ // D is even if it has trailing zeros.
+ HadEvenDivisor |= (K != 0);
+ // D is a power-of-two if D0 is one.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOne();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
+
+ // Q = floor((2^W - 1) u/ D)
+ // R = ((2^W - 1) u% D)
+ APInt Q, R;
+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
+
+ // If we are comparing with zero, then that comparison constant is okay,
+ // else it may need to be one less than that.
+ if (Cmp.ugt(R))
+ Q -= 1;
+
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the lane is tautological the result can be constant-folded.
+ if (TautologicalLane) {
+ // Set P and K amount to a bogus values so we can try to splat them.
+ P = 0;
+ K = -1;
+ // And ensure that comparison constant is tautological,
+ // it will always compare true/false.
+ Q = -1;
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
+ return SDValue();
+
+ // If all lanes are tautological, the result can be constant-folded.
+ if (AllLanesAreTautological)
+ return SDValue();
+
+ // If this is a urem by a powers-of-two, avoid the fold since it can be
+ // best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, KVal, QVal;
+ if (D.getOpcode() == ISD::BUILD_VECTOR) {
+ if (HadTautologicalLanes) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
+ "Expected matchBinaryPredicate to return one element for "
+ "SPLAT_VECTORs");
+ PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
+ KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
+ QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
+ } else {
+ PVal = PAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
+ return SDValue(); // FIXME: Could/should use `ISD::ADD`?
+ assert(CompTargetNode.getValueType() == N.getValueType() &&
+ "Expecting that the types on LHS and RHS of comparisons match.");
+ N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ // UREM: (rotr (mul N, P), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // UREM: (setule/setugt (rotr (mul N, P), K), Q)
+ SDValue NewCC =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ if (!HadTautologicalInvertedLanes)
+ return NewCC;
+
+ // If any lanes previously compared always-false, the NewCC will give
+ // always-true result for them, so we need to fixup those lanes.
+ // Or the other way around for inequality predicate.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+ Created.push_back(NewCC.getNode());
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we have produced the comparison that will give the
+ // opposive tautological answer. So these lanes would need to be fixed up.
+ SDValue TautologicalInvertedChannels =
+ DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
+ Created.push_back(TautologicalInvertedChannels.getNode());
+
+ // NOTE: we avoid letting illegal types through even if we're before legalize
+ // ops – legalization has a hard time producing good code for this.
+ if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
+ // If we have a vector select, let's replace the comparison results in the
+ // affected lanes with the correct tautological result.
+ SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
+ DL, SETCCVT, SETCCVT);
+ return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
+ Replacement, NewCC);
+ }
+
+ // Else, we can just invert the comparison result in the appropriate lanes.
+ //
+ // NOTE: see the note above VSELECT above.
+ if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
+ return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
+ TautologicalInvertedChannels);
+
+ return SDValue(); // Don't know how to lower.
+}
+
+/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 7> Built;
+ if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ assert(Built.size() <= 7 && "Max size prediction failed.");
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // Fold:
+ // (seteq/ne (srem N, D), 0)
+ // To:
+ // (setule/ugt (rotr (add (mul N, P), A), K), Q)
+ //
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
+ // - Q = floor((2 * A) / (2^K))
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If we are after ops legalization, and MUL is unavailable, we can not
+ // proceed.
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Could support comparing with non-zero too.
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!CompTarget || !CompTarget->isZero())
+ return SDValue();
+
+ bool HadIntMinDivisor = false;
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool NeedToApplyOffset = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
+
+ auto BuildSREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isZero())
+ return false;
+
+ // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
+
+ // WARNING: this fold is only valid for positive divisors!
+ APInt D = C->getAPIntValue();
+ if (D.isNegative())
+ D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
+
+ HadIntMinDivisor |= D.isMinSignedValue();
+
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOne();
+ AllDivisorsAreOnes &= D.isOne();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countr_zero();
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ if (!D.isMinSignedValue()) {
+ // D is even if it has trailing zeros; unless it's INT_MIN, in which case
+ // we don't care about this lane in this fold, we'll special-handle it.
+ HadEvenDivisor |= (K != 0);
+ }
+
+ // D is a power-of-two if D0 is one. This includes INT_MIN.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOne();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
+
+ // A = floor((2^(W - 1) - 1) / D0) & -2^K
+ APInt A = APInt::getSignedMaxValue(W).udiv(D0);
+ A.clearLowBits(K);
+
+ if (!D.isMinSignedValue()) {
+ // If divisor INT_MIN, then we don't care about this lane in this fold,
+ // we'll special-handle it.
+ NeedToApplyOffset |= A != 0;
+ }
+
+ // Q = floor((2 * A) / (2^K))
+ APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
+
+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
+ "We are expecting that A is always less than all-ones for SVT");
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded. Likewise, we
+ // don't care about INT_MIN lanes, those can be set to undef if appropriate.
+ if (D.isOne()) {
+ // Set P, A and K to a bogus values so we can try to splat them.
+ P = 0;
+ A = -1;
+ K = -1;
+
+ // x ?% 1 == 0 <--> true <--> x u<= -1
+ Q = -1;
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ AAmts.push_back(DAG.getConstant(A, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
+ return SDValue();
+
+ // If this is a srem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
+ return SDValue();
+
+ // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
+ // since it can be best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, AVal, KVal, QVal;
+ if (D.getOpcode() == ISD::BUILD_VECTOR) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn AAmts into a splat, since we don't care about the
+ // values that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, SVT));
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ AVal = DAG.getBuildVector(VT, DL, AAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
+ QAmts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
+ AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
+ KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
+ QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
+ } else {
+ assert(isa<ConstantSDNode>(D) && "Expected a constant");
+ PVal = PAmts[0];
+ AVal = AAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ if (NeedToApplyOffset) {
+ // We need ADD to do this.
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
+ return SDValue();
+
+ // (add (mul N, P), A)
+ Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ // SREM: (rotr (add (mul N, P), A), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
+ SDValue Fold =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+
+ // If we didn't have lanes with INT_MIN divisor, then we're done.
+ if (!HadIntMinDivisor)
+ return Fold;
+
+ // That fold is only valid for positive divisors. Which effectively means,
+ // it is invalid for INT_MIN divisors. So if we have such a lane,
+ // we must fix-up results for said lanes.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+
+ // NOTE: we avoid letting illegal types through even if we're before legalize
+ // ops – legalization has a hard time producing good code for the code that
+ // follows.
+ if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
+ !isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
+ !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
+ return SDValue();
+
+ Created.push_back(Fold.getNode());
+
+ SDValue IntMin = DAG.getConstant(
+ APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue IntMax = DAG.getConstant(
+ APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue Zero =
+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
+
+ // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
+ SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
+ Created.push_back(DivisorIsIntMin.getNode());
+
+ // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
+ SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
+ Created.push_back(Masked.getNode());
+ SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
+ Created.push_back(MaskedIsZero.getNode());
+
+ // To produce final result we need to blend 2 vectors: 'SetCC' and
+ // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
+ // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
+ // constant-folded, select can get lowered to a shuffle with constant mask.
+ SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
+ MaskedIsZero, Fold);
+
+ return Blended;
+}
+
+bool TargetLowering::
+verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(0))) {
+ DAG.getContext()->emitError("argument to '__builtin_return_address' must "
+ "be a constant integer");
+ return true;
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+
+ // This is specifically a check for the handling of denormal inputs, not the
+ // result.
+ if (Mode.Input == DenormalMode::PreserveSign ||
+ Mode.Input == DenormalMode::PositiveZero) {
+ // Test = X == 0.0
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ }
+
+ // Testing it with denormal inputs to avoid wrong estimate.
+ //
+ // Test = fabs(X) < SmallestNormal
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+}
+
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ NegatibleCost &Cost,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
+ Cost = NegatibleCost::Cheaper;
+ return Op.getOperand(0);
+ }
+
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Pre-increment recursion depth for use in recursive calls.
+ ++Depth;
+ const SDNodeFlags Flags = Op->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
+
+ // Don't allow anything with multiple uses unless we know it is free.
+ if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
+ bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType());
+ if (!IsFreeExtend)
+ return SDValue();
+ }
+
+ auto RemoveDeadNode = [&](SDValue N) {
+ if (N && N.getNode()->use_empty())
+ DAG.RemoveDeadNode(N.getNode());
+ };
+
+ SDLoc DL(Op);
+
+ // Because getNegatedExpression can delete nodes we need a handle to keep
+ // temporary nodes alive in case the recursion manages to create an identical
+ // node.
+ std::list<HandleSDNode> Handles;
+
+ switch (Opcode) {
+ case ISD::ConstantFP: {
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ bool IsOpLegal =
+ isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ OptForSize);
+
+ if (LegalOps && !IsOpLegal)
+ break;
+
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ SDValue CFP = DAG.getConstantFP(V, DL, VT);
+
+ // If we already have the use of the negated floating constant, it is free
+ // to negate it even it has multiple uses.
+ if (!Op.hasOneUse() && CFP.use_empty())
+ break;
+ Cost = NegatibleCost::Neutral;
+ return CFP;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ break;
+
+ bool IsOpLegal =
+ (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
+ llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ OptForSize);
+ });
+
+ if (LegalOps && !IsOpLegal)
+ break;
+
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
+ }
+ Cost = NegatibleCost::Neutral;
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+ case ISD::FADD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ break;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ break;
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+
+ // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
+ // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = CostX;
+ SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
+ if (NegY != N)
+ RemoveDeadNode(NegY);
+ return N;
+ }
+
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = CostY;
+ SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
+ if (NegX != N)
+ RemoveDeadNode(NegX);
+ return N;
+ }
+ break;
+ }
+ case ISD::FSUB: {
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ break;
+
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+ // fold (fneg (fsub 0, Y)) -> Y
+ if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
+ if (C->isZero()) {
+ Cost = NegatibleCost::Cheaper;
+ return Y;
+ }
+
+ // fold (fneg (fsub X, Y)) -> (fsub Y, X)
+ Cost = NegatibleCost::Neutral;
+ return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
+ }
+ case ISD::FMUL:
+ case ISD::FDIV: {
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = CostX;
+ SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
+ if (NegY != N)
+ RemoveDeadNode(NegY);
+ return N;
+ }
+
+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+ break;
+
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = CostY;
+ SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
+ if (NegX != N)
+ RemoveDeadNode(NegX);
+ return N;
+ }
+ break;
+ }
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ break;
+
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
+ NegatibleCost CostZ = NegatibleCost::Expensive;
+ SDValue NegZ =
+ getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
+ // Give up if fail to negate the Z.
+ if (!NegZ)
+ break;
+
+ // Prevent this node from being deleted by the next two calls.
+ Handles.emplace_back(NegZ);
+
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = std::min(CostX, CostZ);
+ SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
+ if (NegY != N)
+ RemoveDeadNode(NegY);
+ return N;
+ }
+
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = std::min(CostY, CostZ);
+ SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
+ if (NegX != N)
+ RemoveDeadNode(NegX);
+ return N;
+ }
+ break;
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
+ OptForSize, Cost, Depth))
+ return DAG.getNode(Opcode, DL, VT, NegV);
+ break;
+ case ISD::FP_ROUND:
+ if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
+ OptForSize, Cost, Depth))
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
+ break;
+ case ISD::SELECT:
+ case ISD::VSELECT: {
+ // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
+ // iff at least one cost is cheaper and the other is neutral/cheaper
+ SDValue LHS = Op.getOperand(1);
+ NegatibleCost CostLHS = NegatibleCost::Expensive;
+ SDValue NegLHS =
+ getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
+ if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
+ RemoveDeadNode(NegLHS);
+ break;
+ }
+
+ // Prevent this node from being deleted by the next call.
+ Handles.emplace_back(NegLHS);
+
+ SDValue RHS = Op.getOperand(2);
+ NegatibleCost CostRHS = NegatibleCost::Expensive;
+ SDValue NegRHS =
+ getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
+ (CostLHS != NegatibleCost::Cheaper &&
+ CostRHS != NegatibleCost::Cheaper)) {
+ RemoveDeadNode(NegLHS);
+ RemoveDeadNode(NegRHS);
+ break;
+ }
+
+ Cost = std::min(CostLHS, CostRHS);
+ return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Legalization Utilities
+//===----------------------------------------------------------------------===//
+
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ MulExpansionKind Kind, SDValue LL,
+ SDValue LH, SDValue RL, SDValue RH) const {
+ assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
+ Opcode == ISD::SMUL_LOHI);
+
+ bool HasMULHS = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+ bool HasMULHU = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+ bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+ bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+
+ if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
+ return false;
+
+ unsigned OuterBitSize = VT.getScalarSizeInBits();
+ unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
+
+ // LL, LH, RL, and RH must be either all NULL or all set to a value.
+ assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+ (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+ SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
+ auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
+ bool Signed) -> bool {
+ if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
+ Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
+ }
+ if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
+ Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
+ return true;
+ }
+ return false;
+ };
+
+ SDValue Lo, Hi;
+
+ if (!LL.getNode() && !RL.getNode() &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
+ RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
+ }
+
+ if (!LL.getNode())
+ return false;
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(LHS, HighMask) &&
+ DAG.MaskedValueIsZero(RHS, HighMask)) {
+ // The inputs are both zero-extended.
+ if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
+ Result.push_back(Lo);
+ Result.push_back(Hi);
+ if (Opcode != ISD::MUL) {
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ Result.push_back(Zero);
+ Result.push_back(Zero);
+ }
+ return true;
+ }
+ }
+
+ if (!VT.isVector() && Opcode == ISD::MUL &&
+ DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
+ DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
+ // The input values are both sign-extended.
+ // TODO non-MUL case?
+ if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
+ Result.push_back(Lo);
+ Result.push_back(Hi);
+ return true;
+ }
+ }
+
+ unsigned ShiftAmount = OuterBitSize - InnerBitSize;
+ SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
+
+ if (!LH.getNode() && !RH.getNode() &&
+ isOperationLegalOrCustom(ISD::SRL, VT) &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
+ LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+ RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
+ RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+ }
+
+ if (!LH.getNode())
+ return false;
+
+ if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
+ return false;
+
+ Result.push_back(Lo);
+
+ if (Opcode == ISD::MUL) {
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ Result.push_back(Hi);
+ return true;
+ }
+
+ // Compute the full width result.
+ auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+ };
+
+ SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+ if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
+ return false;
+
+ // This is effectively the add part of a multiply-add of half-sized operands,
+ // so it cannot overflow.
+ Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+ if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
+ return false;
+
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
+ isOperationLegalOrCustom(ISD::ADDE, VT));
+ if (UseGlue)
+ Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+ Merge(Lo, Hi));
+ else
+ Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
+ Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
+
+ SDValue Carry = Next.getValue(1);
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+
+ if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
+ return false;
+
+ if (UseGlue)
+ Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+ Carry);
+ else
+ Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+ Zero, Carry);
+
+ Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+ if (Opcode == ISD::SMUL_LOHI) {
+ SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
+ Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
+
+ NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
+ Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
+ }
+
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ return true;
+}
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+ SelectionDAG &DAG, MulExpansionKind Kind,
+ SDValue LL, SDValue LH, SDValue RL,
+ SDValue RH) const {
+ SmallVector<SDValue, 2> Result;
+ bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
+ N->getOperand(0), N->getOperand(1), Result, HiLoVT,
+ DAG, Kind, LL, LH, RL, RH);
+ if (Ok) {
+ assert(Result.size() == 2);
+ Lo = Result[0];
+ Hi = Result[1];
+ }
+ return Ok;
+}
+
+// Optimize unsigned division or remainder by constants for types twice as large
+// as a legal VT.
+//
+// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
+// can be computed
+// as:
+// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
+// Remainder = Sum % Constant
+// This is based on "Remainder by Summing Digits" from Hacker's Delight.
+//
+// For division, we can compute the remainder using the algorithm described
+// above, subtract it from the dividend to get an exact multiple of Constant.
+// Then multiply that extact multiply by the multiplicative inverse modulo
+// (1 << (BitWidth / 2)) to get the quotient.
+
+// If Constant is even, we can shift right the dividend and the divisor by the
+// number of trailing zeros in Constant before applying the remainder algorithm.
+// If we're after the quotient, we can subtract this value from the shifted
+// dividend and multiply by the multiplicative inverse of the shifted divisor.
+// If we want the remainder, we shift the value left by the number of trailing
+// zeros and add the bits that were shifted out of the dividend.
+bool TargetLowering::expandDIVREMByConstant(SDNode *N,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ SDValue LL, SDValue LH) const {
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+
+ // TODO: Support signed division/remainder.
+ if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
+ return false;
+ assert(
+ (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
+ "Unexpected opcode");
+
+ auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CN)
+ return false;
+
+ APInt Divisor = CN->getAPIntValue();
+ unsigned BitWidth = Divisor.getBitWidth();
+ unsigned HBitWidth = BitWidth / 2;
+ assert(VT.getScalarSizeInBits() == BitWidth &&
+ HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
+
+ // Divisor needs to less than (1 << HBitWidth).
+ APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
+ if (Divisor.uge(HalfMaxPlus1))
+ return false;
+
+ // We depend on the UREM by constant optimization in DAGCombiner that requires
+ // high multiply.
+ if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
+ !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
+ return false;
+
+ // Don't expand if optimizing for size.
+ if (DAG.shouldOptForSize())
+ return false;
+
+ // Early out for 0 or 1 divisors.
+ if (Divisor.ule(1))
+ return false;
+
+ // If the divisor is even, shift it until it becomes odd.
+ unsigned TrailingZeros = 0;
+ if (!Divisor[0]) {
+ TrailingZeros = Divisor.countr_zero();
+ Divisor.lshrInPlace(TrailingZeros);
+ }
+
+ SDLoc dl(N);
+ SDValue Sum;
+ SDValue PartialRem;
+
+ // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
+ // then add in the carry.
+ // TODO: If we can't split it in half, we might be able to split into 3 or
+ // more pieces using a smaller bit width.
+ if (HalfMaxPlus1.urem(Divisor).isOne()) {
+ assert(!LL == !LH && "Expected both input halves or no input halves!");
+ if (!LL)
+ std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
+
+ // Shift the input by the number of TrailingZeros in the divisor. The
+ // shifted out bits will be added to the remainder later.
+ if (TrailingZeros) {
+ // Save the shifted off bits if we need the remainder.
+ if (Opcode != ISD::UDIV) {
+ APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
+ PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
+ DAG.getConstant(Mask, dl, HiLoVT));
+ }
+
+ LL = DAG.getNode(
+ ISD::OR, dl, HiLoVT,
+ DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
+ DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
+ DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
+ HiLoVT, dl)));
+ LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
+ }
+
+ // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
+ if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
+ SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
+ Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
+ Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
+ DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
+ } else {
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
+ SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
+ // If the boolean for the target is 0 or 1, we can add the setcc result
+ // directly.
+ if (getBooleanContents(HiLoVT) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
+ else
+ Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
+ DAG.getConstant(0, dl, HiLoVT));
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
+ }
+ }
+
+ // If we didn't find a sum, we can't do the expansion.
+ if (!Sum)
+ return false;
+
+ // Perform a HiLoVT urem on the Sum using truncated divisor.
+ SDValue RemL =
+ DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
+ DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
+ SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
+
+ if (Opcode != ISD::UREM) {
+ // Subtract the remainder from the shifted dividend.
+ SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+ SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
+
+ Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
+
+ // Multiply by the multiplicative inverse of the divisor modulo
+ // (1 << BitWidth).
+ APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
+ APInt MulFactor = Divisor.zext(BitWidth + 1);
+ MulFactor = MulFactor.multiplicativeInverse(Mod);
+ MulFactor = MulFactor.trunc(BitWidth);
+
+ SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
+ DAG.getConstant(MulFactor, dl, VT));
+
+ // Split the quotient into low and high parts.
+ SDValue QuotL, QuotH;
+ std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
+ Result.push_back(QuotL);
+ Result.push_back(QuotH);
+ }
+
+ if (Opcode != ISD::UDIV) {
+ // If we shifted the input, shift the remainder left and add the bits we
+ // shifted off the input.
+ if (TrailingZeros) {
+ APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
+ RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
+ RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
+ }
+ Result.push_back(RemL);
+ Result.push_back(DAG.getConstant(0, dl, HiLoVT));
+ }
+
+ return true;
+}
+
+// Check that (every element of) Z is undef or not an exact multiple of BW.
+static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
+ return ISD::matchUnaryPredicate(
+ Z,
+ [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
+ true);
+}
+
+static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
+ EVT VT = Node->getValueType(0);
+ SDValue ShX, ShY;
+ SDValue ShAmt, InvShAmt;
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+ SDValue Mask = Node->getOperand(3);
+ SDValue VL = Node->getOperand(4);
+
+ unsigned BW = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
+ VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
+ VL);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
+ DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
+ } else {
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
+ }
+
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
+ SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
+ }
+ }
+ return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
+}
+
+SDValue TargetLowering::expandFunnelShift(SDNode *Node,
+ SelectionDAG &DAG) const {
+ if (Node->isVPOpcode())
+ return expandVPFunnelShift(Node, DAG);
+
+ EVT VT = Node->getValueType(0);
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return SDValue();
+
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+
+ unsigned BW = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+
+ // If a funnel shift in the other direction is more supported, use it.
+ unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
+ if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
+ isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl X, Y, Z -> fshr X, Y, -Z
+ // fshr X, Y, Z -> fshl X, Y, -Z
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+ Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
+ } else {
+ // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
+ // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
+ X = DAG.getNode(ISD::SRL, DL, VT, X, One);
+ } else {
+ X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
+ Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
+ }
+ Z = DAG.getNOT(DL, Z, ShVT);
+ }
+ return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
+ }
+
+ SDValue ShX, ShY;
+ SDValue ShAmt, InvShAmt;
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
+ } else {
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
+ }
+
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
+ SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
+ }
+ }
+ return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
+}
+
+// TODO: Merge with expandFunnelShift.
+SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLeft = Node->getOpcode() == ISD::ROTL;
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Op1.getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+
+ // If a rotate in the other direction is more supported, use it.
+ unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+ if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
+ isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
+ return DAG.getNode(RevRot, DL, VT, Op0, Sub);
+ }
+
+ if (!AllowVectorOps && VT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return SDValue();
+
+ unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+ unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+ SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ SDValue ShVal;
+ SDValue HsVal;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
+ // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
+ SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+ ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
+ SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+ HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
+ } else {
+ // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
+ // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
+ ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
+ SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ HsVal =
+ DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
+ }
+ return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
+}
+
+void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
+ SelectionDAG &DAG) const {
+ assert(Node->getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Node->getValueType(0);
+ unsigned VTBits = VT.getScalarSizeInBits();
+ assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
+
+ bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
+ bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Node->getOperand(0);
+ SDValue ShOpHi = Node->getOperand(1);
+ SDValue ShAmt = Node->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
+ SDLoc dl(Node);
+
+ // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
+ // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
+ // away during isel.
+ SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(VTBits - 1, dl, ShAmtVT));
+ SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, dl, ShAmtVT))
+ : DAG.getConstant(0, dl, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (IsSHL) {
+ Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
+ } else {
+ Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
+ }
+
+ // If the shift amount is larger or equal than the width of a part we don't
+ // use the result from the FSHL/FSHR. Insert a test and select the appropriate
+ // values for large shift amounts.
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(VTBits, dl, ShAmtVT));
+ SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
+ DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
+
+ if (IsSHL) {
+ Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
+ Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
+ } else {
+ Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
+ Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
+ }
+}
+
+bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ SDLoc dl(SDValue(Node, 0));
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (SrcVT != MVT::f32 || DstVT != MVT::i64)
+ return false;
+
+ if (Node->isStrictFPOpcode())
+ // When a NaN is converted to an integer a trap is allowed. We can't
+ // use this expansion here because it would eliminate that trap. Other
+ // traps are also allowed and cannot be eliminated. See
+ // IEEE 754-2008 sec 5.8.
+ return false;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
+ unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
+ EVT IntVT = SrcVT.changeTypeToInteger();
+ EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
+
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
+ SDValue Bias = DAG.getConstant(127, dl, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
+ SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
+
+ SDValue ExponentBits = DAG.getNode(
+ ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, dl, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, DstVT);
+
+ R = DAG.getSelectCC(
+ dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, DstVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, IntShVT)),
+ DAG.getNode(ISD::SRL, dl, DstVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, IntShVT)),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
+ DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
+
+ Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
+ DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
+ return true;
+}
+
+bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
+ SelectionDAG &DAG) const {
+ SDLoc dl(SDValue(Node, 0));
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ ISD::FP_TO_SINT;
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
+ return false;
+
+ // If the maximum float value is smaller then the signed integer range,
+ // the destination signmask can't be represented by the float, so we can
+ // just use FP_TO_SINT directly.
+ const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
+ APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
+ if (APFloat::opOverflow &
+ APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
+ if (Node->isStrictFPOpcode()) {
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ return true;
+ }
+
+ // Don't expand it if there isn't cheap fsub instruction.
+ if (!isOperationLegalOrCustom(
+ Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
+ return false;
+
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue Sel;
+
+ if (Node->isStrictFPOpcode()) {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Node->getOperand(0), /*IsSignaling*/ true);
+ Chain = Sel.getValue(1);
+ } else {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ }
+
+ bool Strict = Node->isStrictFPOpcode() ||
+ shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+
+ if (Strict) {
+ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
+ // signmask then offset (the result of which should be fully representable).
+ // Sel = Src < 0x8000000000000000
+ // FltOfs = select Sel, 0, 0x8000000000000000
+ // IntOfs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
+
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
+ DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+ SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
+ DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
+ SDValue SInt;
+ if (Node->isStrictFPOpcode()) {
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Chain, Src, FltOfs });
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Val.getValue(1), Val });
+ Chain = SInt.getValue(1);
+ } else {
+ SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
+ SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
+ }
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
+ } else {
+ // Expand based on maximum range of FP_TO_SINT:
+ // True = fp_to_sint(Src)
+ // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
+ // Result = select (Src < 0x8000000000000000), True, False
+
+ SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
+ DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ False = DAG.getNode(ISD::XOR, dl, DstVT, False,
+ DAG.getConstant(SignMask, dl, DstVT));
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+ Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+ }
+ return true;
+}
+
+bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
+ SelectionDAG &DAG) const {
+ // This transform is not correct for converting 0 when rounding mode is set
+ // to round toward negative infinity which will produce -0.0. So disable under
+ // strictfp.
+ if (Node->isStrictFPOpcode())
+ return false;
+
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
+ return false;
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ SDLoc dl(SDValue(Node, 0));
+ EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation performs rounding
+ // correctly in all rounding modes with the exception of converting 0
+ // when rounding toward negative infinity. In that case the fsub will produce
+ // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
+ SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+ SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+ llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
+ SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+ SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+ SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+ SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+ SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+ SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ return true;
+}
+
+SDValue
+TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
+ Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
+ "Wrong opcode");
+
+ if (Node->getFlags().hasNoNaNs()) {
+ ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+ SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
+ // Copy FMF flags, but always set the no-signed-zeros flag
+ // as this is implied by the FMINNUM/FMAXNUM semantics.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoSignedZeros(true);
+ SelCC->setFlags(Flags);
+ return SelCC;
+ }
+
+ return SDValue();
+}
+
+SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
+ ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ EVT VT = Node->getValueType(0);
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
+
+ if (isOperationLegalOrCustom(NewOp, VT)) {
+ SDValue Quiet0 = Node->getOperand(0);
+ SDValue Quiet1 = Node->getOperand(1);
+
+ if (!Node->getFlags().hasNoNaNs()) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+ if (!DAG.isKnownNeverSNaN(Quiet0)) {
+ Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
+ Node->getFlags());
+ }
+ if (!DAG.isKnownNeverSNaN(Quiet1)) {
+ Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
+ Node->getFlags());
+ }
+ }
+
+ return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
+ }
+
+ // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
+ // instead if there are no NaNs and there can't be an incompatible zero
+ // compare: at least one operand isn't +/-0, or there are no signed-zeros.
+ if ((Node->getFlags().hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
+ DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
+ (Node->getFlags().hasNoSignedZeros() ||
+ DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
+ DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
+ unsigned IEEE2018Op =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+ if (isOperationLegalOrCustom(IEEE2018Op, VT))
+ return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
+ Node->getOperand(1), Node->getFlags());
+ }
+
+ if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
+ return SelCC;
+
+ return SDValue();
+}
+
+/// Returns a true value if if this FPClassTest can be performed with an ordered
+/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
+/// std::nullopt if it cannot be performed as a compare with 0.
+static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
+ const fltSemantics &Semantics,
+ const MachineFunction &MF) {
+ FPClassTest OrderedMask = Test & ~fcNan;
+ FPClassTest NanTest = Test & fcNan;
+ bool IsOrdered = NanTest == fcNone;
+ bool IsUnordered = NanTest == fcNan;
+
+ // Skip cases that are testing for only a qnan or snan.
+ if (!IsOrdered && !IsUnordered)
+ return std::nullopt;
+
+ if (OrderedMask == fcZero &&
+ MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
+ return IsOrdered;
+ if (OrderedMask == (fcZero | fcSubnormal) &&
+ MF.getDenormalMode(Semantics).inputsAreZero())
+ return IsOrdered;
+ return std::nullopt;
+}
+
+SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
+ FPClassTest Test, SDNodeFlags Flags,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ EVT OperandVT = Op.getValueType();
+ assert(OperandVT.isFloatingPoint());
+
+ // Degenerated cases.
+ if (Test == fcNone)
+ return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
+ if ((Test & fcAllFlags) == fcAllFlags)
+ return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
+
+ // PPC double double is a pair of doubles, of which the higher part determines
+ // the value class.
+ if (OperandVT == MVT::ppcf128) {
+ Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
+ DAG.getConstant(1, DL, MVT::i32));
+ OperandVT = MVT::f64;
+ }
+
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
+ IsInverted = true;
+ Test = InvertedCheck;
+ }
+
+ // Floating-point type properties.
+ EVT ScalarFloatVT = OperandVT.getScalarType();
+ const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
+ const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
+ bool IsF80 = (ScalarFloatVT == MVT::f80);
+
+ // Some checks can be implemented using float comparisons, if floating point
+ // exceptions are ignored.
+ if (Flags.hasNoFPExcept() &&
+ isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
+ ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
+ ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+
+ if (std::optional<bool> IsCmp0 =
+ isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+ IsCmp0 && (isCondCodeLegalOrCustom(
+ *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
+ OperandVT.getScalarType().getSimpleVT()))) {
+
+ // If denormals could be implicitly treated as 0, this is not equivalent
+ // to a compare with 0 since it will also be true for denormals.
+ return DAG.getSetCC(DL, ResultVT, Op,
+ DAG.getConstantFP(0.0, DL, OperandVT),
+ *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
+ }
+
+ if (Test == fcNan &&
+ isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
+ OperandVT.getScalarType().getSimpleVT())) {
+ return DAG.getSetCC(DL, ResultVT, Op, Op,
+ IsInverted ? ISD::SETO : ISD::SETUO);
+ }
+
+ if (Test == fcInf &&
+ isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ OperandVT.getScalarType().getSimpleVT()) &&
+ isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+ // isinf(x) --> fabs(x) == inf
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
+ SDValue Inf =
+ DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
+ return DAG.getSetCC(DL, ResultVT, Abs, Inf,
+ IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ }
+ }
+
+ // In the general case use integer operations.
+ unsigned BitSize = OperandVT.getScalarSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
+ if (OperandVT.isVector())
+ IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
+ OperandVT.getVectorElementCount());
+ SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
+
+ // Various masks.
+ APInt SignBit = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
+ const unsigned ExplicitIntBitInF80 = 63;
+ APInt ExpMask = Inf;
+ if (IsF80)
+ ExpMask.clearBit(ExplicitIntBitInF80);
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
+
+ SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
+ SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
+ SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
+ SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
+ SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
+ SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
+
+ SDValue Res;
+ const auto appendResult = [&](SDValue PartialRes) {
+ if (PartialRes) {
+ if (Res)
+ Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
+ else
+ Res = PartialRes;
+ }
+ };
+
+ SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
+ const auto getIntBitIsSet = [&]() -> SDValue {
+ if (!IntBitIsSetV) {
+ APInt IntBitMask(BitSize, 0);
+ IntBitMask.setBit(ExplicitIntBitInF80);
+ SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
+ SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
+ IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
+ }
+ return IntBitIsSetV;
+ };
+
+ // Split the value into sign bit and absolute value.
+ SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
+ SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
+ DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
+
+ // Tests that involve more than one class should be processed first.
+ SDValue PartialRes;
+
+ if (IsF80)
+ ; // Detect finite numbers of f80 by checking individual classes because
+ // they have different settings of the explicit integer bit.
+ else if ((Test & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ Test &= ~fcFinite;
+ } else if ((Test & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
+ Test &= ~fcPosFinite;
+ } else if ((Test & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ Test &= ~fcNegFinite;
+ }
+ appendResult(PartialRes);
+
+ if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ appendResult(ExpIsZero);
+ Test &= ~PartialCheck & fcAllFlags;
+ }
+ }
+
+ // Check for individual classes.
+
+ if (unsigned PartialCheck = Test & fcZero) {
+ if (PartialCheck == fcPosZero)
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
+ else if (PartialCheck == fcZero)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
+ else // ISD::fcNegZero
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+ SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+ SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+ SDValue VMinusOneV =
+ DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+ PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+ if (PartialCheck == fcNegSubnormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcInf) {
+ if (PartialCheck == fcPosInf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
+ else if (PartialCheck == fcInf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
+ else { // ISD::fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
+ }
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcNan) {
+ APInt InfWithQnanBit = Inf | QNaNBitMask;
+ SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) > int(inf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
+ if (IsF80) {
+ // Recognize unsupported values as NaNs for compatibility with glibc.
+ // In them (exp(V)==0) == int_bit.
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ SDValue IsPseudo =
+ DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
+ PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
+ }
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
+ PartialRes =
+ DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
+ } else { // ISD::fcSNan
+ // issignaling(V) ==> abs(V) > unsigned(Inf) &&
+ // abs(V) < (unsigned(Inf) | quiet_bit)
+ SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
+ SDValue IsNotQnan =
+ DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
+ }
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcNormal) {
+ // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
+ APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
+ SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
+ SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
+ APInt ExpLimit = ExpMask - ExpLSB;
+ SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
+ if (PartialCheck == fcNegNormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ else if (PartialCheck == fcPosNormal) {
+ SDValue PosSignV =
+ DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
+ }
+ if (IsF80)
+ PartialRes =
+ DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
+ appendResult(PartialRes);
+ }
+
+ if (!Res)
+ return DAG.getConstant(IsInverted, DL, ResultVT);
+ if (IsInverted)
+ Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
+ return Res;
+}
+
+// Only expand vector types if we have the appropriate vector bit operations.
+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
+ assert(VT.isVector() && "Expected vector type");
+ unsigned Len = VT.getScalarSizeInBits();
+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
+}
+
+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return SDValue();
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
+ return SDValue();
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, dl, ShVT))),
+ Mask0F);
+
+ if (Len <= 8)
+ return Op;
+
+ // Avoid the multiply if we only have 2 bytes to add.
+ // TODO: Only doing this for scalars because vectors weren't as obviously
+ // improved.
+ if (Len == 16 && !VT.isVector()) {
+ // v = (v + (v >> 8)) & 0x00FF;
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(8, dl, ShVT))),
+ DAG.getConstant(0xFF, dl, VT));
+ }
+
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ return DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
+}
+
+SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return SDValue();
+
+ // This is same algorithm of expandCTPOP from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT), Mask, VL),
+ Mask55, Mask, VL);
+ Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
+
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT), Mask, VL),
+ Mask33, Mask, VL);
+ Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
+
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
+ Mask, VL),
+ Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
+ Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
+
+ if (Len <= 8)
+ return Op;
+
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ return DAG.getNode(ISD::VP_LSHR, dl, VT,
+ DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
+ DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
+}
+
+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getSelect(dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return SDValue();
+
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+}
+
+SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
+ VL);
+ }
+ Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
+ VL);
+ return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
+}
+
+SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
+ const SDLoc &DL, EVT VT, SDValue Op,
+ unsigned BitWidth) const {
+ if (BitWidth != 32 && BitWidth != 64)
+ return SDValue();
+ APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
+ : APInt(64, 0x0218A392CD3D5DBFULL);
+ const DataLayout &TD = DAG.getDataLayout();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
+ SDValue Lookup = DAG.getNode(
+ ISD::SRL, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
+ DAG.getConstant(DeBruijn, DL, VT)),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
+
+ SmallVector<uint8_t> Table(BitWidth, 0);
+ for (unsigned i = 0; i < BitWidth; i++) {
+ APInt Shl = DeBruijn.shl(i);
+ APInt Lshr = Shl.lshr(ShiftAmt);
+ Table[Lshr.getZExtValue()] = i;
+ }
+
+ // Create a ConstantArray in Constant Pool
+ auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
+ SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
+ TD.getPrefTypeAlign(CA->getType()));
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
+ DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
+ PtrInfo, MVT::i8);
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ return ExtLoad;
+
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, SrcIsZero,
+ DAG.getConstant(BitWidth, DL, VT), ExtLoad);
+}
+
+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTTZ, VT))
+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getSelect(dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return SDValue();
+
+ // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
+ if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
+ !isOperationLegal(ISD::CTLZ, VT))
+ if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
+ return V;
+
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ SDValue Tmp = DAG.getNode(
+ ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
+
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
+ }
+
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
+}
+
+SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
+ SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
+ DAG.getConstant(-1, dl, VT), Mask, VL);
+ SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
+ DAG.getConstant(1, dl, VT), Mask, VL);
+ SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
+ return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
+}
+
+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = N->getOperand(0);
+
+ // abs(x) -> smax(x,sub(0,x))
+ if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::SMAX, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ return DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ }
+
+ // abs(x) -> umin(x,sub(0,x))
+ if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::UMIN, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
+ return DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ }
+
+ // 0 - abs(x) -> smin(x, sub(0,x))
+ if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::SMIN, VT)) {
+ Op = DAG.getFreeze(Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ return DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ }
+
+ // Only expand vector types if we have the appropriate vector operations.
+ if (VT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+ (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
+ (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return SDValue();
+
+ Op = DAG.getFreeze(Op);
+ SDValue Shift =
+ DAG.getNode(ISD::SRA, dl, VT, Op,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+
+ // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
+ if (!IsNegative)
+ return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
+
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+}
+
+SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = DAG.getFreeze(N->getOperand(0));
+ SDValue RHS = DAG.getFreeze(N->getOperand(1));
+ bool IsSigned = N->getOpcode() == ISD::ABDS;
+
+ // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
+ // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
+ unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
+ unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
+ if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
+ SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
+ SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
+ }
+
+ // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
+ if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
+ return DAG.getNode(ISD::OR, dl, VT,
+ DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
+ DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
+
+ // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
+ SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
+ return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
+ DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
+}
+
+SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+
+ if (!VT.isSimple())
+ return SDValue();
+
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i16:
+ // Use a rotate by 8. This can be further expanded if necessary.
+ return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(0xFF00, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<8, dl, VT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL<<8, dl, VT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+
+ if (!VT.isSimple())
+ return SDValue();
+
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i16:
+ Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
+ Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Mask, EVL);
+ Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
+ Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
+ Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
+ Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
+ Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Mask, EVL);
+ Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
+ Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
+ }
+}
+
+SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
+
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ return Tmp;
+ }
+
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift = APInt::getOneBitSet(Sz, J);
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
+SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
+ assert(N->getOpcode() == ISD::VP_BITREVERSE);
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
+
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask4, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask2, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask1, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+ return Tmp;
+ }
+ return SDValue();
+}
+
+std::pair<SDValue, SDValue>
+TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
+ SDLoc SL(LD);
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ EVT DstVT = LD->getValueType(0);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ if (SrcVT.isScalableVector())
+ report_fatal_error("Cannot scalarize scalable vector loads");
+
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = DstVT.getScalarType();
+
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!SrcEltVT.isByteSized()) {
+ unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
+ EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
+
+ unsigned NumSrcBits = SrcVT.getSizeInBits();
+ EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
+
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant(
+ APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
+
+ // Load the whole vector and avoid masking off the top bits as it makes
+ // the codegen worse.
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
+ LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
+ LoadVT, SL, /*LegalTypes=*/false);
+ SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
+ SDValue Elt =
+ DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
+ SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
+
+ if (ExtType != ISD::NON_EXTLOAD) {
+ unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
+ Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
+ }
+
+ Vals.push_back(Scalar);
+ }
+
+ SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
+ return std::make_pair(Value, Load.getValue(1));
+ }
+
+ unsigned Stride = SrcEltVT.getSizeInBits() / 8;
+ assert(SrcEltVT.isByteSized());
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue ScalarLoad =
+ DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcEltVT, LD->getOriginalAlign(),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
+ SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
+
+ return std::make_pair(Value, NewChain);
+}
+
+SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ SDLoc SL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ if (StVT.isScalableVector())
+ report_fatal_error("Cannot scalarize scalable vector stores");
+
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ unsigned NumElem = StVT.getVectorNumElements();
+
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!MemSclVT.isByteSized()) {
+ unsigned NumBits = StVT.getSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+
+ SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getVectorIdxConstant(Idx, SL));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
+ SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
+ SDValue ShiftedElt =
+ DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
+ CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
+ }
+
+ return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
+ }
+
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits() / 8;
+ assert(Stride && "Zero stride!");
+ // Extract each of the elements from the original vector and save them into
+ // memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getVectorIdxConstant(Idx, SL));
+
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(
+ Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
+ MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
+
+ Stores.push_back(Store);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
+}
+
+std::pair<SDValue, SDValue>
+TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ auto &MF = DAG.getMachineFunction();
+
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
+ if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
+ LoadedVT.isVector()) {
+ // Scalarize the load and let the individual components be handled.
+ return scalarizeVectorLoad(LD, DAG);
+ }
+
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
+ LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
+
+ return std::make_pair(Result, newLoad.getValue(1));
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getStoreSize();
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ EVT PtrVT = Ptr.getValueType();
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset), MemVT,
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
+ LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ return std::make_pair(Load, TF);
+ }
+
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ Align Alignment = LD->getOriginalAlign();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (DAG.getDataLayout().isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
+ DAG.getDataLayout()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ return std::make_pair(Result, TF);
+}
+
+SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ Align Alignment = ST->getOriginalAlign();
+ auto &MF = DAG.getMachineFunction();
+ EVT StoreMemVT = ST->getMemoryVT();
+
+ SDLoc dl(ST);
+ if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (isTypeLegal(intVT)) {
+ if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
+ StoreMemVT.isVector()) {
+ // Scalarize the store and let the individual components be handled.
+ SDValue Result = scalarizeVectorStore(ST, DAG);
+ return Result;
+ }
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ Alignment, ST->getMemOperand()->getFlags());
+ return Result;
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ MVT RegVT = getRegisterType(
+ *DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned StoredBytes = StoreMemVT.getStoreSize();
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(
+ Chain, dl, Val, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
+
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ ST->getOriginalAlign(),
+ ST->getMemOperand()->getFlags()));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT LoadMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
+
+ Stores.push_back(
+ DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
+ ST->getOriginalAlign(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo()));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ return Result;
+ }
+
+ assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
+ unsigned NumBits = NewStoredVT.getFixedSizeInBits();
+ unsigned IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(
+ NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl,
+ DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
+ Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags());
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Store2 = DAG.getTruncStore(
+ Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ return Result;
+}
+
+SDValue
+TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
+ const SDLoc &DL, EVT DataVT,
+ SelectionDAG &DAG,
+ bool IsCompressedMemory) const {
+ SDValue Increment;
+ EVT AddrVT = Addr.getValueType();
+ EVT MaskVT = Mask.getValueType();
+ assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
+ "Incompatible types of Data and Mask");
+ if (IsCompressedMemory) {
+ if (DataVT.isScalableVector())
+ report_fatal_error(
+ "Cannot currently handle compressed memory with scalable vectors");
+ // Incrementing the pointer according to number of '1's in the mask.
+ EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
+ SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
+ if (MaskIntVT.getSizeInBits() < 32) {
+ MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
+ MaskIntVT = MVT::i32;
+ }
+
+ // Count '1's with POPCNT.
+ Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
+ Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
+ // Scale is an element size in bytes.
+ SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
+ AddrVT);
+ Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+ } else if (DataVT.isScalableVector()) {
+ Increment = DAG.getVScale(DL, AddrVT,
+ APInt(AddrVT.getFixedSizeInBits(),
+ DataVT.getStoreSize().getKnownMinValue()));
+ } else
+ Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
+
+ return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
+}
+
+static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
+ EVT VecVT, const SDLoc &dl,
+ ElementCount SubEC) {
+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
+ "Cannot index a scalable vector within a fixed-width vector");
+
+ unsigned NElts = VecVT.getVectorMinNumElements();
+ unsigned NumSubElts = SubEC.getKnownMinValue();
+ EVT IdxVT = Idx.getValueType();
+
+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {
+ // If this is a constant index and we know the value plus the number of the
+ // elements in the subvector minus one is less than the minimum number of
+ // elements then it's safe to return Idx.
+ if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
+ if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
+ return Idx;
+ SDValue VS =
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
+ unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
+ SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
+ DAG.getConstant(NumSubElts, dl, IdxVT));
+ return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
+ }
+ if (isPowerOf2_32(NElts) && NumSubElts == 1) {
+ APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
+ return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+ DAG.getConstant(Imm, dl, IdxVT));
+ }
+ unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
+ return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
+ DAG.getConstant(MaxIndex, dl, IdxVT));
+}
+
+SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
+ SDValue VecPtr, EVT VecVT,
+ SDValue Index) const {
+ return getVectorSubVecPointer(
+ DAG, VecPtr, VecVT,
+ EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
+ Index);
+}
+
+SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
+ SDValue VecPtr, EVT VecVT,
+ EVT SubVecVT,
+ SDValue Index) const {
+ SDLoc dl(Index);
+ // Make sure the index type is big enough to compute in.
+ Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
+
+ EVT EltVT = VecVT.getVectorElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
+ "Converting bits to bytes lost precision");
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorElementCount());
+
+ EVT IdxVT = Index.getValueType();
+ if (SubVecVT.isScalableVector())
+ Index =
+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
+
+ Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getConstant(EltSize, dl, IdxVT));
+ return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ // Access to address of TLS varialbe xyz is lowered to a function call:
+ // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ SDLoc dl(GA);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+ Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+ StringRef EmuTlsVarName(NameString);
+ GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+ assert(EmuTlsVar && "Cannot find EmuTlsVar ");
+ Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+ Entry.Ty = VoidPtrType;
+ Args.push_back(Entry);
+
+ SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+ CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ // At last for X86 targets, maybe good for other targets too?
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI.setAdjustsStack(true); // Is this only for X86 target?
+ MFI.setHasCalls(true);
+
+ assert((GA->getOffset() == 0) &&
+ "Emulated TLS must have zero offset in GlobalAddressSDNode");
+ return CallResult.first;
+}
+
+SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
+ if (!isCtlzFast())
+ return SDValue();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc dl(Op);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isZero() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ }
+ return SDValue();
+}
+
+SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ EVT VT = Op0.getValueType();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ unsigned Opcode = Node->getOpcode();
+ SDLoc DL(Node);
+
+ // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
+ if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
+ getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ Op0 = DAG.getFreeze(Op0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
+ }
+
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ // TODO: Missing freeze(Op0)?
+ if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
+ }
+
+ // umax(x,y) -> add(x,usubsat(y,x))
+ // TODO: Missing freeze(Op0)?
+ if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::ADD, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
+ }
+
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
+ // Attempt to find an existing SETCC node that we can reuse.
+ // TODO: Do we need a generic doesSETCCNodeExist?
+ // TODO: Missing freeze(Op0)/freeze(Op1)?
+ auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
+ ISD::CondCode PrefCommuteCC,
+ ISD::CondCode AltCommuteCC) {
+ SDVTList BoolVTList = DAG.getVTList(BoolVT);
+ for (ISD::CondCode CC : {PrefCC, AltCC}) {
+ if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
+ {Op0, Op1, DAG.getCondCode(CC)})) {
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ }
+ }
+ for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
+ if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
+ {Op0, Op1, DAG.getCondCode(CC)})) {
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op1, Op0);
+ }
+ }
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ };
+
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ // -> Y = (A < B) ? B : A
+ // -> Y = (A >= B) ? A : B
+ // -> Y = (A <= B) ? B : A
+ switch (Opcode) {
+ case ISD::SMAX:
+ return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
+ case ISD::SMIN:
+ return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
+ case ISD::UMAX:
+ return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
+ case ISD::UMIN:
+ return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
+ }
+
+ llvm_unreachable("How did we get here?");
+}
+
+SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
+ // usub.sat(a, b) -> umax(a, b) - b
+ if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
+ SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
+ }
+
+ // uadd.sat(a, b) -> umin(a, ~b) + b
+ if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
+ SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
+ SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
+ return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
+ }
+
+ unsigned OverflowOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ OverflowOp = ISD::SADDO;
+ break;
+ case ISD::UADDSAT:
+ OverflowOp = ISD::UADDO;
+ break;
+ case ISD::SSUBSAT:
+ OverflowOp = ISD::SSUBO;
+ break;
+ case ISD::USUBSAT:
+ OverflowOp = ISD::USUBO;
+ break;
+ default:
+ llvm_unreachable("Expected method to receive signed or unsigned saturation "
+ "addition or subtraction node.");
+ }
+
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
+ unsigned BitWidth = LHS.getScalarValueSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue SumDiff = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
+
+ if (Opcode == ISD::UADDSAT) {
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS + RHS) | OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+ }
+ // Overflow ? 0xffff.... : (LHS + RHS)
+ return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
+ }
+
+ if (Opcode == ISD::USUBSAT) {
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS - RHS) & ~OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+ return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+ }
+ // Overflow ? 0 : (LHS - RHS)
+ return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
+ }
+
+ if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+
+ KnownBits KnownLHS = DAG.computeKnownBits(LHS);
+ KnownBits KnownRHS = DAG.computeKnownBits(RHS);
+
+ // If either of the operand signs are known, then they are guaranteed to
+ // only saturate in one direction. If non-negative they will saturate
+ // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
+ //
+ // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
+ // sign of 'y' has to be flipped.
+
+ bool LHSIsNonNegative = KnownLHS.isNonNegative();
+ bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
+ : KnownRHS.isNegative();
+ if (LHSIsNonNegative || RHSIsNonNegative) {
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
+ }
+
+ bool LHSIsNegative = KnownLHS.isNegative();
+ bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
+ : KnownRHS.isNonNegative();
+ if (LHSIsNegative || RHSIsNegative) {
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
+ }
+ }
+
+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
+}
+
+SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ bool IsSigned = Opcode == ISD::SSHLSAT;
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ assert((Node->getOpcode() == ISD::SSHLSAT ||
+ Node->getOpcode() == ISD::USHLSAT) &&
+ "Expected a SHLSAT opcode");
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
+ // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
+
+ unsigned BW = VT.getScalarSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
+ SDValue Orig =
+ DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
+
+ SDValue SatVal;
+ if (IsSigned) {
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
+ SDValue Cond =
+ DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
+ SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
+ } else {
+ SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
+ }
+ SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
+ return DAG.getSelect(dl, VT, Cond, SatVal, Result);
+}
+
+SDValue
+TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
+ assert((Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::UMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT) &&
+ "Expected a fixed point multiplication opcode");
+
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ unsigned Scale = Node->getConstantOperandVal(2);
+ bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT);
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ unsigned VTSize = VT.getScalarSizeInBits();
+
+ if (!Scale) {
+ // [us]mul.fix(a, b, 0) -> mul(a, b)
+ if (!Saturating) {
+ if (isOperationLegalOrCustom(ISD::MUL, VT))
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
+ return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
+ }
+
+ assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
+ "Expected scale to be less than the number of bits if signed or at "
+ "most the number of bits if unsigned.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
+
+ // Get the upper and lower bits of the result.
+ SDValue Lo, Hi;
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+ if (isOperationLegalOrCustom(LoHiOp, VT)) {
+ SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ Lo = Result.getValue(0);
+ Hi = Result.getValue(1);
+ } else if (isOperationLegalOrCustom(HiOp, VT)) {
+ Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
+ } else if (VT.isVector()) {
+ return SDValue();
+ } else {
+ report_fatal_error("Unable to expand fixed point multiplication.");
+ }
+
+ if (Scale == VTSize)
+ // Result is just the top half since we'd be shifting by the width of the
+ // operand. Overflow impossible so this works for both UMULFIX and
+ // UMULFIXSAT.
+ return Hi;
+
+ // The result will need to be shifted right by the scale since both operands
+ // are scaled. The result is given to us in 2 halves, so we only want part of
+ // both in the result.
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ if (!Saturating)
+ return Result;
+
+ if (!Signed) {
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
+ // widened multiplication) aren't all zeroes.
+
+ // Saturate to max if ((Hi >> Scale) != 0),
+ // which is the same as if (Hi > ((1 << Scale) - 1))
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETUGT);
+
+ return Result;
+ }
+
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
+ // widened multiplication) aren't all ones or all zeroes.
+
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
+
+ if (Scale == 0) {
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
+ DAG.getConstant(VTSize - 1, dl, ShiftTy));
+ SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
+ // Saturated to SatMin if wide product is negative, and SatMax if wide
+ // product is positive ...
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
+ ISD::SETLT);
+ // ... but only if we overflowed.
+ return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
+ }
+
+ // We handled Scale==0 above so all the bits to examine is in Hi.
+
+ // Saturate to max if ((Hi >> (Scale - 1)) > 0),
+ // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
+ // Saturate to min if (Hi >> (Scale - 1)) < -1),
+ // which is the same as if (HI < (-1 << (Scale - 1))
+ SDValue HighMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
+ return Result;
+}
+
+SDValue
+TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ unsigned Scale, SelectionDAG &DAG) const {
+ assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
+ Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
+ "Expected a fixed point division opcode");
+
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+ bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // If there is enough room in the type to upscale the LHS or downscale the
+ // RHS before the division, we can perform it in this type without having to
+ // resize. For signed operations, the LHS headroom is the number of
+ // redundant sign bits, and for unsigned ones it is the number of zeroes.
+ // The headroom for the RHS is the number of trailing zeroes.
+ unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
+ : DAG.computeKnownBits(LHS).countMinLeadingZeros();
+ unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+
+ // For signed saturating operations, we need to be able to detect true integer
+ // division overflow; that is, when you have MIN / -EPS. However, this
+ // is undefined behavior and if we emit divisions that could take such
+ // values it may cause undesired behavior (arithmetic exceptions on x86, for
+ // example).
+ // Avoid this by requiring an extra bit so that we never get this case.
+ // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
+ // signed saturating division, we need to emit a whopping 32-bit division.
+ if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
+ return SDValue();
+
+ unsigned LHSShift = std::min(LHSLead, Scale);
+ unsigned RHSShift = Scale - LHSShift;
+
+ // At this point, we know that if we shift the LHS up by LHSShift and the
+ // RHS down by RHSShift, we can emit a regular division with a final scaling
+ // factor of Scale.
+
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ if (LHSShift)
+ LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
+ DAG.getConstant(LHSShift, dl, ShiftTy));
+ if (RHSShift)
+ RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
+ DAG.getConstant(RHSShift, dl, ShiftTy));
+
+ SDValue Quot;
+ if (Signed) {
+ // For signed operations, if the resulting quotient is negative and the
+ // remainder is nonzero, subtract 1 from the quotient to round towards
+ // negative infinity.
+ SDValue Rem;
+ // FIXME: Ideally we would always produce an SDIVREM here, but if the
+ // type isn't legal, SDIVREM cannot be expanded. There is no reason why
+ // we couldn't just form a libcall, but the type legalizer doesn't do it.
+ if (isTypeLegal(VT) &&
+ isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
+ Quot = DAG.getNode(ISD::SDIVREM, dl,
+ DAG.getVTList(VT, VT),
+ LHS, RHS);
+ Rem = Quot.getValue(1);
+ Quot = Quot.getValue(0);
+ } else {
+ Quot = DAG.getNode(ISD::SDIV, dl, VT,
+ LHS, RHS);
+ Rem = DAG.getNode(ISD::SREM, dl, VT,
+ LHS, RHS);
+ }
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
+ SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
+ SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
+ SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
+ SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
+ DAG.getConstant(1, dl, VT));
+ Quot = DAG.getSelect(dl, VT,
+ DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
+ Sub1, Quot);
+ } else
+ Quot = DAG.getNode(ISD::UDIV, dl, VT,
+ LHS, RHS);
+
+ return Quot;
+}
+
+void TargetLowering::expandUADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::UADDO;
+
+ // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
+ if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+ SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+ SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+ { LHS, RHS, CarryIn });
+ Result = SDValue(NodeCarry.getNode(), 0);
+ Overflow = SDValue(NodeCarry.getNode(), 1);
+ return;
+ }
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+ SDValue SetCC;
+ if (IsAdd && isOneConstant(RHS)) {
+ // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
+ // the live range of X. We assume comparing with 0 is cheap.
+ // The general case (X + C) < C is not necessarily beneficial. Although we
+ // reduce the live range of X, we may introduce the materialization of
+ // constant C.
+ SetCC =
+ DAG.getSetCC(dl, SetCCType, Result,
+ DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
+ } else if (IsAdd && isAllOnesConstant(RHS)) {
+ // Special case: uaddo X, -1 overflows if X != 0.
+ SetCC =
+ DAG.getSetCC(dl, SetCCType, LHS,
+ DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
+ } else {
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ }
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+}
+
+void TargetLowering::expandSADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::SADDO;
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+
+ // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+ unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
+ if (isOperationLegal(OpcSat, LHS.getValueType())) {
+ SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
+ SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+ return;
+ }
+
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
+ SDValue ConditionRHS =
+ DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
+
+ Overflow = DAG.getBoolExtOrTrunc(
+ DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
+ ResultType, ResultType);
+}
+
+bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
+ SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+
+ // For power-of-two multiplications we can use a simpler shift expansion.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
+ const APInt &C = RHSC->getAPIntValue();
+ // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
+ if (C.isPowerOf2()) {
+ // smulo(x, signed_min) is same as umulo(x, signed_min).
+ bool UseArithShift = isSigned && !C.isMinSignedValue();
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
+ Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT,
+ DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
+ dl, VT, Result, ShiftAmt),
+ LHS, ISD::SETNE);
+ return true;
+ }
+ }
+
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ if (VT.isVector())
+ WideVT =
+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
+
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (isTypeLegal(WideVT)) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
+ getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
+ } else {
+ if (VT.isVector())
+ return false;
+
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getFixedSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Ret;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ CallOptions.setIsPostTypeLegalization(true);
+ if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
+ // Halves of WideVT are packed into registers in different order
+ // depending on platform endianness. This is usually handled by
+ // the C calling convention, but we can't defer to it in
+ // the legalizer.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
+ } else {
+ SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
+ }
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Same as above.
+ BottomHalf = Ret.getOperand(0);
+ TopHalf = Ret.getOperand(1);
+ } else {
+ BottomHalf = Ret.getOperand(1);
+ TopHalf = Ret.getOperand(0);
+ }
+ }
+
+ Result = BottomHalf;
+ if (isSigned) {
+ SDValue ShiftAmt = DAG.getConstant(
+ VT.getScalarSizeInBits() - 1, dl,
+ getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
+ } else {
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
+ DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // Truncate the result if SetCC returns a larger type than needed.
+ EVT RType = Node->getValueType(1);
+ if (RType.bitsLT(Overflow.getValueType()))
+ Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
+
+ assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
+ "Unexpected result type for S/UMULO legalization");
+ return true;
+}
+
+SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding reductions for scalable vectors is undefined.");
+
+ // Try to use a shuffle reduction for power of two vectors.
+ if (VT.isPow2VectorType()) {
+ while (VT.getVectorNumElements() > 1) {
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
+ break;
+
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
+ Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+ VT = HalfVT;
+ }
+ }
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
+
+ SDValue Res = Ops[0];
+ for (unsigned i = 1; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
+
+ // Result type may be wider than element type.
+ if (EltVT != Node->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
+ return Res;
+}
+
+SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue AccOp = Node->getOperand(0);
+ SDValue VecOp = Node->getOperand(1);
+ SDNodeFlags Flags = Node->getFlags();
+
+ EVT VT = VecOp.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding reductions for scalable vectors is undefined.");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
+
+ unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
+
+ SDValue Res = AccOp;
+ for (unsigned i = 0; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
+
+ return Res;
+}
+
+bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ SDLoc dl(Node);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ SDValue Dividend = Node->getOperand(0);
+ SDValue Divisor = Node->getOperand(1);
+ if (isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
+ return true;
+ }
+ if (isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
+ Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
+ return true;
+ }
+ return false;
+}
+
+SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
+ SelectionDAG &DAG) const {
+ bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
+ SDLoc dl(SDValue(Node, 0));
+ SDValue Src = Node->getOperand(0);
+
+ // DstVT is the result type, while SatVT is the size to which we saturate
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ unsigned SatWidth = SatVT.getScalarSizeInBits();
+ unsigned DstWidth = DstVT.getScalarSizeInBits();
+ assert(SatWidth <= DstWidth &&
+ "Expected saturation width smaller than result width");
+
+ // Determine minimum and maximum integer values and their corresponding
+ // floating-point values.
+ APInt MinInt, MaxInt;
+ if (IsSigned) {
+ MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
+ } else {
+ MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
+ }
+
+ // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
+ // libcall emission cannot handle this. Large result types will fail.
+ if (SrcVT == MVT::f16) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
+ SrcVT = Src.getValueType();
+ }
+
+ APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+ APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
+
+ SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
+ SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
+
+ // If the integer bounds are exactly representable as floats and min/max are
+ // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
+ // of comparisons and selects.
+ bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
+ isOperationLegal(ISD::FMAXNUM, SrcVT);
+ if (AreExactFloatBounds && MinMaxLegal) {
+ SDValue Clamped = Src;
+
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ // Convert clamped value to integer.
+ SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
+ dl, DstVT, Clamped);
+
+ // In the unsigned case we're done, because we mapped NaN to MinFloat,
+ // which will cast to zero.
+ if (!IsSigned)
+ return FpToInt;
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+ return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
+ }
+
+ SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
+ SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
+
+ // Result of direct conversion. The assumption here is that the operation is
+ // non-trapping and it's fine to apply it to an out-of-range value if we
+ // select it away later.
+ SDValue FpToInt =
+ DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
+
+ SDValue Select = FpToInt;
+
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
+ // MinInt if Src is NaN.
+ SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
+ Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
+ // If Src OGT MaxFloat, select MaxInt.
+ SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
+ Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
+
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
+ // is already zero.
+ if (!IsSigned)
+ return Select;
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+ return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
+}
+
+SDValue TargetLowering::expandVectorSplice(SDNode *Node,
+ SelectionDAG &DAG) const {
+ assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
+ assert(Node->getValueType(0).isScalableVector() &&
+ "Fixed length vector types expected to use SHUFFLE_VECTOR!");
+
+ EVT VT = Node->getValueType(0);
+ SDValue V1 = Node->getOperand(0);
+ SDValue V2 = Node->getOperand(1);
+ int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
+ SDLoc DL(Node);
+
+ // Expand through memory thusly:
+ // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
+ // Store V1, Ptr
+ // Store V2, Ptr + sizeof(V1)
+ // If (Imm < 0)
+ // TrailingElts = -Imm
+ // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
+ // else
+ // Ptr = Ptr + (Imm * sizeof(VT.Elt))
+ // Res = Load Ptr
+
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount() * 2);
+ SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
+ EVT PtrVT = StackPtr.getValueType();
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ // Store the lo part of CONCAT_VECTORS(V1, V2)
+ SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
+ // Store the hi part of CONCAT_VECTORS(V1, V2)
+ SDValue OffsetToV2 = DAG.getVScale(
+ DL, PtrVT,
+ APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
+ SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
+
+ if (Imm >= 0) {
+ // Load back the required element. getVectorElementPointer takes care of
+ // clamping the index if it's out-of-bounds.
+ StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
+ // Load the spliced result
+ return DAG.getLoad(VT, DL, StoreV2, StackPtr,
+ MachinePointerInfo::getUnknownStack(MF));
+ }
+
+ uint64_t TrailingElts = -Imm;
+
+ // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
+ TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
+ SDValue TrailingBytes =
+ DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
+
+ if (TrailingElts > VT.getVectorMinNumElements()) {
+ SDValue VLBytes =
+ DAG.getVScale(DL, PtrVT,
+ APInt(PtrVT.getFixedSizeInBits(),
+ VT.getStoreSize().getKnownMinValue()));
+ TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
+ }
+
+ // Calculate the start address of the spliced result.
+ StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
+
+ // Load the spliced result
+ return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
+ MachinePointerInfo::getUnknownStack(MF));
+}
+
+bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC, SDValue Mask,
+ SDValue EVL, bool &NeedInvert,
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
+ assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
+ bool IsNonVP = !EVL;
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default:
+ llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
+ // Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
+ InvCC = getSetCCInverse(CCCode, OpVT);
+ if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ // If inverting the condition is not enough, try swapping operands
+ // on top of it.
+ InvCC = ISD::getSetCCSwappedOperands(InvCC);
+ NeedSwap = true;
+ }
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ if (NeedSwap)
+ std::swap(LHS, RHS);
+ return true;
+ }
+
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default:
+ llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETUO:
+ if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
+ CC1 = ISD::SETUNE;
+ CC2 = ISD::SETUNE;
+ Opc = ISD::OR;
+ break;
+ }
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
+ NeedInvert = true;
+ [[fallthrough]];
+ case ISD::SETO:
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ;
+ CC2 = ISD::SETOEQ;
+ Opc = ISD::AND;
+ break;
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ // If the SETUO or SETO CC isn't legal, we might be able to use
+ // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+ // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+ // the operands.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+ (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+ TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+ CC1 = ISD::SETOGT;
+ CC2 = ISD::SETOLT;
+ Opc = ISD::OR;
+ NeedInvert = ((unsigned)CCCode & 0x8U);
+ break;
+ }
+ [[fallthrough]];
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ [[fallthrough]];
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ // If all combinations of inverting the condition and swapping operands
+ // didn't work then we have no means to expand the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ if (IsNonVP) {
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
+ SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
+ }
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ if (IsNonVP) {
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
+ SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
+ }
+ }
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+ SetCC2.getValue(1));
+ if (IsNonVP)
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ else {
+ // Transform the binary opcode to the VP equivalent.
+ assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
+ Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
+ }
+ RHS = SDValue();
+ CC = SDValue();
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
new file mode 100644
index 000000000000..153fe77b8b4a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -0,0 +1,386 @@
+//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom lowering code required by the shadow-stack GC
+// strategy.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include <cassert>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "shadow-stack-gc-lowering"
+
+namespace {
+
+class ShadowStackGCLowering : public FunctionPass {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head = nullptr;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ StructType *StackEntryTy = nullptr;
+ StructType *FrameMapTy = nullptr;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
+
+public:
+ static char ID;
+
+ ShadowStackGCLowering();
+
+ bool doInitialization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ Type *GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+ Type *Ty, Value *BasePtr, int Idx1,
+ const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+ Type *Ty, Value *BasePtr, int Idx1, int Idx2,
+ const char *Name);
+};
+
+} // end anonymous namespace
+
+char ShadowStackGCLowering::ID = 0;
+char &llvm::ShadowStackGCLoweringID = ShadowStackGCLowering::ID;
+
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
+ "Shadow Stack GC Lowering", false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
+ "Shadow Stack GC Lowering", false, false)
+
+FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
+
+ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) {
+ initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant *, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
+
+ Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
+ StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage, FrameMap,
+ "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+ return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
+}
+
+Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type *> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (const std::pair<CallInst *, AllocaInst *> &Root : Roots)
+ EltTys.push_back(Root.second->getAllocatedType());
+
+ return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGCLowering::doInitialization(Module &M) {
+ bool Active = false;
+ for (Function &F : M) {
+ if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
+ Active = true;
+ break;
+ }
+ }
+ if (!Active)
+ return false;
+
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type *> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::create(EltTys, "gc_map");
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(
+ M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGCLowering::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGCLowering::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
+
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
+ CI,
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Type *Ty,
+ Value *BasePtr, int Idx,
+ int Idx2,
+ const char *Name) {
+ Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Type *Ty, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx)};
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+void ShadowStackGCLowering::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGCLowering::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use the shadow stack GC.
+ if (!F.hasGC() ||
+ F.getGC() != std::string("shadow-stack"))
+ return false;
+
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ std::optional<DomTreeUpdater> DTU;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry =
+ AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
+
+ AtEntry.SetInsertPointPastAllocas(&F);
+ IP = AtEntry.GetInsertPoint();
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead =
+ AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 1, "gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP))
+ ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 0, "gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true,
+ DTU ? &*DTU : nullptr);
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 =
+ CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(),
+ EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (std::pair<CallInst *, AllocaInst *> &Root : Roots) {
+ Root.first->eraseFromParent();
+ Root.second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
new file mode 100644
index 000000000000..4b1d3637a746
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -0,0 +1,997 @@
+//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for safe point where the prologue and epilogue can be
+// inserted.
+// The safe point for the prologue (resp. epilogue) is called Save
+// (resp. Restore).
+// A point is safe for prologue (resp. epilogue) if and only if
+// it 1) dominates (resp. post-dominates) all the frame related operations and
+// between 2) two executions of the Save (resp. Restore) point there is an
+// execution of the Restore (resp. Save) point.
+//
+// For instance, the following points are safe:
+// for (int i = 0; i < 10; ++i) {
+// Save
+// ...
+// Restore
+// }
+// Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
+// And the following points are not:
+// for (int i = 0; i < 10; ++i) {
+// Save
+// ...
+// }
+// for (int i = 0; i < 10; ++i) {
+// ...
+// Restore
+// }
+// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
+//
+// This pass also ensures that the safe points are 3) cheaper than the regular
+// entry and exits blocks.
+//
+// Property #1 is ensured via the use of MachineDominatorTree and
+// MachinePostDominatorTree.
+// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
+// points must be in the same loop.
+// Property #3 is ensured via the MachineBlockFrequencyInfo.
+//
+// If this pass found points matching all these properties, then
+// MachineFrameInfo is updated with this information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "shrink-wrap"
+
+STATISTIC(NumFunc, "Number of functions");
+STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
+STATISTIC(NumCandidatesDropped,
+ "Number of shrink-wrapping candidates dropped because of frequency");
+
+static cl::opt<cl::boolOrDefault>
+EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+ cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<bool> EnablePostShrinkWrapOpt(
+ "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
+ cl::desc("enable splitting of the restore block if possible"));
+
+namespace {
+
+/// Class to determine where the safe point to insert the
+/// prologue and epilogue are.
+/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
+/// shrink-wrapping term for prologue/epilogue placement, this pass
+/// does not rely on expensive data-flow analysis. Instead we use the
+/// dominance properties and loop information to decide which point
+/// are safe for such insertion.
+class ShrinkWrap : public MachineFunctionPass {
+ /// Hold callee-saved information.
+ RegisterClassInfo RCI;
+ MachineDominatorTree *MDT = nullptr;
+ MachinePostDominatorTree *MPDT = nullptr;
+
+ /// Current safe point found for the prologue.
+ /// The prologue will be inserted before the first instruction
+ /// in this basic block.
+ MachineBasicBlock *Save = nullptr;
+
+ /// Current safe point found for the epilogue.
+ /// The epilogue will be inserted before the first terminator instruction
+ /// in this basic block.
+ MachineBasicBlock *Restore = nullptr;
+
+ /// Hold the information of the basic block frequency.
+ /// Use to check the profitability of the new points.
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+
+ /// Hold the loop information. Used to determine if Save and Restore
+ /// are in the same loop.
+ MachineLoopInfo *MLI = nullptr;
+
+ // Emit remarks.
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+
+ /// Frequency of the Entry block.
+ uint64_t EntryFreq = 0;
+
+ /// Current opcode for frame setup.
+ unsigned FrameSetupOpcode = ~0u;
+
+ /// Current opcode for frame destroy.
+ unsigned FrameDestroyOpcode = ~0u;
+
+ /// Stack pointer register, used by llvm.{savestack,restorestack}
+ Register SP;
+
+ /// Entry block.
+ const MachineBasicBlock *Entry = nullptr;
+
+ using SetOfRegs = SmallSetVector<unsigned, 16>;
+
+ /// Registers that need to be saved for the current function.
+ mutable SetOfRegs CurrentCSRs;
+
+ /// Current MachineFunction.
+ MachineFunction *MachineFunc = nullptr;
+
+ /// Is `true` for block numbers where we can guarantee no stack access
+ /// or computation of stack-relative addresses on any CFG path including
+ /// the block itself.
+ BitVector StackAddressUsedBlockInfo;
+
+ /// Check if \p MI uses or defines a callee-saved register or
+ /// a frame index. If this is the case, this means \p MI must happen
+ /// after Save and before Restore.
+ bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const;
+
+ const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
+ if (CurrentCSRs.empty()) {
+ BitVector SavedRegs;
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
+
+ TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS);
+
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ CurrentCSRs.insert((unsigned)Reg);
+ }
+ return CurrentCSRs;
+ }
+
+ /// Update the Save and Restore points such that \p MBB is in
+ /// the region that is dominated by Save and post-dominated by Restore
+ /// and Save and Restore still match the safe point definition.
+ /// Such point may not exist and Save and/or Restore may be null after
+ /// this call.
+ void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
+
+ // Try to find safe point based on dominance and block frequency without
+ // any change in IR.
+ bool performShrinkWrapping(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS);
+
+ /// This function tries to split the restore point if doing so can shrink the
+ /// save point further. \return True if restore point is split.
+ bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS);
+
+ /// This function analyzes if the restore point can split to create a new
+ /// restore point. This function collects
+ /// 1. Any preds of current restore that are reachable by callee save/FI
+ /// blocks
+ /// - indicated by DirtyPreds
+ /// 2. Any preds of current restore that are not DirtyPreds - indicated by
+ /// CleanPreds
+ /// Both sets should be non-empty for considering restore point split.
+ bool checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS);
+
+ /// Initialize the pass for \p MF.
+ void init(MachineFunction &MF) {
+ RCI.runOnMachineFunction(MF);
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ Save = nullptr;
+ Restore = nullptr;
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+ EntryFreq = MBFI->getEntryFreq();
+ const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+ SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore();
+ Entry = &MF.front();
+ CurrentCSRs.clear();
+ MachineFunc = &MF;
+
+ ++NumFunc;
+ }
+
+ /// Check whether or not Save and Restore points are still interesting for
+ /// shrink-wrapping.
+ bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+
+ /// Check if shrink wrapping is enabled for this target and function.
+ static bool isShrinkWrapEnabled(const MachineFunction &MF);
+
+public:
+ static char ID;
+
+ ShrinkWrap() : MachineFunctionPass(ID) {
+ initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
+
+ /// Perform the shrink-wrapping analysis and update
+ /// the MachineFrameInfo attached to \p MF with the results.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char ShrinkWrap::ID = 0;
+
+char &llvm::ShrinkWrapID = ShrinkWrap::ID;
+
+INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
+
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const {
+ /// Check if \p Op is known to access an address not on the function's stack .
+ /// At the moment, accesses where the underlying object is a global, function
+ /// argument, or jump table are considered non-stack accesses. Note that the
+ /// caller's stack may get accessed when passing an argument via the stack,
+ /// but not the stack of the current function.
+ ///
+ auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
+ if (Op->getValue()) {
+ const Value *UO = getUnderlyingObject(Op->getValue());
+ if (!UO)
+ return false;
+ if (auto *Arg = dyn_cast<Argument>(UO))
+ return !Arg->hasPassPointeeByValueCopyAttr();
+ return isa<GlobalValue>(UO);
+ }
+ if (const PseudoSourceValue *PSV = Op->getPseudoValue())
+ return PSV->isJumpTable();
+ return false;
+ };
+ // Load/store operations may access the stack indirectly when we previously
+ // computed an address to a stack location.
+ if (StackAddressUsed && MI.mayLoadOrStore() &&
+ (MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() ||
+ !all_of(MI.memoperands(), IsKnownNonStackPtr)))
+ return true;
+
+ if (MI.getOpcode() == FrameSetupOpcode ||
+ MI.getOpcode() == FrameDestroyOpcode) {
+ LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
+ return true;
+ }
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ for (const MachineOperand &MO : MI.operands()) {
+ bool UseOrDefCSR = false;
+ if (MO.isReg()) {
+ // Ignore instructions like DBG_VALUE which don't read/def the register.
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+ Register PhysReg = MO.getReg();
+ if (!PhysReg)
+ continue;
+ assert(PhysReg.isPhysical() && "Unallocated register?!");
+ // The stack pointer is not normally described as a callee-saved register
+ // in calling convention definitions, so we need to watch for it
+ // separately. An SP mentioned by a call instruction, we can ignore,
+ // though, as it's harmless and we do not want to effectively disable tail
+ // calls by forcing the restore point to post-dominate them.
+ // PPC's LR is also not normally described as a callee-saved register in
+ // calling convention definitions, so we need to watch for it, too. An LR
+ // mentioned implicitly by a return (or "branch to link register")
+ // instruction we can ignore, otherwise we may pessimize shrinkwrapping.
+ UseOrDefCSR =
+ (!MI.isCall() && PhysReg == SP) ||
+ RCI.getLastCalleeSavedAlias(PhysReg) ||
+ (!MI.isReturn() && TRI->isNonallocatableRegisterCalleeSave(PhysReg));
+ } else if (MO.isRegMask()) {
+ // Check if this regmask clobbers any of the CSRs.
+ for (unsigned Reg : getCurrentCSRs(RS)) {
+ if (MO.clobbersPhysReg(Reg)) {
+ UseOrDefCSR = true;
+ break;
+ }
+ }
+ }
+ // Skip FrameIndex operands in DBG_VALUE instructions.
+ if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) {
+ LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
+ << MO.isFI() << "): " << MI << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Helper function to find the immediate (post) dominator.
+template <typename ListOfBBs, typename DominanceAnalysis>
+static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+ DominanceAnalysis &Dom, bool Strict = true) {
+ MachineBasicBlock *IDom = &Block;
+ for (MachineBasicBlock *BB : BBs) {
+ IDom = Dom.findNearestCommonDominator(IDom, BB);
+ if (!IDom)
+ break;
+ }
+ if (Strict && IDom == &Block)
+ return nullptr;
+ return IDom;
+}
+
+static bool isAnalyzableBB(const TargetInstrInfo &TII,
+ MachineBasicBlock &Entry) {
+ // Check if the block is analyzable.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
+}
+
+/// Determines if any predecessor of MBB is on the path from block that has use
+/// or def of CSRs/FI to MBB.
+/// ReachableByDirty: All blocks reachable from block that has use or def of
+/// CSR/FI.
+static bool
+hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ const MachineBasicBlock &MBB) {
+ for (const MachineBasicBlock *PredBB : MBB.predecessors())
+ if (ReachableByDirty.count(PredBB))
+ return true;
+ return false;
+}
+
+/// Derives the list of all the basic blocks reachable from MBB.
+static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
+ const MachineBasicBlock &MBB) {
+ SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
+ MBB.succ_end());
+ Visited.insert(&MBB);
+ while (!Worklist.empty()) {
+ MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
+ if (!Visited.insert(SuccMBB).second)
+ continue;
+ Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
+ }
+}
+
+/// Collect blocks reachable by use or def of CSRs/FI.
+static void collectBlocksReachableByDirty(
+ const DenseSet<const MachineBasicBlock *> &DirtyBBs,
+ DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
+ for (const MachineBasicBlock *MBB : DirtyBBs) {
+ if (ReachableByDirty.count(MBB))
+ continue;
+ // Mark all offsprings as reachable.
+ markAllReachable(ReachableByDirty, *MBB);
+ }
+}
+
+/// \return true if there is a clean path from SavePoint to the original
+/// Restore.
+static bool
+isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
+ ArrayRef<MachineBasicBlock *> CleanPreds) {
+ DenseSet<const MachineBasicBlock *> Visited;
+ SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
+ CleanPreds.end());
+ while (!Worklist.empty()) {
+ MachineBasicBlock *CleanBB = Worklist.pop_back_val();
+ if (CleanBB == SavePoint)
+ return true;
+ if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
+ continue;
+ Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
+ }
+ return false;
+}
+
+/// This function updates the branches post restore point split.
+///
+/// Restore point has been split.
+/// Old restore point: MBB
+/// New restore point: NMBB
+/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
+/// previously should
+/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
+/// block layout OR
+/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
+static void updateTerminator(MachineBasicBlock *BBToUpdate,
+ MachineBasicBlock *NMBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc DL = BBToUpdate->findBranchDebugLoc();
+ // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
+ // branch to it
+ if (!BBToUpdate->isLayoutSuccessor(NMBB))
+ TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
+}
+
+/// This function splits the restore point and returns new restore point/BB.
+///
+/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
+///
+/// Decision has been made to split the restore point.
+/// old restore point: \p MBB
+/// new restore point: \p NMBB
+/// This function makes the necessary block layout changes so that
+/// 1. \p NMBB points to \p MBB unconditionally
+/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
+static MachineBasicBlock *
+tryToSplitRestore(MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = MBB->getParent();
+
+ // get the list of DirtyPreds who have a fallthrough to MBB
+ // before the block layout change. This is just to ensure that if the NMBB is
+ // inserted after MBB, then we create unconditional branch from
+ // DirtyPred/CleanPred to NMBB
+ SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == MBB)
+ MBBFallthrough.insert(BB);
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ // Insert this block at the end of the function. Inserting in between may
+ // interfere with control flow optimizer decisions.
+ MF->insert(MF->end(), NMBB);
+
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
+ NMBB->addLiveIn(LI.PhysReg);
+
+ TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
+
+ // After splitting, all predecessors of the restore point should be dirty
+ // blocks.
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
+
+ NMBB->addSuccessor(MBB);
+
+ for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
+ updateTerminator(BBToUpdate, NMBB, TII);
+
+ return NMBB;
+}
+
+/// This function undoes the restore point split done earlier.
+///
+/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
+///
+/// Restore point was split and the change needs to be unrolled. Make necessary
+/// changes to reset restore point from \p NMBB to \p MBB.
+static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
+ MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ // For a BB, if NMBB is fallthrough in the current layout, then in the new
+ // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
+ // branch to MBB
+ SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == NMBB)
+ NMBBFallthrough.insert(BB);
+
+ NMBB->removeSuccessor(MBB);
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
+
+ NMBB->erase(NMBB->begin(), NMBB->end());
+ NMBB->eraseFromParent();
+
+ for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
+ updateTerminator(BBToUpdate, MBB, TII);
+}
+
+// A block is deemed fit for restore point split iff there exist
+// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
+// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
+bool ShrinkWrap::checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS) {
+ for (const MachineInstr &MI : *CurRestore)
+ if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
+ return false;
+
+ for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
+ if (!isAnalyzableBB(*TII, *PredBB))
+ return false;
+
+ if (ReachableByDirty.count(PredBB))
+ DirtyPreds.push_back(PredBB);
+ else
+ CleanPreds.push_back(PredBB);
+ }
+
+ return !(CleanPreds.empty() || DirtyPreds.empty());
+}
+
+bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS) {
+ if (!EnablePostShrinkWrapOpt)
+ return false;
+
+ MachineBasicBlock *InitSave = nullptr;
+ MachineBasicBlock *InitRestore = nullptr;
+
+ if (HasCandidate) {
+ InitSave = Save;
+ InitRestore = Restore;
+ } else {
+ InitRestore = nullptr;
+ InitSave = &MF.front();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ return false;
+ if (MBB.isReturnBlock()) {
+ // Do not support multiple restore points.
+ if (InitRestore)
+ return false;
+ InitRestore = &MBB;
+ }
+ }
+ }
+
+ if (!InitSave || !InitRestore || InitRestore == InitSave ||
+ !MDT->dominates(InitSave, InitRestore) ||
+ !MPDT->dominates(InitRestore, InitSave))
+ return false;
+
+ // Bail out of the optimization if any of the basic block is target of
+ // INLINEASM_BR instruction
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isInlineAsmBrIndirectTarget())
+ return false;
+
+ DenseSet<const MachineBasicBlock *> DirtyBBs;
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHPad()) {
+ DirtyBBs.insert(&MBB);
+ continue;
+ }
+ for (const MachineInstr &MI : MBB)
+ if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
+ DirtyBBs.insert(&MBB);
+ break;
+ }
+ }
+
+ // Find blocks reachable from the use or def of CSRs/FI.
+ DenseSet<const MachineBasicBlock *> ReachableByDirty;
+ collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineBasicBlock *, 2> DirtyPreds;
+ SmallVector<MachineBasicBlock *, 2> CleanPreds;
+ if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
+ CleanPreds, TII, RS))
+ return false;
+
+ // Trying to reach out to the new save point which dominates all dirty blocks.
+ MachineBasicBlock *NewSave =
+ FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
+
+ while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
+ EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
+ /*Entry freq has been observed more than a loop block in
+ some cases*/
+ MLI->getLoopFor(NewSave)))
+ NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
+ false);
+
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ if (!NewSave || NewSave == InitSave ||
+ isSaveReachableThroughClean(NewSave, CleanPreds) ||
+ !TFI->canUseAsPrologue(*NewSave))
+ return false;
+
+ // Now we know that splitting a restore point can isolate the restore point
+ // from clean blocks and doing so can shrink the save point.
+ MachineBasicBlock *NewRestore =
+ tryToSplitRestore(InitRestore, DirtyPreds, TII);
+
+ // Make sure if the new restore point is valid as an epilogue, depending on
+ // targets.
+ if (!TFI->canUseAsEpilogue(*NewRestore)) {
+ rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
+ return false;
+ }
+
+ Save = NewSave;
+ Restore = NewRestore;
+
+ MDT->runOnMachineFunction(MF);
+ MPDT->runOnMachineFunction(MF);
+
+ assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
+ "Incorrect save or restore point due to dominance relations");
+ assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
+ "Unexpected save or restore point in a loop");
+ assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
+ EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ "Incorrect save or restore point based on block frequency");
+ return true;
+}
+
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
+ RegScavenger *RS) {
+ // Get rid of the easy cases first.
+ if (!Save)
+ Save = &MBB;
+ else
+ Save = MDT->findNearestCommonDominator(Save, &MBB);
+ assert(Save);
+
+ if (!Restore)
+ Restore = &MBB;
+ else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it
+ // means the block never returns. If that's the
+ // case, we don't want to call
+ // `findNearestCommonDominator`, which will
+ // return `Restore`.
+ Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
+ else
+ Restore = nullptr; // Abort, we can't find a restore point in this case.
+
+ // Make sure we would be able to insert the restore code before the
+ // terminator.
+ if (Restore == &MBB) {
+ for (const MachineInstr &Terminator : MBB.terminators()) {
+ if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
+ continue;
+ // One of the terminator needs to happen before the restore point.
+ if (MBB.succ_empty()) {
+ Restore = nullptr; // Abort, we can't find a restore point in this case.
+ break;
+ }
+ // Look for a restore point that post-dominates all the successors.
+ // The immediate post-dominator is what we are looking for.
+ Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ break;
+ }
+ }
+
+ if (!Restore) {
+ LLVM_DEBUG(
+ dbgs() << "Restore point needs to be spanned on several blocks\n");
+ return;
+ }
+
+ // Make sure Save and Restore are suitable for shrink-wrapping:
+ // 1. all path from Save needs to lead to Restore before exiting.
+ // 2. all path to Restore needs to go through Save from Entry.
+ // We achieve that by making sure that:
+ // A. Save dominates Restore.
+ // B. Restore post-dominates Save.
+ // C. Save and Restore are in the same loop.
+ bool SaveDominatesRestore = false;
+ bool RestorePostDominatesSave = false;
+ while (Restore &&
+ (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
+ !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
+ // Post-dominance is not enough in loops to ensure that all uses/defs
+ // are after the prologue and before the epilogue at runtime.
+ // E.g.,
+ // while(1) {
+ // Save
+ // Restore
+ // if (...)
+ // break;
+ // use/def CSRs
+ // }
+ // All the uses/defs of CSRs are dominated by Save and post-dominated
+ // by Restore. However, the CSRs uses are still reachable after
+ // Restore and before Save are executed.
+ //
+ // For now, just push the restore/save points outside of loops.
+ // FIXME: Refine the criteria to still find interesting cases
+ // for loops.
+ MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ // Fix (A).
+ if (!SaveDominatesRestore) {
+ Save = MDT->findNearestCommonDominator(Save, Restore);
+ continue;
+ }
+ // Fix (B).
+ if (!RestorePostDominatesSave)
+ Restore = MPDT->findNearestCommonDominator(Restore, Save);
+
+ // Fix (C).
+ if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
+ // Push Save outside of this loop if immediate dominator is different
+ // from save block. If immediate dominator is not different, bail out.
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
+ break;
+ } else {
+ // If the loop does not exit, there is no point in looking
+ // for a post-dominator outside the loop.
+ SmallVector<MachineBasicBlock*, 4> ExitBlocks;
+ MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks);
+ // Push Restore outside of this loop.
+ // Look for the immediate post-dominator of the loop exits.
+ MachineBasicBlock *IPdom = Restore;
+ for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
+ IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT);
+ if (!IPdom)
+ break;
+ }
+ // If the immediate post-dominator is not in a less nested loop,
+ // then we are stuck in a program with an infinite loop.
+ // In that case, we will not find a safe point, hence, bail out.
+ if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore))
+ Restore = IPdom;
+ else {
+ Restore = nullptr;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
+ StringRef RemarkName, StringRef RemarkMessage,
+ const DiagnosticLocation &Loc,
+ const MachineBasicBlock *MBB) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkMissed(DEBUG_TYPE, RemarkName, Loc, MBB)
+ << RemarkMessage;
+ });
+
+ LLVM_DEBUG(dbgs() << RemarkMessage << '\n');
+ return false;
+}
+
+bool ShrinkWrap::performShrinkWrapping(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS) {
+ for (MachineBasicBlock *MBB : RPOT) {
+ LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
+
+ if (MBB->isEHFuncletEntry())
+ return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
+ "EH Funclets are not supported yet.",
+ MBB->front().getDebugLoc(), MBB);
+
+ if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) {
+ // Push the prologue and epilogue outside of the region that may throw (or
+ // jump out via inlineasm_br), by making sure that all the landing pads
+ // are at least at the boundary of the save and restore points. The
+ // problem is that a basic block can jump out from the middle in these
+ // cases, which we do not handle.
+ updateSaveRestorePoints(*MBB, RS);
+ if (!ArePointsInteresting()) {
+ LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
+ return false;
+ }
+ continue;
+ }
+
+ bool StackAddressUsed = false;
+ // Check if we found any stack accesses in the predecessors. We are not
+ // doing a full dataflow analysis here to keep things simple but just
+ // rely on a reverse portorder traversal (RPOT) to guarantee predecessors
+ // are already processed except for loops (and accept the conservative
+ // result for loops).
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (StackAddressUsedBlockInfo.test(Pred->getNumber())) {
+ StackAddressUsed = true;
+ break;
+ }
+ }
+
+ for (const MachineInstr &MI : *MBB) {
+ if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
+ // Save (resp. restore) point must dominate (resp. post dominate)
+ // MI. Look for the proper basic block for those.
+ updateSaveRestorePoints(*MBB, RS);
+ // If we are at a point where we cannot improve the placement of
+ // save/restore instructions, just give up.
+ if (!ArePointsInteresting()) {
+ LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+ return false;
+ }
+ // No need to look for other instructions, this basic block
+ // will already be part of the handled region.
+ StackAddressUsed = true;
+ break;
+ }
+ }
+ StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
+ }
+ if (!ArePointsInteresting()) {
+ // If the points are not interesting at this point, then they must be null
+ // because it means we did not encounter any frame/CSR related code.
+ // Otherwise, we would have returned from the previous loop.
+ assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
+ LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
+ << '\n');
+
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
+ do {
+ LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
+ << printMBBReference(*Save) << ' '
+ << MBFI->getBlockFreq(Save).getFrequency()
+ << "\nRestore: " << printMBBReference(*Restore) << ' '
+ << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
+
+ bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
+ if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
+ EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
+ TFI->canUseAsEpilogue(*Restore)))
+ break;
+ LLVM_DEBUG(
+ dbgs() << "New points are too expensive or invalid for the target\n");
+ MachineBasicBlock *NewBB;
+ if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
+ break;
+ NewBB = Save;
+ } else {
+ // Restore is expensive.
+ Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ if (!Restore)
+ break;
+ NewBB = Restore;
+ }
+ updateSaveRestorePoints(*NewBB, RS);
+ } while (Save && Restore);
+
+ if (!ArePointsInteresting()) {
+ ++NumCandidatesDropped;
+ return false;
+ }
+ return true;
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ init(MF);
+
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+ "Irreducible CFGs are not supported yet.",
+ MF.getFunction().getSubprogram(), &MF.front());
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
+ bool Changed = false;
+
+ StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
+ bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
+ StackAddressUsedBlockInfo.clear();
+ Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
+ if (!HasCandidate && !Changed)
+ return false;
+ if (!ArePointsInteresting())
+ return Changed;
+
+ LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
+ << printMBBReference(*Save) << ' '
+ << "\nRestore: " << printMBBReference(*Restore) << '\n');
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setSavePoint(Save);
+ MFI.setRestorePoint(Restore);
+ ++NumCandidates;
+ return Changed;
+}
+
+bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+ switch (EnableShrinkWrapOpt) {
+ case cl::BOU_UNSET:
+ return TFI->enableShrinkWrapping(MF) &&
+ // Windows with CFI has some limitations that make it impossible
+ // to use shrink-wrapping.
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ // Sanitizers look at the value of the stack at the location
+ // of the crash. Since a crash can happen anywhere, the
+ // frame must be lowered before anything else happen for the
+ // sanitizers to be able to get a correct stack frame.
+ !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) ||
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) ||
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) ||
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress));
+ // If EnableShrinkWrap is set, it takes precedence on whatever the
+ // target sets. The rational is that we assume we want to test
+ // something related to shrink-wrapping.
+ case cl::BOU_TRUE:
+ return true;
+ case cl::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Invalid shrink-wrapping state");
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 000000000000..d09953e76a80
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,507 @@
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sjljehprepare"
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+class SjLjEHPrepare : public FunctionPass {
+ IntegerType *DataTy = nullptr;
+ Type *doubleUnderDataTy = nullptr;
+ Type *doubleUnderJBufTy = nullptr;
+ Type *FunctionContextTy = nullptr;
+ FunctionCallee RegisterFn;
+ FunctionCallee UnregisterFn;
+ Function *BuiltinSetupDispatchFn = nullptr;
+ Function *FrameAddrFn = nullptr;
+ Function *StackAddrFn = nullptr;
+ Function *StackRestoreFn = nullptr;
+ Function *LSDAAddrFn = nullptr;
+ Function *CallSiteFn = nullptr;
+ Function *FuncCtxFn = nullptr;
+ AllocaInst *FuncCtx = nullptr;
+ const TargetMachine *TM = nullptr;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPrepare(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {}
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {}
+ StringRef getPassName() const override {
+ return "SJLJ Exception Handling preparation";
+ }
+
+private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes);
+ void insertCallSiteStore(Instruction *I, int Number);
+};
+} // end anonymous namespace
+
+char SjLjEHPrepare::ID = 0;
+INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions",
+ false, false)
+
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) {
+ return new SjLjEHPrepare(TM);
+}
+
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPrepare::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ unsigned DataBits =
+ TM ? TM->getSjLjDataSize() : TargetMachine::DefaultSjLjDataSize;
+ DataTy = Type::getIntNTy(M.getContext(), DataBits);
+ doubleUnderDataTy = ArrayType::get(DataTy, 4);
+ doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
+ FunctionContextTy = StructType::get(VoidPtrTy, // __prev
+ DataTy, // call_site
+ doubleUnderDataTy, // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ doubleUnderJBufTy // __jbuf
+ );
+
+ return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+ IRBuilder<> Builder(I);
+
+ // Get a reference to the call_site field.
+ Type *Int32Ty = Type::getInt32Ty(I->getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Idxs[2] = { Zero, One };
+ Value *CallSite =
+ Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
+
+ // Insert a store of the call-site number
+ ConstantInt *CallSiteNoC = ConstantInt::get(DataTy, Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/);
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predecessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+ SmallPtrSetImpl<BasicBlock *> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second)
+ return; // already been here.
+
+ df_iterator_default_set<BasicBlock*> Visited;
+
+ for (BasicBlock *B : inverse_depth_first_ext(BB, Visited))
+ LiveBBs.insert(B);
+}
+
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value *, 8> UseWorkList(LPI->users());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ auto *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI)
+ continue;
+ if (EVI->getNumIndices() != 1)
+ continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->use_empty())
+ EVI->eraseFromParent();
+ }
+
+ if (LPI->use_empty())
+ return;
+
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = PoisonValue::get(LPadType);
+ auto *SelI = cast<Instruction>(SelVal);
+ IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator()));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+}
+
+/// setupFunctionContext - Allocate the function context on the stack and fill
+/// it with all of the data that we know at this point.
+Value *SjLjEHPrepare::setupFunctionContext(Function &F,
+ ArrayRef<LandingPadInst *> LPads) {
+ BasicBlock *EntryBB = &F.front();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an alloca
+ // because the value needs to be added to the global context list.
+ auto &DL = F.getParent()->getDataLayout();
+ const Align Alignment = DL.getPrefTypeAlign(FunctionContextTy);
+ FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr,
+ Alignment, "fn_context", &EntryBB->front());
+
+ // Fill in the function context structure.
+ for (LandingPadInst *LPI : LPads) {
+ IRBuilder<> Builder(LPI->getParent(),
+ LPI->getParent()->getFirstInsertionPt());
+
+ // Reference the __data field.
+ Value *FCData =
+ Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
+
+ // The exception values come back in context->__data[0].
+ Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+ 0, 0, "exception_gep");
+ Value *ExnVal = Builder.CreateLoad(DataTy, ExceptionAddr, true, "exn_val");
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
+
+ Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+ 0, 1, "exn_selector_gep");
+ Value *SelVal =
+ Builder.CreateLoad(DataTy, SelectorAddr, true, "exn_selector_val");
+
+ // SelVal must be Int32Ty, so trunc it
+ SelVal = Builder.CreateTrunc(SelVal, Type::getInt32Ty(F.getContext()));
+
+ substituteLPadValues(LPI, ExnVal, SelVal);
+ }
+
+ // Personality function
+ IRBuilder<> Builder(EntryBB->getTerminator());
+ Value *PersonalityFn = F.getPersonalityFn();
+ Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
+ FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
+ Builder.CreateStore(
+ Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
+ PersonalityFieldPtr, /*isVolatile=*/true);
+
+ // LSDA address
+ Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr");
+ Value *LSDAFieldPtr =
+ Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep");
+ Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
+
+ return FuncCtx;
+}
+
+/// lowerIncomingArguments - To avoid having to handle incoming arguments
+/// specially, we lower each arg to a copy instruction in the entry block. This
+/// ensures that the argument value itself cannot be live out of the entry
+/// block.
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
+ BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsPt) &&
+ cast<AllocaInst>(AfterAllocaInsPt)->isStaticAlloca())
+ ++AfterAllocaInsPt;
+ assert(AfterAllocaInsPt != F.front().end());
+
+ for (auto &AI : F.args()) {
+ // Swift error really is a register that we model as memory -- instruction
+ // selection will perform mem-to-reg for us and spill/reload appropriately
+ // around calls that clobber it. There is no need to spill this
+ // value to the stack and doing so would not be allowed.
+ if (AI.isSwiftError())
+ continue;
+
+ Type *Ty = AI.getType();
+
+ // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
+ Value *TrueValue = ConstantInt::getTrue(F.getContext());
+ Value *UndefValue = UndefValue::get(Ty);
+ Instruction *SI = SelectInst::Create(
+ TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt);
+ AI.replaceAllUsesWith(SI);
+
+ // Reset the operand, because it was clobbered by the RAUW above.
+ SI->setOperand(1, &AI);
+ }
+}
+
+/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
+/// edge and spill them.
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst *> Invokes) {
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : BB) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ if (Inst.use_empty())
+ continue;
+ if (Inst.hasOneUse() &&
+ cast<Instruction>(Inst.user_back())->getParent() == &BB &&
+ !isa<PHINode>(Inst.user_back()))
+ continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (auto *AI = dyn_cast<AllocaInst>(&Inst))
+ if (AI->isStaticAlloca())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction *, 16> Users;
+ for (User *U : Inst.users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != &BB || isa<PHINode>(UI))
+ Users.push_back(UI);
+ }
+
+ // Find all of the blocks that this value is live in.
+ SmallPtrSet<BasicBlock *, 32> LiveBBs;
+ LiveBBs.insert(&BB);
+ while (!Users.empty()) {
+ Instruction *U = Users.pop_back_val();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (InvokeInst *Invoke : Invokes) {
+ BasicBlock *UnwindBlock = Invoke->getUnwindDest();
+ if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) {
+ LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around "
+ << UnwindBlock->getName() << "\n");
+ NeedsSpill = true;
+ break;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ DemoteRegToStack(Inst, true);
+ ++NumSpilled;
+ }
+ }
+ }
+
+ // Go through the landing pads and remove any PHIs there.
+ for (InvokeInst *Invoke : Invokes) {
+ BasicBlock *UnwindBlock = Invoke->getUnwindDest();
+ LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+ // Place PHIs into a set to avoid invalidating the iterator.
+ SmallPtrSet<PHINode *, 8> PHIsToDemote;
+ for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ PHIsToDemote.insert(cast<PHINode>(PN));
+ if (PHIsToDemote.empty())
+ continue;
+
+ // Demote the PHIs to the stack.
+ for (PHINode *PN : PHIsToDemote)
+ DemotePHIToStack(PN);
+
+ // Move the landingpad instruction back to the top of the landing pad block.
+ LPI->moveBefore(&UnwindBlock->front());
+ }
+}
+
+/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
+/// the function context and marking the call sites with the appropriate
+/// values. These values are used by the DWARF EH emitter.
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
+ SmallVector<ReturnInst *, 16> Returns;
+ SmallVector<InvokeInst *, 16> Invokes;
+ SmallSetVector<LandingPadInst *, 16> LPads;
+
+ // Look through the terminators of the basic blocks to find invokes.
+ for (BasicBlock &BB : F)
+ if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
+ if (Function *Callee = II->getCalledFunction())
+ if (Callee->getIntrinsicID() == Intrinsic::donothing) {
+ // Remove the NOP invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ continue;
+ }
+
+ Invokes.push_back(II);
+ LPads.insert(II->getUnwindDest()->getLandingPadInst());
+ } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
+ Returns.push_back(RI);
+ }
+
+ if (Invokes.empty())
+ return false;
+
+ NumInvokes += Invokes.size();
+
+ lowerIncomingArguments(F);
+ lowerAcrossUnwindEdges(F, Invokes);
+
+ Value *FuncCtx =
+ setupFunctionContext(F, ArrayRef(LPads.begin(), LPads.end()));
+ BasicBlock *EntryBB = &F.front();
+ IRBuilder<> Builder(EntryBB->getTerminator());
+
+ // Get a reference to the jump buffer.
+ Value *JBufPtr =
+ Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");
+
+ // Save the frame pointer.
+ Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
+ "jbuf_fp_gep");
+
+ Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
+ Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
+
+ // Save the stack pointer.
+ Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
+ "jbuf_sp_gep");
+
+ Val = Builder.CreateCall(StackAddrFn, {}, "sp");
+ Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
+
+ // Call the setup_dispatch intrinsic. It fills in the rest of the jmpbuf.
+ Builder.CreateCall(BuiltinSetupDispatchFn, {});
+
+ // Store a pointer to the function context so that the back-end will know
+ // where to look for it.
+ Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
+ Builder.CreateCall(FuncCtxFn, FuncCtxArg);
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values.
+ for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
+ insertCallSiteStore(Invokes[I], I + 1);
+
+ ConstantInt *CallSiteNum =
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ }
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (BasicBlock &BB : F) {
+ if (&BB == &F.front())
+ continue;
+ for (Instruction &I : BB)
+ if (I.mayThrow())
+ insertCallSiteStore(&I, -1);
+ }
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
+ Register->setDoesNotThrow();
+
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (BasicBlock &BB : F) {
+ if (&BB == &F.front())
+ continue;
+ for (Instruction &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() != StackRestoreFn)
+ continue;
+ } else if (!isa<AllocaInst>(&I)) {
+ continue;
+ }
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(&I);
+ new StoreInst(StackAddr, StackPtr, true, StackAddr->getNextNode());
+ }
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (ReturnInst *Return : Returns) {
+ Instruction *InsertPoint = Return;
+ if (CallInst *CI = Return->getParent()->getTerminatingMustTailCall())
+ InsertPoint = CI;
+ CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint);
+ }
+
+ return true;
+}
+
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+ Module &M = *F.getParent();
+ RegisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy));
+ UnregisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy));
+ FrameAddrFn = Intrinsic::getDeclaration(
+ &M, Intrinsic::frameaddress,
+ {Type::getInt8PtrTy(M.getContext(),
+ M.getDataLayout().getAllocaAddrSpace())});
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetupDispatchFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
+
+ bool Res = setupEntryBlockAndCallSites(F);
+ return Res;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 000000000000..47ee36971d0e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,272 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "slotindexes"
+
+char SlotIndexes::ID = 0;
+
+SlotIndexes::SlotIndexes() : MachineFunctionPass(ID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+}
+
+SlotIndexes::~SlotIndexes() {
+ // The indexList's nodes are all allocated in the BumpPtrAllocator.
+ indexList.clearAndLeakNodesUnsafely();
+}
+
+INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
+ "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum, "Number of local renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+ mi2iMap.clear();
+ MBBRanges.clear();
+ idx2MBBMap.clear();
+ indexList.clear();
+ ileAllocator.Reset();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+ // Compute numbering as follows:
+ // Grab an iterator to the start of the index list.
+ // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+ // iterator in lock-step (though skipping it over indexes which have
+ // null pointers in the instruction field).
+ // At each iteration assert that the instruction pointed to in the index
+ // is the same one pointed to by the MI iterator. This
+
+ // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+ // only need to be set up once after the first numbering is computed.
+
+ mf = &fn;
+
+ // Check that the list contains only the sentinal.
+ assert(indexList.empty() && "Index list non-empty at initial numbering?");
+ assert(idx2MBBMap.empty() &&
+ "Index -> MBB mapping non-empty at initial numbering?");
+ assert(MBBRanges.empty() &&
+ "MBB -> Index mapping non-empty at initial numbering?");
+ assert(mi2iMap.empty() &&
+ "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+ unsigned index = 0;
+ MBBRanges.resize(mf->getNumBlockIDs());
+ idx2MBBMap.reserve(mf->size());
+
+ indexList.push_back(createEntry(nullptr, index));
+
+ // Iterate over the function.
+ for (MachineBasicBlock &MBB : *mf) {
+ // Insert an index for the MBB start.
+ SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+
+ // Insert a store index for the instr.
+ indexList.push_back(createEntry(&MI, index += SlotIndex::InstrDist));
+
+ // Save this base index in the maps.
+ mi2iMap.insert(std::make_pair(
+ &MI, SlotIndex(&indexList.back(), SlotIndex::Slot_Block)));
+ }
+
+ // We insert one blank instructions between basic blocks.
+ indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist));
+
+ MBBRanges[MBB.getNumber()].first = blockStartIndex;
+ MBBRanges[MBB.getNumber()].second = SlotIndex(&indexList.back(),
+ SlotIndex::Slot_Block);
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, &MBB));
+ }
+
+ // Sort the Idx2MBBMap
+ llvm::sort(idx2MBBMap, less_first());
+
+ LLVM_DEBUG(mf->print(dbgs(), this));
+
+ // And we're done!
+ return false;
+}
+
+void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI,
+ bool AllowBundled) {
+ assert((AllowBundled || !MI.isBundledWithPred()) &&
+ "Use removeSingleMachineInstrFromMaps() instead");
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+}
+
+void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+
+ // When removing the first instruction of a bundle update mapping to next
+ // instruction.
+ if (MI.isBundledWithSucc()) {
+ // Only the first instruction of a bundle should have an index assigned.
+ assert(!MI.isBundledWithPred() && "Should be first bundle instruction");
+
+ MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
+ MachineInstr &NextMI = *Next;
+ MIEntry.setInstr(&NextMI);
+ mi2iMap.insert(std::make_pair(&NextMI, MIIndex));
+ return;
+ } else {
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+ }
+}
+
+// Renumber indexes locally after curItr was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
+ // Number indexes with half the default spacing so we can catch up quickly.
+ const unsigned Space = SlotIndex::InstrDist/2;
+ static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM");
+
+ IndexList::iterator startItr = std::prev(curItr);
+ unsigned index = startItr->getIndex();
+ do {
+ curItr->setIndex(index += Space);
+ ++curItr;
+ // If the next index is bigger, we have caught up.
+ } while (curItr != indexList.end() && curItr->getIndex() <= index);
+
+ LLVM_DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex()
+ << '-' << index << " ***\n");
+ ++NumLocalRenum;
+}
+
+// Repair indexes after adding and removing instructions.
+void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) {
+ bool includeStart = (Begin == MBB->begin());
+ SlotIndex startIdx;
+ if (includeStart)
+ startIdx = getMBBStartIdx(MBB);
+ else
+ startIdx = getInstructionIndex(*--Begin);
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB);
+ else
+ endIdx = getInstructionIndex(*End);
+
+ // FIXME: Conceptually, this code is implementing an iterator on MBB that
+ // optionally includes an additional position prior to MBB->begin(), indicated
+ // by the includeStart flag. This is done so that we can iterate MIs in a MBB
+ // in parallel with SlotIndexes, but there should be a better way to do this.
+ IndexList::iterator ListB = startIdx.listEntry()->getIterator();
+ IndexList::iterator ListI = endIdx.listEntry()->getIterator();
+ MachineBasicBlock::iterator MBBI = End;
+ bool pastStart = false;
+ while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
+ assert(ListI->getIndex() >= startIdx.getIndex() &&
+ (includeStart || !pastStart) &&
+ "Decremented past the beginning of region to repair.");
+
+ MachineInstr *SlotMI = ListI->getInstr();
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
+ bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+
+ if (SlotMI == MI && !MBBIAtBegin) {
+ --ListI;
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else if (MI && !mi2iMap.contains(MI)) {
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else {
+ --ListI;
+ if (SlotMI)
+ removeMachineInstrFromMaps(*SlotMI);
+ }
+ }
+
+ // In theory this could be combined with the previous loop, but it is tricky
+ // to update the IndexList while we are iterating it.
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr &MI = *I;
+ if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
+ insertMachineInstrInMaps(MI);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SlotIndexes::dump() const {
+ for (const IndexListEntry &ILE : indexList) {
+ dbgs() << ILE.getIndex() << " ";
+
+ if (ILE.getInstr()) {
+ dbgs() << *ILE.getInstr();
+ } else {
+ dbgs() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+ dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
+}
+#endif
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+ if (isValid())
+ os << listEntry()->getIndex() << "Berd"[getSlot()];
+ else
+ os << "invalid";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+// Dump a SlotIndex to stderr.
+LLVM_DUMP_METHOD void SlotIndex::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..91da5e49713c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,398 @@
+//===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "spill-code-placement"
+
+char SpillPlacement::ID = 0;
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
+ "Spill Code Placement Analysis", true, true)
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+struct SpillPlacement::Node {
+ /// BiasN - Sum of blocks that prefer a spill.
+ BlockFrequency BiasN;
+
+ /// BiasP - Sum of blocks that prefer a register.
+ BlockFrequency BiasP;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always on of the values {-1, 0, 1}. A positive number means the
+ /// variable should go in a register through this bundle.
+ int Value;
+
+ using LinkVector = SmallVector<std::pair<BlockFrequency, unsigned>, 4>;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive block frequencies.
+ LinkVector Links;
+
+ /// SumLinkWeights - Cached sum of the weights of all links + ThresHold.
+ BlockFrequency SumLinkWeights;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // We must spill if Bias < -sum(weights) or the MustSpill flag was set.
+ // BiasN is saturated when MustSpill is set, make sure this still returns
+ // true when the RHS saturates. Note that SumLinkWeights includes Threshold.
+ return BiasN >= BiasP + SumLinkWeights;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ /// the CFG.
+ void clear(const BlockFrequency &Threshold) {
+ BiasN = BiasP = Value = 0;
+ SumLinkWeights = Threshold;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ void addLink(unsigned b, BlockFrequency w) {
+ // Update cached sum.
+ SumLinkWeights += w;
+
+ // There can be multiple links to the same bundle, add them up.
+ for (std::pair<BlockFrequency, unsigned> &L : Links)
+ if (L.second == b) {
+ L.first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node.
+ void addBias(BlockFrequency freq, BorderConstraint direction) {
+ switch (direction) {
+ default:
+ break;
+ case PrefReg:
+ BiasP += freq;
+ break;
+ case PrefSpill:
+ BiasN += freq;
+ break;
+ case MustSpill:
+ BiasN = BlockFrequency::getMaxFrequency();
+ break;
+ }
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[], const BlockFrequency &Threshold) {
+ // Compute the weighted sum of inputs.
+ BlockFrequency SumN = BiasN;
+ BlockFrequency SumP = BiasP;
+ for (std::pair<BlockFrequency, unsigned> &L : Links) {
+ if (nodes[L.second].Value == -1)
+ SumN += L.first;
+ else if (nodes[L.second].Value == 1)
+ SumP += L.first;
+ }
+
+ // Each weighted sum is going to be less than the total frequency of the
+ // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we
+ // will add a dead zone around 0 for two reasons:
+ //
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ //
+ bool Before = preferReg();
+ if (SumN >= SumP + Threshold)
+ Value = -1;
+ else if (SumP >= SumN + Threshold)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+
+ void getDissentingNeighbors(SparseSet<unsigned> &List,
+ const Node nodes[]) const {
+ for (const auto &Elt : Links) {
+ unsigned n = Elt.second;
+ // Neighbors that already have the same value are not going to
+ // change because of this node changing.
+ if (Value != nodes[n].Value)
+ List.insert(n);
+ }
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+ TodoList.clear();
+ TodoList.setUniverse(bundles->getNumBundles());
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ BlockFrequencies.resize(mf.getNumBlockIDs());
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ setThreshold(MBFI->getEntryFreq());
+ for (auto &I : mf) {
+ unsigned Num = I.getNumber();
+ BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
+ }
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = nullptr;
+ TodoList.clear();
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ TodoList.insert(n);
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear(Threshold);
+
+ // Very large bundles usually come from big switches, indirect branches,
+ // landing pads, or loops with many 'continue' statements. It is difficult to
+ // allocate registers when so many different blocks are involved.
+ //
+ // Give a small negative bias to large bundles such that a substantial
+ // fraction of the connected blocks need to be interested before we consider
+ // expanding the region through the bundle. This helps compile time by
+ // limiting the number of blocks visited and the number of links in the
+ // Hopfield network.
+ if (bundles->getBlocks(n).size() > 100) {
+ nodes[n].BiasP = 0;
+ nodes[n].BiasN = (MBFI->getEntryFreq() / 16);
+ }
+}
+
+/// Set the threshold for a given entry frequency.
+///
+/// Set the threshold relative to \c Entry. Since the threshold is used as a
+/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
+/// threshold.
+void SpillPlacement::setThreshold(const BlockFrequency &Entry) {
+ // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
+ // it. Divide by 2^13, rounding as appropriate.
+ uint64_t Freq = Entry.getFrequency();
+ uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
+ Threshold = std::max(UINT64_C(1), Scaled);
+}
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+ for (const BlockConstraint &LB : LiveBlocks) {
+ BlockFrequency Freq = BlockFrequencies[LB.Number];
+
+ // Live-in to block?
+ if (LB.Entry != DontCare) {
+ unsigned ib = bundles->getBundle(LB.Number, false);
+ activate(ib);
+ nodes[ib].addBias(Freq, LB.Entry);
+ }
+
+ // Live-out from block?
+ if (LB.Exit != DontCare) {
+ unsigned ob = bundles->getBundle(LB.Number, true);
+ activate(ob);
+ nodes[ob].addBias(Freq, LB.Exit);
+ }
+ }
+}
+
+/// addPrefSpill - Same as addConstraints(PrefSpill)
+void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
+ for (unsigned B : Blocks) {
+ BlockFrequency Freq = BlockFrequencies[B];
+ if (Strong)
+ Freq += Freq;
+ unsigned ib = bundles->getBundle(B, false);
+ unsigned ob = bundles->getBundle(B, true);
+ activate(ib);
+ activate(ob);
+ nodes[ib].addBias(Freq, PrefSpill);
+ nodes[ob].addBias(Freq, PrefSpill);
+ }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+ for (unsigned Number : Links) {
+ unsigned ib = bundles->getBundle(Number, false);
+ unsigned ob = bundles->getBundle(Number, true);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ BlockFrequency Freq = BlockFrequencies[Number];
+ nodes[ib].addLink(ob, Freq);
+ nodes[ob].addLink(ib, Freq);
+ }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+ RecentPositive.clear();
+ for (unsigned n : ActiveNodes->set_bits()) {
+ update(n);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (nodes[n].mustSpill())
+ continue;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ return !RecentPositive.empty();
+}
+
+bool SpillPlacement::update(unsigned n) {
+ if (!nodes[n].update(nodes, Threshold))
+ return false;
+ nodes[n].getDissentingNeighbors(TodoList, nodes);
+ return true;
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+void SpillPlacement::iterate() {
+ // We do not need to push those node in the todolist.
+ // They are already been proceeded as part of the previous iteration.
+ RecentPositive.clear();
+
+ // Since the last iteration, the todolist have been augmented by calls
+ // to addConstraints, addLinks, and co.
+ // Update the network energy starting at this new frontier.
+ // The call to ::update will add the nodes that changed into the todolist.
+ unsigned Limit = bundles->getNumBundles() * 10;
+ while(Limit-- > 0 && !TodoList.empty()) {
+ unsigned n = TodoList.pop_back_val();
+ if (!update(n))
+ continue;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+ RecentPositive.clear();
+ TodoList.clear();
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+ assert(ActiveNodes && "Call prepare() first");
+
+ // Write preferences back to ActiveNodes.
+ bool Perfect = true;
+ for (unsigned n : ActiveNodes->set_bits())
+ if (!nodes[n].preferReg()) {
+ ActiveNodes->reset(n);
+ Perfect = false;
+ }
+ ActiveNodes = nullptr;
+ return Perfect;
+}
+
+void SpillPlacement::BlockConstraint::print(raw_ostream &OS) const {
+ auto toString = [](BorderConstraint C) -> StringRef {
+ switch(C) {
+ case DontCare: return "DontCare";
+ case PrefReg: return "PrefReg";
+ case PrefSpill: return "PrefSpill";
+ case PrefBoth: return "PrefBoth";
+ case MustSpill: return "MustSpill";
+ };
+ llvm_unreachable("uncovered switch");
+ };
+
+ dbgs() << "{" << Number << ", "
+ << toString(Entry) << ", "
+ << toString(Exit) << ", "
+ << (ChangesValue ? "changes" : "no change") << "}";
+}
+
+void SpillPlacement::BlockConstraint::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..bd37d85c6c0d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,172 @@
+//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/BlockFrequency.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineLoopInfo;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF = nullptr;
+ const EdgeBundles *bundles = nullptr;
+ const MachineLoopInfo *loops = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
+ Node *nodes = nullptr;
+
+ // Nodes that are active in the current computation. Owned by the prepare()
+ // caller.
+ BitVector *ActiveNodes = nullptr;
+
+ // Nodes with active links. Populated by scanActiveBundles.
+ SmallVector<unsigned, 8> Linked;
+
+ // Nodes that went positive during the last call to scanActiveBundles or
+ // iterate.
+ SmallVector<unsigned, 8> RecentPositive;
+
+ // Block frequencies are computed once. Indexed by block number.
+ SmallVector<BlockFrequency, 8> BlockFrequencies;
+
+ /// Decision threshold. A node gets the output value 0 if the weighted sum of
+ /// its inputs falls in the open interval (-Threshold;Threshold).
+ BlockFrequency Threshold;
+
+ /// List of nodes that need to be updated in ::iterate.
+ SparseSet<unsigned> TodoList;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID) {}
+ ~SpillPlacement() override { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ PrefBoth, ///< Block entry prefers both register and stack.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+
+ /// True when this block changes the value of the live range. This means
+ /// the block has a non-PHI def. When this is false, a live-in value on
+ /// the stack can be live-out on the stack without inserting a spill.
+ bool ChangesValue;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+ };
+
+ /// prepare - Reset state and prepare for a new spill placement computation.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled. This vector is retained.
+ void prepare(BitVector &RegBundles);
+
+ /// addConstraints - Add constraints and biases. This method may be called
+ /// more than once to accumulate constraints.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out.
+ void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+ /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
+ /// equivalent to calling addConstraint with identical BlockConstraints with
+ /// Entry = Exit = PrefSpill, and ChangesValue = false.
+ ///
+ /// @param Blocks Array of block numbers that prefer to spill in and out.
+ /// @param Strong When true, double the negative bias for these blocks.
+ void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
+
+ /// addLinks - Add transparent blocks with the given numbers.
+ void addLinks(ArrayRef<unsigned> Links);
+
+ /// scanActiveBundles - Perform an initial scan of all bundles activated by
+ /// addConstraints and addLinks, updating their state. Add all the bundles
+ /// that now prefer a register to RecentPositive.
+ /// Prepare internal data structures for iterate.
+ /// Return true is there are any positive nodes.
+ bool scanActiveBundles();
+
+ /// iterate - Update the network iteratively until convergence, or new bundles
+ /// are found.
+ void iterate();
+
+ /// getRecentPositive - Return an array of bundles that became positive during
+ /// the previous call to scanActiveBundles or iterate.
+ ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+ /// finish - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// The selected bundles are returned in the bitvector passed to prepare().
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool finish();
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ BlockFrequency getBlockFrequency(unsigned Number) const {
+ return BlockFrequencies[Number];
+ }
+
+private:
+ bool runOnMachineFunction(MachineFunction &mf) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+
+ void activate(unsigned n);
+ void setThreshold(const BlockFrequency &Entry);
+
+ bool update(unsigned n);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 000000000000..eee54f09fbad
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1888 @@
+//===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SplitKit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple, "Number of splits that were simple");
+STATISTIC(NumCopies, "Number of copies inserted for splitting");
+STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
+
+//===----------------------------------------------------------------------===//
+// Last Insert Point Analysis
+//===----------------------------------------------------------------------===//
+
+InsertPointAnalysis::InsertPointAnalysis(const LiveIntervals &lis,
+ unsigned BBNum)
+ : LIS(lis), LastInsertPoint(BBNum) {}
+
+SlotIndex
+InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB) {
+ unsigned Num = MBB.getNumber();
+ std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB);
+
+ SmallVector<const MachineBasicBlock *, 1> ExceptionalSuccessors;
+ bool EHPadSuccessor = false;
+ for (const MachineBasicBlock *SMBB : MBB.successors()) {
+ if (SMBB->isEHPad()) {
+ ExceptionalSuccessors.push_back(SMBB);
+ EHPadSuccessor = true;
+ } else if (SMBB->isInlineAsmBrIndirectTarget())
+ ExceptionalSuccessors.push_back(SMBB);
+ }
+
+ // Compute insert points on the first call. The pair is independent of the
+ // current live interval.
+ if (!LIP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB.getFirstTerminator();
+ if (FirstTerm == MBB.end())
+ LIP.first = MBBEnd;
+ else
+ LIP.first = LIS.getInstructionIndex(*FirstTerm);
+
+ // If there is a landing pad or inlineasm_br successor, also find the
+ // instruction. If there is no such instruction, we don't need to do
+ // anything special. We assume there cannot be multiple instructions that
+ // are Calls with EHPad successors or INLINEASM_BR in a block. Further, we
+ // assume that if there are any, they will be after any other call
+ // instructions in the block.
+ if (ExceptionalSuccessors.empty())
+ return LIP.first;
+ for (const MachineInstr &MI : llvm::reverse(MBB)) {
+ if ((EHPadSuccessor && MI.isCall()) ||
+ MI.getOpcode() == TargetOpcode::INLINEASM_BR) {
+ LIP.second = LIS.getInstructionIndex(MI);
+ break;
+ }
+ }
+ }
+
+ // If CurLI is live into a landing pad successor, move the last insert point
+ // back to the call that may throw.
+ if (!LIP.second)
+ return LIP.first;
+
+ if (none_of(ExceptionalSuccessors, [&](const MachineBasicBlock *EHPad) {
+ return LIS.isLiveInToMBB(CurLI, EHPad);
+ }))
+ return LIP.first;
+
+ // Find the value leaving MBB.
+ const VNInfo *VNI = CurLI.getVNInfoBefore(MBBEnd);
+ if (!VNI)
+ return LIP.first;
+
+ // The def of statepoint instruction is a gc relocation and it should be alive
+ // in landing pad. So we cannot split interval after statepoint instruction.
+ if (SlotIndex::isSameInstr(VNI->def, LIP.second))
+ if (auto *I = LIS.getInstructionFromIndex(LIP.second))
+ if (I->getOpcode() == TargetOpcode::STATEPOINT)
+ return LIP.second;
+
+ // If the value leaving MBB was defined after the call in MBB, it can't
+ // really be live-in to the landing pad. This can happen if the landing pad
+ // has a PHI, and this register is undef on the exceptional edge.
+ // <rdar://problem/10664933>
+ if (!SlotIndex::isEarlierInstr(VNI->def, LIP.second) && VNI->def < MBBEnd)
+ return LIP.first;
+
+ // Value is properly live-in to the landing pad.
+ // Only allow inserts before the call.
+ return LIP.second;
+}
+
+MachineBasicBlock::iterator
+InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI,
+ MachineBasicBlock &MBB) {
+ SlotIndex LIP = getLastInsertPoint(CurLI, MBB);
+ if (LIP == LIS.getMBBEndIdx(&MBB))
+ return MBB.end();
+ return LIS.getInstructionFromIndex(LIP);
+}
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
+ TII(*MF.getSubtarget().getInstrInfo()), IPA(lis, MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = nullptr;
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+ assert(UseSlots.empty() && "Call clear first");
+
+ // First get all the defs from the interval values. This provides the correct
+ // slots for early clobbers.
+ for (const VNInfo *VNI : CurLI->valnos)
+ if (!VNI->isPHIDef() && !VNI->isUnused())
+ UseSlots.push_back(VNI->def);
+
+ // Get use slots form the use-def chain.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg()))
+ if (!MO.isUndef())
+ UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot());
+
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+ // Remove duplicates, keeping the smaller slot for each instruction.
+ // That is what we want for early clobbers.
+ UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+ SlotIndex::isSameInstr),
+ UseSlots.end());
+
+ // Compute per-live block info.
+ calcLiveBlockInfo();
+
+ LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+void SplitAnalysis::calcLiveBlockInfo() {
+ ThroughBlocks.resize(MF.getNumBlockIDs());
+ NumThroughBlocks = NumGapBlocks = 0;
+ if (CurLI->empty())
+ return;
+
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
+ while (true) {
+ BlockInfo BI;
+ BI.MBB = &*MFI;
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // If the block contains no uses, the range must be live through. At one
+ // point, RegisterCoalescer could create dangling ranges that ended
+ // mid-block.
+ if (UseI == UseE || *UseI >= Stop) {
+ ++NumThroughBlocks;
+ ThroughBlocks.set(BI.MBB->getNumber());
+ // The range shouldn't end mid-block if there are no uses. This shouldn't
+ // happen.
+ assert(LVI->end >= Stop && "range ends mid block with no uses");
+ } else {
+ // This block has uses. Find the first and last uses in the block.
+ BI.FirstInstr = *UseI;
+ assert(BI.FirstInstr >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastInstr = UseI[-1];
+ assert(BI.LastInstr < Stop);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+
+ // When not live in, the first use should be a def.
+ if (!BI.LiveIn) {
+ assert(LVI->start == LVI->valno->def && "Dangling Segment start");
+ assert(LVI->start == BI.FirstInstr && "First instr should be a def");
+ BI.FirstDef = BI.FirstInstr;
+ }
+
+ // Look for gaps in the live range.
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.LiveOut = false;
+ BI.LastInstr = LastStop;
+ break;
+ }
+
+ if (LastStop < LVI->start) {
+ // There is a gap in the live range. Create duplicate entries for the
+ // live-in snippet and the live-out snippet.
+ ++NumGapBlocks;
+
+ // Push the Live-in part.
+ BI.LiveOut = false;
+ UseBlocks.push_back(BI);
+ UseBlocks.back().LastInstr = LastStop;
+
+ // Set up BI for the live-out part.
+ BI.LiveIn = false;
+ BI.LiveOut = true;
+ BI.FirstInstr = BI.FirstDef = LVI->start;
+ }
+
+ // A Segment that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling Segment start");
+ if (!BI.FirstDef)
+ BI.FirstDef = LVI->start;
+ }
+
+ UseBlocks.push_back(BI);
+
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
+ }
+
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
+ break;
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start)->getIterator();
+ }
+
+ assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+ if (cli->empty())
+ return 0;
+ LiveInterval *li = const_cast<LiveInterval*>(cli);
+ LiveInterval::iterator LVI = li->begin();
+ LiveInterval::iterator LVE = li->end();
+ unsigned Count = 0;
+
+ // Loop over basic blocks where li is live.
+ MachineFunction::const_iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
+ SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
+ while (true) {
+ ++Count;
+ LVI = li->advanceTo(LVI, Stop);
+ if (LVI == LVE)
+ return Count;
+ do {
+ ++MFI;
+ Stop = LIS.getMBBEndIdx(&*MFI);
+ } while (Stop <= LVI->start);
+ }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+ Register OrigReg = VRM.getOriginal(CurLI->reg());
+ const LiveInterval &Orig = LIS.getInterval(OrigReg);
+ assert(!Orig.empty() && "Splitting empty interval?");
+ LiveInterval::const_iterator I = Orig.find(Idx);
+
+ // Range containing Idx should begin at Idx.
+ if (I != Orig.end() && I->start <= Idx)
+ return I->start == Idx;
+
+ // Range does not contain Idx, previous must end at Idx.
+ return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ CurLI = li;
+ analyzeUses();
+}
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM,
+ MachineDominatorTree &MDT,
+ MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI)
+ : SA(SA), LIS(LIS), VRM(VRM), MRI(VRM.getMachineFunction().getRegInfo()),
+ MDT(MDT), TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()),
+ TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()),
+ MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {}
+
+void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
+ Edit = &LRE;
+ SpillMode = SM;
+ OpenIdx = 0;
+ RegAssign.clear();
+ Values.clear();
+
+ // Reset the LiveIntervalCalc instances needed for this spill mode.
+ LICalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ if (SpillMode)
+ LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+
+ Edit->anyRematerializable();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
+}
+#endif
+
+/// Find a subrange corresponding to the exact lane mask @p LM in the live
+/// interval @p LI. The interval @p LI is assumed to contain such a subrange.
+/// This function is used to find corresponding subranges between the
+/// original interval and the new intervals.
+template <typename T> auto &getSubrangeImpl(LaneBitmask LM, T &LI) {
+ for (auto &S : LI.subranges())
+ if (S.LaneMask == LM)
+ return S;
+ llvm_unreachable("SubRange for this mask not found");
+}
+
+LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI) {
+ return getSubrangeImpl(LM, LI);
+}
+
+const LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ const LiveInterval &LI) {
+ return getSubrangeImpl(LM, LI);
+}
+
+/// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
+/// in the live interval @p LI. The interval @p LI is assumed to contain such
+/// a subrange. This function is used to find corresponding subranges between
+/// the original interval and the new intervals.
+const LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM,
+ const LiveInterval &LI) {
+ for (const LiveInterval::SubRange &S : LI.subranges())
+ if ((S.LaneMask & LM) == LM)
+ return S;
+ llvm_unreachable("SubRange for this mask not found");
+}
+
+void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
+ if (!LI.hasSubRanges()) {
+ LI.createDeadDef(VNI);
+ return;
+ }
+
+ SlotIndex Def = VNI->def;
+ if (Original) {
+ // If we are transferring a def from the original interval, make sure
+ // to only update the subranges for which the original subranges had
+ // a def at this location.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ auto &PS = getSubRangeForMask(S.LaneMask, Edit->getParent());
+ VNInfo *PV = PS.getVNInfoAt(Def);
+ if (PV != nullptr && PV->def == Def)
+ S.createDeadDef(Def, LIS.getVNInfoAllocator());
+ }
+ } else {
+ // This is a new def: either from rematerialization, or from an inserted
+ // copy. Since rematerialization can regenerate a definition of a sub-
+ // register, we need to check which subranges need to be updated.
+ const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
+ assert(DefMI != nullptr);
+ LaneBitmask LM;
+ for (const MachineOperand &DefOp : DefMI->defs()) {
+ Register R = DefOp.getReg();
+ if (R != LI.reg())
+ continue;
+ if (unsigned SR = DefOp.getSubReg())
+ LM |= TRI.getSubRegIndexLaneMask(SR);
+ else {
+ LM = MRI.getMaxLaneMaskForVReg(R);
+ break;
+ }
+ }
+ for (LiveInterval::SubRange &S : LI.subranges())
+ if ((S.LaneMask & LM).any())
+ S.createDeadDef(Def, LIS.getVNInfoAllocator());
+ }
+}
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+ const VNInfo *ParentVNI,
+ SlotIndex Idx,
+ bool Original) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
+
+ bool Force = LI->hasSubRanges();
+ ValueForcePair FP(Force ? nullptr : VNI, Force);
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator, bool> InsP =
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), FP));
+
+ // This was the first time (RegIdx, ParentVNI) was mapped, and it is not
+ // forced. Keep it as a simple def without any liveness.
+ if (!Force && InsP.second)
+ return VNI;
+
+ // If the previous value was a simple mapping, add liveness for it now.
+ if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
+ addDeadDef(*LI, OldVNI, Original);
+
+ // No longer a simple mapping. Switch to a complex mapping. If the
+ // interval has subranges, make it a forced mapping.
+ InsP.first->second = ValueForcePair(nullptr, Force);
+ }
+
+ // This is a complex mapping, add liveness for VNI
+ addDeadDef(*LI, VNI, Original);
+ return VNI;
+}
+
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI.id)];
+ VNInfo *VNI = VFP.getPointer();
+
+ // ParentVNI was either unmapped or already complex mapped. Either way, just
+ // set the force bit.
+ if (!VNI) {
+ VFP.setInt(true);
+ return;
+ }
+
+ // This was previously a single mapping. Make sure the old def is represented
+ // by a trivial live range.
+ addDeadDef(LIS.getInterval(Edit->get(RegIdx)), VNI, false);
+
+ // Mark as complex mapped, forced.
+ VFP = ValueForcePair(nullptr, true);
+}
+
+SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ bool FirstCopy = !Def.isValid();
+ MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
+ .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
+ | getInternalReadRegState(!FirstCopy), SubIdx)
+ .addReg(FromReg, 0, SubIdx);
+
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ if (FirstCopy) {
+ Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ } else {
+ CopyMI->bundleWithPred();
+ }
+ return Def;
+}
+
+SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
+ LaneBitmask LaneMask, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
+ // The full vreg is copied.
+ MachineInstr *CopyMI =
+ BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
+ return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ }
+
+ // Only a subset of lanes needs to be copied. The following is a simple
+ // heuristic to construct a sequence of COPYs. We could add a target
+ // specific callback if this turns out to be suboptimal.
+ LiveInterval &DestLI = LIS.getInterval(Edit->get(RegIdx));
+
+ // First pass: Try to find a perfectly matching subregister index. If none
+ // exists find the one covering the most lanemask bits.
+ const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
+ assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
+
+ SmallVector<unsigned, 8> SubIndexes;
+
+ // Abort if we cannot possibly implement the COPY with the given indexes.
+ if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes))
+ report_fatal_error("Impossible to implement partial COPY");
+
+ SlotIndex Def;
+ for (unsigned BestIdx : SubIndexes) {
+ Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
+ DestLI, Late, Def);
+ }
+
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ DestLI.refineSubRanges(
+ Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
+
+ return Def;
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
+ SlotIndex UseIdx, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ SlotIndex Def;
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+
+ // We may be trying to avoid interference that ends at a deleted instruction,
+ // so always begin RegIdx 0 early and all others late.
+ bool Late = RegIdx != 0;
+
+ // Attempt cheap-as-a-copy rematerialization.
+ Register Original = VRM.getOriginal(Edit->get(RegIdx));
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+
+ Register Reg = LI->reg();
+ bool DidRemat = false;
+ if (OrigVNI) {
+ LiveRangeEdit::Remat RM(ParentVNI);
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+ if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
+ ++NumRemats;
+ DidRemat = true;
+ }
+ }
+ if (!DidRemat) {
+ LaneBitmask LaneMask;
+ if (OrigLI.hasSubRanges()) {
+ LaneMask = LaneBitmask::getNone();
+ for (LiveInterval::SubRange &S : OrigLI.subranges()) {
+ if (S.liveAt(UseIdx))
+ LaneMask |= S.LaneMask;
+ }
+ } else {
+ LaneMask = LaneBitmask::getAll();
+ }
+
+ if (LaneMask.none()) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::IMPLICIT_DEF);
+ MachineInstr *ImplicitDef = BuildMI(MBB, I, DebugLoc(), Desc, Reg);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ Def = Indexes.insertMachineInstrInMaps(*ImplicitDef, Late).getRegSlot();
+ } else {
+ ++NumCopies;
+ Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
+ }
+ }
+
+ // Define the value in Reg.
+ return defValue(RegIdx, ParentVNI, Def, false);
+}
+
+/// Create a new virtual register and live interval.
+unsigned SplitEditor::openIntv() {
+ // Create the complement as index 0.
+ if (Edit->empty())
+ Edit->createEmptyInterval();
+
+ // Create the open interval.
+ OpenIdx = Edit->size();
+ Edit->createEmptyInterval();
+ return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+ assert(Idx != 0 && "Cannot select the complement interval");
+ assert(Idx < Edit->size() && "Can only select previously opened interval");
+ LLVM_DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ OpenIdx = Idx;
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ LLVM_DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvAfter");
+ LLVM_DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvAfter called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+ std::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ LLVM_DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", "
+ << Last);
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << ": not live\n");
+ return End;
+ }
+ SlotIndex LSP = SA.getLastSplitPoint(&MBB);
+ if (LSP < Last) {
+ // It could be that the use after LSP is a def, and thus the ParentVNI
+ // just selected starts at that def. For this case to exist, the def
+ // must be part of a tied def/use pair (as otherwise we'd have split
+ // distinct live ranges into individual live intervals), and thus we
+ // can insert the def into the VNI of the use and the tied def/use
+ // pair can live in the resulting interval.
+ Last = LSP;
+ ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ // undef use --> undef tied def
+ LLVM_DEBUG(dbgs() << ": tied use not live\n");
+ return End;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ SA.getLastSplitPointIter(&MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ LLVM_DEBUG(dump());
+ return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before useIntv");
+ LLVM_DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ LLVM_DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ LLVM_DEBUG(dbgs() << " leaveIntvAfter " << Idx);
+
+ // The interval must be live beyond the instruction at Idx.
+ SlotIndex Boundary = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << ": not live\n");
+ return Boundary.getNextSlot();
+ }
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
+ assert(MI && "No instruction at index");
+
+ // In spill mode, make live ranges as short as possible by inserting the copy
+ // before MI. This is only possible if that instruction doesn't redefine the
+ // value. The inserted COPY is not a kill, and we don't need to recompute
+ // the source live range. The spiller also won't try to hoist this copy.
+ if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
+ MI->readsVirtualRegister(Edit->getReg())) {
+ forceRecompute(0, *ParentVNI);
+ defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return Idx;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
+ std::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ LLVM_DEBUG(dbgs() << " leaveIntvBefore " << Idx);
+
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ LLVM_DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", "
+ << Start);
+
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ LLVM_DEBUG(dbgs() << ": not live\n");
+ return Start;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ LLVM_DEBUG(dump());
+ return VNI->def;
+}
+
+static bool hasTiedUseOf(MachineInstr &MI, unsigned Reg) {
+ return any_of(MI.defs(), [Reg](const MachineOperand &MO) {
+ return MO.isReg() && MO.isTied() && MO.getReg() == Reg;
+ });
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
+ "Parent changes value in extended range");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // The complement interval will be extended as needed by LICalc.extend().
+ if (ParentVNI)
+ forceRecompute(0, *ParentVNI);
+
+ // If the last use is tied to a def, we can't mark it as live for the
+ // interval which includes only the use. That would cause the tied pair
+ // to end up in two different intervals.
+ if (auto *MI = LIS.getInstructionFromIndex(End))
+ if (hasTiedUseOf(*MI, Edit->getReg())) {
+ LLVM_DEBUG(dbgs() << "skip overlap due to tied def at end\n");
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ LLVM_DEBUG(dump());
+}
+
+//===----------------------------------------------------------------------===//
+// Spill modes
+//===----------------------------------------------------------------------===//
+
+void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ LLVM_DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ RegAssignMap::iterator AssignI;
+ AssignI.setMap(RegAssign);
+
+ for (const VNInfo *C : Copies) {
+ SlotIndex Def = C->def;
+ MachineInstr *MI = LIS.getInstructionFromIndex(Def);
+ assert(MI && "No instruction for back-copy");
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ bool AtBegin;
+ do AtBegin = MBBI == MBB->begin();
+ while (!AtBegin && (--MBBI)->isDebugOrPseudoInstr());
+
+ LLVM_DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LIS.removeVRegDefAt(*LI, Def);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+
+ // Adjust RegAssign if a register assignment is killed at Def. We want to
+ // avoid calculating the live range of the source register if possible.
+ AssignI.find(Def.getPrevSlot());
+ if (!AssignI.valid() || AssignI.start() >= Def)
+ continue;
+ // If MI doesn't kill the assigned register, just leave it.
+ if (AssignI.stop() != Def)
+ continue;
+ unsigned RegIdx = AssignI.value();
+ // We could hoist back-copy right after another back-copy. As a result
+ // MMBI points to copy instruction which is actually dead now.
+ // We cannot set its stop to MBBI which will be the same as start and
+ // interval does not support that.
+ SlotIndex Kill =
+ AtBegin ? SlotIndex() : LIS.getInstructionIndex(*MBBI).getRegSlot();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg()) ||
+ Kill <= AssignI.start()) {
+ LLVM_DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx
+ << '\n');
+ forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def));
+ } else {
+ LLVM_DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ AssignI.setStop(Kill);
+ }
+ }
+}
+
+MachineBasicBlock*
+SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB) {
+ if (MBB == DefMBB)
+ return MBB;
+ assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def.");
+
+ const MachineLoopInfo &Loops = SA.Loops;
+ const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB);
+ MachineDomTreeNode *DefDomNode = MDT[DefMBB];
+
+ // Best candidate so far.
+ MachineBasicBlock *BestMBB = MBB;
+ unsigned BestDepth = std::numeric_limits<unsigned>::max();
+
+ while (true) {
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
+
+ // MBB isn't in a loop, it doesn't get any better. All dominators have a
+ // higher frequency by definition.
+ if (!Loop) {
+ LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB)
+ << " dominates " << printMBBReference(*MBB)
+ << " at depth 0\n");
+ return MBB;
+ }
+
+ // We'll never be able to exit the DefLoop.
+ if (Loop == DefLoop) {
+ LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB)
+ << " dominates " << printMBBReference(*MBB)
+ << " in the same loop\n");
+ return MBB;
+ }
+
+ // Least busy dominator seen so far.
+ unsigned Depth = Loop->getLoopDepth();
+ if (Depth < BestDepth) {
+ BestMBB = MBB;
+ BestDepth = Depth;
+ LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB)
+ << " dominates " << printMBBReference(*MBB)
+ << " at depth " << Depth << '\n');
+ }
+
+ // Leave loop by going to the immediate dominator of the loop header.
+ // This is a bigger stride than simply walking up the dominator tree.
+ MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom();
+
+ // Too far up the dominator tree?
+ if (!IDom || !MDT.dominates(DefDomNode, IDom))
+ return BestMBB;
+
+ MBB = IDom->getBlock();
+ }
+}
+
+void SplitEditor::computeRedundantBackCopies(
+ DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ const LiveInterval *Parent = &Edit->getParent();
+ SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
+ SmallPtrSet<VNInfo *, 8> DominatedVNIs;
+
+ // Aggregate VNIs having the same value as ParentVNI.
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ EqualVNs[ParentVNI->id].insert(VNI);
+ }
+
+ // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
+ // redundant VNIs to BackCopies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ const VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ if (!NotToHoistSet.count(ParentVNI->id))
+ continue;
+ SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
+ SmallPtrSetIterator<VNInfo *> It2 = It1;
+ for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
+ It2 = It1;
+ for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
+ if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
+ continue;
+
+ MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
+ MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
+ if (MBB1 == MBB2) {
+ DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
+ } else if (MDT.dominates(MBB1, MBB2)) {
+ DominatedVNIs.insert(*It2);
+ } else if (MDT.dominates(MBB2, MBB1)) {
+ DominatedVNIs.insert(*It1);
+ }
+ }
+ }
+ if (!DominatedVNIs.empty()) {
+ forceRecompute(0, *ParentVNI);
+ append_range(BackCopies, DominatedVNIs);
+ DominatedVNIs.clear();
+ }
+ }
+}
+
+/// For SM_Size mode, find a common dominator for all the back-copies for
+/// the same ParentVNI and hoist the backcopies to the dominator BB.
+/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
+/// to do the hoisting, simply remove the dominated backcopies for the same
+/// ParentVNI.
+void SplitEditor::hoistCopies() {
+ // Get the complement interval, always RegIdx 0.
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ const LiveInterval *Parent = &Edit->getParent();
+
+ // Track the nearest common dominator for all back-copies for each ParentVNI,
+ // indexed by ParentVNI->id.
+ using DomPair = std::pair<MachineBasicBlock *, SlotIndex>;
+ SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+ // The total cost of all the back-copies for each ParentVNI.
+ SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
+ // The ParentVNI->id set for which hoisting back-copies are not beneficial
+ // for Speed.
+ DenseSet<unsigned> NotToHoistSet;
+
+ // Find the nearest common dominator for parent values with multiple
+ // back-copies. If a single back-copy dominates, put it in DomPair.second.
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ assert(ParentVNI && "Parent not live at complement def");
+
+ // Don't hoist remats. The complement is probably going to disappear
+ // completely anyway.
+ if (Edit->didRematerialize(ParentVNI))
+ continue;
+
+ MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+
+ DomPair &Dom = NearestDom[ParentVNI->id];
+
+ // Keep directly defined parent values. This is either a PHI or an
+ // instruction in the complement range. All other copies of ParentVNI
+ // should be eliminated.
+ if (VNI->def == ParentVNI->def) {
+ LLVM_DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ Dom = DomPair(ValMBB, VNI->def);
+ continue;
+ }
+ // Skip the singly mapped values. There is nothing to gain from hoisting a
+ // single back-copy.
+ if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
+ LLVM_DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ continue;
+ }
+
+ if (!Dom.first) {
+ // First time we see ParentVNI. VNI dominates itself.
+ Dom = DomPair(ValMBB, VNI->def);
+ } else if (Dom.first == ValMBB) {
+ // Two defs in the same block. Pick the earlier def.
+ if (!Dom.second.isValid() || VNI->def < Dom.second)
+ Dom.second = VNI->def;
+ } else {
+ // Different basic blocks. Check if one dominates.
+ MachineBasicBlock *Near =
+ MDT.findNearestCommonDominator(Dom.first, ValMBB);
+ if (Near == ValMBB)
+ // Def ValMBB dominates.
+ Dom = DomPair(ValMBB, VNI->def);
+ else if (Near != Dom.first)
+ // None dominate. Hoist to common dominator, need new def.
+ Dom = DomPair(Near, SlotIndex());
+ Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
+ }
+
+ LLVM_DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@'
+ << VNI->def << " for parent " << ParentVNI->id << '@'
+ << ParentVNI->def << " hoist to "
+ << printMBBReference(*Dom.first) << ' ' << Dom.second
+ << '\n');
+ }
+
+ // Insert the hoisted copies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ DomPair &Dom = NearestDom[i];
+ if (!Dom.first || Dom.second.isValid())
+ continue;
+ // This value needs a hoisted copy inserted at the end of Dom.first.
+ const VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
+ // Get a less loopy dominator than Dom.first.
+ Dom.first = findShallowDominator(Dom.first, DefMBB);
+ if (SpillMode == SM_Speed &&
+ MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
+ NotToHoistSet.insert(ParentVNI->id);
+ continue;
+ }
+ SlotIndex LSP = SA.getLastSplitPoint(Dom.first);
+ if (LSP <= ParentVNI->def) {
+ NotToHoistSet.insert(ParentVNI->id);
+ continue;
+ }
+ Dom.second = defFromParent(0, ParentVNI, LSP, *Dom.first,
+ SA.getLastSplitPointIter(Dom.first))->def;
+ }
+
+ // Remove redundant back-copies that are now known to be dominated by another
+ // def with the same value.
+ SmallVector<VNInfo*, 8> BackCopies;
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ const DomPair &Dom = NearestDom[ParentVNI->id];
+ if (!Dom.first || Dom.second == VNI->def ||
+ NotToHoistSet.count(ParentVNI->id))
+ continue;
+ BackCopies.push_back(VNI);
+ forceRecompute(0, *ParentVNI);
+ }
+
+ // If it is not beneficial to hoist all the BackCopies, simply remove
+ // redundant BackCopies in speed mode.
+ if (SpillMode == SM_Speed && !NotToHoistSet.empty())
+ computeRedundantBackCopies(NotToHoistSet, BackCopies);
+
+ removeBackCopies(BackCopies);
+}
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need LICalc.extend().
+bool SplitEditor::transferValues() {
+ bool Skipped = false;
+ RegAssignMap::const_iterator AssignI = RegAssign.begin();
+ for (const LiveRange::Segment &S : Edit->getParent()) {
+ LLVM_DEBUG(dbgs() << " blit " << S << ':');
+ VNInfo *ParentVNI = S.valno;
+ // RegAssign has holes where RegIdx 0 should be used.
+ SlotIndex Start = S.start;
+ AssignI.advanceTo(Start);
+ do {
+ unsigned RegIdx;
+ SlotIndex End = S.end;
+ if (!AssignI.valid()) {
+ RegIdx = 0;
+ } else if (AssignI.start() <= Start) {
+ RegIdx = AssignI.value();
+ if (AssignI.stop() < End) {
+ End = AssignI.stop();
+ ++AssignI;
+ }
+ } else {
+ RegIdx = 0;
+ End = std::min(End, AssignI.start());
+ }
+
+ // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+ LLVM_DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx << '('
+ << printReg(Edit->get(RegIdx)) << ')');
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+
+ // Check for a simply defined value that can be blitted directly.
+ ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
+ if (VNInfo *VNI = VFP.getPointer()) {
+ LLVM_DEBUG(dbgs() << ':' << VNI->id);
+ LI.addSegment(LiveInterval::Segment(Start, End, VNI));
+ Start = End;
+ continue;
+ }
+
+ // Skip values with forced recomputation.
+ if (VFP.getInt()) {
+ LLVM_DEBUG(dbgs() << "(recalc)");
+ Skipped = true;
+ Start = End;
+ continue;
+ }
+
+ LiveIntervalCalc &LIC = getLICalc(RegIdx);
+
+ // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+ // so the live range is accurate. Add live-in blocks in [Start;End) to the
+ // LiveInBlocks.
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+ SlotIndex BlockStart, BlockEnd;
+ std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB);
+
+ // The first block may be live-in, or it may have its own def.
+ if (Start != BlockStart) {
+ VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped value");
+ LLVM_DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));
+ // MBB has its own def. Is it also live-out?
+ if (BlockEnd <= End)
+ LIC.setLiveOutValue(&*MBB, VNI);
+
+ // Skip to the next block for live-in.
+ ++MBB;
+ BlockStart = BlockEnd;
+ }
+
+ // Handle the live-in blocks covered by [Start;End).
+ assert(Start <= BlockStart && "Expected live-in block");
+ while (BlockStart < End) {
+ LLVM_DEBUG(dbgs() << ">" << printMBBReference(*MBB));
+ BlockEnd = LIS.getMBBEndIdx(&*MBB);
+ if (BlockStart == ParentVNI->def) {
+ // This block has the def of a parent PHI, so it isn't live-in.
+ assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+ VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped parent PHI");
+ if (End >= BlockEnd)
+ LIC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
+ } else {
+ // This block needs a live-in value. The last block covered may not
+ // be live-out.
+ if (End < BlockEnd)
+ LIC.addLiveInBlock(LI, MDT[&*MBB], End);
+ else {
+ // Live-through, and we don't know the value.
+ LIC.addLiveInBlock(LI, MDT[&*MBB]);
+ LIC.setLiveOutValue(&*MBB, nullptr);
+ }
+ }
+ BlockStart = BlockEnd;
+ ++MBB;
+ }
+ Start = End;
+ } while (Start != S.end);
+ LLVM_DEBUG(dbgs() << '\n');
+ }
+
+ LICalc[0].calculateValues();
+ if (SpillMode)
+ LICalc[1].calculateValues();
+
+ return Skipped;
+}
+
+static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) {
+ const LiveRange::Segment *Seg = LR.getSegmentContaining(Def);
+ if (Seg == nullptr)
+ return true;
+ if (Seg->end != Def.getDeadSlot())
+ return false;
+ // This is a dead PHI. Remove it.
+ LR.removeSegment(*Seg, true);
+ return true;
+}
+
+void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
+ LiveRange &LR, LaneBitmask LM,
+ ArrayRef<SlotIndex> Undefs) {
+ for (MachineBasicBlock *P : B.predecessors()) {
+ SlotIndex End = LIS.getMBBEndIdx(P);
+ SlotIndex LastUse = End.getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ const LiveInterval &PLI = Edit->getParent();
+ // Need the cast because the inputs to ?: would otherwise be deemed
+ // "incompatible": SubRange vs LiveInterval.
+ const LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
+ : static_cast<const LiveRange &>(PLI);
+ if (PSR.liveAt(LastUse))
+ LIC.extend(LR, End, /*PhysReg=*/0, Undefs);
+ }
+}
+
+void SplitEditor::extendPHIKillRanges() {
+ // Extend live ranges to be live-out for successor PHI values.
+
+ // Visit each PHI def slot in the parent live interval. If the def is dead,
+ // remove it. Otherwise, extend the live interval to reach the end indexes
+ // of all predecessor blocks.
+
+ const LiveInterval &ParentLI = Edit->getParent();
+ for (const VNInfo *V : ParentLI.valnos) {
+ if (V->isUnused() || !V->isPHIDef())
+ continue;
+
+ unsigned RegIdx = RegAssign.lookup(V->def);
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+ LiveIntervalCalc &LIC = getLICalc(RegIdx);
+ MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+ if (!removeDeadSegment(V->def, LI))
+ extendPHIRange(B, LIC, LI, LaneBitmask::getAll(), /*Undefs=*/{});
+ }
+
+ SmallVector<SlotIndex, 4> Undefs;
+ LiveIntervalCalc SubLIC;
+
+ for (const LiveInterval::SubRange &PS : ParentLI.subranges()) {
+ for (const VNInfo *V : PS.valnos) {
+ if (V->isUnused() || !V->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(V->def);
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+ LiveInterval::SubRange &S = getSubRangeForMaskExact(PS.LaneMask, LI);
+ if (removeDeadSegment(V->def, S))
+ continue;
+
+ MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+ SubLIC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ Undefs.clear();
+ LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes());
+ extendPHIRange(B, SubLIC, S, PS.LaneMask, Undefs);
+ }
+ }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ struct ExtPoint {
+ ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N)
+ : MO(O), RegIdx(R), Next(N) {}
+
+ MachineOperand MO;
+ unsigned RegIdx;
+ SlotIndex Next;
+ };
+
+ SmallVector<ExtPoint,4> ExtPoints;
+
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI.reg_operands(Edit->getReg()))) {
+ MachineInstr *MI = MO.getParent();
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
+ if (MI->isDebugValue()) {
+ LLVM_DEBUG(dbgs() << "Zapping " << *MI);
+ MO.setReg(0);
+ continue;
+ }
+
+ // <undef> operands don't really read the register, so it doesn't matter
+ // which register we choose. When the use operand is tied to a def, we must
+ // use the same register as the def, so just do that always.
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ if (MO.isDef() || MO.isUndef())
+ Idx = Idx.getRegSlot(MO.isEarlyClobber());
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+ MO.setReg(LI.reg());
+ LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent())
+ << '\t' << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx if the instruction reads reg.
+ if (!ExtendRanges || MO.isUndef())
+ continue;
+
+ // Skip instructions that don't read Reg.
+ if (MO.isDef()) {
+ if (!MO.getSubReg() && !MO.isEarlyClobber())
+ continue;
+ // We may want to extend a live range for a partial redef, or for a use
+ // tied to an early clobber.
+ if (!Edit->getParent().liveAt(Idx.getPrevSlot()))
+ continue;
+ } else {
+ assert(MO.isUse());
+ bool IsEarlyClobber = false;
+ if (MO.isTied()) {
+ // We want to extend a live range into `e` slot rather than `r` slot if
+ // tied-def is early clobber, because the `e` slot already contained
+ // in the live range of early-clobber tied-def operand, give an example
+ // here:
+ // 0 %0 = ...
+ // 16 early-clobber %0 = Op %0 (tied-def 0), ...
+ // 32 ... = Op %0
+ // Before extend:
+ // %0 = [0r, 0d) [16e, 32d)
+ // The point we want to extend is 0d to 16e not 16r in this case, but if
+ // we use 16r here we will extend nothing because that already contained
+ // in [16e, 32d).
+ unsigned OpIdx = MO.getOperandNo();
+ unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx);
+ const MachineOperand &DefOp = MI->getOperand(DefOpIdx);
+ IsEarlyClobber = DefOp.isEarlyClobber();
+ }
+
+ Idx = Idx.getRegSlot(IsEarlyClobber);
+ }
+
+ SlotIndex Next = Idx;
+ if (LI.hasSubRanges()) {
+ // We have to delay extending subranges until we have seen all operands
+ // defining the register. This is because a <def,read-undef> operand
+ // will create an "undef" point, and we cannot extend any subranges
+ // until all of them have been accounted for.
+ if (MO.isUse())
+ ExtPoints.push_back(ExtPoint(MO, RegIdx, Next));
+ } else {
+ LiveIntervalCalc &LIC = getLICalc(RegIdx);
+ LIC.extend(LI, Next, 0, ArrayRef<SlotIndex>());
+ }
+ }
+
+ for (ExtPoint &EP : ExtPoints) {
+ LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx));
+ assert(LI.hasSubRanges());
+
+ LiveIntervalCalc SubLIC;
+ Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
+ LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LM).none())
+ continue;
+ // The problem here can be that the new register may have been created
+ // for a partially defined original register. For example:
+ // %0:subreg_hireg<def,read-undef> = ...
+ // ...
+ // %1 = COPY %0
+ if (S.empty())
+ continue;
+ SubLIC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ SmallVector<SlotIndex, 4> Undefs;
+ LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes());
+ SubLIC.extend(S, EP.Next, 0, Undefs);
+ }
+ }
+
+ for (Register R : *Edit) {
+ LiveInterval &LI = LIS.getInterval(R);
+ if (!LI.hasSubRanges())
+ continue;
+ LI.clear();
+ LI.removeEmptySubRanges();
+ LIS.constructMainRangeFromSubranges(LI);
+ }
+}
+
+void SplitEditor::deleteRematVictims() {
+ SmallVector<MachineInstr*, 8> Dead;
+ for (const Register &R : *Edit) {
+ LiveInterval *LI = &LIS.getInterval(R);
+ for (const LiveRange::Segment &S : LI->segments) {
+ // Dead defs end at the dead slot.
+ if (S.end != S.valno->def.getDeadSlot())
+ continue;
+ if (S.valno->isPHIDef())
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
+ assert(MI && "Missing instruction for dead def");
+ MI->addRegisterDead(LI->reg(), &TRI);
+
+ if (!MI->allDefsAreDead())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);
+ Dead.push_back(MI);
+ }
+ }
+
+ if (Dead.empty())
+ return;
+
+ Edit->eliminateDeadDefs(Dead, std::nullopt);
+}
+
+void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) {
+ // Fast-path for common case.
+ if (!ParentVNI.isPHIDef()) {
+ for (unsigned I = 0, E = Edit->size(); I != E; ++I)
+ forceRecompute(I, ParentVNI);
+ return;
+ }
+
+ // Trace value through phis.
+ SmallPtrSet<const VNInfo *, 8> Visited; ///< whether VNI was/is in worklist.
+ SmallVector<const VNInfo *, 4> WorkList;
+ Visited.insert(&ParentVNI);
+ WorkList.push_back(&ParentVNI);
+
+ const LiveInterval &ParentLI = Edit->getParent();
+ const SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ do {
+ const VNInfo &VNI = *WorkList.back();
+ WorkList.pop_back();
+ for (unsigned I = 0, E = Edit->size(); I != E; ++I)
+ forceRecompute(I, VNI);
+ if (!VNI.isPHIDef())
+ continue;
+
+ MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(VNI.def);
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred);
+ VNInfo *PredVNI = ParentLI.getVNInfoBefore(PredEnd);
+ assert(PredVNI && "Value available in PhiVNI predecessor");
+ if (Visited.insert(PredVNI).second)
+ WorkList.push_back(PredVNI);
+ }
+ } while(!WorkList.empty());
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+ ++NumFinished;
+
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
+
+ // Add the original defs from the parent interval.
+ for (const VNInfo *ParentVNI : Edit->getParent().valnos) {
+ if (ParentVNI->isUnused())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+ defValue(RegIdx, ParentVNI, ParentVNI->def, true);
+
+ // Force rematted values to be recomputed everywhere.
+ // The new live ranges may be truncated.
+ if (Edit->didRematerialize(ParentVNI))
+ forceRecomputeVNI(*ParentVNI);
+ }
+
+ // Hoist back-copies to the complement interval when in spill mode.
+ switch (SpillMode) {
+ case SM_Partition:
+ // Leave all back-copies as is.
+ break;
+ case SM_Size:
+ case SM_Speed:
+ // hoistCopies will behave differently between size and speed.
+ hoistCopies();
+ }
+
+ // Transfer the simply mapped values, check if any are skipped.
+ bool Skipped = transferValues();
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
+ if (Skipped)
+ extendPHIKillRanges();
+ else
+ ++NumSimple;
+
+ // Delete defs that were rematted everywhere.
+ if (Skipped)
+ deleteRematVictims();
+
+ // Get rid of unused values and set phi-kill flags.
+ for (Register Reg : *Edit) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LI.removeEmptySubRanges();
+ LI.RenumberValues();
+ }
+
+ // Provide a reverse mapping from original indices to Edit ranges.
+ if (LRMap) {
+ auto Seq = llvm::seq<unsigned>(0, Edit->size());
+ LRMap->assign(Seq.begin(), Seq.end());
+ }
+
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ Register VReg = Edit->get(i);
+ LiveInterval &LI = LIS.getInterval(VReg);
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(LI, SplitLIs);
+ Register Original = VRM.getOriginal(VReg);
+ for (LiveInterval *SplitLI : SplitLIs)
+ VRM.setIsSplitFromReg(SplitLI->reg(), Original);
+
+ // The new intervals all map back to i.
+ if (LRMap)
+ LRMap->resize(Edit->size(), i);
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), VRAI);
+
+ assert(!LRMap || LRMap->size() == Edit->size());
+}
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
+ bool SingleInstrs) const {
+ // Always split for multiple instructions.
+ if (!BI.isOneInstr())
+ return true;
+ // Don't split for single instructions unless explicitly requested.
+ if (!SingleInstrs)
+ return false;
+ // Splitting a live-through range always makes progress.
+ if (BI.LiveIn && BI.LiveOut)
+ return true;
+ // No point in isolating a copy. It has no register class constraints.
+ if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ return false;
+ // Finally, don't isolate an end point that was created by earlier splits.
+ return isOriginalEndpoint(BI.FirstInstr);
+}
+
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+ openIntv();
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB);
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
+ LastSplitPoint));
+ if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastInstr));
+ } else {
+ // The last use is after the last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastInstr);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter){
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+ LLVM_DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
+
+ assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+ assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block");
+ assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf");
+ assert((!EnterAfter || EnterAfter >= Start) && "Interference before block");
+
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+
+ if (!IntvOut) {
+ LLVM_DEBUG(dbgs() << ", spill on entry.\n");
+ //
+ // <<<<<<<<< Possible LeaveBefore interference.
+ // |-----------| Live through.
+ // -____________ Spill on entry.
+ //
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAtTop(*MBB);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (!IntvIn) {
+ LLVM_DEBUG(dbgs() << ", reload on exit.\n");
+ //
+ // >>>>>>> Possible EnterAfter interference.
+ // |-----------| Live through.
+ // ___________-- Reload on exit.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAtEnd(*MBB);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+ LLVM_DEBUG(dbgs() << ", straight through.\n");
+ //
+ // |-----------| Live through.
+ // ------------- Straight through, same intv, no interference.
+ //
+ selectIntv(IntvOut);
+ useIntv(Start, Stop);
+ return;
+ }
+
+ // We cannot legally insert splits after LSP.
+ SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+ assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf");
+
+ if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+ LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+ LLVM_DEBUG(dbgs() << ", switch avoiding interference.\n");
+ //
+ // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ------======= Switch intervals between interference.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx;
+ if (LeaveBefore && LeaveBefore < LSP) {
+ Idx = enterIntvBefore(LeaveBefore);
+ useIntv(Idx, Stop);
+ } else {
+ Idx = enterIntvAtEnd(*MBB);
+ }
+ selectIntv(IntvIn);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << ", create local intv for interference.\n");
+ //
+ // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ==---------== Switch intervals before/after interference.
+ //
+ assert(LeaveBefore <= EnterAfter && "Missed case");
+
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ selectIntv(IntvIn);
+ Idx = leaveIntvBefore(LeaveBefore);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore) {
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';'
+ << Stop << "), uses " << BI.FirstInstr << '-'
+ << BI.LastInstr << ", reg-in " << IntvIn
+ << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+ assert(IntvIn && "Must have register in");
+ assert(BI.LiveIn && "Must be live-in");
+ assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
+ LLVM_DEBUG(dbgs() << " before interference.\n");
+ //
+ // <<< Interference after kill.
+ // |---o---x | Killed in block.
+ // ========= Use IntvIn everywhere.
+ //
+ selectIntv(IntvIn);
+ useIntv(Start, BI.LastInstr);
+ return;
+ }
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB);
+
+ if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
+ //
+ // <<< Possible interference after last use.
+ // |---o---o---| Live-out on stack.
+ // =========____ Leave IntvIn after last use.
+ //
+ // < Interference after last use.
+ // |---o---o--o| Live-out on stack, late last use.
+ // ============ Copy to stack after LSP, overlap IntvIn.
+ // \_____ Stack interval is live-out.
+ //
+ if (BI.LastInstr < LSP) {
+ LLVM_DEBUG(dbgs() << ", spill after last use before interference.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ } else {
+ LLVM_DEBUG(dbgs() << ", spill before last split point.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvBefore(LSP);
+ overlapIntv(Idx, BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ }
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvIn. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ unsigned LocalIntv = openIntv();
+ (void)LocalIntv;
+ LLVM_DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+ if (!BI.LiveOut || BI.LastInstr < LSP) {
+ //
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o---| Live-out on stack.
+ // =====----____ Leave IntvIn before interference, then spill.
+ //
+ SlotIndex To = leaveIntvAfter(BI.LastInstr);
+ SlotIndex From = enterIntvBefore(LeaveBefore);
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+ return;
+ }
+
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o--o| Live-out on stack, late last use.
+ // =====------- Copy to stack before LSP, overlap LocalIntv.
+ // \_____ Stack interval is live-out.
+ //
+ SlotIndex To = leaveIntvBefore(LSP);
+ overlapIntv(To, BI.LastInstr);
+ SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter) {
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';'
+ << Stop << "), uses " << BI.FirstInstr << '-'
+ << BI.LastInstr << ", reg-out " << IntvOut
+ << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB);
+
+ assert(IntvOut && "Must have register out");
+ assert(BI.LiveOut && "Must be live-out");
+ assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
+ LLVM_DEBUG(dbgs() << " after interference.\n");
+ //
+ // >>>> Interference before def.
+ // | o---o---| Defined in block.
+ // ========= Use IntvOut everywhere.
+ //
+ selectIntv(IntvOut);
+ useIntv(BI.FirstInstr, Stop);
+ return;
+ }
+
+ if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
+ LLVM_DEBUG(dbgs() << ", reload after interference.\n");
+ //
+ // >>>> Interference before def.
+ // |---o---o---| Live-through, stack-in.
+ // ____========= Enter IntvOut before first use.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr));
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvOut. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ LLVM_DEBUG(dbgs() << ", interference overlaps uses.\n");
+ //
+ // >>>>>>> Interference overlapping uses.
+ // |---o---o---| Live-through, stack-in.
+ // ____---====== Create local interval for interference range.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ openIntv();
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
+ useIntv(From, Idx);
+}
+
+void SplitAnalysis::BlockInfo::print(raw_ostream &OS) const {
+ OS << "{" << printMBBReference(*MBB) << ", "
+ << "uses " << FirstInstr << " to " << LastInstr << ", "
+ << "1st def " << FirstDef << ", "
+ << (LiveIn ? "live in" : "dead in") << ", "
+ << (LiveOut ? "live out" : "dead out") << "}";
+}
+
+void SplitAnalysis::BlockInfo::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
new file mode 100644
index 000000000000..f764ffd4750c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -0,0 +1,557 @@
+//===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H
+#define LLVM_LIB_CODEGEN_SPLITKIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Compiler.h"
+#include <utility>
+
+namespace llvm {
+
+class LiveInterval;
+class LiveRange;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineBlockFrequencyInfo;
+class MachineDominatorTree;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VirtRegAuxInfo;
+
+/// Determines the latest safe point in a block in which we can insert a split,
+/// spill or other instruction related with CurLI.
+class LLVM_LIBRARY_VISIBILITY InsertPointAnalysis {
+private:
+ const LiveIntervals &LIS;
+
+ /// Last legal insert point in each basic block in the current function.
+ /// The first entry is the first terminator, the second entry is the
+ /// last valid point to insert a split or spill for a variable that is
+ /// live into a landing pad or inlineasm_br successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastInsertPoint;
+
+ SlotIndex computeLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB);
+
+public:
+ InsertPointAnalysis(const LiveIntervals &lis, unsigned BBNum);
+
+ /// Return the base index of the last valid insert point for \pCurLI in \pMBB.
+ SlotIndex getLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB) {
+ unsigned Num = MBB.getNumber();
+ // Inline the common simple case.
+ if (LastInsertPoint[Num].first.isValid() &&
+ !LastInsertPoint[Num].second.isValid())
+ return LastInsertPoint[Num].first;
+ return computeLastInsertPoint(CurLI, MBB);
+ }
+
+ /// Returns the last insert point as an iterator for \pCurLI in \pMBB.
+ MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
+ MachineBasicBlock &MBB);
+
+ /// Return the base index of the first insert point in \pMBB.
+ SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) {
+ SlotIndex Res = LIS.getMBBStartIdx(&MBB);
+ if (!MBB.empty()) {
+ MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MII != MBB.end())
+ Res = LIS.getInstructionIndex(*MII);
+ }
+ return Res;
+ }
+
+};
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class LLVM_LIBRARY_VISIBILITY SplitAnalysis {
+public:
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+ ///
+ /// Two BlockInfo entries are created for template 4. One for the live-in
+ /// segment, and one for the live-out segment. These entries look as if the
+ /// block were split in the middle where the live range isn't live.
+ ///
+ /// Live-through blocks without any uses don't get BlockInfo entries. They
+ /// are simply listed in ThroughBlocks instead.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstInstr; ///< First instr accessing current reg.
+ SlotIndex LastInstr; ///< Last instr accessing current reg.
+ SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex().
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ /// isOneInstr - Returns true when this BlockInfo describes a single
+ /// instruction.
+ bool isOneInstr() const {
+ return SlotIndex::isSameInstr(FirstInstr, LastInstr);
+ }
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+ };
+
+private:
+ // Current live interval.
+ const LiveInterval *CurLI = nullptr;
+
+ /// Insert Point Analysis.
+ InsertPointAnalysis IPA;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
+ /// UseBlocks - Blocks where CurLI has uses.
+ SmallVector<BlockInfo, 8> UseBlocks;
+
+ /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+ /// the live range has a gap.
+ unsigned NumGapBlocks = 0u;
+
+ /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+ BitVector ThroughBlocks;
+
+ /// NumThroughBlocks - Number of live-through blocks.
+ unsigned NumThroughBlocks = 0u;
+
+ // Sumarize statistics by counting instructions using CurLI.
+ void analyzeUses();
+
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ void calcLiveBlockInfo();
+
+public:
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
+
+ /// isOriginalEndpoint - Return true if the original live range was killed or
+ /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+ /// and 'use' for an early-clobber def.
+ /// This can be used to recognize code inserted by earlier live range
+ /// splitting.
+ bool isOriginalEndpoint(SlotIndex Idx) const;
+
+ /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+ /// This include both use and def operands, at most one entry per instruction.
+ ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
+ /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+ /// where CurLI has uses.
+ ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+ /// getNumThroughBlocks - Return the number of through blocks.
+ unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+ /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+ bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+ /// getThroughBlocks - Return the set of through blocks.
+ const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+ /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+ unsigned getNumLiveBlocks() const {
+ return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+ }
+
+ /// countLiveBlocks - Return the number of blocks where li is live. This is
+ /// guaranteed to return the same number as getNumLiveBlocks() after calling
+ /// analyze(li).
+ unsigned countLiveBlocks(const LiveInterval *li) const;
+
+ using BlockPtrSet = SmallPtrSet<const MachineBasicBlock *, 16>;
+
+ /// shouldSplitSingleBlock - Returns true if it would help to create a local
+ /// live range for the instructions in BI. There is normally no benefit to
+ /// creating a live range for a single instruction, but it does enable
+ /// register class inflation if the instruction has a restricted register
+ /// class.
+ ///
+ /// @param BI The block to be isolated.
+ /// @param SingleInstrs True when single instructions should be isolated.
+ bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
+
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ return IPA.getLastInsertPoint(*CurLI, *MF.getBlockNumbered(Num));
+ }
+
+ SlotIndex getLastSplitPoint(MachineBasicBlock *BB) {
+ return IPA.getLastInsertPoint(*CurLI, *BB);
+ }
+
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
+ return IPA.getLastInsertPointIter(*CurLI, *BB);
+ }
+
+ SlotIndex getFirstSplitPoint(unsigned Num) {
+ return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
+ }
+};
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class LLVM_LIBRARY_VISIBILITY SplitEditor {
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+ VirtRegAuxInfo &VRAI;
+
+public:
+ /// ComplementSpillMode - Select how the complement live range should be
+ /// created. SplitEditor automatically creates interval 0 to contain
+ /// anything that isn't added to another interval. This complement interval
+ /// can get quite complicated, and it can sometimes be an advantage to allow
+ /// it to overlap the other intervals. If it is going to spill anyway, no
+ /// registers are wasted by keeping a value in two places at the same time.
+ enum ComplementSpillMode {
+ /// SM_Partition(Default) - Try to create the complement interval so it
+ /// doesn't overlap any other intervals, and the original interval is
+ /// partitioned. This may require a large number of back copies and extra
+ /// PHI-defs. Only segments marked with overlapIntv will be overlapping.
+ SM_Partition,
+
+ /// SM_Size - Overlap intervals to minimize the number of inserted COPY
+ /// instructions. Copies to the complement interval are hoisted to their
+ /// common dominator, so only one COPY is required per value in the
+ /// complement interval. This also means that no extra PHI-defs need to be
+ /// inserted in the complement interval.
+ SM_Size,
+
+ /// SM_Speed - Overlap intervals to minimize the expected execution
+ /// frequency of the inserted copies. This is very similar to SM_Size, but
+ /// the complement interval may get some extra PHI-defs.
+ SM_Speed
+ };
+
+private:
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit *Edit = nullptr;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx = 0;
+
+ /// The current spill mode, selected by reset().
+ ComplementSpillMode SpillMode = SM_Partition;
+
+ using RegAssignMap = IntervalMap<SlotIndex, unsigned>;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ using ValueForcePair = PointerIntPair<VNInfo *, 1>;
+ using ValueMap = DenseMap<std::pair<unsigned, unsigned>, ValueForcePair>;
+
+ /// Values - keep track of the mapping from parent values to values in the new
+ /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+ ///
+ /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+ /// 2. (Null, false) - the value is mapped to multiple values in
+ /// Edit.get(RegIdx). Each value is represented by a minimal live range at
+ /// its def. The full live range can be inferred exactly from the range
+ /// of RegIdx in RegAssign.
+ /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
+ /// the live range must be recomputed using ::extend().
+ /// 4. (VNI, false) The value is mapped to a single new value.
+ /// The new value has no live ranges anywhere.
+ ValueMap Values;
+
+ /// LICalc - Cache for computing live ranges and SSA update. Each instance
+ /// can only handle non-overlapping live ranges, so use a separate
+ /// LiveIntervalCalc instance for the complement interval when in spill mode.
+ LiveIntervalCalc LICalc[2];
+
+ /// getLICalc - Return the LICalc to use for RegIdx. In spill mode, the
+ /// complement interval can overlap the other intervals, so it gets its own
+ /// LICalc instance. When not in spill mode, all intervals can share one.
+ LiveIntervalCalc &getLICalc(unsigned RegIdx) {
+ return LICalc[SpillMode != SM_Partition && RegIdx != 0];
+ }
+
+ /// Add a segment to the interval LI for the value number VNI. If LI has
+ /// subranges, corresponding segments will be added to them as well, but
+ /// with newly created value numbers. If Original is true, dead def will
+ /// only be added a subrange of LI if the corresponding subrange of the
+ /// original interval has a def at this index. Otherwise, all subranges
+ /// of LI will be updated.
+ void addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original);
+
+ /// defValue - define a value in RegIdx from ParentVNI at Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in ParentLI. The new value is added to the value
+ /// map. The value being defined may either come from rematerialization
+ /// (or an inserted copy), or it may be coming from the original interval.
+ /// The parameter Original should be true in the latter case, otherwise
+ /// it should be false.
+ /// Return the new LI value.
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx,
+ bool Original);
+
+ /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
+ /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
+ /// This is used for values whose live range doesn't match RegAssign exactly.
+ /// They could have rematerialized, or back-copies may have been moved.
+ void forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI);
+
+ /// Calls forceRecompute() on any affected regidx and on ParentVNI
+ /// predecessors in case of a phi definition.
+ void forceRecomputeVNI(const VNInfo &ParentVNI);
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
+ SlotIndex UseIdx, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// removeBackCopies - Remove the copy instructions that defines the values
+ /// in the vector in the complement interval.
+ void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies);
+
+ /// getShallowDominator - Returns the least busy dominator of MBB that is
+ /// also dominated by DefMBB. Busy is measured by loop depth.
+ MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB);
+
+ /// Find out all the backCopies dominated by others.
+ void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
+ SmallVectorImpl<VNInfo *> &BackCopies);
+
+ /// Hoist back-copies to the complement interval. It tries to hoist all
+ /// the back-copies to one BB if it is beneficial, or else simply remove
+ /// redundant backcopies dominated by others.
+ void hoistCopies();
+
+ /// transferValues - Transfer values to the new ranges.
+ /// Return true if any ranges were skipped.
+ bool transferValues();
+
+ /// Live range @p LR corresponding to the lane Mask @p LM has a live
+ /// PHI def at the beginning of block @p B. Extend the range @p LR of
+ /// all predecessor values that reach this def. If @p LR is a subrange,
+ /// the array @p Undefs is the set of all locations where it is undefined
+ /// via <def,read-undef> in other subranges for the same register.
+ void extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
+ LiveRange &LR, LaneBitmask LM,
+ ArrayRef<SlotIndex> Undefs);
+
+ /// extendPHIKillRanges - Extend the ranges of all values killed by original
+ /// parent PHIDefs.
+ void extendPHIKillRanges();
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned(bool ExtendRanges);
+
+ /// deleteRematVictims - Delete defs that are dead after rematerializing.
+ void deleteRematVictims();
+
+ /// Add a copy instruction copying \p FromReg to \p ToReg before
+ /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
+ /// necessary to construct a sequence of copies to cover it exactly.
+ SlotIndex buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ bool Late, unsigned RegIdx);
+
+ SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg,
+ MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM,
+ MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI,
+ VirtRegAuxInfo &VRAI);
+
+ /// reset - Prepare for a new split.
+ void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
+
+ /// Create a new virtual register and live interval.
+ /// Return the interval index, starting from 1. Interval index 0 is the
+ /// implicit complement interval.
+ unsigned openIntv();
+
+ /// currentIntv - Return the current interval index.
+ unsigned currentIntv() const { return OpenIdx; }
+
+ /// selectIntv - Select a previously opened interval index.
+ void selectIntv(unsigned Idx);
+
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAfter(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from the inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use OpenLI.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval if End does not have tied-def usage of the register and in this
+ /// case complement interval is used. Let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
+
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ /// @param LRMap When not null, this vector will map each live range in Edit
+ /// back to the indices returned by openIntv.
+ /// There may be extra indices created by dead code elimination.
+ void finish(SmallVectorImpl<unsigned> *LRMap = nullptr);
+
+ /// dump - print the current interval mapping to dbgs().
+ void dump() const;
+
+ // ===--- High level methods ---===
+
+ /// splitSingleBlock - Split CurLI into a separate live interval around the
+ /// uses in a single block. This is intended to be used as part of a larger
+ /// split, and doesn't call finish().
+ void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
+ /// splitLiveThroughBlock - Split CurLI in the given block such that it
+ /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+ /// the block, but they will be ignored when placing split points.
+ ///
+ /// @param MBBNum Block number.
+ /// @param IntvIn Interval index entering the block.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter);
+
+ /// splitRegInBlock - Split CurLI in the given block such that it enters the
+ /// block in IntvIn and leaves it on the stack (or not at all). Split points
+ /// are placed in a way that avoids putting uses in the stack interval. This
+ /// may require creating a local interval when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvIn Interval index entering the block. Not 0.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore);
+
+ /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+ /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+ /// Split points are placed to avoid interference and such that the uses are
+ /// not in the stack interval. This may require creating a local interval
+ /// when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SPLITKIT_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
new file mode 100644
index 000000000000..66b9086e1d88
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -0,0 +1,1379 @@
+//===- StackColoring.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the stack-coloring optimization that looks for
+// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// which represent the possible lifetime of stack slots. It attempts to
+// merge disjoint stack slots and reduce the used stack space.
+// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
+//
+// TODO: In the future we plan to improve stack coloring in the following ways:
+// 1. Allow merging multiple small slots into a single larger slot at different
+// offsets.
+// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-coloring"
+
+static cl::opt<bool>
+DisableColoring("no-stack-coloring",
+ cl::init(false), cl::Hidden,
+ cl::desc("Disable stack coloring"));
+
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user. This option
+/// is treated as overriding LifetimeStartOnFirstUse below.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
+
+/// Enable enhanced dataflow scheme for lifetime analysis (treat first
+/// use of stack slot as start of slot lifetime, as opposed to looking
+/// for LIFETIME_START marker). See "Implementation notes" below for
+/// more info.
+static cl::opt<bool>
+LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
+ cl::init(true), cl::Hidden,
+ cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
+
+
+STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
+STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+//
+// Stack Coloring reduces stack usage by merging stack slots when they
+// can't be used together. For example, consider the following C program:
+//
+// void bar(char *, int);
+// void foo(bool var) {
+// A: {
+// char z[4096];
+// bar(z, 0);
+// }
+//
+// char *p;
+// char x[4096];
+// char y[4096];
+// if (var) {
+// p = x;
+// } else {
+// bar(y, 1);
+// p = y + 1024;
+// }
+// B:
+// bar(p, 2);
+// }
+//
+// Naively-compiled, this program would use 12k of stack space. However, the
+// stack slot corresponding to `z` is always destroyed before either of the
+// stack slots for `x` or `y` are used, and then `x` is only used if `var`
+// is true, while `y` is only used if `var` is false. So in no time are 2
+// of the stack slots used together, and therefore we can merge them,
+// compiling the function using only a single 4k alloca:
+//
+// void foo(bool var) { // equivalent
+// char x[4096];
+// char *p;
+// bar(x, 0);
+// if (var) {
+// p = x;
+// } else {
+// bar(x, 1);
+// p = x + 1024;
+// }
+// bar(p, 2);
+// }
+//
+// This is an important optimization if we want stack space to be under
+// control in large functions, both open-coded ones and ones created by
+// inlining.
+//
+// Implementation Notes:
+// ---------------------
+//
+// An important part of the above reasoning is that `z` can't be accessed
+// while the latter 2 calls to `bar` are running. This is justified because
+// `z`'s lifetime is over after we exit from block `A:`, so any further
+// accesses to it would be UB. The way we represent this information
+// in LLVM is by having frontends delimit blocks with `lifetime.start`
+// and `lifetime.end` intrinsics.
+//
+// The effect of these intrinsics seems to be as follows (maybe I should
+// specify this in the reference?):
+//
+// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
+// lifetime intrinsic refers to that stack slot, in which case
+// it is marked as *in-scope*.
+// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
+// the stack slot is overwritten with `undef`.
+// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
+// L4) on function exit, all stack slots are marked as *out-of-scope*.
+// L5) `lifetime.end` is a no-op when called on a slot that is already
+// *out-of-scope*.
+// L6) memory accesses to *out-of-scope* stack slots are UB.
+// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
+// are invalidated, unless the slot is "degenerate". This is used to
+// justify not marking slots as in-use until the pointer to them is
+// used, but feels a bit hacky in the presence of things like LICM. See
+// the "Degenerate Slots" section for more details.
+//
+// Now, let's ground stack coloring on these rules. We'll define a slot
+// as *in-use* at a (dynamic) point in execution if it either can be
+// written to at that point, or if it has a live and non-undef content
+// at that point.
+//
+// Obviously, slots that are never *in-use* together can be merged, and
+// in our example `foo`, the slots for `x`, `y` and `z` are never
+// in-use together (of course, sometimes slots that *are* in-use together
+// might still be mergable, but we don't care about that here).
+//
+// In this implementation, we successively merge pairs of slots that are
+// not *in-use* together. We could be smarter - for example, we could merge
+// a single large slot with 2 small slots, or we could construct the
+// interference graph and run a "smart" graph coloring algorithm, but with
+// that aside, how do we find out whether a pair of slots might be *in-use*
+// together?
+//
+// From our rules, we see that *out-of-scope* slots are never *in-use*,
+// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
+// until their address is taken. Therefore, we can approximate slot activity
+// using dataflow.
+//
+// A subtle point: naively, we might try to figure out which pairs of
+// stack-slots interfere by propagating `S in-use` through the CFG for every
+// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
+// which they are both *in-use*.
+//
+// That is sound, but overly conservative in some cases: in our (artificial)
+// example `foo`, either `x` or `y` might be in use at the label `B:`, but
+// as `x` is only in use if we came in from the `var` edge and `y` only
+// if we came from the `!var` edge, they still can't be in use together.
+// See PR32488 for an important real-life case.
+//
+// If we wanted to find all points of interference precisely, we could
+// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
+// would be precise, but requires propagating `O(n^2)` dataflow facts.
+//
+// However, we aren't interested in the *set* of points of interference
+// between 2 stack slots, only *whether* there *is* such a point. So we
+// can rely on a little trick: for `S` and `T` to be in-use together,
+// one of them needs to become in-use while the other is in-use (or
+// they might both become in use simultaneously). We can check this
+// by also keeping track of the points at which a stack slot might *start*
+// being in-use.
+//
+// Exact first use:
+// ----------------
+//
+// Consider the following motivating example:
+//
+// int foo() {
+// char b1[1024], b2[1024];
+// if (...) {
+// char b3[1024];
+// <uses of b1, b3>;
+// return x;
+// } else {
+// char b4[1024], b5[1024];
+// <uses of b2, b4, b5>;
+// return y;
+// }
+// }
+//
+// In the code above, "b3" and "b4" are declared in distinct lexical
+// scopes, meaning that it is easy to prove that they can share the
+// same stack slot. Variables "b1" and "b2" are declared in the same
+// scope, meaning that from a lexical point of view, their lifetimes
+// overlap. From a control flow pointer of view, however, the two
+// variables are accessed in disjoint regions of the CFG, thus it
+// should be possible for them to share the same stack slot. An ideal
+// stack allocation for the function above would look like:
+//
+// slot 0: b1, b2
+// slot 1: b3, b4
+// slot 2: b5
+//
+// Achieving this allocation is tricky, however, due to the way
+// lifetime markers are inserted. Here is a simplified view of the
+// control flow graph for the code above:
+//
+// +------ block 0 -------+
+// 0| LIFETIME_START b1, b2 |
+// 1| <test 'if' condition> |
+// +-----------------------+
+// ./ \.
+// +------ block 1 -------+ +------ block 2 -------+
+// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
+// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
+// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
+// +-----------------------+ +-----------------------+
+// \. /.
+// +------ block 3 -------+
+// 8| <cleanupcode> |
+// 9| LIFETIME_END b1, b2 |
+// 10| return |
+// +-----------------------+
+//
+// If we create live intervals for the variables above strictly based
+// on the lifetime markers, we'll get the set of intervals on the
+// left. If we ignore the lifetime start markers and instead treat a
+// variable's lifetime as beginning with the first reference to the
+// var, then we get the intervals on the right.
+//
+// LIFETIME_START First Use
+// b1: [0,9] [3,4] [8,9]
+// b2: [0,9] [6,9]
+// b3: [2,4] [3,4]
+// b4: [5,7] [6,7]
+// b5: [5,7] [6,7]
+//
+// For the intervals on the left, the best we can do is overlap two
+// variables (b3 and b4, for example); this gives us a stack size of
+// 4*1024 bytes, not ideal. When treating first-use as the start of a
+// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+// byte stack (better).
+//
+// Degenerate Slots:
+// -----------------
+//
+// Relying entirely on first-use of stack slots is problematic,
+// however, due to the fact that optimizations can sometimes migrate
+// uses of a variable outside of its lifetime start/end region. Here
+// is an example:
+//
+// int bar() {
+// char b1[1024], b2[1024];
+// if (...) {
+// <uses of b2>
+// return y;
+// } else {
+// <uses of b1>
+// while (...) {
+// char b3[1024];
+// <uses of b3>
+// }
+// }
+// }
+//
+// Before optimization, the control flow graph for the code above
+// might look like the following:
+//
+// +------ block 0 -------+
+// 0| LIFETIME_START b1, b2 |
+// 1| <test 'if' condition> |
+// +-----------------------+
+// ./ \.
+// +------ block 1 -------+ +------- block 2 -------+
+// 2| <uses of b2> | 3| <uses of b1> |
+// +-----------------------+ +-----------------------+
+// | |
+// | +------- block 3 -------+ <-\.
+// | 4| <while condition> | |
+// | +-----------------------+ |
+// | / | |
+// | / +------- block 4 -------+
+// \ / 5| LIFETIME_START b3 | |
+// \ / 6| <uses of b3> | |
+// \ / 7| LIFETIME_END b3 | |
+// \ | +------------------------+ |
+// \ | \ /
+// +------ block 5 -----+ \---------------
+// 8| <cleanupcode> |
+// 9| LIFETIME_END b1, b2 |
+// 10| return |
+// +---------------------+
+//
+// During optimization, however, it can happen that an instruction
+// computing an address in "b3" (for example, a loop-invariant GEP) is
+// hoisted up out of the loop from block 4 to block 2. [Note that
+// this is not an actual load from the stack, only an instruction that
+// computes the address to be loaded]. If this happens, there is now a
+// path leading from the first use of b3 to the return instruction
+// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
+// now larger than if we were computing live intervals strictly based
+// on lifetime markers. In the example above, this lengthened lifetime
+// would mean that it would appear illegal to overlap b3 with b2.
+//
+// To deal with this such cases, the code in ::collectMarkers() below
+// tries to identify "degenerate" slots -- those slots where on a single
+// forward pass through the CFG we encounter a first reference to slot
+// K before we hit the slot K lifetime start marker. For such slots,
+// we fall back on using the lifetime start marker as the beginning of
+// the variable's lifetime. NB: with this implementation, slots can
+// appear degenerate in cases where there is unstructured control flow:
+//
+// if (q) goto mid;
+// if (x > 9) {
+// int b[100];
+// memcpy(&b[0], ...);
+// mid: b[k] = ...;
+// abc(&b);
+// }
+//
+// If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
+// before visiting the memcpy block (which will contain the lifetime start
+// for "b" then it will appear that 'b' has a degenerate lifetime.
+//
+// Handle Windows Exception with LifetimeStartOnFirstUse:
+// -----------------
+//
+// There was a bug for using LifetimeStartOnFirstUse in win32.
+// class Type1 {
+// ...
+// ~Type1(){ write memory;}
+// }
+// ...
+// try{
+// Type1 V
+// ...
+// } catch (Type2 X){
+// ...
+// }
+// For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots
+// to prevent using LifetimeStartOnFirstUse. Because pX may merged with
+// object V which may call destructor after implicitly writing pX. All these
+// are done in C++ EH runtime libs (through CxxThrowException), and can't
+// obviously check it in IR level.
+//
+// The loader of pX, without obvious writing IR, is usually the first LOAD MI
+// in EHPad, Some like:
+// bb.x.catch.i (landing-pad, ehfunclet-entry):
+// ; predecessors: %bb...
+// successors: %bb...
+// %n:gr32 = MOV32rm %stack.pX ...
+// ...
+// The Type2** %stack.pX will only be written in EH runtime libs, so we
+// check the StoreSlots to screen it out.
+
+namespace {
+
+/// StackColoring - A machine pass for merging disjoint stack allocations,
+/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+class StackColoring : public MachineFunctionPass {
+ MachineFrameInfo *MFI = nullptr;
+ MachineFunction *MF = nullptr;
+
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+ /// Maps active slots (per bit) for each basic block.
+ using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
+ LivenessMap BlockLiveness;
+
+ /// Maps serial numbers to basic blocks.
+ DenseMap<const MachineBasicBlock *, int> BasicBlocks;
+
+ /// Maps basic blocks to a serial number.
+ SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
+
+ /// Maps slots to their use interval. Outside of this interval, slots
+ /// values are either dead or `undef` and they will not be written to.
+ SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+
+ /// Maps slots to the points where they can become in-use.
+ SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
+
+ /// VNInfo is used for the construction of LiveIntervals.
+ VNInfo::Allocator VNInfoAllocator;
+
+ /// SlotIndex analysis object.
+ SlotIndexes *Indexes = nullptr;
+
+ /// The list of lifetime markers found. These markers are to be removed
+ /// once the coloring is done.
+ SmallVector<MachineInstr*, 8> Markers;
+
+ /// Record the FI slots for which we have seen some sort of
+ /// lifetime marker (either start or end).
+ BitVector InterestingSlots;
+
+ /// FI slots that need to be handled conservatively (for these
+ /// slots lifetime-start-on-first-use is disabled).
+ BitVector ConservativeSlots;
+
+ /// Record the FI slots referenced by a 'may write to memory'.
+ BitVector StoreSlots;
+
+ /// Number of iterations taken during data flow analysis.
+ unsigned NumIterations;
+
+public:
+ static char ID;
+
+ StackColoring() : MachineFunctionPass(ID) {
+ initializeStackColoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &Func) override;
+
+private:
+ /// Used in collectMarkers
+ using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
+
+ /// Debug.
+ void dump() const;
+ void dumpIntervals() const;
+ void dumpBB(MachineBasicBlock *MBB) const;
+ void dumpBV(const char *tag, const BitVector &BV) const;
+
+ /// Removes all of the lifetime marker instructions from the function.
+ /// \returns true if any markers were removed.
+ bool removeAllMarkers();
+
+ /// Scan the machine function and find all of the lifetime markers.
+ /// Record the findings in the BEGIN and END vectors.
+ /// \returns the number of markers found.
+ unsigned collectMarkers(unsigned NumSlot);
+
+ /// Perform the dataflow calculation and calculate the lifetime for each of
+ /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
+ /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
+ /// in and out blocks.
+ void calculateLocalLiveness();
+
+ /// Returns TRUE if we're using the first-use-begins-lifetime method for
+ /// this slot (if FALSE, then the start marker is treated as start of lifetime).
+ bool applyFirstUse(int Slot) {
+ if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
+ return false;
+ if (ConservativeSlots.test(Slot))
+ return false;
+ return true;
+ }
+
+ /// Examines the specified instruction and returns TRUE if the instruction
+ /// represents the start or end of an interesting lifetime. The slot or slots
+ /// starting or ending are added to the vector "slots" and "isStart" is set
+ /// accordingly.
+ /// \returns True if inst contains a lifetime start or end
+ bool isLifetimeStartOrEnd(const MachineInstr &MI,
+ SmallVector<int, 4> &slots,
+ bool &isStart);
+
+ /// Construct the LiveIntervals for the slots.
+ void calculateLiveIntervals(unsigned NumSlots);
+
+ /// Go over the machine function and change instructions which use stack
+ /// slots to use the joint slots.
+ void remapInstructions(DenseMap<int, int> &SlotRemap);
+
+ /// The input program may contain instructions which are not inside lifetime
+ /// markers. This can happen due to a bug in the compiler or due to a bug in
+ /// user code (for example, returning a reference to a local variable).
+ /// This procedure checks all of the instructions in the function and
+ /// invalidates lifetime ranges which do not contain all of the instructions
+ /// which access that frame slot.
+ void removeInvalidSlotRanges();
+
+ /// Map entries which point to other entries to their destination.
+ /// A->B->C becomes A->C.
+ void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+};
+
+} // end anonymous namespace
+
+char StackColoring::ID = 0;
+
+char &llvm::StackColoringID = StackColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
+ "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
+ "Merge disjoint stack slots", false, false)
+
+void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
+ const BitVector &BV) const {
+ dbgs() << tag << " : { ";
+ for (unsigned I = 0, E = BV.size(); I != E; ++I)
+ dbgs() << BV.test(I) << " ";
+ dbgs() << "}\n";
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
+ LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ const BlockLifetimeInfo &BlockInfo = BI->second;
+
+ dumpBV("BEGIN", BlockInfo.Begin);
+ dumpBV("END", BlockInfo.End);
+ dumpBV("LIVE_IN", BlockInfo.LiveIn);
+ dumpBV("LIVE_OUT", BlockInfo.LiveOut);
+}
+
+LLVM_DUMP_METHOD void StackColoring::dump() const {
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+ dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
+ << MBB->getName() << "]\n";
+ dumpBB(MBB);
+ }
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
+ for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
+ dbgs() << "Interval[" << I << "]:\n";
+ Intervals[I]->dump();
+ }
+}
+#endif
+
+static inline int getStartOrEndSlot(const MachineInstr &MI)
+{
+ assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
+ "Expected LIFETIME_START or LIFETIME_END op");
+ const MachineOperand &MO = MI.getOperand(0);
+ int Slot = MO.getIndex();
+ if (Slot >= 0)
+ return Slot;
+ return -1;
+}
+
+// At the moment the only way to end a variable lifetime is with
+// a VARIABLE_LIFETIME op (which can't contain a start). If things
+// change and the IR allows for a single inst that both begins
+// and ends lifetime(s), this interface will need to be reworked.
+bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
+ SmallVector<int, 4> &slots,
+ bool &isStart) {
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ int Slot = getStartOrEndSlot(MI);
+ if (Slot < 0)
+ return false;
+ if (!InterestingSlots.test(Slot))
+ return false;
+ slots.push_back(Slot);
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ isStart = false;
+ return true;
+ }
+ if (!applyFirstUse(Slot)) {
+ isStart = true;
+ return true;
+ }
+ } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
+ if (!MI.isDebugInstr()) {
+ bool found = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int Slot = MO.getIndex();
+ if (Slot<0)
+ continue;
+ if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
+ slots.push_back(Slot);
+ found = true;
+ }
+ }
+ if (found) {
+ isStart = true;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+ unsigned MarkersFound = 0;
+ BlockBitVecMap SeenStartMap;
+ InterestingSlots.clear();
+ InterestingSlots.resize(NumSlot);
+ ConservativeSlots.clear();
+ ConservativeSlots.resize(NumSlot);
+ StoreSlots.clear();
+ StoreSlots.resize(NumSlot);
+
+ // number of start and end lifetime ops for each slot
+ SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
+ SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
+ SmallVector<int, 8> NumLoadInCatchPad(NumSlot, 0);
+
+ // Step 1: collect markers and populate the "InterestingSlots"
+ // and "ConservativeSlots" sets.
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+ // Compute the set of slots for which we've seen a START marker but have
+ // not yet seen an END marker at this point in the walk (e.g. on entry
+ // to this bb).
+ BitVector BetweenStartEnd;
+ BetweenStartEnd.resize(NumSlot);
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
+ if (I != SeenStartMap.end()) {
+ BetweenStartEnd |= I->second;
+ }
+ }
+
+ // Walk the instructions in the block to look for start/end ops.
+ for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ int Slot = getStartOrEndSlot(MI);
+ if (Slot < 0)
+ continue;
+ InterestingSlots.set(Slot);
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
+ BetweenStartEnd.set(Slot);
+ NumStartLifetimes[Slot] += 1;
+ } else {
+ BetweenStartEnd.reset(Slot);
+ NumEndLifetimes[Slot] += 1;
+ }
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ LLVM_DEBUG(dbgs() << "Found a lifetime ");
+ LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
+ ? "start"
+ : "end"));
+ LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
+ LLVM_DEBUG(dbgs()
+ << " with allocation: " << Allocation->getName() << "\n");
+ }
+ Markers.push_back(&MI);
+ MarkersFound += 1;
+ } else {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int Slot = MO.getIndex();
+ if (Slot < 0)
+ continue;
+ if (! BetweenStartEnd.test(Slot)) {
+ ConservativeSlots.set(Slot);
+ }
+ // Here we check the StoreSlots to screen catch point out. For more
+ // information, please refer "Handle Windows Exception with
+ // LifetimeStartOnFirstUse" at the head of this file.
+ if (MI.mayStore())
+ StoreSlots.set(Slot);
+ if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad())
+ NumLoadInCatchPad[Slot] += 1;
+ }
+ }
+ }
+ BitVector &SeenStart = SeenStartMap[MBB];
+ SeenStart |= BetweenStartEnd;
+ }
+ if (!MarkersFound) {
+ return 0;
+ }
+
+ // 1) PR27903: slots with multiple start or end lifetime ops are not
+ // safe to enable for "lifetime-start-on-first-use".
+ // 2) And also not safe for variable X in catch(X) in windows.
+ for (unsigned slot = 0; slot < NumSlot; ++slot) {
+ if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 ||
+ (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot)))
+ ConservativeSlots.set(slot);
+ }
+ LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
+
+ // Step 2: compute begin/end sets for each block
+
+ // NOTE: We use a depth-first iteration to ensure that we obtain a
+ // deterministic numbering.
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+ // Assign a serial number to this basic block.
+ BasicBlocks[MBB] = BasicBlockNumbering.size();
+ BasicBlockNumbering.push_back(MBB);
+
+ // Keep a reference to avoid repeated lookups.
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
+
+ BlockInfo.Begin.resize(NumSlot);
+ BlockInfo.End.resize(NumSlot);
+
+ SmallVector<int, 4> slots;
+ for (MachineInstr &MI : *MBB) {
+ bool isStart = false;
+ slots.clear();
+ if (isLifetimeStartOrEnd(MI, slots, isStart)) {
+ if (!isStart) {
+ assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
+ int Slot = slots[0];
+ if (BlockInfo.Begin.test(Slot)) {
+ BlockInfo.Begin.reset(Slot);
+ }
+ BlockInfo.End.set(Slot);
+ } else {
+ for (auto Slot : slots) {
+ LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
+ LLVM_DEBUG(dbgs()
+ << " at " << printMBBReference(*MBB) << " index ");
+ LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ LLVM_DEBUG(dbgs()
+ << " with allocation: " << Allocation->getName());
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ if (BlockInfo.End.test(Slot)) {
+ BlockInfo.End.reset(Slot);
+ }
+ BlockInfo.Begin.set(Slot);
+ }
+ }
+ }
+ }
+ }
+
+ // Update statistics.
+ NumMarkerSeen += MarkersFound;
+ return MarkersFound;
+}
+
+void StackColoring::calculateLocalLiveness() {
+ unsigned NumIters = 0;
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ ++NumIters;
+
+ for (const MachineBasicBlock *BB : BasicBlockNumbering) {
+ // Use an iterator to avoid repeated lookups.
+ LivenessMap::iterator BI = BlockLiveness.find(BB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ BlockLifetimeInfo &BlockInfo = BI->second;
+
+ // Compute LiveIn by unioning together the LiveOut sets of all preds.
+ BitVector LocalLiveIn;
+ for (MachineBasicBlock *Pred : BB->predecessors()) {
+ LivenessMap::const_iterator I = BlockLiveness.find(Pred);
+ // PR37130: transformations prior to stack coloring can
+ // sometimes leave behind statically unreachable blocks; these
+ // can be safely skipped here.
+ if (I != BlockLiveness.end())
+ LocalLiveIn |= I->second.LiveOut;
+ }
+
+ // Compute LiveOut by subtracting out lifetimes that end in this
+ // block, then adding in lifetimes that begin in this block. If
+ // we have both BEGIN and END markers in the same basic block
+ // then we know that the BEGIN marker comes after the END,
+ // because we already handle the case where the BEGIN comes
+ // before the END when collecting the markers (and building the
+ // BEGIN/END vectors).
+ BitVector LocalLiveOut = LocalLiveIn;
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveOut |= BlockInfo.Begin;
+
+ // Update block LiveIn set, noting whether it has changed.
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
+ changed = true;
+ BlockInfo.LiveIn |= LocalLiveIn;
+ }
+
+ // Update block LiveOut set, noting whether it has changed.
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
+ changed = true;
+ BlockInfo.LiveOut |= LocalLiveOut;
+ }
+ }
+ } // while changed.
+
+ NumIterations = NumIters;
+}
+
+void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+ SmallVector<SlotIndex, 16> Starts;
+ SmallVector<bool, 16> DefinitelyInUse;
+
+ // For each block, find which slots are active within this block
+ // and update the live intervals.
+ for (const MachineBasicBlock &MBB : *MF) {
+ Starts.clear();
+ Starts.resize(NumSlots);
+ DefinitelyInUse.clear();
+ DefinitelyInUse.resize(NumSlots);
+
+ // Start the interval of the slots that we previously found to be 'in-use'.
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+ pos = MBBLiveness.LiveIn.find_next(pos)) {
+ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+ }
+
+ // Create the interval for the basic blocks containing lifetime begin/end.
+ for (const MachineInstr &MI : MBB) {
+ SmallVector<int, 4> slots;
+ bool IsStart = false;
+ if (!isLifetimeStartOrEnd(MI, slots, IsStart))
+ continue;
+ SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+ for (auto Slot : slots) {
+ if (IsStart) {
+ // If a slot is already definitely in use, we don't have to emit
+ // a new start marker because there is already a pre-existing
+ // one.
+ if (!DefinitelyInUse[Slot]) {
+ LiveStarts[Slot].push_back(ThisIndex);
+ DefinitelyInUse[Slot] = true;
+ }
+ if (!Starts[Slot].isValid())
+ Starts[Slot] = ThisIndex;
+ } else {
+ if (Starts[Slot].isValid()) {
+ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
+ Intervals[Slot]->addSegment(
+ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
+ Starts[Slot] = SlotIndex(); // Invalidate the start index
+ DefinitelyInUse[Slot] = false;
+ }
+ }
+ }
+ }
+
+ // Finish up started segments
+ for (unsigned i = 0; i < NumSlots; ++i) {
+ if (!Starts[i].isValid())
+ continue;
+
+ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
+ VNInfo *VNI = Intervals[i]->getValNumInfo(0);
+ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
+ }
+ }
+}
+
+bool StackColoring::removeAllMarkers() {
+ unsigned Count = 0;
+ for (MachineInstr *MI : Markers) {
+ MI->eraseFromParent();
+ Count++;
+ }
+ Markers.clear();
+
+ LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
+ return Count;
+}
+
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+ unsigned FixedInstr = 0;
+ unsigned FixedMemOp = 0;
+ unsigned FixedDbg = 0;
+
+ // Remap debug information that refers to stack slots.
+ for (auto &VI : MF->getVariableDbgInfo()) {
+ if (!VI.Var || !VI.inStackSlot())
+ continue;
+ int Slot = VI.getStackSlot();
+ if (SlotRemap.count(Slot)) {
+ LLVM_DEBUG(dbgs() << "Remapping debug info for ["
+ << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
+ VI.updateStackSlot(SlotRemap[Slot]);
+ FixedDbg++;
+ }
+ }
+
+ // Keep a list of *allocas* which need to be remapped.
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+
+ // Keep a list of allocas which has been affected by the remap.
+ SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
+
+ for (const std::pair<int, int> &SI : SlotRemap) {
+ const AllocaInst *From = MFI->getObjectAllocation(SI.first);
+ const AllocaInst *To = MFI->getObjectAllocation(SI.second);
+ assert(To && From && "Invalid allocation object");
+ Allocas[From] = To;
+
+ // If From is before wo, its possible that there is a use of From between
+ // them.
+ if (From->comesBefore(To))
+ const_cast<AllocaInst*>(To)->moveBefore(const_cast<AllocaInst*>(From));
+
+ // AA might be used later for instruction scheduling, and we need it to be
+ // able to deduce the correct aliasing releationships between pointers
+ // derived from the alloca being remapped and the target of that remapping.
+ // The only safe way, without directly informing AA about the remapping
+ // somehow, is to directly update the IR to reflect the change being made
+ // here.
+ Instruction *Inst = const_cast<AllocaInst *>(To);
+ if (From->getType() != To->getType()) {
+ BitCastInst *Cast = new BitCastInst(Inst, From->getType());
+ Cast->insertAfter(Inst);
+ Inst = Cast;
+ }
+
+ // We keep both slots to maintain AliasAnalysis metadata later.
+ MergedAllocas.insert(From);
+ MergedAllocas.insert(To);
+
+ // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
+ // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
+ // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
+ MachineFrameInfo::SSPLayoutKind FromKind
+ = MFI->getObjectSSPLayout(SI.first);
+ MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second);
+ if (FromKind != MachineFrameInfo::SSPLK_None &&
+ (ToKind == MachineFrameInfo::SSPLK_None ||
+ (ToKind != MachineFrameInfo::SSPLK_LargeArray &&
+ FromKind != MachineFrameInfo::SSPLK_AddrOf)))
+ MFI->setObjectSSPLayout(SI.second, FromKind);
+
+ // The new alloca might not be valid in a llvm.dbg.declare for this
+ // variable, so undef out the use to make the verifier happy.
+ AllocaInst *FromAI = const_cast<AllocaInst *>(From);
+ if (FromAI->isUsedByMetadata())
+ ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
+ for (auto &Use : FromAI->uses()) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
+ if (BCI->isUsedByMetadata())
+ ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
+ }
+
+ // Note that this will not replace uses in MMOs (which we'll update below),
+ // or anywhere else (which is why we won't delete the original
+ // instruction).
+ FromAI->replaceAllUsesWith(Inst);
+ }
+
+ // Remap all instructions to the new stack slots.
+ std::vector<std::vector<MachineMemOperand *>> SSRefs(
+ MFI->getObjectIndexEnd());
+ for (MachineBasicBlock &BB : *MF)
+ for (MachineInstr &I : BB) {
+ // Skip lifetime markers. We'll remove them soon.
+ if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
+ I.getOpcode() == TargetOpcode::LIFETIME_END)
+ continue;
+
+ // Update the MachineMemOperand to use the new alloca.
+ for (MachineMemOperand *MMO : I.memoperands()) {
+ // We've replaced IR-level uses of the remapped allocas, so we only
+ // need to replace direct uses here.
+ const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
+ if (!AI)
+ continue;
+
+ if (!Allocas.count(AI))
+ continue;
+
+ MMO->setValue(Allocas[AI]);
+ FixedMemOp++;
+ }
+
+ // Update all of the machine instruction operands.
+ for (MachineOperand &MO : I.operands()) {
+ if (!MO.isFI())
+ continue;
+ int FromSlot = MO.getIndex();
+
+ // Don't touch arguments.
+ if (FromSlot<0)
+ continue;
+
+ // Only look at mapped slots.
+ if (!SlotRemap.count(FromSlot))
+ continue;
+
+ // In a debug build, check that the instruction that we are modifying is
+ // inside the expected live range. If the instruction is not inside
+ // the calculated range then it means that the alloca usage moved
+ // outside of the lifetime markers, or that the user has a bug.
+ // NOTE: Alloca address calculations which happen outside the lifetime
+ // zone are okay, despite the fact that we don't have a good way
+ // for validating all of the usages of the calculation.
+#ifndef NDEBUG
+ bool TouchesMemory = I.mayLoadOrStore();
+ // If we *don't* protect the user from escaped allocas, don't bother
+ // validating the instructions.
+ if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ const LiveInterval *Interval = &*Intervals[FromSlot];
+ assert(Interval->find(Index) != Interval->end() &&
+ "Found instruction usage outside of live range.");
+ }
+#endif
+
+ // Fix the machine instructions.
+ int ToSlot = SlotRemap[FromSlot];
+ MO.setIndex(ToSlot);
+ FixedInstr++;
+ }
+
+ // We adjust AliasAnalysis information for merged stack slots.
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
+ bool ReplaceMemOps = false;
+ for (MachineMemOperand *MMO : I.memoperands()) {
+ // Collect MachineMemOperands which reference
+ // FixedStackPseudoSourceValues with old frame indices.
+ if (const auto *FSV = dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MMO->getPseudoValue())) {
+ int FI = FSV->getFrameIndex();
+ auto To = SlotRemap.find(FI);
+ if (To != SlotRemap.end())
+ SSRefs[FI].push_back(MMO);
+ }
+
+ // If this memory location can be a slot remapped here,
+ // we remove AA information.
+ bool MayHaveConflictingAAMD = false;
+ if (MMO->getAAInfo()) {
+ if (const Value *MMOV = MMO->getValue()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjectsForCodeGen(MMOV, Objs);
+
+ if (Objs.empty())
+ MayHaveConflictingAAMD = true;
+ else
+ for (Value *V : Objs) {
+ // If this memory location comes from a known stack slot
+ // that is not remapped, we continue checking.
+ // Otherwise, we need to invalidate AA infomation.
+ const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
+ if (AI && MergedAllocas.count(AI)) {
+ MayHaveConflictingAAMD = true;
+ break;
+ }
+ }
+ }
+ }
+ if (MayHaveConflictingAAMD) {
+ NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
+ ReplaceMemOps = true;
+ } else {
+ NewMMOs.push_back(MMO);
+ }
+ }
+
+ // If any memory operand is updated, set memory references of
+ // this instruction.
+ if (ReplaceMemOps)
+ I.setMemRefs(*MF, NewMMOs);
+ }
+
+ // Rewrite MachineMemOperands that reference old frame indices.
+ for (auto E : enumerate(SSRefs))
+ if (!E.value().empty()) {
+ const PseudoSourceValue *NewSV =
+ MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
+ for (MachineMemOperand *Ref : E.value())
+ Ref->setValue(NewSV);
+ }
+
+ // Update the location of C++ catch objects for the MSVC personality routine.
+ if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
+ for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
+ for (WinEHHandlerType &H : TBME.HandlerArray)
+ if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
+ SlotRemap.count(H.CatchObj.FrameIndex))
+ H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
+
+ LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
+ LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
+ LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
+ (void) FixedMemOp;
+ (void) FixedDbg;
+ (void) FixedInstr;
+}
+
+void StackColoring::removeInvalidSlotRanges() {
+ for (MachineBasicBlock &BB : *MF)
+ for (MachineInstr &I : BB) {
+ if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
+ I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
+ continue;
+
+ // Some intervals are suspicious! In some cases we find address
+ // calculations outside of the lifetime zone, but not actual memory
+ // read or write. Memory accesses outside of the lifetime zone are a clear
+ // violation, but address calculations are okay. This can happen when
+ // GEPs are hoisted outside of the lifetime zone.
+ // So, in here we only check instructions which can read or write memory.
+ if (!I.mayLoad() && !I.mayStore())
+ continue;
+
+ // Check all of the machine operands.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isFI())
+ continue;
+
+ int Slot = MO.getIndex();
+
+ if (Slot<0)
+ continue;
+
+ if (Intervals[Slot]->empty())
+ continue;
+
+ // Check that the used slot is inside the calculated lifetime range.
+ // If it is not, warn about it and invalidate the range.
+ LiveInterval *Interval = &*Intervals[Slot];
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ if (Interval->find(Index) == Interval->end()) {
+ Interval->clear();
+ LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
+ EscapedAllocas++;
+ }
+ }
+ }
+}
+
+void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
+ unsigned NumSlots) {
+ // Expunge slot remap map.
+ for (unsigned i=0; i < NumSlots; ++i) {
+ // If we are remapping i
+ if (SlotRemap.count(i)) {
+ int Target = SlotRemap[i];
+ // As long as our target is mapped to something else, follow it.
+ while (SlotRemap.count(Target)) {
+ Target = SlotRemap[Target];
+ SlotRemap[i] = Target;
+ }
+ }
+ }
+}
+
+bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+ LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
+ << "********** Function: " << Func.getName() << '\n');
+ MF = &Func;
+ MFI = &MF->getFrameInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ BlockLiveness.clear();
+ BasicBlocks.clear();
+ BasicBlockNumbering.clear();
+ Markers.clear();
+ Intervals.clear();
+ LiveStarts.clear();
+ VNInfoAllocator.Reset();
+
+ unsigned NumSlots = MFI->getObjectIndexEnd();
+
+ // If there are no stack slots then there are no markers to remove.
+ if (!NumSlots)
+ return false;
+
+ SmallVector<int, 8> SortedSlots;
+ SortedSlots.reserve(NumSlots);
+ Intervals.reserve(NumSlots);
+ LiveStarts.resize(NumSlots);
+
+ unsigned NumMarkers = collectMarkers(NumSlots);
+
+ unsigned TotalSize = 0;
+ LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
+ << " slots\n");
+ LLVM_DEBUG(dbgs() << "Slot structure:\n");
+
+ for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
+ LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
+ << " bytes.\n");
+ TotalSize += MFI->getObjectSize(i);
+ }
+
+ LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
+
+ // Don't continue because there are not enough lifetime markers, or the
+ // stack is too small, or we are told not to optimize the slots.
+ if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
+ skipFunction(Func.getFunction())) {
+ LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
+ return removeAllMarkers();
+ }
+
+ for (unsigned i=0; i < NumSlots; ++i) {
+ std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
+ LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+ Intervals.push_back(std::move(LI));
+ SortedSlots.push_back(i);
+ }
+
+ // Calculate the liveness of each block.
+ calculateLocalLiveness();
+ LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
+ LLVM_DEBUG(dump());
+
+ // Propagate the liveness information.
+ calculateLiveIntervals(NumSlots);
+ LLVM_DEBUG(dumpIntervals());
+
+ // Search for allocas which are used outside of the declared lifetime
+ // markers.
+ if (ProtectFromEscapedAllocas)
+ removeInvalidSlotRanges();
+
+ // Maps old slots to new slots.
+ DenseMap<int, int> SlotRemap;
+ unsigned RemovedSlots = 0;
+ unsigned ReducedSize = 0;
+
+ // Do not bother looking at empty intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (Intervals[SortedSlots[I]]->empty())
+ SortedSlots[I] = -1;
+ }
+
+ // This is a simple greedy algorithm for merging allocas. First, sort the
+ // slots, placing the largest slots first. Next, perform an n^2 scan and look
+ // for disjoint slots. When you find disjoint slots, merge the smaller one
+ // into the bigger one and update the live interval. Remove the small alloca
+ // and continue.
+
+ // Sort the slots according to their size. Place unused slots at the end.
+ // Use stable sort to guarantee deterministic code generation.
+ llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
+ // We use -1 to denote a uninteresting slot. Place these slots at the end.
+ if (LHS == -1)
+ return false;
+ if (RHS == -1)
+ return true;
+ // Sort according to size.
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ });
+
+ for (auto &s : LiveStarts)
+ llvm::sort(s);
+
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (SortedSlots[I] == -1)
+ continue;
+
+ for (unsigned J=I+1; J < NumSlots; ++J) {
+ if (SortedSlots[J] == -1)
+ continue;
+
+ int FirstSlot = SortedSlots[I];
+ int SecondSlot = SortedSlots[J];
+
+ // Objects with different stack IDs cannot be merged.
+ if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
+ continue;
+
+ LiveInterval *First = &*Intervals[FirstSlot];
+ LiveInterval *Second = &*Intervals[SecondSlot];
+ auto &FirstS = LiveStarts[FirstSlot];
+ auto &SecondS = LiveStarts[SecondSlot];
+ assert(!First->empty() && !Second->empty() && "Found an empty range");
+
+ // Merge disjoint slots. This is a little bit tricky - see the
+ // Implementation Notes section for an explanation.
+ if (!First->isLiveAtIndexes(SecondS) &&
+ !Second->isLiveAtIndexes(FirstS)) {
+ Changed = true;
+ First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
+
+ int OldSize = FirstS.size();
+ FirstS.append(SecondS.begin(), SecondS.end());
+ auto Mid = FirstS.begin() + OldSize;
+ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
+
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
+ << SecondSlot << " together.\n");
+ Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
+ MFI->getObjectAlign(SecondSlot));
+
+ assert(MFI->getObjectSize(FirstSlot) >=
+ MFI->getObjectSize(SecondSlot) &&
+ "Merging a small object into a larger one");
+
+ RemovedSlots+=1;
+ ReducedSize += MFI->getObjectSize(SecondSlot);
+ MFI->setObjectAlignment(FirstSlot, MaxAlignment);
+ MFI->RemoveStackObject(SecondSlot);
+ }
+ }
+ }
+ }// While changed.
+
+ // Record statistics.
+ StackSpaceSaved += ReducedSize;
+ StackSlotMerged += RemovedSlots;
+ LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
+ << ReducedSize << " bytes\n");
+
+ // Scan the entire function and update all machine operands that use frame
+ // indices to use the remapped frame index.
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap);
+
+ return removeAllMarkers();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
new file mode 100644
index 000000000000..5d3903ed84ce
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -0,0 +1,254 @@
+//===-- StackFrameLayoutAnalysisPass.cpp
+//------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// StackFrameLayoutAnalysisPass implementation. Outputs information about the
+// layout of the stack frame, using the remarks interface. On the CLI it prints
+// a textual representation of the stack frame. When possible it prints the
+// values that occupy a stack slot using any available debug information. Since
+// output is remarks based, it is also available in a machine readable file
+// format, such as YAML.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-frame-layout"
+
+namespace {
+
+/// StackFrameLayoutAnalysisPass - This is a pass to dump the stack frame of a
+/// MachineFunction.
+///
+struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
+ using SlotDbgMap = SmallDenseMap<int, SetVector<const DILocalVariable *>>;
+ static char ID;
+
+ enum SlotType {
+ Spill, // a Spill slot
+ StackProtector, // Stack Protector slot
+ Variable, // a slot used to store a local data (could be a tmp)
+ Invalid // It's an error for a slot to have this type
+ };
+
+ struct SlotData {
+ int Slot;
+ int Size;
+ int Align;
+ int Offset;
+ SlotType SlotTy;
+
+ SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx)
+ : Slot(Idx), Size(MFI.getObjectSize(Idx)),
+ Align(MFI.getObjectAlign(Idx).value()),
+ Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid) {
+ if (MFI.isSpillSlotObjectIndex(Idx))
+ SlotTy = SlotType::Spill;
+ else if (Idx == MFI.getStackProtectorIndex())
+ SlotTy = SlotType::StackProtector;
+ else
+ SlotTy = SlotType::Variable;
+ }
+
+ // we use this to sort in reverse order, so that the layout is displayed
+ // correctly
+ bool operator<(const SlotData &Rhs) const { return Offset > Rhs.Offset; }
+ };
+
+ StackFrameLayoutAnalysisPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Stack Frame Layout Analysis";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // TODO: We should implement a similar filter for remarks:
+ // -Rpass-func-filter=<regex>
+ if (!isFunctionInPrintList(MF.getName()))
+ return false;
+
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(DEBUG_TYPE))
+ return false;
+
+ MachineOptimizationRemarkAnalysis Rem(DEBUG_TYPE, "StackLayout",
+ MF.getFunction().getSubprogram(),
+ &MF.front());
+ Rem << ("\nFunction: " + MF.getName()).str();
+ emitStackFrameLayoutRemarks(MF, Rem);
+ getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE().emit(Rem);
+ return false;
+ }
+
+ std::string getTypeString(SlotType Ty) {
+ switch (Ty) {
+ case SlotType::Spill:
+ return "Spill";
+ case SlotType::StackProtector:
+ return "Protector";
+ case SlotType::Variable:
+ return "Variable";
+ default:
+ llvm_unreachable("bad slot type for stack layout");
+ }
+ }
+
+ void emitStackSlotRemark(const MachineFunction &MF, const SlotData &D,
+ MachineOptimizationRemarkAnalysis &Rem) {
+ // To make it easy to understand the stack layout from the CLI, we want to
+ // print each slot like the following:
+ //
+ // Offset: [SP+8], Type: Spill, Align: 8, Size: 16
+ // foo @ /path/to/file.c:25
+ // bar @ /path/to/file.c:35
+ //
+ // Which prints the size, alignment, and offset from the SP at function
+ // entry.
+ //
+ // But we also want the machine readable remarks data to be nicely
+ // organized. So we print some additional data as strings for the CLI
+ // output, but maintain more structured data for the YAML.
+ //
+ // For example we store the Offset in YAML as:
+ // ...
+ // - Offset: -8
+ //
+ // But we print it to the CLI as
+ // Offset: [SP-8]
+
+ // Negative offsets will print a leading `-`, so only add `+`
+ std::string Prefix =
+ formatv("\nOffset: [SP{0}", (D.Offset < 0) ? "" : "+").str();
+ Rem << Prefix << ore::NV("Offset", D.Offset)
+ << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
+ << ", Align: " << ore::NV("Align", D.Align)
+ << ", Size: " << ore::NV("Size", D.Size);
+ }
+
+ void emitSourceLocRemark(const MachineFunction &MF, const DILocalVariable *N,
+ MachineOptimizationRemarkAnalysis &Rem) {
+ std::string Loc =
+ formatv("{0} @ {1}:{2}", N->getName(), N->getFilename(), N->getLine())
+ .str();
+ Rem << "\n " << ore::NV("DataLoc", Loc);
+ }
+
+ void emitStackFrameLayoutRemarks(MachineFunction &MF,
+ MachineOptimizationRemarkAnalysis &Rem) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects())
+ return;
+
+ // ValOffset is the offset to the local area from the SP at function entry.
+ // To display the true offset from SP, we need to subtract ValOffset from
+ // MFI's ObjectOffset.
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
+ const int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ LLVM_DEBUG(dbgs() << "getStackProtectorIndex =="
+ << MFI.getStackProtectorIndex() << "\n");
+
+ std::vector<SlotData> SlotInfo;
+
+ const unsigned int NumObj = MFI.getNumObjects();
+ SlotInfo.reserve(NumObj);
+ // initialize slot info
+ for (int Idx = MFI.getObjectIndexBegin(), EndIdx = MFI.getObjectIndexEnd();
+ Idx != EndIdx; ++Idx) {
+ if (MFI.isDeadObjectIndex(Idx))
+ continue;
+ SlotInfo.emplace_back(MFI, ValOffset, Idx);
+ }
+
+ // sort the ordering, to match the actual layout in memory
+ llvm::sort(SlotInfo);
+
+ SlotDbgMap SlotMap = genSlotDbgMapping(MF);
+
+ for (const SlotData &Info : SlotInfo) {
+ emitStackSlotRemark(MF, Info, Rem);
+ for (const DILocalVariable *N : SlotMap[Info.Slot])
+ emitSourceLocRemark(MF, N, Rem);
+ }
+ }
+
+ // We need to generate a mapping of slots to the values that are stored to
+ // them. This information is lost by the time we need to print out the frame,
+ // so we reconstruct it here by walking the CFG, and generating the mapping.
+ SlotDbgMap genSlotDbgMapping(MachineFunction &MF) {
+ SlotDbgMap SlotDebugMap;
+
+ // add variables to the map
+ for (MachineFunction::VariableDbgInfo &DI :
+ MF.getInStackSlotVariableDbgInfo())
+ SlotDebugMap[DI.getStackSlot()].insert(DI.Var);
+
+ // Then add all the spills that have debug data
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineMemOperand *MO : MI.memoperands()) {
+ if (!MO->isStore())
+ continue;
+ auto *FI = dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MO->getPseudoValue());
+ if (!FI)
+ continue;
+ int FrameIdx = FI->getFrameIndex();
+ SmallVector<MachineInstr *> Dbg;
+ MI.collectDebugValues(Dbg);
+
+ for (MachineInstr *MI : Dbg)
+ SlotDebugMap[FrameIdx].insert(MI->getDebugVariable());
+ }
+ }
+ }
+
+ return SlotDebugMap;
+ }
+};
+
+char StackFrameLayoutAnalysisPass::ID = 0;
+} // namespace
+
+char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisPass::ID;
+INITIALIZE_PASS(StackFrameLayoutAnalysisPass, "stack-frame-layout",
+ "Stack Frame Layout", false, false)
+
+namespace llvm {
+/// Returns a newly-created StackFrameLayout pass.
+MachineFunctionPass *createStackFrameLayoutAnalysisPass() {
+ return new StackFrameLayoutAnalysisPass();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
new file mode 100644
index 000000000000..778ac1f5701c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -0,0 +1,171 @@
+//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StackMap Liveness analysis pass. The pass calculates
+// the liveness for each basic block in a function and attaches the register
+// live-out information to a stackmap or patchpoint intrinsic if present.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stackmaps"
+
+static cl::opt<bool> EnablePatchPointLiveness(
+ "enable-patchpoint-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable PatchPoint Liveness Analysis Pass"));
+
+STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
+STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
+STATISTIC(NumBBsVisited, "Number of basic blocks visited");
+STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap");
+STATISTIC(NumStackMaps, "Number of StackMaps visited");
+
+namespace {
+/// This pass calculates the liveness information for each basic block in
+/// a function and attaches the register live-out information to a patchpoint
+/// intrinsic if present.
+///
+/// This pass can be disabled via the -enable-patchpoint-liveness=false flag.
+/// The pass skips functions that don't have any patchpoint intrinsics. The
+/// information provided by this pass is optional and not required by the
+/// aformentioned intrinsic to function.
+class StackMapLiveness : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI = nullptr;
+ LivePhysRegs LiveRegs;
+
+public:
+ static char ID;
+
+ /// Default construct and initialize the pass.
+ StackMapLiveness();
+
+ /// Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// Performs the actual liveness calculation for the function.
+ bool calculateLiveness(MachineFunction &MF);
+
+ /// Add the current register live set to the instruction.
+ void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI);
+
+ /// Create a register mask and initialize it with the registers from
+ /// the register live set.
+ uint32_t *createRegisterMask(MachineFunction &MF) const;
+};
+} // namespace
+
+char StackMapLiveness::ID = 0;
+char &llvm::StackMapLivenessID = StackMapLiveness::ID;
+INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
+ "StackMap Liveness Analysis", false, false)
+
+/// Default construct and initialize the pass.
+StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) {
+ initializeStackMapLivenessPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We preserve all information.
+ AU.setPreservesAll();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Calculate the liveness information for the given machine function.
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
+ if (!EnablePatchPointLiveness)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
+ << MF.getName() << " **********\n");
+ TRI = MF.getSubtarget().getRegisterInfo();
+ ++NumStackMapFuncVisited;
+
+ // Skip this function if there are no patchpoints to process.
+ if (!MF.getFrameInfo().hasPatchPoint()) {
+ ++NumStackMapFuncSkipped;
+ return false;
+ }
+ return calculateLiveness(MF);
+}
+
+/// Performs the actual liveness calculation for the function.
+bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
+ bool HasChanged = false;
+ // For all basic blocks in the function.
+ for (auto &MBB : MF) {
+ LLVM_DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
+ LiveRegs.init(*TRI);
+ // FIXME: This should probably be addLiveOuts().
+ LiveRegs.addLiveOutsNoPristines(MBB);
+ bool HasStackMap = false;
+ // Reverse iterate over all instructions and add the current live register
+ // set to an instruction if we encounter a patchpoint instruction.
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ addLiveOutSetToMI(MF, MI);
+ HasChanged = true;
+ HasStackMap = true;
+ ++NumStackMaps;
+ }
+ LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << MI);
+ LiveRegs.stepBackward(MI);
+ }
+ ++NumBBsVisited;
+ if (!HasStackMap)
+ ++NumBBsHaveNoStackmap;
+ }
+ return HasChanged;
+}
+
+/// Add the current register live set to the instruction.
+void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF,
+ MachineInstr &MI) {
+ uint32_t *Mask = createRegisterMask(MF);
+ MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask);
+ MI.addOperand(MF, MO);
+}
+
+/// Create a register mask and initialize it with the registers from the
+/// register live set.
+uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const {
+ // The mask is owned and cleaned up by the Machine Function.
+ uint32_t *Mask = MF.allocateRegMask();
+ for (auto Reg : LiveRegs)
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+
+ // Give the target a chance to adjust the mask.
+ TRI->adjustStackMapLiveOutMask(Mask);
+
+ return Mask;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
new file mode 100644
index 000000000000..f9115e434878
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -0,0 +1,760 @@
+//===- StackMaps.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stackmaps"
+
+static cl::opt<int> StackMapVersion(
+ "stackmap-version", cl::init(3), cl::Hidden,
+ cl::desc("Specify the stackmap encoding version (default = 3)"));
+
+const char *StackMaps::WSMP = "Stack Maps: ";
+
+static uint64_t getConstMetaVal(const MachineInstr &MI, unsigned Idx) {
+ assert(MI.getOperand(Idx).isImm() &&
+ MI.getOperand(Idx).getImm() == StackMaps::ConstantOp);
+ const auto &MO = MI.getOperand(Idx + 1);
+ assert(MO.isImm());
+ return MO.getImm();
+}
+
+StackMapOpers::StackMapOpers(const MachineInstr *MI)
+ : MI(MI) {
+ assert(getVarIdx() <= MI->getNumOperands() &&
+ "invalid stackmap definition");
+}
+
+PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
+ : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ !MI->getOperand(0).isImplicit()) {
+#ifndef NDEBUG
+ unsigned CheckStartIdx = 0, e = MI->getNumOperands();
+ while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
+ MI->getOperand(CheckStartIdx).isDef() &&
+ !MI->getOperand(CheckStartIdx).isImplicit())
+ ++CheckStartIdx;
+
+ assert(getMetaIdx() == CheckStartIdx &&
+ "Unexpected additional definition in Patchpoint intrinsic.");
+#endif
+}
+
+unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
+ if (!StartIdx)
+ StartIdx = getVarIdx();
+
+ // Find the next scratch register (implicit def and early clobber)
+ unsigned ScratchIdx = StartIdx, e = MI->getNumOperands();
+ while (ScratchIdx < e &&
+ !(MI->getOperand(ScratchIdx).isReg() &&
+ MI->getOperand(ScratchIdx).isDef() &&
+ MI->getOperand(ScratchIdx).isImplicit() &&
+ MI->getOperand(ScratchIdx).isEarlyClobber()))
+ ++ScratchIdx;
+
+ assert(ScratchIdx != e && "No scratch register available");
+ return ScratchIdx;
+}
+
+unsigned StatepointOpers::getNumGcMapEntriesIdx() {
+ // Take index of num of allocas and skip all allocas records.
+ unsigned CurIdx = getNumAllocaIdx();
+ unsigned NumAllocas = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumAllocas--)
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+unsigned StatepointOpers::getNumAllocaIdx() {
+ // Take index of num of gc ptrs and skip all gc ptr records.
+ unsigned CurIdx = getNumGCPtrIdx();
+ unsigned NumGCPtrs = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumGCPtrs--)
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+unsigned StatepointOpers::getNumGCPtrIdx() {
+ // Take index of num of deopt args and skip all deopt records.
+ unsigned CurIdx = getNumDeoptArgsIdx();
+ unsigned NumDeoptArgs = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumDeoptArgs--) {
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ }
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+int StatepointOpers::getFirstGCPtrIdx() {
+ unsigned NumGCPtrsIdx = getNumGCPtrIdx();
+ unsigned NumGCPtrs = getConstMetaVal(*MI, NumGCPtrsIdx - 1);
+ if (NumGCPtrs == 0)
+ return -1;
+ ++NumGCPtrsIdx; // skip <num gc ptrs>
+ assert(NumGCPtrsIdx < MI->getNumOperands());
+ return (int)NumGCPtrsIdx;
+}
+
+unsigned StatepointOpers::getGCPointerMap(
+ SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap) {
+ unsigned CurIdx = getNumGcMapEntriesIdx();
+ unsigned GCMapSize = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ for (unsigned N = 0; N < GCMapSize; ++N) {
+ unsigned B = MI->getOperand(CurIdx++).getImm();
+ unsigned D = MI->getOperand(CurIdx++).getImm();
+ GCMap.push_back(std::make_pair(B, D));
+ }
+
+ return GCMapSize;
+}
+
+bool StatepointOpers::isFoldableReg(Register Reg) const {
+ unsigned FoldableAreaStart = getVarIdx();
+ for (const MachineOperand &MO : MI->uses()) {
+ if (MO.getOperandNo() >= FoldableAreaStart)
+ break;
+ if (MO.isReg() && MO.getReg() == Reg)
+ return false;
+ }
+ return true;
+}
+
+bool StatepointOpers::isFoldableReg(const MachineInstr *MI, Register Reg) {
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT)
+ return false;
+ return StatepointOpers(MI).isFoldableReg(Reg);
+}
+
+StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
+ if (StackMapVersion != 3)
+ llvm_unreachable("Unsupported stackmap version!");
+}
+
+unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) {
+ assert(CurIdx < MI->getNumOperands() && "Bad meta arg index");
+ const auto &MO = MI->getOperand(CurIdx);
+ if (MO.isImm()) {
+ switch (MO.getImm()) {
+ default:
+ llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp:
+ CurIdx += 2;
+ break;
+ case StackMaps::IndirectMemRefOp:
+ CurIdx += 3;
+ break;
+ case StackMaps::ConstantOp:
+ ++CurIdx;
+ break;
+ }
+ }
+ ++CurIdx;
+ assert(CurIdx < MI->getNumOperands() && "points past operand list");
+ return CurIdx;
+}
+
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+ int RegNum;
+ for (MCPhysReg SR : TRI->superregs_inclusive(Reg)) {
+ RegNum = TRI->getDwarfRegNum(SR, false);
+ if (RegNum >= 0)
+ break;
+ }
+
+ assert(RegNum >= 0 && "Invalid Dwarf register number.");
+ return (unsigned)RegNum;
+}
+
+MachineInstr::const_mop_iterator
+StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
+ LiveOutVec &LiveOuts) const {
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
+ if (MOI->isImm()) {
+ switch (MOI->getImm()) {
+ default:
+ llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp: {
+ auto &DL = AP.MF->getDataLayout();
+
+ unsigned Size = DL.getPointerSizeInBits();
+ assert((Size % 8) == 0 && "Need pointer size in bytes.");
+ Size /= 8;
+ Register Reg = (++MOI)->getReg();
+ int64_t Imm = (++MOI)->getImm();
+ Locs.emplace_back(StackMaps::Location::Direct, Size,
+ getDwarfRegNum(Reg, TRI), Imm);
+ break;
+ }
+ case StackMaps::IndirectMemRefOp: {
+ int64_t Size = (++MOI)->getImm();
+ assert(Size > 0 && "Need a valid size for indirect memory locations.");
+ Register Reg = (++MOI)->getReg();
+ int64_t Imm = (++MOI)->getImm();
+ Locs.emplace_back(StackMaps::Location::Indirect, Size,
+ getDwarfRegNum(Reg, TRI), Imm);
+ break;
+ }
+ case StackMaps::ConstantOp: {
+ ++MOI;
+ assert(MOI->isImm() && "Expected constant operand.");
+ int64_t Imm = MOI->getImm();
+ Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, Imm);
+ break;
+ }
+ }
+ return ++MOI;
+ }
+
+ // The physical register number will ultimately be encoded as a DWARF regno.
+ // The stack map also records the size of a spill slot that can hold the
+ // register content. (The runtime can track the actual size of the data type
+ // if it needs to.)
+ if (MOI->isReg()) {
+ // Skip implicit registers (this includes our scratch registers)
+ if (MOI->isImplicit())
+ return ++MOI;
+
+ if (MOI->isUndef()) {
+ // Record `undef` register as constant. Use same value as ISel uses.
+ Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, 0xFEFEFEFE);
+ return ++MOI;
+ }
+
+ assert(MOI->getReg().isPhysical() &&
+ "Virtreg operands should have been rewritten before now.");
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
+ assert(!MOI->getSubReg() && "Physical subreg still around.");
+
+ unsigned Offset = 0;
+ unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI);
+ unsigned LLVMRegNum = *TRI->getLLVMRegNum(DwarfRegNum, false);
+ unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg());
+ if (SubRegIdx)
+ Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+
+ Locs.emplace_back(Location::Register, TRI->getSpillSize(*RC),
+ DwarfRegNum, Offset);
+ return ++MOI;
+ }
+
+ if (MOI->isRegLiveOut())
+ LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut());
+
+ return ++MOI;
+}
+
+void StackMaps::print(raw_ostream &OS) {
+ const TargetRegisterInfo *TRI =
+ AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr;
+ OS << WSMP << "callsites:\n";
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
+
+ OS << WSMP << "callsite " << CSI.ID << "\n";
+ OS << WSMP << " has " << CSLocs.size() << " locations\n";
+
+ unsigned Idx = 0;
+ for (const auto &Loc : CSLocs) {
+ OS << WSMP << "\t\tLoc " << Idx << ": ";
+ switch (Loc.Type) {
+ case Location::Unprocessed:
+ OS << "<Unprocessed operand>";
+ break;
+ case Location::Register:
+ OS << "Register ";
+ if (TRI)
+ OS << printReg(Loc.Reg, TRI);
+ else
+ OS << Loc.Reg;
+ break;
+ case Location::Direct:
+ OS << "Direct ";
+ if (TRI)
+ OS << printReg(Loc.Reg, TRI);
+ else
+ OS << Loc.Reg;
+ if (Loc.Offset)
+ OS << " + " << Loc.Offset;
+ break;
+ case Location::Indirect:
+ OS << "Indirect ";
+ if (TRI)
+ OS << printReg(Loc.Reg, TRI);
+ else
+ OS << Loc.Reg;
+ OS << "+" << Loc.Offset;
+ break;
+ case Location::Constant:
+ OS << "Constant " << Loc.Offset;
+ break;
+ case Location::ConstantIndex:
+ OS << "Constant Index " << Loc.Offset;
+ break;
+ }
+ OS << "\t[encoding: .byte " << Loc.Type << ", .byte 0"
+ << ", .short " << Loc.Size << ", .short " << Loc.Reg << ", .short 0"
+ << ", .int " << Loc.Offset << "]\n";
+ Idx++;
+ }
+
+ OS << WSMP << "\thas " << LiveOuts.size() << " live-out registers\n";
+
+ Idx = 0;
+ for (const auto &LO : LiveOuts) {
+ OS << WSMP << "\t\tLO " << Idx << ": ";
+ if (TRI)
+ OS << printReg(LO.Reg, TRI);
+ else
+ OS << LO.Reg;
+ OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte "
+ << LO.Size << "]\n";
+ Idx++;
+ }
+ }
+}
+
+/// Create a live-out register record for the given register Reg.
+StackMaps::LiveOutReg
+StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
+ unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI);
+ unsigned Size = TRI->getSpillSize(*TRI->getMinimalPhysRegClass(Reg));
+ return LiveOutReg(Reg, DwarfRegNum, Size);
+}
+
+/// Parse the register live-out mask and return a vector of live-out registers
+/// that need to be recorded in the stackmap.
+StackMaps::LiveOutVec
+StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
+ assert(Mask && "No register mask specified");
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
+ LiveOutVec LiveOuts;
+
+ // Create a LiveOutReg for each bit that is set in the register mask.
+ for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg)
+ if ((Mask[Reg / 32] >> (Reg % 32)) & 1)
+ LiveOuts.push_back(createLiveOutReg(Reg, TRI));
+
+ // We don't need to keep track of a register if its super-register is already
+ // in the list. Merge entries that refer to the same dwarf register and use
+ // the maximum size that needs to be spilled.
+
+ llvm::sort(LiveOuts, [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+ // Only sort by the dwarf register number.
+ return LHS.DwarfRegNum < RHS.DwarfRegNum;
+ });
+
+ for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
+ for (auto *II = std::next(I); II != E; ++II) {
+ if (I->DwarfRegNum != II->DwarfRegNum) {
+ // Skip all the now invalid entries.
+ I = --II;
+ break;
+ }
+ I->Size = std::max(I->Size, II->Size);
+ if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
+ I->Reg = II->Reg;
+ II->Reg = 0; // mark for deletion.
+ }
+ }
+
+ llvm::erase_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; });
+
+ return LiveOuts;
+}
+
+// See statepoint MI format description in StatepointOpers' class comment
+// in include/llvm/CodeGen/StackMaps.h
+void StackMaps::parseStatepointOpers(const MachineInstr &MI,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ LocationVec &Locations,
+ LiveOutVec &LiveOuts) {
+ LLVM_DEBUG(dbgs() << "record statepoint : " << MI << "\n");
+ StatepointOpers SO(&MI);
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // CC
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Flags
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Num Deopts
+
+ // Record Deopt Args.
+ unsigned NumDeoptArgs = Locations.back().Offset;
+ assert(Locations.back().Type == Location::Constant);
+ assert(NumDeoptArgs == SO.getNumDeoptArgs());
+
+ while (NumDeoptArgs--)
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+
+ // Record gc base/derived pairs
+ assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp);
+ ++MOI;
+ assert(MOI->isImm());
+ unsigned NumGCPointers = MOI->getImm();
+ ++MOI;
+ if (NumGCPointers) {
+ // Map logical index of GC ptr to MI operand index.
+ SmallVector<unsigned, 8> GCPtrIndices;
+ unsigned GCPtrIdx = (unsigned)SO.getFirstGCPtrIdx();
+ assert((int)GCPtrIdx != -1);
+ assert(MOI - MI.operands_begin() == GCPtrIdx + 0LL);
+ while (NumGCPointers--) {
+ GCPtrIndices.push_back(GCPtrIdx);
+ GCPtrIdx = StackMaps::getNextMetaArgIdx(&MI, GCPtrIdx);
+ }
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> GCPairs;
+ unsigned NumGCPairs = SO.getGCPointerMap(GCPairs);
+ (void)NumGCPairs;
+ LLVM_DEBUG(dbgs() << "NumGCPairs = " << NumGCPairs << "\n");
+
+ auto MOB = MI.operands_begin();
+ for (auto &P : GCPairs) {
+ assert(P.first < GCPtrIndices.size() && "base pointer index not found");
+ assert(P.second < GCPtrIndices.size() &&
+ "derived pointer index not found");
+ unsigned BaseIdx = GCPtrIndices[P.first];
+ unsigned DerivedIdx = GCPtrIndices[P.second];
+ LLVM_DEBUG(dbgs() << "Base : " << BaseIdx << " Derived : " << DerivedIdx
+ << "\n");
+ (void)parseOperand(MOB + BaseIdx, MOE, Locations, LiveOuts);
+ (void)parseOperand(MOB + DerivedIdx, MOE, Locations, LiveOuts);
+ }
+
+ MOI = MOB + GCPtrIdx;
+ }
+
+ // Record gc allocas
+ assert(MOI < MOE);
+ assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp);
+ ++MOI;
+ unsigned NumAllocas = MOI->getImm();
+ ++MOI;
+ while (NumAllocas--) {
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+ assert(MOI < MOE);
+ }
+}
+
+void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
+ const MachineInstr &MI, uint64_t ID,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ bool recordResult) {
+ MCContext &OutContext = AP.OutStreamer->getContext();
+
+ LocationVec Locations;
+ LiveOutVec LiveOuts;
+
+ if (recordResult) {
+ assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value.");
+ parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), Locations,
+ LiveOuts);
+ }
+
+ // Parse operands.
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT)
+ parseStatepointOpers(MI, MOI, MOE, Locations, LiveOuts);
+ else
+ while (MOI != MOE)
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+
+ // Move large constants into the constant pool.
+ for (auto &Loc : Locations) {
+ // Constants are encoded as sign-extended integers.
+ // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
+ if (Loc.Type == Location::Constant && !isInt<32>(Loc.Offset)) {
+ Loc.Type = Location::ConstantIndex;
+ // ConstPool is intentionally a MapVector of 'uint64_t's (as
+ // opposed to 'int64_t's). We should never be in a situation
+ // where we have to insert either the tombstone or the empty
+ // keys into a map, and for a DenseMap<uint64_t, T> these are
+ // (uint64_t)0 and (uint64_t)-1. They can be and are
+ // represented using 32 bit integers.
+ assert((uint64_t)Loc.Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
+ (uint64_t)Loc.Offset !=
+ DenseMapInfo<uint64_t>::getTombstoneKey() &&
+ "empty and tombstone keys should fit in 32 bits!");
+ auto Result = ConstPool.insert(std::make_pair(Loc.Offset, Loc.Offset));
+ Loc.Offset = Result.first - ConstPool.begin();
+ }
+ }
+
+ // Create an expression to calculate the offset of the callsite from function
+ // entry.
+ const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(&MILabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+ CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
+ std::move(LiveOuts));
+
+ // Record the stack size of the current function and update callsite count.
+ const MachineFrameInfo &MFI = AP.MF->getFrameInfo();
+ const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
+ bool HasDynamicFrameSize =
+ MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(*(AP.MF));
+ uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize();
+
+ auto CurrentIt = FnInfos.find(AP.CurrentFnSym);
+ if (CurrentIt != FnInfos.end())
+ CurrentIt->second.RecordCount++;
+ else
+ FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
+}
+
+void StackMaps::recordStackMap(const MCSymbol &L, const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
+
+ StackMapOpers opers(&MI);
+ const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
+ recordStackMapOpers(L, MI, ID, std::next(MI.operands_begin(),
+ opers.getVarIdx()),
+ MI.operands_end());
+}
+
+void StackMaps::recordPatchPoint(const MCSymbol &L, const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
+
+ PatchPointOpers opers(&MI);
+ const int64_t ID = opers.getID();
+ auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
+ recordStackMapOpers(L, MI, ID, MOI, MI.operands_end(),
+ opers.isAnyReg() && opers.hasDef());
+
+#ifndef NDEBUG
+ // verify anyregcc
+ auto &Locations = CSInfos.back().Locations;
+ if (opers.isAnyReg()) {
+ unsigned NArgs = opers.getNumCallArgs();
+ for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
+ assert(Locations[i].Type == Location::Register &&
+ "anyreg arg must be in reg.");
+ }
+#endif
+}
+
+void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
+
+ StatepointOpers opers(&MI);
+ const unsigned StartIdx = opers.getVarIdx();
+ recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx,
+ MI.operands_end(), false);
+}
+
+/// Emit the stackmap header.
+///
+/// Header {
+/// uint8 : Stack Map Version (currently 3)
+/// uint8 : Reserved (expected to be 0)
+/// uint16 : Reserved (expected to be 0)
+/// }
+/// uint32 : NumFunctions
+/// uint32 : NumConstants
+/// uint32 : NumRecords
+void StackMaps::emitStackmapHeader(MCStreamer &OS) {
+ // Header.
+ OS.emitIntValue(StackMapVersion, 1); // Version.
+ OS.emitIntValue(0, 1); // Reserved.
+ OS.emitInt16(0); // Reserved.
+
+ // Num functions.
+ LLVM_DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
+ OS.emitInt32(FnInfos.size());
+ // Num constants.
+ LLVM_DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
+ OS.emitInt32(ConstPool.size());
+ // Num callsites.
+ LLVM_DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
+ OS.emitInt32(CSInfos.size());
+}
+
+/// Emit the function frame record for each function.
+///
+/// StkSizeRecord[NumFunctions] {
+/// uint64 : Function Address
+/// uint64 : Stack Size
+/// uint64 : Record Count
+/// }
+void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
+ // Function Frame records.
+ LLVM_DEBUG(dbgs() << WSMP << "functions:\n");
+ for (auto const &FR : FnInfos) {
+ LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first
+ << " frame size: " << FR.second.StackSize
+ << " callsite count: " << FR.second.RecordCount << '\n');
+ OS.emitSymbolValue(FR.first, 8);
+ OS.emitIntValue(FR.second.StackSize, 8);
+ OS.emitIntValue(FR.second.RecordCount, 8);
+ }
+}
+
+/// Emit the constant pool.
+///
+/// int64 : Constants[NumConstants]
+void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
+ // Constant pool entries.
+ LLVM_DEBUG(dbgs() << WSMP << "constants:\n");
+ for (const auto &ConstEntry : ConstPool) {
+ LLVM_DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
+ OS.emitIntValue(ConstEntry.second, 8);
+ }
+}
+
+/// Emit the callsite info for each callsite.
+///
+/// StkMapRecord[NumRecords] {
+/// uint64 : PatchPoint ID
+/// uint32 : Instruction Offset
+/// uint16 : Reserved (record flags)
+/// uint16 : NumLocations
+/// Location[NumLocations] {
+/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex
+/// uint8 : Size in Bytes
+/// uint16 : Dwarf RegNum
+/// int32 : Offset
+/// }
+/// uint16 : Padding
+/// uint16 : NumLiveOuts
+/// LiveOuts[NumLiveOuts] {
+/// uint16 : Dwarf RegNum
+/// uint8 : Reserved
+/// uint8 : Size in Bytes
+/// }
+/// uint32 : Padding (only if required to align to 8 byte)
+/// }
+///
+/// Location Encoding, Type, Value:
+/// 0x1, Register, Reg (value in register)
+/// 0x2, Direct, Reg + Offset (frame index)
+/// 0x3, Indirect, [Reg + Offset] (spilled value)
+/// 0x4, Constant, Offset (small constant)
+/// 0x5, ConstIndex, Constants[Offset] (large constant)
+void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
+ LLVM_DEBUG(print(dbgs()));
+ // Callsite entries.
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
+
+ // Verify stack map entry. It's better to communicate a problem to the
+ // runtime than crash in case of in-process compilation. Currently, we do
+ // simple overflow checks, but we may eventually communicate other
+ // compilation errors this way.
+ if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) {
+ OS.emitIntValue(UINT64_MAX, 8); // Invalid ID.
+ OS.emitValue(CSI.CSOffsetExpr, 4);
+ OS.emitInt16(0); // Reserved.
+ OS.emitInt16(0); // 0 locations.
+ OS.emitInt16(0); // padding.
+ OS.emitInt16(0); // 0 live-out registers.
+ OS.emitInt32(0); // padding.
+ continue;
+ }
+
+ OS.emitIntValue(CSI.ID, 8);
+ OS.emitValue(CSI.CSOffsetExpr, 4);
+
+ // Reserved for flags.
+ OS.emitInt16(0);
+ OS.emitInt16(CSLocs.size());
+
+ for (const auto &Loc : CSLocs) {
+ OS.emitIntValue(Loc.Type, 1);
+ OS.emitIntValue(0, 1); // Reserved
+ OS.emitInt16(Loc.Size);
+ OS.emitInt16(Loc.Reg);
+ OS.emitInt16(0); // Reserved
+ OS.emitInt32(Loc.Offset);
+ }
+
+ // Emit alignment to 8 byte.
+ OS.emitValueToAlignment(Align(8));
+
+ // Num live-out registers and padding to align to 4 byte.
+ OS.emitInt16(0);
+ OS.emitInt16(LiveOuts.size());
+
+ for (const auto &LO : LiveOuts) {
+ OS.emitInt16(LO.DwarfRegNum);
+ OS.emitIntValue(0, 1);
+ OS.emitIntValue(LO.Size, 1);
+ }
+ // Emit alignment to 8 byte.
+ OS.emitValueToAlignment(Align(8));
+ }
+}
+
+/// Serialize the stackmap data.
+void StackMaps::serializeToStackMapSection() {
+ (void)WSMP;
+ // Bail out if there's no stack map data.
+ assert((!CSInfos.empty() || ConstPool.empty()) &&
+ "Expected empty constant pool too!");
+ assert((!CSInfos.empty() || FnInfos.empty()) &&
+ "Expected empty function record too!");
+ if (CSInfos.empty())
+ return;
+
+ MCContext &OutContext = AP.OutStreamer->getContext();
+ MCStreamer &OS = *AP.OutStreamer;
+
+ // Create the section.
+ MCSection *StackMapSection =
+ OutContext.getObjectFileInfo()->getStackMapSection();
+ OS.switchSection(StackMapSection);
+
+ // Emit a dummy symbol to force section inclusion.
+ OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
+
+ // Serialize data.
+ LLVM_DEBUG(dbgs() << "********** Stack Map Output **********\n");
+ emitStackmapHeader(OS);
+ emitFunctionFrameRecords(OS);
+ emitConstantPoolEntries(OS);
+ emitCallsiteEntries(OS);
+ OS.addBlankLine();
+
+ // Clean up.
+ CSInfos.clear();
+ ConstPool.clear();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 000000000000..387b653f8815
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,660 @@
+//===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-protector"
+
+STATISTIC(NumFunProtected, "Number of functions protected");
+STATISTIC(NumAddrTaken, "Number of local variables that have their address"
+ " taken.");
+
+static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
+ cl::init(true), cl::Hidden);
+static cl::opt<bool> DisableCheckNoReturn("disable-check-noreturn-call",
+ cl::init(false), cl::Hidden);
+
+char StackProtector::ID = 0;
+
+StackProtector::StackProtector() : FunctionPass(ID) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
+ "Insert stack protectors", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE,
+ "Insert stack protectors", false, true)
+
+FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); }
+
+void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ Trip = TM->getTargetTriple();
+ TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ HasPrologue = false;
+ HasIRCheck = false;
+
+ SSPBufferSize = Fn.getFnAttributeAsParsedInteger(
+ "stack-protector-buffer-size", DefaultSSPBufferSize);
+ if (!requiresStackProtector(F, &Layout))
+ return false;
+
+ // TODO(etienneb): Functions with funclets are not correctly supported now.
+ // Do nothing if this is funclet-based personality.
+ if (Fn.hasPersonalityFn()) {
+ EHPersonality Personality = classifyEHPersonality(Fn.getPersonalityFn());
+ if (isFuncletEHPersonality(Personality))
+ return false;
+ }
+
+ ++NumFunProtected;
+ bool Changed = InsertStackProtectors();
+#ifdef EXPENSIVE_CHECKS
+ assert((!DTU ||
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)) &&
+ "Failed to maintain validity of domtree!");
+#endif
+ DTU.reset();
+ return Changed;
+}
+
+/// \param [out] IsLarge is set to true if a protectable array is found and
+/// it is "large" ( >= ssp-buffer-size). In the case of a structure with
+/// multiple arrays, this gets set if any of them is large.
+static bool ContainsProtectableArray(Type *Ty, Module *M, unsigned SSPBufferSize,
+ bool &IsLarge, bool Strong,
+ bool InStruct) {
+ if (!Ty)
+ return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ // However, in strong mode any array, regardless of type and size,
+ // triggers a protector.
+ if (!Strong && (InStruct || !Triple(M->getTargetTriple()).isOSDarwin()))
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (SSPBufferSize <= M->getDataLayout().getTypeAllocSize(AT)) {
+ IsLarge = true;
+ return true;
+ }
+
+ if (Strong)
+ // Require a protector for all arrays in strong mode
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST)
+ return false;
+
+ bool NeedsProtector = false;
+ for (Type *ET : ST->elements())
+ if (ContainsProtectableArray(ET, M, SSPBufferSize, IsLarge, Strong, true)) {
+ // If the element is a protectable array and is large (>= SSPBufferSize)
+ // then we are done. If the protectable array is not large, then
+ // keep looking in case a subsequent element is a large array.
+ if (IsLarge)
+ return true;
+ NeedsProtector = true;
+ }
+
+ return NeedsProtector;
+}
+
+/// Check whether a stack allocation has its address taken.
+static bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize,
+ Module *M,
+ SmallPtrSet<const PHINode *, 16> &VisitedPHIs) {
+ const DataLayout &DL = M->getDataLayout();
+ for (const User *U : AI->users()) {
+ const auto *I = cast<Instruction>(U);
+ // If this instruction accesses memory make sure it doesn't access beyond
+ // the bounds of the allocated object.
+ std::optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
+ if (MemLoc && MemLoc->Size.hasValue() &&
+ !TypeSize::isKnownGE(AllocSize,
+ TypeSize::getFixed(MemLoc->Size.getValue())))
+ return true;
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+ if (AI == cast<StoreInst>(I)->getValueOperand())
+ return true;
+ break;
+ case Instruction::AtomicCmpXchg:
+ // cmpxchg conceptually includes both a load and store from the same
+ // location. So, like store, the value being stored is what matters.
+ if (AI == cast<AtomicCmpXchgInst>(I)->getNewValOperand())
+ return true;
+ break;
+ case Instruction::PtrToInt:
+ if (AI == cast<PtrToIntInst>(I)->getOperand(0))
+ return true;
+ break;
+ case Instruction::Call: {
+ // Ignore intrinsics that do not become real instructions.
+ // TODO: Narrow this to intrinsics that have store-like effects.
+ const auto *CI = cast<CallInst>(I);
+ if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
+ return true;
+ break;
+ }
+ case Instruction::Invoke:
+ return true;
+ case Instruction::GetElementPtr: {
+ // If the GEP offset is out-of-bounds, or is non-constant and so has to be
+ // assumed to be potentially out-of-bounds, then any memory access that
+ // would use it could also be out-of-bounds meaning stack protection is
+ // required.
+ const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(I->getType());
+ APInt Offset(IndexSize, 0);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return true;
+ TypeSize OffsetSize = TypeSize::Fixed(Offset.getLimitedValue());
+ if (!TypeSize::isKnownGT(AllocSize, OffsetSize))
+ return true;
+ // Adjust AllocSize to be the space remaining after this offset.
+ // We can't subtract a fixed size from a scalable one, so in that case
+ // assume the scalable value is of minimum size.
+ TypeSize NewAllocSize =
+ TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
+ if (HasAddressTaken(I, NewAllocSize, M, VisitedPHIs))
+ return true;
+ break;
+ }
+ case Instruction::BitCast:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ if (HasAddressTaken(I, AllocSize, M, VisitedPHIs))
+ return true;
+ break;
+ case Instruction::PHI: {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ const auto *PN = cast<PHINode>(I);
+ if (VisitedPHIs.insert(PN).second)
+ if (HasAddressTaken(PN, AllocSize, M, VisitedPHIs))
+ return true;
+ break;
+ }
+ case Instruction::Load:
+ case Instruction::AtomicRMW:
+ case Instruction::Ret:
+ // These instructions take an address operand, but have load-like or
+ // other innocuous behavior that should not trigger a stack protector.
+ // atomicrmw conceptually has both load and store semantics, but the
+ // value being stored must be integer; so if a pointer is being stored,
+ // we'll catch it in the PtrToInt case above.
+ break;
+ default:
+ // Conservatively return true for any instruction that takes an address
+ // operand, but is not handled above.
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Search for the first call to the llvm.stackprotector intrinsic and return it
+/// if present.
+static const CallInst *findStackProtectorIntrinsic(Function &F) {
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::stackprotector)
+ return II;
+ return nullptr;
+}
+
+/// Check whether or not this function needs a stack protector based
+/// upon the stack protector level.
+///
+/// We use two heuristics: a standard (ssp) and strong (sspstrong).
+/// The standard heuristic which will add a guard variable to functions that
+/// call alloca with a either a variable size or a size >= SSPBufferSize,
+/// functions with character buffers larger than SSPBufferSize, and functions
+/// with aggregates containing character buffers larger than SSPBufferSize. The
+/// strong heuristic will add a guard variables to functions that call alloca
+/// regardless of size, functions with any buffer regardless of type and size,
+/// functions with aggregates that contain any buffer regardless of type and
+/// size, and functions that contain stack-based variables that have had their
+/// address taken.
+bool StackProtector::requiresStackProtector(Function *F, SSPLayoutMap *Layout) {
+ Module *M = F->getParent();
+ bool Strong = false;
+ bool NeedsProtector = false;
+
+ // The set of PHI nodes visited when determining if a variable's reference has
+ // been taken. This set is maintained to ensure we don't visit the same PHI
+ // node multiple times.
+ SmallPtrSet<const PHINode *, 16> VisitedPHIs;
+
+ unsigned SSPBufferSize = F->getFnAttributeAsParsedInteger(
+ "stack-protector-buffer-size", DefaultSSPBufferSize);
+
+ if (F->hasFnAttribute(Attribute::SafeStack))
+ return false;
+
+ // We are constructing the OptimizationRemarkEmitter on the fly rather than
+ // using the analysis pass to avoid building DominatorTree and LoopInfo which
+ // are not available this late in the IR pipeline.
+ OptimizationRemarkEmitter ORE(F);
+
+ if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ if (!Layout)
+ return true;
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a function attribute or command-line switch";
+ });
+ NeedsProtector = true;
+ Strong = true; // Use the same heuristic as strong to determine SSPLayout
+ } else if (F->hasFnAttribute(Attribute::StackProtectStrong))
+ Strong = true;
+ else if (!F->hasFnAttribute(Attribute::StackProtect))
+ return false;
+
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (AI->isArrayAllocation()) {
+ auto RemarkBuilder = [&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
+ &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a call to alloca or use of a variable length "
+ "array";
+ };
+ if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
+ // A call to alloca with size >= SSPBufferSize requires
+ // stack protectors.
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_LargeArray));
+ ORE.emit(RemarkBuilder);
+ NeedsProtector = true;
+ } else if (Strong) {
+ // Require protectors for all alloca calls in strong mode.
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_SmallArray));
+ ORE.emit(RemarkBuilder);
+ NeedsProtector = true;
+ }
+ } else {
+ // A call to alloca with a variable size requires protectors.
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_LargeArray));
+ ORE.emit(RemarkBuilder);
+ NeedsProtector = true;
+ }
+ continue;
+ }
+
+ bool IsLarge = false;
+ if (ContainsProtectableArray(AI->getAllocatedType(), M, SSPBufferSize,
+ IsLarge, Strong, false)) {
+ if (!Layout)
+ return true;
+ Layout->insert(std::make_pair(
+ AI, IsLarge ? MachineFrameInfo::SSPLK_LargeArray
+ : MachineFrameInfo::SSPLK_SmallArray));
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a stack allocated buffer or struct containing a "
+ "buffer";
+ });
+ NeedsProtector = true;
+ continue;
+ }
+
+ if (Strong &&
+ HasAddressTaken(
+ AI, M->getDataLayout().getTypeAllocSize(AI->getAllocatedType()),
+ M, VisitedPHIs)) {
+ ++NumAddrTaken;
+ if (!Layout)
+ return true;
+ Layout->insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",
+ &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to the address of a local variable being taken";
+ });
+ NeedsProtector = true;
+ }
+ // Clear any PHIs that we visited, to make sure we examine all uses of
+ // any subsequent allocas that we look at.
+ VisitedPHIs.clear();
+ }
+ }
+ }
+
+ return NeedsProtector;
+}
+
+/// Create a stack guard loading and populate whether SelectionDAG SSP is
+/// supported.
+static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
+ IRBuilder<> &B,
+ bool *SupportsSelectionDAGSP = nullptr) {
+ Value *Guard = TLI->getIRStackGuard(B);
+ StringRef GuardMode = M->getStackProtectorGuard();
+ if ((GuardMode == "tls" || GuardMode.empty()) && Guard)
+ return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
+
+ // Use SelectionDAG SSP handling, since there isn't an IR guard.
+ //
+ // This is more or less weird, since we optionally output whether we
+ // should perform a SelectionDAG SP here. The reason is that it's strictly
+ // defined as !TLI->getIRStackGuard(B), where getIRStackGuard is also
+ // mutating. There is no way to get this bit without mutating the IR, so
+ // getting this bit has to happen in this right time.
+ //
+ // We could have define a new function TLI::supportsSelectionDAGSP(), but that
+ // will put more burden on the backends' overriding work, especially when it
+ // actually conveys the same information getIRStackGuard() already gives.
+ if (SupportsSelectionDAGSP)
+ *SupportsSelectionDAGSP = true;
+ TLI->insertSSPDeclarations(*M);
+ return B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard));
+}
+
+/// Insert code into the entry block that stores the stack guard
+/// variable onto the stack:
+///
+/// entry:
+/// StackGuardSlot = alloca i8*
+/// StackGuard = <stack guard>
+/// call void @llvm.stackprotector(StackGuard, StackGuardSlot)
+///
+/// Returns true if the platform/triple supports the stackprotectorcreate pseudo
+/// node.
+static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc,
+ const TargetLoweringBase *TLI, AllocaInst *&AI) {
+ bool SupportsSelectionDAGSP = false;
+ IRBuilder<> B(&F->getEntryBlock().front());
+ PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext());
+ AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
+
+ Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
+ B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ {GuardSlot, AI});
+ return SupportsSelectionDAGSP;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ // If the target wants to XOR the frame pointer into the guard value, it's
+ // impossible to emit the check in IR, so the target *must* support stack
+ // protection in SDAG.
+ bool SupportsSelectionDAGSP =
+ TLI->useStackGuardXorFP() ||
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+ BasicBlock *FailBB = nullptr;
+
+ for (BasicBlock &BB : llvm::make_early_inc_range(*F)) {
+ // This is stack protector auto generated check BB, skip it.
+ if (&BB == FailBB)
+ continue;
+ Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (!CheckLoc && !DisableCheckNoReturn)
+ for (auto &Inst : BB)
+ if (auto *CB = dyn_cast<CallBase>(&Inst))
+ // Do stack check before noreturn calls that aren't nounwind (e.g:
+ // __cxa_throw).
+ if (CB->doesNotReturn() && !CB->doesNotThrow()) {
+ CheckLoc = CB;
+ break;
+ }
+
+ if (!CheckLoc)
+ continue;
+
+ // Generate prologue instrumentation if not already generated.
+ if (!HasPrologue) {
+ HasPrologue = true;
+ SupportsSelectionDAGSP &= CreatePrologue(F, M, CheckLoc, TLI, AI);
+ }
+
+ // SelectionDAG based code generation. Nothing else needs to be done here.
+ // The epilogue instrumentation is postponed to SelectionDAG.
+ if (SupportsSelectionDAGSP)
+ break;
+
+ // Find the stack guard slot if the prologue was not created by this pass
+ // itself via a previous call to CreatePrologue().
+ if (!AI) {
+ const CallInst *SPCall = findStackProtectorIntrinsic(*F);
+ assert(SPCall && "Call to llvm.stackprotector is missing");
+ AI = cast<AllocaInst>(SPCall->getArgOperand(1));
+ }
+
+ // Set HasIRCheck to true, so that SelectionDAG will not generate its own
+ // version. SelectionDAG called 'shouldEmitSDCheck' to check whether
+ // instrumentation has already been generated.
+ HasIRCheck = true;
+
+ // If we're instrumenting a block with a tail call, the check has to be
+ // inserted before the call rather than between it and the return. The
+ // verifier guarantees that a tail call is either directly before the
+ // return or with a single correct bitcast of the return value in between so
+ // we don't need to worry about many situations here.
+ Instruction *Prev = CheckLoc->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall())
+ CheckLoc = Prev;
+ else if (Prev) {
+ Prev = Prev->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall())
+ CheckLoc = Prev;
+ }
+
+ // Generate epilogue instrumentation. The epilogue intrumentation can be
+ // function-based or inlined depending on which mechanism the target is
+ // providing.
+ if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+ // Generate the function-based epilogue instrumentation.
+ // The target provides a guard check function, generate a call to it.
+ IRBuilder<> B(CheckLoc);
+ LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
+ CallInst *Call = B.CreateCall(GuardCheck, {Guard});
+ Call->setAttributes(GuardCheck->getAttributes());
+ Call->setCallingConv(GuardCheck->getCallingConv());
+ } else {
+ // Generate the epilogue with inline instrumentation.
+ // If we do not support SelectionDAG based calls, generate IR level
+ // calls.
+ //
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = <stack guard>
+ // %2 = load StackGuardSlot
+ // %3 = icmp ne i1 %1, %2
+ // br i1 %3, label %CallStackCheckFailBlk, label %SP_return
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Create the FailBB. We duplicate the BB every time since the MI tail
+ // merge pass will merge together all of the various BB into one including
+ // fail BB generated by the stack protector pseudo instruction.
+ if (!FailBB)
+ FailBB = CreateFailBB();
+
+ IRBuilder<> B(CheckLoc);
+ Value *Guard = getStackGuard(TLI, M, B);
+ LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
+ auto *Cmp = cast<ICmpInst>(B.CreateICmpNE(Guard, LI2));
+ auto SuccessProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(false);
+ MDNode *Weights = MDBuilder(F->getContext())
+ .createBranchWeights(FailureProb.getNumerator(),
+ SuccessProb.getNumerator());
+
+ SplitBlockAndInsertIfThen(Cmp, CheckLoc,
+ /*Unreachable=*/false, Weights,
+ DTU ? &*DTU : nullptr,
+ /*LI=*/nullptr, /*ThenBlock=*/FailBB);
+
+ auto *BI = cast<BranchInst>(Cmp->getParent()->getTerminator());
+ BasicBlock *NewBB = BI->getSuccessor(1);
+ NewBB->setName("SP_return");
+ NewBB->moveAfter(&BB);
+
+ Cmp->setPredicate(Cmp->getInversePredicate());
+ BI->swapSuccessors();
+ }
+ }
+
+ // Return if we didn't modify any basic blocks. i.e., there are no return
+ // statements in the function.
+ return HasPrologue;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ LLVMContext &Context = F->getContext();
+ BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
+ IRBuilder<> B(FailBB);
+ if (F->getSubprogram())
+ B.SetCurrentDebugLocation(
+ DILocation::get(Context, 0, 0, F->getSubprogram()));
+ FunctionCallee StackChkFail;
+ SmallVector<Value *, 1> Args;
+ if (Trip.isOSOpenBSD()) {
+ StackChkFail = M->getOrInsertFunction("__stack_smash_handler",
+ Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context));
+ Args.push_back(B.CreateGlobalStringPtr(F->getName(), "SSH"));
+ } else {
+ StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
+ }
+ cast<Function>(StackChkFail.getCallee())->addFnAttr(Attribute::NoReturn);
+ B.CreateCall(StackChkFail, Args);
+ B.CreateUnreachable();
+ return FailBB;
+}
+
+bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
+ return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator());
+}
+
+void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const {
+ if (Layout.empty())
+ return;
+
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ const AllocaInst *AI = MFI.getObjectAllocation(I);
+ if (!AI)
+ continue;
+
+ SSPLayoutMap::const_iterator LI = Layout.find(AI);
+ if (LI == Layout.end())
+ continue;
+
+ MFI.setObjectSSPLayout(I, LI->second);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 000000000000..6d933ab12041
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,550 @@
+//===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-slot-coloring"
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+
+ class StackSlotColoring : public MachineFunctionPass {
+ LiveStacks *LS = nullptr;
+ MachineFrameInfo *MFI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of MachineMemOperands for each spill slot.
+ // MachineMemOperands can be shared between instructions, so we need
+ // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not
+ // become FI0 -> FI1 -> FI2.
+ SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<Align, 16> OrigAlignments;
+
+ // OrigSizes - Sizes of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color. This is per stack ID.
+ SmallVector<BitVector, 2> AllColors;
+
+ // NextColor - Next "color" that's not yet used. This is per stack ID.
+ SmallVector<int, 2> NextColors = { -1 };
+
+ // UsedColors - "Colors" that have been assigned. This is per stack ID
+ SmallVector<BitVector, 2> UsedColors;
+
+ // Join all intervals sharing one color into a single LiveIntervalUnion to
+ // speedup range overlap test.
+ class ColorAssignmentInfo {
+ // Single liverange (used to avoid creation of LiveIntervalUnion).
+ LiveInterval *SingleLI = nullptr;
+ // LiveIntervalUnion to perform overlap test.
+ LiveIntervalUnion *LIU = nullptr;
+ // LiveIntervalUnion has a parameter in its constructor so doing this
+ // dirty magic.
+ uint8_t LIUPad[sizeof(LiveIntervalUnion)];
+
+ public:
+ ~ColorAssignmentInfo() {
+ if (LIU)
+ LIU->~LiveIntervalUnion(); // Dirty magic again.
+ }
+
+ // Return true if LiveInterval overlaps with any
+ // intervals that have already been assigned to this color.
+ bool overlaps(LiveInterval *LI) const {
+ if (LIU)
+ return LiveIntervalUnion::Query(*LI, *LIU).checkInterference();
+ return SingleLI ? SingleLI->overlaps(*LI) : false;
+ }
+
+ // Add new LiveInterval to this color.
+ void add(LiveInterval *LI, LiveIntervalUnion::Allocator &Alloc) {
+ assert(!overlaps(LI));
+ if (LIU) {
+ LIU->unify(*LI, *LI);
+ } else if (SingleLI) {
+ LIU = new (LIUPad) LiveIntervalUnion(Alloc);
+ LIU->unify(*SingleLI, *SingleLI);
+ LIU->unify(*LI, *LI);
+ SingleLI = nullptr;
+ } else
+ SingleLI = LI;
+ }
+ };
+
+ LiveIntervalUnion::Allocator LIUAlloc;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<ColorAssignmentInfo, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+
+ StackSlotColoring() : MachineFunctionPass(ID) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
+ MachineFunction &MF);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
+ "Stack Slot Coloring", false, false)
+
+namespace {
+
+// IntervalSorter - Comparison predicate that sort live intervals by
+// their weight.
+struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight() > RHS->weight();
+ }
+};
+
+} // end anonymous namespace
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ if (!MI.isDebugInstr())
+ li.incrementWeight(
+ LiveIntervals::getSpillWeight(false, true, MBFI, MI));
+ }
+ for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(),
+ EE = MI.memoperands_end();
+ MMOI != EE; ++MMOI) {
+ MachineMemOperand *MMO = *MMOI;
+ if (const FixedStackPseudoSourceValue *FSV =
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MMO->getPseudoValue())) {
+ int FI = FSV->getFrameIndex();
+ if (FI >= 0)
+ SSRefs[FI].push_back(MMO);
+ }
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+
+ // There is always at least one stack ID.
+ AllColors.resize(1);
+ UsedColors.resize(1);
+
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors[0].resize(LastFI);
+ UsedColors[0].resize(LastFI);
+ Assignments.resize(LastFI);
+
+ using Pair = std::iterator_traits<LiveStacks::iterator>::value_type;
+
+ SmallVector<Pair *, 16> Intervals;
+
+ Intervals.reserve(LS->getNumIntervals());
+ for (auto &I : *LS)
+ Intervals.push_back(&I);
+ llvm::sort(Intervals,
+ [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+
+ // Gather all spill slots into a list.
+ LLVM_DEBUG(dbgs() << "Spill slot intervals:\n");
+ for (auto *I : Intervals) {
+ LiveInterval &li = I->second;
+ LLVM_DEBUG(li.dump());
+ int FI = Register::stackSlot2Index(li.reg());
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlign(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+
+ auto StackID = MFI->getStackID(FI);
+ if (StackID != 0) {
+ AllColors.resize(StackID + 1);
+ UsedColors.resize(StackID + 1);
+ AllColors[StackID].resize(LastFI);
+ UsedColors[StackID].resize(LastFI);
+ }
+
+ AllColors[StackID].set(FI);
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+
+ // Sort them by weight.
+ llvm::stable_sort(SSIntervals, IntervalSorter());
+
+ NextColors.resize(AllColors.size());
+
+ // Get first "color".
+ for (unsigned I = 0, E = AllColors.size(); I != E; ++I)
+ NextColors[I] = AllColors[I].find_first();
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ int FI = Register::stackSlot2Index(li->reg());
+ uint8_t StackID = MFI->getStackID(FI);
+
+ if (!DisableSharing) {
+
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors[StackID].find_first();
+ while (Color != -1) {
+ if (!Assignments[Color].overlaps(li)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors[StackID].find_next(Color);
+ }
+ }
+
+ if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) {
+ LLVM_DEBUG(dbgs() << "cannot share FIs with different stack IDs\n");
+ Share = false;
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColors[StackID] != -1 && "No more spill slots?");
+ Color = NextColors[StackID];
+ UsedColors[StackID].set(Color);
+ NextColors[StackID] = AllColors[StackID].find_next(NextColors[StackID]);
+ }
+
+ assert(MFI->getStackID(Color) == MFI->getStackID(FI));
+
+ // Record the assignment.
+ Assignments[Color].add(li, LIUAlloc);
+ LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ Align Alignment = OrigAlignments[FI];
+ if (!Share || Alignment > MFI->getObjectAlign(Color))
+ MFI->setObjectAlignment(Color, Alignment);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n");
+ bool Changed = false;
+ for (LiveInterval *li : SSIntervals) {
+ int SS = Register::stackSlot2Index(li->reg());
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight();
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ for (LiveInterval *li : SSIntervals) {
+ int SS = Register::stackSlot2Index(li->reg());
+ li->setWeight(SlotWeights[SS]);
+ }
+ // Sort them by new weight.
+ llvm::stable_sort(SSIntervals, IntervalSorter());
+
+#ifndef NDEBUG
+ for (LiveInterval *li : SSIntervals)
+ LLVM_DEBUG(li->dump());
+ LLVM_DEBUG(dbgs() << '\n');
+#endif
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MachineMemOperands.
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS))
+ continue;
+
+ const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI);
+ SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
+ RefMMOs[i]->setValue(NewSV);
+ }
+
+ // Rewrite all MO_FrameIndex operands. Look for dead stores.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB)
+ RewriteInstruction(MI, SlotMapping, MF);
+ RemoveDeadStores(&MBB);
+ }
+
+ // Delete unused stack slots.
+ for (int StackID = 0, E = AllColors.size(); StackID != E; ++StackID) {
+ int NextColor = NextColors[StackID];
+ while (NextColor != -1) {
+ LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors[StackID].find_next(NextColor);
+ }
+ }
+
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
+ SmallVectorImpl<int> &SlotMapping,
+ MachineFunction &MF) {
+ // Update the operands.
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int OldFI = MO.getIndex();
+ if (OldFI < 0)
+ continue;
+ int NewFI = SlotMapping[OldFI];
+ if (NewFI == -1 || NewFI == OldFI)
+ continue;
+
+ assert(MFI->getStackID(OldFI) == MFI->getStackID(NewFI));
+ MO.setIndex(NewFI);
+ }
+
+ // The MachineMemOperands have already been updated.
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+ int FirstSS, SecondSS;
+ if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
+ FirstSS != -1) {
+ ++NumDead;
+ changed = true;
+ toErase.push_back(&*I);
+ continue;
+ }
+
+ MachineBasicBlock::iterator NextMI = std::next(I);
+ MachineBasicBlock::iterator ProbableLoadMI = I;
+
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ unsigned LoadSize = 0;
+ unsigned StoreSize = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize)))
+ continue;
+ // Skip the ...pseudo debugging... instructions between a load and store.
+ while ((NextMI != E) && NextMI->isDebugInstr()) {
+ ++NextMI;
+ ++I;
+ }
+ if (NextMI == E) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize)))
+ continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 ||
+ LoadSize != StoreSize)
+ continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
+ ++NumDead;
+ toErase.push_back(&*ProbableLoadMI);
+ }
+
+ toErase.push_back(&*NextMI);
+ ++I;
+ }
+
+ for (MachineInstr *MI : toErase)
+ MI->eraseFromParent();
+
+ return changed;
+}
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MFI = &MF.getFrameInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ LS = &getAnalysis<LiveStacks>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots == 0)
+ // Nothing to do!
+ return false;
+
+ // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+ // coloring. The stack could be modified before the longjmp is executed,
+ // resulting in the wrong value being used afterwards. (See
+ // <rdar://problem/8007500>.)
+ if (MF.exposesReturnsTwice())
+ return false;
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ for (int &Next : NextColors)
+ Next = -1;
+
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ Assignments.clear();
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
new file mode 100644
index 000000000000..83a7063de112
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -0,0 +1,311 @@
+//===-- SwiftErrorValueTracking.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+Register SwiftErrorValueTracking::getOrCreateVReg(const MachineBasicBlock *MBB,
+ const Value *Val) {
+ auto Key = std::make_pair(MBB, Val);
+ auto It = VRegDefMap.find(Key);
+ // If this is the first use of this swifterror value in this basic block,
+ // create a new virtual register.
+ // After we processed all basic blocks we will satisfy this "upwards exposed
+ // use" by inserting a copy or phi at the beginning of this block.
+ if (It == VRegDefMap.end()) {
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+ VRegDefMap[Key] = VReg;
+ VRegUpwardsUse[Key] = VReg;
+ return VReg;
+ } else
+ return It->second;
+}
+
+void SwiftErrorValueTracking::setCurrentVReg(const MachineBasicBlock *MBB,
+ const Value *Val, Register VReg) {
+ VRegDefMap[std::make_pair(MBB, Val)] = VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegDefAt(
+ const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+ auto It = VRegDefUses.find(Key);
+ if (It != VRegDefUses.end())
+ return It->second;
+
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+ VRegDefUses[Key] = VReg;
+ setCurrentVReg(MBB, Val, VReg);
+ return VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegUseAt(
+ const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+ auto It = VRegDefUses.find(Key);
+ if (It != VRegDefUses.end())
+ return It->second;
+
+ Register VReg = getOrCreateVReg(MBB, Val);
+ VRegDefUses[Key] = VReg;
+ return VReg;
+}
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+void SwiftErrorValueTracking::setFunction(MachineFunction &mf) {
+ MF = &mf;
+ Fn = &MF->getFunction();
+ TLI = MF->getSubtarget().getTargetLowering();
+ TII = MF->getSubtarget().getInstrInfo();
+
+ if (!TLI->supportSwiftError())
+ return;
+
+ SwiftErrorVals.clear();
+ VRegDefMap.clear();
+ VRegUpwardsUse.clear();
+ VRegDefUses.clear();
+ SwiftErrorArg = nullptr;
+
+ // Check if function has a swifterror argument.
+ bool HaveSeenSwiftErrorArg = false;
+ for (Function::const_arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+ AI != AE; ++AI)
+ if (AI->hasSwiftErrorAttr()) {
+ assert(!HaveSeenSwiftErrorArg &&
+ "Must have only one swifterror parameter");
+ (void)HaveSeenSwiftErrorArg; // silence warning.
+ HaveSeenSwiftErrorArg = true;
+ SwiftErrorArg = &*AI;
+ SwiftErrorVals.push_back(&*AI);
+ }
+
+ for (const auto &LLVMBB : *Fn)
+ for (const auto &Inst : LLVMBB) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+ if (Alloca->isSwiftError())
+ SwiftErrorVals.push_back(Alloca);
+ }
+}
+
+bool SwiftErrorValueTracking::createEntriesInEntryBlock(DebugLoc DbgLoc) {
+ if (!TLI->supportSwiftError())
+ return false;
+
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (SwiftErrorVals.empty())
+ return false;
+
+ MachineBasicBlock *MBB = &*MF->begin();
+ auto &DL = MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ bool Inserted = false;
+ for (const auto *SwiftErrorVal : SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (SwiftErrorArg && SwiftErrorArg == SwiftErrorVal)
+ continue;
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DbgLoc,
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+
+ setCurrentVReg(MBB, SwiftErrorVal, VReg);
+ Inserted = true;
+ }
+
+ return Inserted;
+}
+
+/// Propagate swifterror values through the machine function CFG.
+void SwiftErrorValueTracking::propagateVRegs() {
+ if (!TLI->supportSwiftError())
+ return;
+
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (SwiftErrorVals.empty())
+ return;
+
+ // For each machine basic block in reverse post order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ // For each swifterror value in the function.
+ for (const auto *SwiftErrorVal : SwiftErrorVals) {
+ auto Key = std::make_pair(MBB, SwiftErrorVal);
+ auto UUseIt = VRegUpwardsUse.find(Key);
+ auto VRegDefIt = VRegDefMap.find(Key);
+ bool UpwardsUse = UUseIt != VRegUpwardsUse.end();
+ Register UUseVReg = UpwardsUse ? UUseIt->second : Register();
+ bool DownwardDef = VRegDefIt != VRegDefMap.end();
+ assert(!(UpwardsUse && !DownwardDef) &&
+ "We can't have an upwards use but no downwards def");
+
+ // If there is no upwards exposed use and an entry for the swifterror in
+ // the def map for this value we don't need to do anything: We already
+ // have a downward def for this basic block.
+ if (!UpwardsUse && DownwardDef)
+ continue;
+
+ // Otherwise we either have an upwards exposed use vreg that we need to
+ // materialize or need to forward the downward def from predecessors.
+
+ // Check whether we have a single vreg def from all predecessors.
+ // Otherwise we need a phi.
+ SmallVector<std::pair<MachineBasicBlock *, Register>, 4> VRegs;
+ SmallSet<const MachineBasicBlock *, 8> Visited;
+ for (auto *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+ VRegs.push_back(std::make_pair(
+ Pred, getOrCreateVReg(Pred, SwiftErrorVal)));
+ if (Pred != MBB)
+ continue;
+ // We have a self-edge.
+ // If there was no upwards use in this basic block there is now one: the
+ // phi needs to use it self.
+ if (!UpwardsUse) {
+ UpwardsUse = true;
+ UUseIt = VRegUpwardsUse.find(Key);
+ assert(UUseIt != VRegUpwardsUse.end());
+ UUseVReg = UUseIt->second;
+ }
+ }
+
+ // We need a phi node if we have more than one predecessor with different
+ // downward defs.
+ bool needPHI =
+ VRegs.size() >= 1 &&
+ llvm::any_of(
+ VRegs,
+ [&](const std::pair<const MachineBasicBlock *, Register> &V)
+ -> bool { return V.second != VRegs[0].second; });
+
+ // If there is no upwards exposed used and we don't need a phi just
+ // forward the swifterror vreg from the predecessor(s).
+ if (!UpwardsUse && !needPHI) {
+ assert(!VRegs.empty() &&
+ "No predecessors? The entry block should bail out earlier");
+ // Just forward the swifterror vreg from the predecessor(s).
+ setCurrentVReg(MBB, SwiftErrorVal, VRegs[0].second);
+ continue;
+ }
+
+ auto DLoc = isa<Instruction>(SwiftErrorVal)
+ ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+ : DebugLoc();
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+
+ // If we don't need a phi create a copy to the upward exposed vreg.
+ if (!needPHI) {
+ assert(UpwardsUse);
+ assert(!VRegs.empty() &&
+ "No predecessors? Is the Calling Convention correct?");
+ Register DestReg = UUseVReg;
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+ DestReg)
+ .addReg(VRegs[0].second);
+ continue;
+ }
+
+ // We need a phi: if there is an upwards exposed use we already have a
+ // destination virtual register number otherwise we generate a new one.
+ auto &DL = MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ Register PHIVReg =
+ UpwardsUse ? UUseVReg : MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder PHI =
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+ TII->get(TargetOpcode::PHI), PHIVReg);
+ for (auto BBRegPair : VRegs) {
+ PHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+ }
+
+ // We did not have a definition in this block before: store the phi's vreg
+ // as this block downward exposed def.
+ if (!UpwardsUse)
+ setCurrentVReg(MBB, SwiftErrorVal, PHIVReg);
+ }
+ }
+}
+
+void SwiftErrorValueTracking::preassignVRegs(
+ MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
+ if (!TLI->supportSwiftError() || SwiftErrorVals.empty())
+ return;
+
+ // Iterator over instructions and assign vregs to swifterror defs and uses.
+ for (auto It = Begin; It != End; ++It) {
+ if (auto *CB = dyn_cast<CallBase>(&*It)) {
+ // A call-site with a swifterror argument is both use and def.
+ const Value *SwiftErrorAddr = nullptr;
+ for (const auto &Arg : CB->args()) {
+ if (!Arg->isSwiftError())
+ continue;
+ // Use of swifterror.
+ assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+ SwiftErrorAddr = &*Arg;
+ assert(SwiftErrorAddr->isSwiftError() &&
+ "Must have a swifterror value argument");
+ getOrCreateVRegUseAt(&*It, MBB, SwiftErrorAddr);
+ }
+ if (!SwiftErrorAddr)
+ continue;
+
+ // Def of swifterror.
+ getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+ // A load is a use.
+ } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+ const Value *V = LI->getOperand(0);
+ if (!V->isSwiftError())
+ continue;
+
+ getOrCreateVRegUseAt(LI, MBB, V);
+
+ // A store is a def.
+ } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+ const Value *SwiftErrorAddr = SI->getOperand(1);
+ if (!SwiftErrorAddr->isSwiftError())
+ continue;
+
+ // Def of swifterror.
+ getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+ // A return in a swiferror returning function is a use.
+ } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+ const Function *F = R->getParent()->getParent();
+ if (!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ continue;
+
+ getOrCreateVRegUseAt(R, MBB, SwiftErrorArg);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
new file mode 100644
index 000000000000..36a02d5beb4b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -0,0 +1,494 @@
+//===- SwitchLoweringUtils.cpp - Switch Lowering --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains switch inst lowering optimizations and utilities for
+// codegen, so that it can be used for both SelectionDAG and GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+using namespace SwitchCG;
+
+uint64_t SwitchCG::getJumpTableRange(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last) {
+ assert(Last >= First);
+ const APInt &LowCase = Clusters[First].Low->getValue();
+ const APInt &HighCase = Clusters[Last].High->getValue();
+ assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+ // FIXME: A range of consecutive cases has 100% density, but only requires one
+ // comparison to lower. We should discriminate against such consecutive ranges
+ // in jump tables.
+ return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
+}
+
+uint64_t
+SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last) {
+ assert(Last >= First);
+ assert(TotalCases[Last] >= TotalCases[First]);
+ uint64_t NumCases =
+ TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+ return NumCases;
+}
+
+void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+#ifndef NDEBUG
+ // Clusters must be non-empty, sorted, and only contain Range clusters.
+ assert(!Clusters.empty());
+ for (CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range);
+ for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+ assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ assert(TLI && "TLI not set!");
+ if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
+ return;
+
+ const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
+ const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+
+ // Bail if not enough cases.
+ const int64_t N = Clusters.size();
+ if (N < 2 || N < MinJumpTableEntries)
+ return;
+
+ // Accumulated number of cases in each cluster and those prior to it.
+ SmallVector<unsigned, 8> TotalCases(N);
+ for (unsigned i = 0; i < N; ++i) {
+ const APInt &Hi = Clusters[i].High->getValue();
+ const APInt &Lo = Clusters[i].Low->getValue();
+ TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+ if (i != 0)
+ TotalCases[i] += TotalCases[i - 1];
+ }
+
+ uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
+ uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ // Cheap case: the whole range may be suitable for jump table.
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
+ CaseCluster JTCluster;
+ if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+ Clusters[0] = JTCluster;
+ Clusters.resize(1);
+ return;
+ }
+ }
+
+ // The algorithm below is not suitable for -O0.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // Split Clusters into minimum number of dense partitions. The algorithm uses
+ // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+ // for the Case Statement'" (1994), but builds the MinPartitions array in
+ // reverse order to make it easier to reconstruct the partitions in ascending
+ // order. In the choice between two optimal partitionings, it picks the one
+ // which yields more jump tables.
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+ // PartitionsScore[i] is used to break ties when choosing between two
+ // partitionings resulting in the same number of partitions.
+ SmallVector<unsigned, 8> PartitionsScore(N);
+ // For PartitionsScore, a small number of comparisons is considered as good as
+ // a jump table and a single comparison is considered better than a jump
+ // table.
+ enum PartitionScores : unsigned {
+ NoTable = 0,
+ Table = 1,
+ FewCases = 1,
+ SingleCase = 2
+ };
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+ PartitionsScore[N - 1] = PartitionScores::SingleCase;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; i--) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+ PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
+
+ // Search for a solution that results in fewer partitions.
+ for (int64_t j = N - 1; j > i; j--) {
+ // Try building a partition from Clusters[i..j].
+ Range = getJumpTableRange(Clusters, i, j);
+ NumCases = getJumpTableNumCases(TotalCases, i, j);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+ int64_t NumEntries = j - i + 1;
+
+ if (NumEntries == 1)
+ Score += PartitionScores::SingleCase;
+ else if (NumEntries <= SmallNumberOfEntries)
+ Score += PartitionScores::FewCases;
+ else if (NumEntries >= MinJumpTableEntries)
+ Score += PartitionScores::Table;
+
+ // If this leads to fewer partitions, or to the same number of
+ // partitions with better score, it is a better partitioning.
+ if (NumPartitions < MinPartitions[i] ||
+ (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ PartitionsScore[i] = Score;
+ }
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing some with jump tables in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(Last >= First);
+ assert(DstIndex <= First);
+ unsigned NumClusters = Last - First + 1;
+
+ CaseCluster JTCluster;
+ if (NumClusters >= MinJumpTableEntries &&
+ buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+ Clusters[DstIndex++] = JTCluster;
+ } else {
+ for (unsigned I = First; I <= Last; ++I)
+ std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB,
+ CaseCluster &JTCluster) {
+ assert(First <= Last);
+
+ auto Prob = BranchProbability::getZero();
+ unsigned NumCmps = 0;
+ std::vector<MachineBasicBlock*> Table;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
+ for (unsigned I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Prob += Clusters[I].Prob;
+ const APInt &Low = Clusters[I].Low->getValue();
+ const APInt &High = Clusters[I].High->getValue();
+ NumCmps += (Low == High) ? 1 : 2;
+ if (I != First) {
+ // Fill the gap between this and the previous cluster.
+ const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
+ assert(PreviousHigh.slt(Low));
+ uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+ for (uint64_t J = 0; J < Gap; J++)
+ Table.push_back(DefaultMBB);
+ }
+ uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+ for (uint64_t J = 0; J < ClusterSize; ++J)
+ Table.push_back(Clusters[I].MBB);
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
+ }
+
+ unsigned NumDests = JTProbs.size();
+ if (TLI->isSuitableForBitTests(NumDests, NumCmps,
+ Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), *DL)) {
+ // Clusters[First..Last] should be lowered as bit tests instead.
+ return false;
+ }
+
+ // Create the MBB that will load from and jump through the table.
+ // Note: We create it here, but it's not inserted into the function yet.
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *JumpTableMBB =
+ CurMF->CreateMachineBasicBlock(SI->getParent());
+
+ // Add successors. Note: use table order for determinism.
+ SmallPtrSet<MachineBasicBlock *, 8> Done;
+ for (MachineBasicBlock *Succ : Table) {
+ if (Done.count(Succ))
+ continue;
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
+ Done.insert(Succ);
+ }
+ JumpTableMBB->normalizeSuccProbs();
+
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI->getJumpTableEncoding())
+ ->createJumpTableIndex(Table);
+
+ // Set up the jump table info.
+ JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+ JumpTableHeader JTH(Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), SI->getCondition(),
+ nullptr, false);
+ JTCases.emplace_back(std::move(JTH), std::move(JT));
+
+ JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+ JTCases.size() - 1, Prob);
+ return true;
+}
+
+void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
+ const SwitchInst *SI) {
+ // Partition Clusters into as few subsets as possible, where each subset has a
+ // range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+ // Clusters must be sorted and contain Range or JumpTable clusters.
+ assert(!Clusters.empty());
+ assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+ for (const CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+ for (unsigned i = 1; i < Clusters.size(); ++i)
+ assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ // The algorithm below is not suitable for -O0.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ EVT PTy = TLI->getPointerTy(*DL);
+ if (!TLI->isOperationLegal(ISD::SHL, PTy))
+ return;
+
+ int BitWidth = PTy.getSizeInBits();
+ const int64_t N = Clusters.size();
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+
+ // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; --i) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+
+ // Search for a solution that results in fewer partitions.
+ // Note: the search is limited by BitWidth, reducing time complexity.
+ for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+ // Try building a partition from Clusters[i..j].
+
+ // Check the range.
+ if (!TLI->rangeFitsInWord(Clusters[i].Low->getValue(),
+ Clusters[j].High->getValue(), *DL))
+ continue;
+
+ // Check nbr of destinations and cluster types.
+ // FIXME: This works, but doesn't seem very efficient.
+ bool RangesOnly = true;
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ for (int64_t k = i; k <= j; k++) {
+ if (Clusters[k].Kind != CC_Range) {
+ RangesOnly = false;
+ break;
+ }
+ Dests.set(Clusters[k].MBB->getNumber());
+ }
+ if (!RangesOnly || Dests.count() > 3)
+ break;
+
+ // Check if it's a better partition.
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ if (NumPartitions < MinPartitions[i]) {
+ // Found a better partition.
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing with bit-test clusters in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(First <= Last);
+ assert(DstIndex <= First);
+
+ CaseCluster BitTestCluster;
+ if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+ Clusters[DstIndex++] = BitTestCluster;
+ } else {
+ size_t NumClusters = Last - First + 1;
+ std::memmove(&Clusters[DstIndex], &Clusters[First],
+ sizeof(Clusters[0]) * NumClusters);
+ DstIndex += NumClusters;
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ CaseCluster &BTCluster) {
+ assert(First <= Last);
+ if (First == Last)
+ return false;
+
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ unsigned NumCmps = 0;
+ for (int64_t I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Dests.set(Clusters[I].MBB->getNumber());
+ NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ }
+ unsigned NumDests = Dests.count();
+
+ APInt Low = Clusters[First].Low->getValue();
+ APInt High = Clusters[Last].High->getValue();
+ assert(Low.slt(High));
+
+ if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+ return false;
+
+ APInt LowBound;
+ APInt CmpRange;
+
+ const int BitWidth = TLI->getPointerTy(*DL).getSizeInBits();
+ assert(TLI->rangeFitsInWord(Low, High, *DL) &&
+ "Case range must fit in bit mask!");
+
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
+ LowBound = APInt::getZero(Low.getBitWidth());
+ CmpRange = High;
+ ContiguousRange = false;
+ } else {
+ LowBound = Low;
+ CmpRange = High - Low;
+ }
+
+ CaseBitsVector CBV;
+ auto TotalProb = BranchProbability::getZero();
+ for (unsigned i = First; i <= Last; ++i) {
+ // Find the CaseBits for this destination.
+ unsigned j;
+ for (j = 0; j < CBV.size(); ++j)
+ if (CBV[j].BB == Clusters[i].MBB)
+ break;
+ if (j == CBV.size())
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
+ CaseBits *CB = &CBV[j];
+
+ // Update Mask, Bits and ExtraProb.
+ uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+ uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+ assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
+ CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
+ CB->Bits += Hi - Lo + 1;
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
+ }
+
+ BitTestInfo BTI;
+ llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
+ // Sort by probability first, number of bits second, bit mask third.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
+ if (a.Bits != b.Bits)
+ return a.Bits > b.Bits;
+ return a.Mask < b.Mask;
+ });
+
+ for (auto &CB : CBV) {
+ MachineBasicBlock *BitTestBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
+ }
+ BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
+
+ BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+ BitTestCases.size() - 1, TotalProb);
+ return true;
+}
+
+void SwitchCG::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+ for (const CaseCluster &CC : Clusters)
+ assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
+
+ llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Merge adjacent clusters with the same destination.
+ const unsigned N = Clusters.size();
+ unsigned DstIndex = 0;
+ for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+ CaseCluster &CC = Clusters[SrcIndex];
+ const ConstantInt *CaseVal = CC.Low;
+ MachineBasicBlock *Succ = CC.MBB;
+
+ if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+ (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+ // If this case has the same successor and is a neighbour, merge it into
+ // the previous cluster.
+ Clusters[DstIndex - 1].High = CaseVal;
+ Clusters[DstIndex - 1].Prob += CC.Prob;
+ } else {
+ std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+ sizeof(Clusters[SrcIndex]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 000000000000..bf3d2088e196
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,102 @@
+//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass duplicates basic blocks ending in unconditional branches
+/// into the tails of their predecessors, using the TailDuplicator utility
+/// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tailduplication"
+
+namespace {
+
+class TailDuplicateBase : public MachineFunctionPass {
+ TailDuplicator Duplicator;
+ std::unique_ptr<MBFIWrapper> MBFIW;
+ bool PreRegAlloc;
+public:
+ TailDuplicateBase(char &PassID, bool PreRegAlloc)
+ : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+class TailDuplicate : public TailDuplicateBase {
+public:
+ static char ID;
+ TailDuplicate() : TailDuplicateBase(ID, false) {
+ initializeTailDuplicatePass(*PassRegistry::getPassRegistry());
+ }
+};
+
+class EarlyTailDuplicate : public TailDuplicateBase {
+public:
+ static char ID;
+ EarlyTailDuplicate() : TailDuplicateBase(ID, true) {
+ initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry());
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::NoPHIs);
+ }
+};
+
+} // end anonymous namespace
+
+char TailDuplicate::ID;
+char EarlyTailDuplicate::ID;
+
+char &llvm::TailDuplicateID = TailDuplicate::ID;
+char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID;
+
+INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false)
+INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication",
+ "Early Tail Duplication", false, false)
+
+bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ if (MBFI)
+ MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
+ /*LayoutMode=*/false);
+
+ bool MadeChange = false;
+ while (Duplicator.tailDuplicateBlocks())
+ MadeChange = true;
+
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
new file mode 100644
index 000000000000..5ed67bd0a121
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -0,0 +1,1071 @@
+//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility class duplicates basic blocks ending in unconditional branches
+// into the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tailduplication"
+
+STATISTIC(NumTails, "Number of tails duplicated");
+STATISTIC(NumTailDups, "Number of tail duplicated blocks");
+STATISTIC(NumTailDupAdded,
+ "Number of instructions added due to tail duplication");
+STATISTIC(NumTailDupRemoved,
+ "Number of instructions removed due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs, "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicateSize(
+ "tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
+ cl::Hidden);
+
+static cl::opt<unsigned> TailDupIndirectBranchSize(
+ "tail-dup-indirect-size",
+ cl::desc("Maximum instructions to consider tail duplicating blocks that "
+ "end with indirect branches."), cl::init(20),
+ cl::Hidden);
+
+static cl::opt<bool>
+ TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
+ cl::Hidden);
+
+void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
+ const MachineBranchProbabilityInfo *MBPIin,
+ MBFIWrapper *MBFIin,
+ ProfileSummaryInfo *PSIin,
+ bool LayoutModeIn, unsigned TailDupSizeIn) {
+ MF = &MFin;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ MMI = &MF->getMMI();
+ MBPI = MBPIin;
+ MBFI = MBFIin;
+ PSI = PSIin;
+ TailDupSize = TailDupSizeIn;
+
+ assert(MBPI != nullptr && "Machine Branch Probability Info required");
+
+ LayoutMode = LayoutModeIn;
+ this->PreRegAlloc = PreRegAlloc;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineBasicBlock &MBB : llvm::drop_begin(MF)) {
+ SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(),
+ MBB.pred_end());
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (MI != MBB.end()) {
+ if (!MI->isPHI())
+ break;
+ for (MachineBasicBlock *PredBB : Preds) {
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
+ << *MI;
+ dbgs() << " missing input from predecessor "
+ << printMBBReference(*PredBB) << '\n';
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB)
+ << ": " << *MI;
+ dbgs() << " extra input from predecessor "
+ << printMBBReference(*PHIBB) << '\n';
+ llvm_unreachable(nullptr);
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
+ << *MI;
+ dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n';
+ llvm_unreachable(nullptr);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// Tail duplicate the block and cleanup.
+/// \p IsSimple - return value of isSimpleBB
+/// \p MBB - block to be duplicated
+/// \p ForcedLayoutPred - If non-null, treat this block as the layout
+/// predecessor, instead of using the ordering in MF
+/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
+/// all Preds that received a copy of \p MBB.
+/// \p RemovalCallback - if non-null, called just before MBB is deleted.
+bool TailDuplicator::tailDuplicateAndUpdate(
+ bool IsSimple, MachineBasicBlock *MBB,
+ MachineBasicBlock *ForcedLayoutPred,
+ SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
+ function_ref<void(MachineBasicBlock *)> *RemovalCallback,
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock *, 8> TDBBs;
+ SmallVector<MachineInstr *, 16> Copies;
+ if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred,
+ TDBBs, Copies, CandidatePtr))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr *, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumTailDupRemoved += MBB->size();
+ removeDeadBlock(MBB, RemovalCallback);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = nullptr;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<Register, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
+ MachineBasicBlock *SrcBB = J.first;
+ Register SrcReg = J.second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ SmallVector<MachineOperand *> DebugUses;
+ // Rewrite uses that are outside of the original def's block.
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(VReg))) {
+ MachineInstr *UseMI = UseMO.getParent();
+ // Rewrite debug uses last so that they can take advantage of any
+ // register mappings introduced by other users in its BB, since we
+ // cannot create new register definitions specifically for the debug
+ // instruction (as debug instructions should not affect CodeGen).
+ if (UseMI->isDebugValue()) {
+ DebugUses.push_back(&UseMO);
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ for (auto *UseMO : DebugUses) {
+ MachineInstr *UseMI = UseMO->getParent();
+ UseMO->setReg(
+ SSAUpdate.GetValueInMiddleOfBlock(UseMI->getParent(), true));
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ Register Dst = Copy->getOperand(0).getReg();
+ Register Src = Copy->getOperand(1).getReg();
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ if (DuplicatedPreds)
+ *DuplicatedPreds = std::move(TDBBs);
+
+ return true;
+}
+
+/// Look for small blocks that are unconditionally branched to and do not fall
+/// through. Tail-duplicate their instructions into their predecessors to
+/// eliminate (dynamic) branches.
+bool TailDuplicator::tailDuplicateBlocks() {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(*MF, true);
+ }
+
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(*MF))) {
+ if (NumTails == TailDupLimit)
+ break;
+
+ bool IsSimple = isSimpleBB(&MBB);
+
+ if (!shouldTailDuplicate(IsSimple, MBB))
+ continue;
+
+ MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr);
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(*MF, false);
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+ if (UseMI.isDebugValue())
+ continue;
+ if (UseMI.getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i + 1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<Register> *UsedByPhi) {
+ for (const auto &MI : BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ Register SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
+/// Add a definition and source virtual registers pair for SSA update.
+void TailDuplicator::addSSAUpdateEntry(Register OrigReg, Register NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<Register, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
+/// source register that's contributed by PredBB and update SSA update map.
+void TailDuplicator::processPHI(
+ MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+ DenseMap<Register, RegSubRegPair> &LocalVRMap,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &Copies,
+ const DenseSet<Register> &RegsUsedByPhi, bool Remove) {
+ Register DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ Register SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ Register NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+ addSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ if (!Remove)
+ return;
+
+ // Remove PredBB from the PHI node.
+ MI->removeOperand(SrcOpIdx + 1);
+ MI->removeOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1 && !TailBB->hasAddressTaken())
+ MI->eraseFromParent();
+ else if (MI->getNumOperands() == 1)
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+}
+
+/// Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicator::duplicateInstruction(
+ MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+ DenseMap<Register, RegSubRegPair> &LocalVRMap,
+ const DenseSet<Register> &UsedByPhi) {
+ // Allow duplication of CFI instructions.
+ if (MI->isCFIInstruction()) {
+ BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
+ TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(MI->getOperand(0).getCFIIndex())
+ .setMIFlags(MI->getFlags());
+ return;
+ }
+ MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
+ if (PreRegAlloc) {
+ for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ Register NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+ addSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ auto VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end()) {
+ // Need to make sure that the register class of the mapped register
+ // will satisfy the constraints of the class of the register being
+ // replaced.
+ auto *OrigRC = MRI->getRegClass(Reg);
+ auto *MappedRC = MRI->getRegClass(VI->second.Reg);
+ const TargetRegisterClass *ConstrRC;
+ if (VI->second.SubReg != 0) {
+ ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
+ VI->second.SubReg);
+ if (ConstrRC) {
+ // The actual constraining (as in "find appropriate new class")
+ // is done by getMatchingSuperRegClass, so now we only need to
+ // change the class of the mapped register.
+ MRI->setRegClass(VI->second.Reg, ConstrRC);
+ }
+ } else {
+ // For mapped registers that do not have sub-registers, simply
+ // restrict their class to match the original one.
+
+ // We don't want debug instructions affecting the resulting code so
+ // if we're cloning a debug instruction then just use MappedRC
+ // rather than constraining the register class further.
+ ConstrRC = NewMI.isDebugInstr()
+ ? MappedRC
+ : MRI->constrainRegClass(VI->second.Reg, OrigRC);
+ }
+
+ if (ConstrRC) {
+ // If the class constraining succeeded, we can simply replace
+ // the old register with the mapped one.
+ MO.setReg(VI->second.Reg);
+ // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
+ // sub-register, we need to compose the sub-register indices.
+ MO.setSubReg(
+ TRI->composeSubRegIndices(VI->second.SubReg, MO.getSubReg()));
+ } else {
+ // The direct replacement is not possible, due to failing register
+ // class constraints. An explicit COPY is necessary. Create one
+ // that can be reused.
+ Register NewReg = MRI->createVirtualRegister(OrigRC);
+ BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VI->second.Reg, 0, VI->second.SubReg);
+ LocalVRMap.erase(VI);
+ LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+ MO.setReg(NewReg);
+ // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
+ // is equivalent to the whole register Reg. Hence, Reg:subreg
+ // is same as NewReg:subreg, so keep the sub-register index
+ // unchanged.
+ }
+ // Clear any kill flags from this operand. The new register could
+ // have uses after this one, so kills are not valid here.
+ MO.setIsKill(false);
+ }
+ }
+ }
+ }
+}
+
+/// After FromBB is tail duplicated into its predecessor blocks, the successors
+/// have gained new predecessors. Update the PHI instructions in them
+/// accordingly.
+void TailDuplicator::updateSuccessorsPHIs(
+ MachineBasicBlock *FromBB, bool isDead,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallSetVector<MachineBasicBlock *, 8> &Succs) {
+ for (MachineBasicBlock *SuccBB : Succs) {
+ for (MachineInstr &MI : *SuccBB) {
+ if (!MI.isPHI())
+ break;
+ MachineInstrBuilder MIB(*FromBB->getParent(), MI);
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = MI.getOperand(i + 1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = MI.getOperand(Idx);
+ Register Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
+ MachineOperand &MO = MI.getOperand(i + 1);
+ if (MO.getMBB() == FromBB) {
+ MI.removeOperand(i + 1);
+ MI.removeOperand(i);
+ }
+ }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive removeOperand calls.
+
+ DenseMap<Register, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
+ MachineBasicBlock *SrcBB = J.first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
+ Register SrcReg = J.second;
+ if (Idx != 0) {
+ MI.getOperand(Idx).setReg(SrcReg);
+ MI.getOperand(Idx + 1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(SrcReg).addMBB(SrcBB);
+ }
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (MachineBasicBlock *SrcBB : TDBBs) {
+ if (Idx != 0) {
+ MI.getOperand(Idx).setReg(Reg);
+ MI.getOperand(Idx + 1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(Reg).addMBB(SrcBB);
+ }
+ }
+ }
+ if (Idx != 0) {
+ MI.removeOperand(Idx + 1);
+ MI.removeOperand(Idx);
+ }
+ }
+ }
+}
+
+/// Determine if it is profitable to duplicate this block.
+bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // When doing tail-duplication during layout, the block ordering is in flux,
+ // so canFallThrough returns a result based on incorrect information and
+ // should just be ignored.
+ if (!LayoutMode && TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ bool OptForSize = MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
+ if (TailDupSize == 0)
+ MaxDuplicateCount = TailDuplicateSize;
+ else
+ MaxDuplicateCount = TailDupSize;
+ if (OptForSize)
+ MaxDuplicateCount = 1;
+
+ // If the block to be duplicated ends in an unanalyzable fallthrough, don't
+ // duplicate it.
+ // A similar check is necessary in MachineBlockPlacement to make sure pairs of
+ // blocks with unanalyzable fallthrough get layed out contiguously.
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
+ TailBB.canFallThrough())
+ return false;
+
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = TailDupIndirectBranchSize;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ for (MachineInstr &MI : TailBB) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ // CFI instructions are marked as non-duplicable, because Darwin compact
+ // unwind info emission can't handle multiple prologue setups. In case of
+ // DWARF, allow them be duplicated, so that their existence doesn't prevent
+ // tail duplication of some basic blocks, that would be duplicated otherwise.
+ if (MI.isNotDuplicable() &&
+ (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
+ !MI.isCFIInstruction()))
+ return false;
+
+ // Convergent instructions can be duplicated only if doing so doesn't add
+ // new control dependencies, which is what we're going to do here.
+ if (MI.isConvergent())
+ return false;
+
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && MI.isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && MI.isCall())
+ return false;
+
+ // TailDuplicator::appendCopies will erroneously place COPYs after
+ // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same
+ // bug that was fixed in f7a53d82c090.
+ // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point
+ // for the COPY when replacing PHIs.
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return false;
+
+ if (MI.isBundle())
+ InstrCount += MI.getBundleSize();
+ else if (!MI.isPHI() && !MI.isMetaInstruction())
+ InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ // Check if any of the successors of TailBB has a PHI node in which the
+ // value corresponding to TailBB uses a subregister.
+ // If a phi node uses a register paired with a subregister, the actual
+ // "value type" of the phi may differ from the type of the register without
+ // any subregisters. Due to a bug, tail duplication may add a new operand
+ // without a necessary subregister, producing an invalid code. This is
+ // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
+ // Disable tail duplication for this case for now, until the problem is
+ // fixed.
+ for (auto *SB : TailBB.successors()) {
+ for (auto &I : *SB) {
+ if (!I.isPHI())
+ break;
+ unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
+ assert(Idx != 0);
+ MachineOperand &PU = I.getOperand(Idx);
+ if (PU.getSubReg() != 0)
+ return false;
+ }
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// True if this BB has only one unconditional jump.
+bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(true);
+ if (I == TailBB->end())
+ return true;
+ return I->isUnconditionalBranch();
+}
+
+static bool bothUsedInPHI(const MachineBasicBlock &A,
+ const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) {
+ for (MachineBasicBlock *BB : A.successors())
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+
+ return false;
+}
+
+bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ for (MachineBasicBlock *PredBB : BB.predecessors()) {
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool TailDuplicator::duplicateSimpleBB(
+ MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ const DenseSet<Register> &UsedByPhi) {
+ SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors());
+ bool Changed = false;
+ for (MachineBasicBlock *PredBB : Preds) {
+ if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
+ continue;
+
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = PredBB->getNextNode();
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = nullptr;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = nullptr;
+ if (PredTBB == NextBB && PredFBB == nullptr)
+ PredTBB = nullptr;
+
+ auto DL = PredBB->findBranchDebugLoc();
+ TII->removeBranch(*PredBB);
+
+ if (!PredBB->isSuccessor(NewTarget))
+ PredBB->replaceSuccessor(TailBB, NewTarget);
+ else {
+ PredBB->removeSuccessor(TailBB, true);
+ assert(PredBB->succ_size() <= 1);
+ }
+
+ if (PredTBB)
+ TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB) {
+ // EH edges are ignored by analyzeBranch.
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
+ return false;
+ if (!PredCond.empty())
+ return false;
+ // FIXME: This is overly conservative; it may be ok to relax this in the
+ // future under more specific conditions. If TailBB is an INLINEASM_BR
+ // indirect target, we need to see if the edge from PredBB to TailBB is from
+ // an INLINEASM_BR in PredBB, and then also if that edge was from the
+ // indirect target list, fallthrough/default target, or potentially both. If
+ // it's both, TailDuplicator::tailDuplicate will remove the edge, corrupting
+ // the successor list in PredBB and predecessor list in TailBB.
+ if (TailBB->isInlineAsmBrIndirectTarget())
+ return false;
+ return true;
+}
+
+/// If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+/// \p IsSimple result of isSimpleBB
+/// \p TailBB Block to be duplicated.
+/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
+/// instead of the previous block in MF's order.
+/// \p TDBBs A vector to keep track of all blocks tail-duplicated
+/// into.
+/// \p Copies A vector of copy instructions inserted. Used later to
+/// walk all the inserted copies and remove redundant ones.
+bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
+ MachineBasicBlock *ForcedLayoutPred,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallVectorImpl<MachineInstr *> &Copies,
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) {
+ LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
+ << '\n');
+
+ bool ShouldUpdateTerminators = TailBB->canFallThrough();
+
+ DenseSet<Register> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi);
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock *, 8> Preds;
+ if (CandidatePtr)
+ Preds.insert(CandidatePtr->begin(), CandidatePtr->end());
+ else
+ Preds.insert(TailBB->pred_begin(), TailBB->pred_end());
+
+ for (MachineBasicBlock *PredBB : Preds) {
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+
+ if (!canTailDuplicate(TailBB, PredBB))
+ continue;
+
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ // If profile is available, findDuplicateCandidates can choose better
+ // fall-through predecessor.
+ if (!(MF->getFunction().hasProfileData() && LayoutMode)) {
+ bool IsLayoutSuccessor = false;
+ if (ForcedLayoutPred)
+ IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
+ else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ IsLayoutSuccessor = true;
+ if (IsLayoutSuccessor)
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->removeBranch(*PredBB);
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
+ for (MachineInstr &MI : llvm::make_early_inc_range(*TailBB)) {
+ if (MI.isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
+ }
+ }
+ appendCopies(PredBB, CopyInfos, Copies);
+
+ NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock *Succ : TailBB->successors())
+ PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
+
+ // Update branches in pred to jump to tail's layout successor if needed.
+ if (ShouldUpdateTerminators)
+ PredBB->updateTerminator(TailBB->getNextNode());
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = ForcedLayoutPred;
+ if (!PrevBB)
+ PrevBB = &*std::prev(TailBB->getIterator());
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
+ SmallVector<MachineOperand, 4> PriorCond;
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // analyzeBranch.
+ if (PrevBB->succ_size() == 1 &&
+ // Layout preds are not always CFG preds. Check.
+ *PrevBB->succ_begin() == TailBB &&
+ !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
+ PriorCond.empty() &&
+ (!PriorTBB || PriorTBB == TailBB) &&
+ TailBB->pred_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ // There may be a branch to the layout successor. This is unlikely but it
+ // happens. The correct thing to do is to remove the branch before
+ // duplicating the instructions in all cases.
+ bool RemovedBranches = TII->removeBranch(*PrevBB) != 0;
+
+ // If there are still tail instructions, abort the merge
+ if (PrevBB->getFirstTerminator() == PrevBB->end()) {
+ if (PreRegAlloc) {
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
+ true);
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ appendCopies(PrevBB, CopyInfos, Copies);
+ } else {
+ TII->removeBranch(*PrevBB);
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+
+ // Update branches in PrevBB based on Tail's layout successor.
+ if (ShouldUpdateTerminators)
+ PrevBB->updateTerminator(TailBB->getNextNode());
+
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still "
+ "contains terminator instructions");
+ // Return early if no changes were made
+ if (!Changed)
+ return RemovedBranches;
+ }
+ Changed |= RemovedBranches;
+ }
+
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (MachineBasicBlock *PredBB : Preds) {
+ if (is_contained(TDBBs, PredBB))
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
+ // Process PHI instructions first.
+ for (MachineInstr &MI : make_early_inc_range(TailBB->phis())) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ appendCopies(PredBB, CopyInfos, Copies);
+ }
+
+ return Changed;
+}
+
+/// At the end of the block \p MBB generate COPY instructions between registers
+/// described by \p CopyInfos. Append resulting instructions to \p Copies.
+void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<MachineInstr*> &Copies) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
+ for (auto &CI : CopyInfos) {
+ auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
+ .addReg(CI.second.Reg, 0, CI.second.SubReg);
+ Copies.push_back(C);
+ }
+}
+
+/// Remove the specified dead machine basic block from the function, updating
+/// the CFG.
+void TailDuplicator::removeDeadBlock(
+ MachineBasicBlock *MBB,
+ function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // Update the call site info.
+ for (const MachineInstr &MI : *MBB)
+ if (MI.shouldUpdateCallSiteInfo())
+ MF->eraseCallSiteInfo(&MI);
+
+ if (RemovalCallback)
+ (*RemovalCallback)(MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end() - 1);
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 000000000000..48a2094f5d45
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,168 @@
+//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() = default;
+
+bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
+ assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
+ MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ !MF.getFunction().hasFnAttribute(Attribute::UWTable));
+ return false;
+}
+
+bool TargetFrameLowering::enableCFIFixup(MachineFunction &MF) const {
+ return MF.needsFrameMoves() &&
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+}
+
+/// Returns the displacement from the frame register to the stack
+/// frame of the specified index, along with the frame register used
+/// (in output arg FrameReg). This is the default implementation which
+/// is overridden for some targets.
+StackOffset
+TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() -
+ getOffsetOfLocalArea() +
+ MFI.getOffsetAdjustment());
+}
+
+bool TargetFrameLowering::needsFrameIndexResolution(
+ const MachineFunction &MF) const {
+ return MF.getFrameInfo().hasStackObjects();
+}
+
+void TargetFrameLowering::getCalleeSaves(const MachineFunction &MF,
+ BitVector &CalleeSaves) const {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ CalleeSaves.resize(TRI.getNumRegs());
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ CalleeSaves.set(Info.getReg());
+}
+
+void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // Resize before the early returns. Some backends expect that
+ // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no
+ // saved registers.
+ SavedRegs.resize(TRI.getNumRegs());
+
+ // When interprocedural register allocation is enabled caller saved registers
+ // are preferred over callee saved registers.
+ if (MF.getTarget().Options.EnableIPRA &&
+ isSafeForNoCSROpt(MF.getFunction()) &&
+ isProfitableForNoCSROpt(MF.getFunction()))
+ return;
+
+ // Get the callee saved register list...
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+
+ // Early exit if there are no callee saved registers.
+ if (!CSRegs || CSRegs[0] == 0)
+ return;
+
+ // In Naked functions we aren't going to save any registers.
+ if (MF.getFunction().hasFnAttribute(Attribute::Naked))
+ return;
+
+ // Noreturn+nounwind functions never restore CSR, so no saves are needed.
+ // Purely noreturn functions may still return through throws, so those must
+ // save CSR for caller exception handlers.
+ //
+ // If the function uses longjmp to break out of its current path of
+ // execution we do not need the CSR spills either: setjmp stores all CSRs
+ // it was called with into the jmp_buf, which longjmp then restores.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
+ MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ !MF.getFunction().hasFnAttribute(Attribute::UWTable) &&
+ enableCalleeSaveSkip(MF))
+ return;
+
+ // Functions which call __builtin_unwind_init get all their registers saved.
+ bool CallsUnwindInit = MF.callsUnwindInit();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (CallsUnwindInit || MRI.isPhysRegModified(Reg))
+ SavedRegs.set(Reg);
+ }
+}
+
+bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+ if (!hasFP(MF))
+ return false;
+
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ return RegInfo->useFPForScavengingIndex(MF) &&
+ !RegInfo->hasStackRealignment(MF);
+}
+
+bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
+ if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
+ !F.hasFnAttribute(Attribute::NoRecurse))
+ return false;
+ // Function should not be optimized as tail call.
+ for (const User *U : F.users())
+ if (auto *CB = dyn_cast<CallBase>(U))
+ if (CB->isTailCall())
+ return false;
+ return true;
+}
+
+int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
+ llvm_unreachable("getInitialCFAOffset() not implemented!");
+}
+
+Register
+TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
+ llvm_unreachable("getInitialCFARegister() not implemented!");
+}
+
+TargetFrameLowering::DwarfFrameBase
+TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF)}};
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..b29404b42519
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -0,0 +1,1726 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
+TargetInstrInfo::~TargetInstrInfo() = default;
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ if (OpNum >= MCID.getNumOperands())
+ return nullptr;
+
+ short RegClass = MCID.operands()[OpNum].RegClass;
+ if (MCID.operands()[OpNum].isLookupPtrRegClass())
+ return TRI->getPointerRegClass(MF, RegClass);
+
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return nullptr;
+
+ // Otherwise just look it up normally.
+ return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// insertNoops - Insert noops into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const {
+ for (unsigned i = 0; i < Quantity; ++i)
+ insertNoop(MBB, MI);
+}
+
+static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
+ return strncmp(Str, MAI.getCommentString().data(),
+ MAI.getCommentString().size()) == 0;
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// We implement a special case of the .space directive which takes only a
+/// single integer argument in base 10 that is the size in bytes. This is a
+/// restricted form of the GAS directive in that we only interpret
+/// simple--i.e. not a logical or arithmetic expression--size values without
+/// the optional fill value. This is primarily used for creating arbitrary
+/// sized inline asm blocks for testing purposes.
+unsigned TargetInstrInfo::getInlineAsmLength(
+ const char *Str,
+ const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
+ // Count the number of instructions in the asm.
+ bool AtInsnStart = true;
+ unsigned Length = 0;
+ const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0) {
+ AtInsnStart = true;
+ } else if (isAsmComment(Str, MAI)) {
+ // Stop counting as an instruction after a comment until the next
+ // separator.
+ AtInsnStart = false;
+ }
+
+ if (AtInsnStart && !isSpace(static_cast<unsigned char>(*Str))) {
+ unsigned AddLength = MaxInstLength;
+ if (strncmp(Str, ".space", 6) == 0) {
+ char *EStr;
+ int SpaceSize;
+ SpaceSize = strtol(Str + 6, &EStr, 10);
+ SpaceSize = SpaceSize < 0 ? 0 : SpaceSize;
+ while (*EStr != '\n' && isSpace(static_cast<unsigned char>(*EStr)))
+ ++EStr;
+ if (*EStr == '\0' || *EStr == '\n' ||
+ isAsmComment(EStr, MAI)) // Successfully parsed .space argument
+ AddLength = SpaceSize;
+ }
+ Length += AddLength;
+ AtInsnStart = false;
+ }
+ }
+
+ return Length;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Save off the debug loc before erasing the instruction.
+ DebugLoc DL = Tail->getDebugLoc();
+
+ // Update call site info and remove all the dead instructions
+ // from the end of MBB.
+ while (Tail != MBB->end()) {
+ auto MI = Tail++;
+ if (MI->shouldUpdateCallSiteInfo())
+ MBB->getParent()->eraseCallSiteInfo(&*MI);
+ MBB->erase(MI);
+ }
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ insertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
+ MBB->addSuccessor(NewDest);
+}
+
+MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI, unsigned Idx1,
+ unsigned Idx2) const {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool HasDef = MCID.getNumDefs();
+ if (HasDef && !MI.getOperand(0).isReg())
+ // No idea how to commute this instruction. Target should implement its own.
+ return nullptr;
+
+ unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
+ unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+ assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
+ CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
+ "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
+ assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() &&
+ "This only knows how to commute register operands so far");
+
+ Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
+ Register Reg1 = MI.getOperand(Idx1).getReg();
+ Register Reg2 = MI.getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI.getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI.getOperand(Idx2).getSubReg();
+ bool Reg1IsKill = MI.getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI.getOperand(Idx2).isKill();
+ bool Reg1IsUndef = MI.getOperand(Idx1).isUndef();
+ bool Reg2IsUndef = MI.getOperand(Idx2).isUndef();
+ bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead();
+ bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
+ // Avoid calling isRenamable for virtual registers since we assert that
+ // renamable property is only queried/set for physical registers.
+ bool Reg1IsRenamable =
+ Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false;
+ bool Reg2IsRenamable =
+ Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false;
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+ Reg2IsKill = false;
+ Reg0 = Reg2;
+ SubReg0 = SubReg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
+ SubReg0 = SubReg1;
+ }
+
+ MachineInstr *CommutedMI = nullptr;
+ if (NewMI) {
+ // Create a new instruction.
+ MachineFunction &MF = *MI.getMF();
+ CommutedMI = MF.CloneMachineInstr(&MI);
+ } else {
+ CommutedMI = &MI;
+ }
+
+ if (HasDef) {
+ CommutedMI->getOperand(0).setReg(Reg0);
+ CommutedMI->getOperand(0).setSubReg(SubReg0);
+ }
+ CommutedMI->getOperand(Idx2).setReg(Reg1);
+ CommutedMI->getOperand(Idx1).setReg(Reg2);
+ CommutedMI->getOperand(Idx2).setSubReg(SubReg1);
+ CommutedMI->getOperand(Idx1).setSubReg(SubReg2);
+ CommutedMI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ CommutedMI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ CommutedMI->getOperand(Idx2).setIsUndef(Reg1IsUndef);
+ CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
+ CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
+ CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
+ // Avoid calling setIsRenamable for virtual registers since we assert that
+ // renamable property is only queried/set for physical registers.
+ if (Reg1.isPhysical())
+ CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable);
+ if (Reg2.isPhysical())
+ CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable);
+ return CommutedMI;
+}
+
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose
+ // any commutable operand, which is done in findCommutedOpIndices() method
+ // called below.
+ if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) &&
+ !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) {
+ assert(MI.isCommutable() &&
+ "Precondition violation: MI must be commutable.");
+ return nullptr;
+ }
+ return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
+bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
+ unsigned &ResultIdx2,
+ unsigned CommutableOpIdx1,
+ unsigned CommutableOpIdx2) {
+ if (ResultIdx1 == CommuteAnyOperandIndex &&
+ ResultIdx2 == CommuteAnyOperandIndex) {
+ ResultIdx1 = CommutableOpIdx1;
+ ResultIdx2 = CommutableOpIdx2;
+ } else if (ResultIdx1 == CommuteAnyOperandIndex) {
+ if (ResultIdx2 == CommutableOpIdx1)
+ ResultIdx1 = CommutableOpIdx2;
+ else if (ResultIdx2 == CommutableOpIdx2)
+ ResultIdx1 = CommutableOpIdx1;
+ else
+ return false;
+ } else if (ResultIdx2 == CommuteAnyOperandIndex) {
+ if (ResultIdx1 == CommutableOpIdx1)
+ ResultIdx2 = CommutableOpIdx2;
+ else if (ResultIdx1 == CommutableOpIdx2)
+ ResultIdx2 = CommutableOpIdx1;
+ else
+ return false;
+ } else
+ // Check that the result operand indices match the given commutable
+ // operand indices.
+ return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) ||
+ (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1);
+
+ return true;
+}
+
+bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ assert(!MI.isBundle() &&
+ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MCID.isCommutable())
+ return false;
+
+ // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+ // is not true, then the target must implement this.
+ unsigned CommutableOpIdx1 = MCID.getNumDefs();
+ unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+
+ if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+}
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
+ if (!MI.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI.isBranch() && !MI.isBarrier())
+ return true;
+ if (!MI.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool TargetInstrInfo::PredicateInstruction(
+ MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
+ bool MadeChange = false;
+
+ assert(!MI.isBundle() &&
+ "TargetInstrInfo::PredicateInstruction() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MI.isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (MCID.operands()[i].isPredicate()) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+bool TargetInstrInfo::hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+ size_t StartSize = Accesses.size();
+ for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
+ oe = MI.memoperands_end();
+ o != oe; ++o) {
+ if ((*o)->isLoad() &&
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ Accesses.push_back(*o);
+ }
+ return Accesses.size() != StartSize;
+}
+
+bool TargetInstrInfo::hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+ size_t StartSize = Accesses.size();
+ for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
+ oe = MI.memoperands_end();
+ o != oe; ++o) {
+ if ((*o)->isStore() &&
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ Accesses.push_back(*o);
+ }
+ return Accesses.size() != StartSize;
+}
+
+bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
+ unsigned SubIdx, unsigned &Size,
+ unsigned &Offset,
+ const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!SubIdx) {
+ Size = TRI->getSpillSize(*RC);
+ Offset = 0;
+ return true;
+ }
+ unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
+ // Convert bit size to byte size.
+ if (BitSize % 8)
+ return false;
+
+ int BitOffset = TRI->getSubRegIdxOffset(SubIdx);
+ if (BitOffset < 0 || BitOffset % 8)
+ return false;
+
+ Size = BitSize / 8;
+ Offset = (unsigned)BitOffset / 8;
+
+ assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range");
+
+ if (!MF.getDataLayout().isLittleEndian()) {
+ Offset = TRI->getSpillSize(*RC) - (Offset + Size);
+ }
+ return true;
+}
+
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, unsigned SubIdx,
+ const MachineInstr &Orig,
+ const TargetRegisterInfo &TRI) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+}
+
+bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
+ const MachineInstr &MI1,
+ const MachineRegisterInfo *MRI) const {
+ return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
+ assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
+ MachineFunction &MF = *MBB.getParent();
+ return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
+ unsigned FoldIdx) {
+ assert(MI.isCopy() && "MI must be a COPY instruction");
+ if (MI.getNumOperands() != 2)
+ return nullptr;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI.getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return nullptr;
+
+ Register FoldReg = FoldOp.getReg();
+ Register LiveReg = LiveOp.getReg();
+
+ assert(FoldReg.isVirtual() && "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (LiveOp.getReg().isPhysical())
+ return RC->contains(LiveOp.getReg()) ? RC : nullptr;
+
+ if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return nullptr;
+}
+
+MCInst TargetInstrInfo::getNop() const { llvm_unreachable("Not implemented"); }
+
+std::pair<unsigned, unsigned>
+TargetInstrInfo::getPatchpointUnfoldableRange(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::STACKMAP:
+ // StackMapLiveValues are foldable
+ return std::make_pair(0, StackMapOpers(&MI).getVarIdx());
+ case TargetOpcode::PATCHPOINT:
+ // For PatchPoint, the call args are not foldable (even if reported in the
+ // stackmap e.g. via anyregcc).
+ return std::make_pair(0, PatchPointOpers(&MI).getVarIdx());
+ case TargetOpcode::STATEPOINT:
+ // For statepoints, fold deopt and gc arguments, but not call arguments.
+ return std::make_pair(MI.getNumDefs(), StatepointOpers(&MI).getVarIdx());
+ default:
+ llvm_unreachable("unexpected stackmap opcode");
+ }
+}
+
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FrameIndex,
+ const TargetInstrInfo &TII) {
+ unsigned StartIdx = 0;
+ unsigned NumDefs = 0;
+ // getPatchpointUnfoldableRange throws guarantee if MI is not a patchpoint.
+ std::tie(NumDefs, StartIdx) = TII.getPatchpointUnfoldableRange(MI);
+
+ unsigned DefToFoldIdx = MI.getNumOperands();
+
+ // Return false if any operands requested for folding are not foldable (not
+ // part of the stackmap's live values).
+ for (unsigned Op : Ops) {
+ if (Op < NumDefs) {
+ assert(DefToFoldIdx == MI.getNumOperands() && "Folding multiple defs");
+ DefToFoldIdx = Op;
+ } else if (Op < StartIdx) {
+ return nullptr;
+ }
+ if (MI.getOperand(Op).isTied())
+ return nullptr;
+ }
+
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ // No need to fold return, the meta data, and function arguments
+ for (unsigned i = 0; i < StartIdx; ++i)
+ if (i != DefToFoldIdx)
+ MIB.add(MI.getOperand(i));
+
+ for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ unsigned TiedTo = e;
+ (void)MI.isRegTiedToDefOperand(i, &TiedTo);
+
+ if (is_contained(Ops, i)) {
+ assert(TiedTo == e && "Cannot fold tied operands");
+ unsigned SpillSize;
+ unsigned SpillOffset;
+ // Compute the spill slot size and offset.
+ const TargetRegisterClass *RC =
+ MF.getRegInfo().getRegClass(MO.getReg());
+ bool Valid =
+ TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
+ if (!Valid)
+ report_fatal_error("cannot spill patchpoint subregister operand");
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(SpillSize);
+ MIB.addFrameIndex(FrameIndex);
+ MIB.addImm(SpillOffset);
+ } else {
+ MIB.add(MO);
+ if (TiedTo < e) {
+ assert(TiedTo < NumDefs && "Bad tied operand");
+ if (TiedTo > DefToFoldIdx)
+ --TiedTo;
+ NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1);
+ }
+ }
+ }
+ return NewMI;
+}
+
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FI,
+ LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
+ auto Flags = MachineMemOperand::MONone;
+ for (unsigned OpIdx : Ops)
+ Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
+ : MachineMemOperand::MOLoad;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
+ // If we're not folding a load into a subreg, the size of the load is the
+ // size of the spill slot. But if we are, we need to figure out what the
+ // actual load size is.
+ int64_t MemSize = 0;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ if (Flags & MachineMemOperand::MOStore) {
+ MemSize = MFI.getObjectSize(FI);
+ } else {
+ for (unsigned OpIdx : Ops) {
+ int64_t OpSize = MFI.getObjectSize(FI);
+
+ if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) {
+ unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
+ if (SubRegSize > 0 && !(SubRegSize % 8))
+ OpSize = SubRegSize / 8;
+ }
+
+ MemSize = std::max(MemSize, OpSize);
+ }
+ }
+
+ assert(MemSize && "Did not expect a zero-sized stack slot");
+
+ MachineInstr *NewMI = nullptr;
+
+ if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) {
+ // Fold stackmap/patchpoint.
+ NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
+ if (NewMI)
+ MBB->insert(MI, NewMI);
+ } else {
+ // Ask the target to do the actual folding.
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
+ }
+
+ if (NewMI) {
+ NewMI->setMemRefs(MF, MI.memoperands());
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->mayLoad()) &&
+ "Folded a use to a non-load!");
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+ Flags, MemSize, MFI.getObjectAlign(FI));
+ NewMI->addMemOperand(MF, MMO);
+
+ // The pass "x86 speculative load hardening" always attaches symbols to
+ // call instructions. We need copy it form old instruction.
+ NewMI->cloneInstrSymbols(MF, MI);
+
+ return NewMI;
+ }
+
+ // Straight COPY may fold as load/store.
+ if (!MI.isCopy() || Ops.size() != 1)
+ return nullptr;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return nullptr;
+
+ const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI,
+ Register());
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register());
+ return &*--Pos;
+}
+
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned OpIdx : Ops)
+ assert(MI.getOperand(OpIdx).isUse() && "Folding load into def!");
+#endif
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = nullptr;
+ int FrameIndex = 0;
+
+ if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) &&
+ isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ // Fold stackmap/patchpoint.
+ NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+ if (NewMI)
+ NewMI = &*MBB.insert(MI, NewMI);
+ } else {
+ // Ask the target to do the actual folding.
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
+ }
+
+ if (!NewMI)
+ return nullptr;
+
+ // Copy the memoperands from the load to the folded instruction.
+ if (MI.memoperands_empty()) {
+ NewMI->setMemRefs(MF, LoadMI.memoperands());
+ } else {
+ // Handle the rare case of folding multiple loads.
+ NewMI->setMemRefs(MF, MI.memoperands());
+ for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
+ E = LoadMI.memoperands_end();
+ I != E; ++I) {
+ NewMI->addMemOperand(MF, *I);
+ }
+ }
+ return NewMI;
+}
+
+/// transferImplicitOperands - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit
+/// operands from MI to the replacement instruction.
+static void transferImplicitOperands(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ Register DstReg = MI->getOperand(0).getReg();
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ CopyMI->addOperand(MO);
+
+ // Be conservative about preserving kills when subregister defs are
+ // involved. If there was implicit kill of a super-register overlapping the
+ // copy result, we would kill the subregisters previous copies defined.
+
+ if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
+ CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
+ }
+}
+
+void TargetInstrInfo::lowerCopy(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) const {
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(get(TargetOpcode::KILL));
+ return;
+ }
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
+ if (IdentityCopy || SrcMO.isUndef()) {
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(get(TargetOpcode::KILL));
+ return;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return;
+ }
+
+ copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(),
+ SrcMO.getReg(), SrcMO.isKill());
+
+ if (MI->getNumOperands() > 2)
+ transferImplicitOperands(MI, TRI);
+ MI->eraseFromParent();
+ return;
+}
+
+bool TargetInstrInfo::hasReassociableOperands(
+ const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions for the operands that we will
+ // reassociate.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && Op1.getReg().isVirtual())
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && Op2.getReg().isVirtual())
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And at least one operand must be defined in MBB.
+ return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
+}
+
+bool TargetInstrInfo::areOpcodesEqualOrInverse(unsigned Opcode1,
+ unsigned Opcode2) const {
+ return Opcode1 == Opcode2 || getInverseOpcode(Opcode1) == Opcode2;
+}
+
+bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
+ bool &Commuted) const {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned Opcode = Inst.getOpcode();
+
+ // If only one operand has the same or inverse opcode and it's the second
+ // source operand, the operands must be commuted.
+ Commuted = !areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) &&
+ areOpcodesEqualOrInverse(Opcode, MI2->getOpcode());
+ if (Commuted)
+ std::swap(MI1, MI2);
+
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must also be associative/commutative or be the
+ // inverse of such an operation (this can be different even for
+ // instructions with the same opcode if traits like fast-math-flags are
+ // included).
+ // 3. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 4. The previous instruction's result must only be used by Inst.
+ return areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) &&
+ (isAssociativeAndCommutative(*MI1) ||
+ isAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
+ hasReassociableOperands(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
+}
+
+// 1. The operation must be associative and commutative or be the inverse of
+// such an operation.
+// 2. The instruction must have virtual register definitions for its
+// operands in the same basic block.
+// 3. The instruction must have a reassociable sibling.
+bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
+ bool &Commuted) const {
+ return (isAssociativeAndCommutative(Inst) ||
+ isAssociativeAndCommutative(Inst, /* Invert */ true)) &&
+ hasReassociableOperands(Inst, Inst.getParent()) &&
+ hasReassociableSibling(Inst, Commuted);
+}
+
+// The concept of the reassociation pass is that these operations can benefit
+// from this kind of transformation:
+//
+// A = ? op ?
+// B = A op X (Prev)
+// C = B op Y (Root)
+// -->
+// A = ? op ?
+// B = X op Y
+// C = A op B
+//
+// breaking the dependency between A and B, allowing them to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+// instruction is known to not increase the critical path, then don't match
+// that pattern.
+bool TargetInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
+ bool Commute;
+ if (isReassociationCandidate(Root, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// Return true when a code sequence can improve loop throughput.
+bool
+TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+ return false;
+}
+
+std::pair<unsigned, unsigned>
+TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
+ const MachineInstr &Root,
+ const MachineInstr &Prev) const {
+ bool AssocCommutRoot = isAssociativeAndCommutative(Root);
+ bool AssocCommutPrev = isAssociativeAndCommutative(Prev);
+
+ // Early exit if both opcodes are associative and commutative. It's a trivial
+ // reassociation when we only change operands order. In this case opcodes are
+ // not required to have inverse versions.
+ if (AssocCommutRoot && AssocCommutPrev) {
+ assert(Root.getOpcode() == Prev.getOpcode() && "Expected to be equal");
+ return std::make_pair(Root.getOpcode(), Root.getOpcode());
+ }
+
+ // At least one instruction is not associative or commutative.
+ // Since we have matched one of the reassociation patterns, we expect that the
+ // instructions' opcodes are equal or one of them is the inversion of the
+ // other.
+ assert(areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()) &&
+ "Incorrectly matched pattern");
+ unsigned AssocCommutOpcode = Root.getOpcode();
+ unsigned InverseOpcode = *getInverseOpcode(Root.getOpcode());
+ if (!AssocCommutRoot)
+ std::swap(AssocCommutOpcode, InverseOpcode);
+
+ // The transformation rule (`+` is any associative and commutative binary
+ // operation, `-` is the inverse):
+ // REASSOC_AX_BY:
+ // (A + X) + Y => A + (X + Y)
+ // (A + X) - Y => A + (X - Y)
+ // (A - X) + Y => A - (X - Y)
+ // (A - X) - Y => A - (X + Y)
+ // REASSOC_XA_BY:
+ // (X + A) + Y => (X + Y) + A
+ // (X + A) - Y => (X - Y) + A
+ // (X - A) + Y => (X + Y) - A
+ // (X - A) - Y => (X - Y) - A
+ // REASSOC_AX_YB:
+ // Y + (A + X) => (Y + X) + A
+ // Y - (A + X) => (Y - X) - A
+ // Y + (A - X) => (Y - X) + A
+ // Y - (A - X) => (Y + X) - A
+ // REASSOC_XA_YB:
+ // Y + (X + A) => (Y + X) + A
+ // Y - (X + A) => (Y - X) - A
+ // Y + (X - A) => (Y + X) - A
+ // Y - (X - A) => (Y - X) + A
+ switch (Pattern) {
+ default:
+ llvm_unreachable("Unexpected pattern");
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ break;
+ }
+ llvm_unreachable("Unhandled combination");
+}
+
+// Return a pair of boolean flags showing if the new root and new prev operands
+// must be swapped. See visual example of the rule in
+// TargetInstrInfo::getReassociationOpcodes.
+static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
+ switch (Pattern) {
+ default:
+ llvm_unreachable("Unexpected pattern");
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ return {false, false};
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ return {true, false};
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ return {true, true};
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ return {true, true};
+ }
+}
+
+/// Attempt the reassociation transformation to reduce critical path length.
+/// See the above comments before getMachineCombinerPatterns().
+void TargetInstrInfo::reassociateOps(
+ MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ int Row;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
+ case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
+ case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
+ case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
+ default: llvm_unreachable("unexpected MachineCombinerPattern");
+ }
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ Register RegA = OpA.getReg();
+ Register RegB = OpB.getReg();
+ Register RegX = OpX.getReg();
+ Register RegY = OpY.getReg();
+ Register RegC = OpC.getReg();
+
+ if (RegA.isVirtual())
+ MRI.constrainRegClass(RegA, RC);
+ if (RegB.isVirtual())
+ MRI.constrainRegClass(RegB, RC);
+ if (RegX.isVirtual())
+ MRI.constrainRegClass(RegX, RC);
+ if (RegY.isVirtual())
+ MRI.constrainRegClass(RegY, RC);
+ if (RegC.isVirtual())
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ Register NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev);
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+ bool KillNewVR = true;
+
+ auto [SwapRootOperands, SwapPrevOperands] = mustSwapOperands(Pattern);
+
+ if (SwapPrevOperands) {
+ std::swap(RegX, RegY);
+ std::swap(KillX, KillY);
+ }
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY))
+ .setMIFlags(Prev.getFlags());
+
+ if (SwapRootOperands) {
+ std::swap(RegA, NewVR);
+ std::swap(KillA, KillNewVR);
+ }
+
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(KillNewVR))
+ .setMIFlags(Root.getFlags());
+
+ setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
+
+ // Record new instructions for insertion and old instructions for deletion.
+ InsInstrs.push_back(MIB1);
+ InsInstrs.push_back(MIB2);
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+
+ // We transformed:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+ // Into:
+ // B = X op Y (MIB1)
+ // C = A op B (MIB2)
+ // C has the same value as before, B doesn't; as such, keep the debug number
+ // of C but not of B.
+ if (unsigned OldRootNum = Root.peekDebugInstrNum())
+ MIB2.getInstr()->setDebugInstrNum(OldRootNum);
+}
+
+void TargetInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ break;
+ default:
+ llvm_unreachable("Unknown pattern for machine combiner");
+ }
+
+ // Don't reassociate if Prev and Root are in different blocks.
+ if (Prev->getParent() != Root.getParent())
+ return;
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+}
+
+MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
+ return MachineTraceStrategy::TS_MinInstrCount;
+}
+
+bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
+ const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getMF();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Remat clients assume operand 0 is the defined register.
+ if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
+ return false;
+ Register DefReg = MI.getOperand(0).getReg();
+
+ // A sub-register definition can only be rematerialized if the instruction
+ // doesn't read the other parts of the register. Otherwise it is really a
+ // read-modify-write operation on the full virtual register which cannot be
+ // moved safely.
+ if (DefReg.isVirtual() && MI.getOperand(0).getSubReg() &&
+ MI.readsVirtualRegister(DefReg))
+ return false;
+
+ // A load from a fixed stack slot can be rematerialized. This may be
+ // redundant with subsequent checks, but it's target-independent,
+ // simple, and a common case.
+ int FrameIdx = 0;
+ if (isLoadFromStackSlot(MI, FrameIdx) &&
+ MF.getFrameInfo().isImmutableObjectIndex(FrameIdx))
+ return true;
+
+ // Avoid instructions obviously unsafe for remat.
+ if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI.isInlineAsm())
+ return false;
+
+ // Avoid instructions which load from potentially varying memory.
+ if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad())
+ return false;
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Check for a well-behaved physical register.
+ if (Reg.isPhysical()) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI.isConstantPhysReg(Reg))
+ return false;
+ } else {
+ // A physreg def. We can't remat it.
+ return false;
+ }
+ continue;
+ }
+
+ // Only allow one virtual-register def. There may be multiple defs of the
+ // same virtual register, though.
+ if (MO.isDef() && Reg != DefReg)
+ return false;
+
+ // Don't allow any virtual-register uses. Rematting an instruction with
+ // virtual register uses would length the live ranges of the uses, which
+ // is not necessarily a good idea, certainly not "trivial".
+ if (MO.isUse())
+ return false;
+ }
+
+ // Everything checked out.
+ return true;
+}
+
+int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
+ const MachineFunction *MF = MI.getMF();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
+
+ if (!isFrameInstr(MI))
+ return 0;
+
+ int SPAdj = TFI->alignSPAdjust(getFrameSize(MI));
+
+ if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode))
+ SPAdj = -SPAdj;
+
+ return SPAdj;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Terminators and labels can't be scheduled around.
+ if (MI.isTerminator() || MI.isPosition())
+ return true;
+
+ // INLINEASM_BR can jump to another block
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+ return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
+
+// Default implementation of getMemOperandWithOffset.
+bool TargetInstrInfo::getMemOperandWithOffset(
+ const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
+ bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ unsigned Width;
+ if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
+ Width, TRI) ||
+ BaseOps.size() != 1)
+ return false;
+ BaseOp = BaseOps.front();
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr &MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI.getDesc().getSchedClass();
+ int UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps >= 0)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel,
+ const MachineInstr &DefMI) const {
+ if (DefMI.isTransient())
+ return 0;
+ if (DefMI.mayLoad())
+ return SchedModel.LoadLatency;
+ if (isHighLatencyDef(DefMI.getOpcode()))
+ return SchedModel.HighLatency;
+ return 1;
+}
+
+unsigned TargetInstrInfo::getPredicationCost(const MachineInstr &) const {
+ return 0;
+}
+
+unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return MI.mayLoad() ? 2 : 1;
+
+ return ItinData->getStageLatency(MI.getDesc().getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr &DefMI,
+ unsigned DefIdx) const {
+ const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+std::optional<ParamLoadedValue>
+TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const {
+ const MachineFunction *MF = MI.getMF();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});
+ int64_t Offset;
+ bool OffsetIsScalable;
+
+ // To simplify the sub-register handling, verify that we only need to
+ // consider physical registers.
+ assert(MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
+ if (auto DestSrc = isCopyInstr(MI)) {
+ Register DestReg = DestSrc->Destination->getReg();
+
+ // If the copy destination is the forwarding reg, describe the forwarding
+ // reg using the copy source as the backup location. Example:
+ //
+ // x0 = MOV x7
+ // call callee(x0) ; x0 described as x7
+ if (Reg == DestReg)
+ return ParamLoadedValue(*DestSrc->Source, Expr);
+
+ // If the target's hook couldn't describe this copy, give up.
+ return std::nullopt;
+ } else if (auto RegImm = isAddImmediate(MI, Reg)) {
+ Register SrcReg = RegImm->Reg;
+ Offset = RegImm->Imm;
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset);
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+ } else if (MI.hasOneMemOperand()) {
+ // Only describe memory which provably does not escape the function. As
+ // described in llvm.org/PR43343, escaped memory may be clobbered by the
+ // callee (or by another thread).
+ const auto &TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const MachineMemOperand *MMO = MI.memoperands()[0];
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+
+ // If the address points to "special" memory (e.g. a spill slot), it's
+ // sufficient to check that it isn't aliased by any high-level IR value.
+ if (!PSV || PSV->mayAlias(&MFI))
+ return std::nullopt;
+
+ const MachineOperand *BaseOp;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable,
+ TRI))
+ return std::nullopt;
+
+ // FIXME: Scalable offsets are not yet handled in the offset code below.
+ if (OffsetIsScalable)
+ return std::nullopt;
+
+ // TODO: Can currently only handle mem instructions with a single define.
+ // An example from the x86 target:
+ // ...
+ // DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax, implicit-def $rdx
+ // ...
+ //
+ if (MI.getNumExplicitDefs() != 1)
+ return std::nullopt;
+
+ // TODO: In what way do we need to take Reg into consideration here?
+
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ Ops.push_back(dwarf::DW_OP_deref_size);
+ Ops.push_back(MMO->getSize());
+ Expr = DIExpression::prependOpcodes(Expr, Ops);
+ return ParamLoadedValue(*BaseOp, Expr);
+ }
+
+ return std::nullopt;
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const {
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
+ unsigned UseClass = UseMI.getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+bool TargetInstrInfo::getRegSequenceInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert((MI.isRegSequence() ||
+ MI.isRegSequenceLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isRegSequence())
+ return getRegSequenceLikeInputs(MI, DefIdx, InputRegs);
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ assert(DefIdx == 0 && "REG_SEQUENCE only has one def");
+ for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
+ OpIdx += 2) {
+ const MachineOperand &MOReg = MI.getOperand(OpIdx);
+ if (MOReg.isUndef())
+ continue;
+ const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ // Record Reg:SubReg, SubIdx.
+ InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(),
+ (unsigned)MOSubIdx.getImm()));
+ }
+ return true;
+}
+
+bool TargetInstrInfo::getExtractSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert((MI.isExtractSubreg() ||
+ MI.isExtractSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isExtractSubreg())
+ return getExtractSubregLikeInputs(MI, DefIdx, InputReg);
+
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0.sub1, sub0.
+ assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def");
+ const MachineOperand &MOReg = MI.getOperand(1);
+ if (MOReg.isUndef())
+ return false;
+ const MachineOperand &MOSubIdx = MI.getOperand(2);
+ assert(MOSubIdx.isImm() &&
+ "The subindex of the extract_subreg is not an immediate");
+
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
+
+bool TargetInstrInfo::getInsertSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const {
+ assert((MI.isInsertSubreg() ||
+ MI.isInsertSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isInsertSubreg())
+ return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg);
+
+ // We are looking at:
+ // Def = INSERT_SEQUENCE v0, v1, sub0.
+ assert(DefIdx == 0 && "INSERT_SUBREG only has one def");
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ if (MOInsertedReg.isUndef())
+ return false;
+ const MachineOperand &MOSubIdx = MI.getOperand(3);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
+
+// Returns a MIRPrinter comment for this machine operand.
+std::string TargetInstrInfo::createMIROperandComment(
+ const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const {
+
+ if (!MI.isInlineAsm())
+ return "";
+
+ std::string Flags;
+ raw_string_ostream OS(Flags);
+
+ if (OpIdx == InlineAsm::MIOp_ExtraInfo) {
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+ unsigned ExtraInfo = Op.getImm();
+ bool First = true;
+ for (StringRef Info : InlineAsm::getExtraInfoNames(ExtraInfo)) {
+ if (!First)
+ OS << " ";
+ First = false;
+ OS << Info;
+ }
+
+ return OS.str();
+ }
+
+ int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0 || (unsigned)FlagIdx != OpIdx)
+ return "";
+
+ assert(Op.isImm() && "Expected flag operand to be an immediate");
+ // Pretty print the inline asm operand descriptor.
+ unsigned Flag = Op.getImm();
+ unsigned Kind = InlineAsm::getKind(Flag);
+ OS << InlineAsm::getKindName(Kind);
+
+ unsigned RCID = 0;
+ if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TRI) {
+ OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
+ } else
+ OS << ":RC" << RCID;
+ }
+
+ if (InlineAsm::isMemKind(Flag)) {
+ unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ OS << ":" << InlineAsm::getMemConstraintName(MCID);
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ return OS.str();
+}
+
+TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default;
+
+void TargetInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ outliner::Candidate &FirstCand = Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-features"));
+ if (ParentFn.hasFnAttribute("target-cpu"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-cpu"));
+
+ // Set nounwind, so we don't generate eh_frame.
+ if (llvm::all_of(Candidates, [](const outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
+ }))
+ F.addFnAttr(Attribute::NoUnwind);
+}
+
+outliner::InstrType TargetInstrInfo::getOutliningType(
+ MachineBasicBlock::iterator &MIT, unsigned Flags) const {
+ MachineInstr &MI = *MIT;
+
+ // NOTE: MI.isMetaInstruction() will match CFI_INSTRUCTION, but some targets
+ // have support for outlining those. Special-case that here.
+ if (MI.isCFIInstruction())
+ // Just go right to the target implementation.
+ return getOutliningTypeImpl(MIT, Flags);
+
+ // Be conservative about inline assembly.
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // Labels generally can't safely be outlined.
+ if (MI.isLabel())
+ return outliner::InstrType::Illegal;
+
+ // Don't let debug instructions impact analysis.
+ if (MI.isDebugInstr())
+ return outliner::InstrType::Invisible;
+
+ // Some other special cases.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::LIFETIME_START:
+ case TargetOpcode::LIFETIME_END:
+ return outliner::InstrType::Invisible;
+ default:
+ break;
+ }
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+ // If this is a branch to another block, we can't outline it.
+ if (!MI.getParent()->succ_empty())
+ return outliner::InstrType::Illegal;
+
+ // Don't outline if the branch is not unconditional.
+ if (isPredicated(MI))
+ return outliner::InstrType::Illegal;
+ }
+
+ // Make sure none of the operands of this instruction do anything that
+ // might break if they're moved outside their current function.
+ // This includes MachineBasicBlock references, BlockAddressses,
+ // Constant pool indices and jump table indices.
+ //
+ // A quick note on MO_TargetIndex:
+ // This doesn't seem to be used in any of the architectures that the
+ // MachineOutliner supports, but it was still filtered out in all of them.
+ // There was one exception (RISC-V), but MO_TargetIndex also isn't used there.
+ // As such, this check is removed both here and in the target-specific
+ // implementations. Instead, we assert to make sure this doesn't
+ // catch anyone off-guard somewhere down the line.
+ for (const MachineOperand &MOP : MI.operands()) {
+ // If you hit this assertion, please remove it and adjust
+ // `getOutliningTypeImpl` for your target appropriately if necessary.
+ // Adding the assertion back to other supported architectures
+ // would be nice too :)
+ assert(!MOP.isTargetIndex() && "This isn't used quite yet!");
+
+ // CFI instructions should already have been filtered out at this point.
+ assert(!MOP.isCFIIndex() && "CFI instructions handled elsewhere!");
+
+ // PrologEpilogInserter should've already run at this point.
+ assert(!MOP.isFI() && "FrameIndex instructions should be gone by now!");
+
+ if (MOP.isMBB() || MOP.isBlockAddress() || MOP.isCPI() || MOP.isJTI())
+ return outliner::InstrType::Illegal;
+ }
+
+ // If we don't know, delegate to the target-specific hook.
+ return getOutliningTypeImpl(MIT, Flags);
+}
+
+bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // Some instrumentations create special TargetOpcode at the start which
+ // expands to special code sequences which must be present.
+ auto First = MBB.getFirstNonDebugInstr();
+ if (First == MBB.end())
+ return true;
+
+ if (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
+ First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER)
+ return false;
+
+ // Some instrumentations create special pseudo-instructions at or just before
+ // the end that must be present.
+ auto Last = MBB.getLastNonDebugInstr();
+ if (Last->getOpcode() == TargetOpcode::PATCHABLE_RET ||
+ Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL)
+ return false;
+
+ if (Last != First && Last->isReturn()) {
+ --Last;
+ if (Last->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_EXIT ||
+ Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL)
+ return false;
+ }
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
new file mode 100644
index 000000000000..10c54560da5a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -0,0 +1,2405 @@
+//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLoweringBase class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+static cl::opt<bool> JumpIsExpensiveOverride(
+ "jump-is-expensive", cl::init(false),
+ cl::desc("Do not create extra branches to split comparison logic."),
+ cl::Hidden);
+
+static cl::opt<unsigned> MinimumJumpTableEntries
+ ("min-jump-table-entries", cl::init(4), cl::Hidden,
+ cl::desc("Set minimum number of entries to use a jump table."));
+
+static cl::opt<unsigned> MaximumJumpTableSize
+ ("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden,
+ cl::desc("Set maximum size of jump tables."));
+
+/// Minimum jump table density for normal functions.
+static cl::opt<unsigned>
+ JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "a normal function"));
+
+/// Minimum jump table density for -Os or -Oz functions.
+static cl::opt<unsigned> OptsizeJumpTableDensity(
+ "optsize-jump-table-density", cl::init(40), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "an optsize function"));
+
+// FIXME: This option is only to test if the strict fp operation processed
+// correctly by preventing mutating strict fp operation to normal fp operation
+// during development. When the backend supports strict float operation, this
+// option will be meaningless.
+static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
+ cl::desc("Don't mutate strict-float node to a legalize node"),
+ cl::init(false), cl::Hidden);
+
+static bool darwinHasSinCos(const Triple &TT) {
+ assert(TT.isOSDarwin() && "should be called with darwin triple");
+ // Don't bother with 32 bit x86.
+ if (TT.getArch() == Triple::x86)
+ return false;
+ // Macos < 10.9 has no sincos_stret.
+ if (TT.isMacOSX())
+ return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit();
+ // iOS < 7.0 has no sincos_stret.
+ if (TT.isiOS())
+ return !TT.isOSVersionLT(7, 0);
+ // Any other darwin such as WatchOS/TvOS is new enough.
+ return true;
+}
+
+void TargetLoweringBase::InitLibcalls(const Triple &TT) {
+#define HANDLE_LIBCALL(code, name) \
+ setLibcallName(RTLIB::code, name);
+#include "llvm/IR/RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
+ // Initialize calling conventions to their default.
+ for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
+ setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
+
+ // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
+ if (TT.isPPC()) {
+ setLibcallName(RTLIB::ADD_F128, "__addkf3");
+ setLibcallName(RTLIB::SUB_F128, "__subkf3");
+ setLibcallName(RTLIB::MUL_F128, "__mulkf3");
+ setLibcallName(RTLIB::DIV_F128, "__divkf3");
+ setLibcallName(RTLIB::POWI_F128, "__powikf2");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+ setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+ setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf");
+ setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
+ setLibcallName(RTLIB::UNE_F128, "__nekf2");
+ setLibcallName(RTLIB::OGE_F128, "__gekf2");
+ setLibcallName(RTLIB::OLT_F128, "__ltkf2");
+ setLibcallName(RTLIB::OLE_F128, "__lekf2");
+ setLibcallName(RTLIB::OGT_F128, "__gtkf2");
+ setLibcallName(RTLIB::UO_F128, "__unordkf2");
+ }
+
+ // A few names are different on particular architectures or environments.
+ if (TT.isOSDarwin()) {
+ // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
+ // of the gnueabi-style __gnu_*_ieee.
+ // FIXME: What about other targets?
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+
+ // Some darwins have an optimized __bzero/bzero function.
+ switch (TT.getArch()) {
+ case Triple::x86:
+ case Triple::x86_64:
+ if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
+ setLibcallName(RTLIB::BZERO, "__bzero");
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_32:
+ setLibcallName(RTLIB::BZERO, "bzero");
+ break;
+ default:
+ break;
+ }
+
+ if (darwinHasSinCos(TT)) {
+ setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret");
+ setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret");
+ if (TT.isWatchABI()) {
+ setLibcallCallingConv(RTLIB::SINCOS_STRET_F32,
+ CallingConv::ARM_AAPCS_VFP);
+ setLibcallCallingConv(RTLIB::SINCOS_STRET_F64,
+ CallingConv::ARM_AAPCS_VFP);
+ }
+ }
+ } else {
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee");
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
+ }
+
+ if (TT.isGNUEnvironment() || TT.isOSFuchsia() ||
+ (TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ setLibcallName(RTLIB::SINCOS_F80, "sincosl");
+ setLibcallName(RTLIB::SINCOS_F128, "sincosl");
+ setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
+ }
+
+ if (TT.isPS()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ }
+
+ if (TT.isOSOpenBSD()) {
+ setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
+ }
+
+ if (TT.isOSWindows() && !TT.isOSCygMing()) {
+ setLibcallName(RTLIB::LDEXP_F32, nullptr);
+ setLibcallName(RTLIB::LDEXP_F80, nullptr);
+ setLibcallName(RTLIB::LDEXP_F128, nullptr);
+ setLibcallName(RTLIB::LDEXP_PPCF128, nullptr);
+
+ setLibcallName(RTLIB::FREXP_F32, nullptr);
+ setLibcallName(RTLIB::FREXP_F80, nullptr);
+ setLibcallName(RTLIB::FREXP_F128, nullptr);
+ setLibcallName(RTLIB::FREXP_PPCF128, nullptr);
+ }
+}
+
+/// GetFPLibCall - Helper to return the right libcall for the given floating
+/// point type, or UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_F16_F32;
+ if (RetVT == MVT::f64)
+ return FPEXT_F16_F64;
+ if (RetVT == MVT::f80)
+ return FPEXT_F16_F80;
+ if (RetVT == MVT::f128)
+ return FPEXT_F16_F128;
+ } else if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F32_F128;
+ if (RetVT == MVT::ppcf128)
+ return FPEXT_F32_PPCF128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F64_F128;
+ else if (RetVT == MVT::ppcf128)
+ return FPEXT_F64_PPCF128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F80_F128;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_F16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F16;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F16;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F16;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F16;
+ } else if (RetVT == MVT::bf16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_BF16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_BF16;
+ } else if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ } else if (RetVT == MVT::f80) {
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F80;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F16_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F16_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F16_I128;
+ } else if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F16_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F16_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F16_I128;
+ } else if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I32_F16;
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I64_F16;
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I128_F16;
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I32_F16;
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I64_F16;
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I128_F16;
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) {
+ return getFPLibCall(RetVT, POWI_F32, POWI_F64, POWI_F80, POWI_F128,
+ POWI_PPCF128);
+}
+
+RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) {
+ return getFPLibCall(RetVT, LDEXP_F32, LDEXP_F64, LDEXP_F80, LDEXP_F128,
+ LDEXP_PPCF128);
+}
+
+RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) {
+ return getFPLibCall(RetVT, FREXP_F32, FREXP_F64, FREXP_F80, FREXP_F128,
+ FREXP_PPCF128);
+}
+
+RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
+ MVT VT) {
+ unsigned ModeN, ModelN;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ModeN = 0;
+ break;
+ case MVT::i16:
+ ModeN = 1;
+ break;
+ case MVT::i32:
+ ModeN = 2;
+ break;
+ case MVT::i64:
+ ModeN = 3;
+ break;
+ case MVT::i128:
+ ModeN = 4;
+ break;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+ switch (Order) {
+ case AtomicOrdering::Monotonic:
+ ModelN = 0;
+ break;
+ case AtomicOrdering::Acquire:
+ ModelN = 1;
+ break;
+ case AtomicOrdering::Release:
+ ModelN = 2;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ ModelN = 3;
+ break;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+#define LCALLS(A, B) \
+ { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALL5(A) \
+ LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
+ switch (Opc) {
+ case ISD::ATOMIC_CMP_SWAP: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_SWAP: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_ADD: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_OR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_CLR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_XOR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
+ return LC[ModeN][ModelN];
+ }
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+#undef LCALLS
+#undef LCALL5
+}
+
+RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
+#define OP_TO_LIBCALL(Name, Enum) \
+ case Name: \
+ switch (VT.SimpleTy) { \
+ default: \
+ return UNKNOWN_LIBCALL; \
+ case MVT::i8: \
+ return Enum##_1; \
+ case MVT::i16: \
+ return Enum##_2; \
+ case MVT::i32: \
+ return Enum##_4; \
+ case MVT::i64: \
+ return Enum##_8; \
+ case MVT::i128: \
+ return Enum##_16; \
+ }
+
+ switch (Opc) {
+ OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
+ OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
+ }
+
+#undef OP_TO_LIBCALL
+
+ return UNKNOWN_LIBCALL;
+}
+
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+}
+
+RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+}
+
+RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::UO_F128] = ISD::SETNE;
+ CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
+}
+
+/// NOTE: The TargetMachine owns TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
+ initActions();
+
+ // Perform these initializations only once.
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
+ MaxLoadsPerMemcmp = 8;
+ MaxGluedStoresPerMemcpy = 0;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
+ MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
+ HasMultipleConditionRegisters = false;
+ HasExtractBitsInsn = false;
+ JumpIsExpensive = JumpIsExpensiveOverride;
+ PredictableSelectIsExpensive = false;
+ EnableExtLdPromotion = false;
+ StackPointerRegisterToSaveRestore = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanFloatContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ GatherAllAliasesMaxDepth = 18;
+ IsStrictFPEnabled = DisableStrictNodeMutation;
+ MaxBytesForAlignment = 0;
+ // TODO: the default will be switched to 0 in the next commit, along
+ // with the Target-specific changes necessary.
+ MaxAtomicSizeInBitsSupported = 1024;
+
+ // Assume that even with libcalls, no target supports wider than 128 bit
+ // division.
+ MaxDivRemBitWidthSupported = 128;
+
+ MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
+
+ MinCmpXchgSizeInBits = 0;
+ SupportsUnalignedAtomics = false;
+
+ std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
+
+ InitLibcalls(TM.getTargetTriple());
+ InitCmpLibcallCCs(CmpLibcallCCs);
+}
+
+void TargetLoweringBase::initActions() {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+ std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
+ std::fill(std::begin(TargetDAGCombineArray),
+ std::end(TargetDAGCombineArray), 0);
+
+ // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to
+ // remove this and targets should individually set these types if not legal.
+ for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END,
+ force_iteration_on_noniterable_enum)) {
+ for (MVT VT : {MVT::i2, MVT::i4})
+ OpActions[(unsigned)VT.SimpleTy][NT] = Expand;
+ }
+ for (MVT AVT : MVT::all_valuetypes()) {
+ for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) {
+ setTruncStoreAction(AVT, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand);
+ }
+ }
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ for (MVT VT : {MVT::i2, MVT::i4}) {
+ setIndexedLoadAction(IM, VT, Expand);
+ setIndexedStoreAction(IM, VT, Expand);
+ setIndexedMaskedLoadAction(IM, VT, Expand);
+ setIndexedMaskedStoreAction(IM, VT, Expand);
+ }
+ }
+
+ for (MVT VT : MVT::fp_valuetypes()) {
+ MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits());
+ if (IntVT.isValid()) {
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
+ AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
+ }
+ }
+
+ // Set default actions for various operations.
+ for (MVT VT : MVT::all_valuetypes()) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, VT, Expand);
+ setIndexedStoreAction(IM, VT, Expand);
+ setIndexedMaskedLoadAction(IM, VT, Expand);
+ setIndexedMaskedStoreAction(IM, VT, Expand);
+ }
+
+ // Most backends expect to see the node which just returns the value loaded.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
+
+ // These operations default to expand.
+ setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS,
+ ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE,
+ ISD::FMINIMUM, ISD::FMAXIMUM,
+ ISD::FMAD, ISD::SMIN,
+ ISD::SMAX, ISD::UMIN,
+ ISD::UMAX, ISD::ABS,
+ ISD::FSHL, ISD::FSHR,
+ ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT,
+ ISD::SSHLSAT, ISD::USHLSAT,
+ ISD::SMULFIX, ISD::SMULFIXSAT,
+ ISD::UMULFIX, ISD::UMULFIXSAT,
+ ISD::SDIVFIX, ISD::SDIVFIXSAT,
+ ISD::UDIVFIX, ISD::UDIVFIXSAT,
+ ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
+ ISD::IS_FPCLASS},
+ VT, Expand);
+
+ // Overflow operations default to expand
+ setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO,
+ ISD::SMULO, ISD::UMULO},
+ VT, Expand);
+
+ // Carry-using overflow operations default to expand.
+ setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY,
+ ISD::SADDO_CARRY, ISD::SSUBO_CARRY},
+ VT, Expand);
+
+ // ADDC/ADDE/SUBC/SUBE default to expand.
+ setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT,
+ Expand);
+
+ // Halving adds
+ setOperationAction(
+ {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT,
+ Expand);
+
+ // Absolute difference
+ setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
+
+ // These default to Expand so they will be expanded to CTLZ/CTTZ by default.
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Expand);
+
+ setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(
+ {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP},
+ VT, Expand);
+
+ // These operations default to expand for vector types.
+ if (VT.isVector())
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR},
+ VT, Expand);
+
+ // Constrained floating-point operations default to expand.
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
+#include "llvm/IR/ConstrainedOps.def"
+
+ // For most targets @llvm.get.dynamic.area.offset just returns 0.
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
+
+ // Vector reduction default to expand.
+ setOperationAction(
+ {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD,
+ ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
+ ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
+ VT, Expand);
+
+ // Named vector shuffles default to expand.
+ setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
+
+ // VP operations default to expand.
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
+ setOperationAction(ISD::SDOPC, VT, Expand);
+#include "llvm/IR/VPIntrinsics.def"
+
+ // FP environment operations default to expand.
+ setOperationAction(ISD::GET_FPENV, VT, Expand);
+ setOperationAction(ISD::SET_FPENV, VT, Expand);
+ setOperationAction(ISD::RESET_FPENV, VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // Most targets also ignore the @llvm.readcyclecounter intrinsic.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP,
+ {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
+ Expand);
+
+ // These library functions default to expand.
+ setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
+ ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND,
+ ISD::LRINT, ISD::LLRINT},
+ {MVT::f32, MVT::f64, MVT::f128}, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
+
+ setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand);
+ setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand);
+}
+
+MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT) const {
+ return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
+}
+
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
+ bool LegalTypes) const {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ MVT ShiftVT =
+ LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL);
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. Assume it will be legalized when the shift is expanded.
+ if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits()))
+ ShiftVT = MVT::i32;
+ assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) &&
+ "ShiftVT is still too small!");
+ return ShiftVT;
+}
+
+bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
+}
+
+void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
+ // If the command-line option was specified, ignore this request.
+ if (!JumpIsExpensiveOverride.getNumOccurrences())
+ JumpIsExpensive = isExpensive;
+}
+
+TargetLoweringBase::LegalizeKind
+TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
+ // If this is a simple type, use the ComputeRegisterProp mechanism.
+ if (VT.isSimple()) {
+ MVT SVT = VT.getSimpleVT();
+ assert((unsigned)SVT.SimpleTy < std::size(TransformToType));
+ MVT NVT = TransformToType[SVT.SimpleTy];
+ LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
+
+ assert((LA == TypeLegal || LA == TypeSoftenFloat ||
+ LA == TypeSoftPromoteHalf ||
+ (NVT.isVector() ||
+ ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
+ "Promote may not follow Expand or Promote");
+
+ if (LA == TypeSplitVector)
+ return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context));
+ if (LA == TypeScalarizeVector)
+ return LegalizeKind(LA, SVT.getVectorElementType());
+ return LegalizeKind(LA, NVT);
+ }
+
+ // Handle Extended Scalar Types.
+ if (!VT.isVector()) {
+ assert(VT.isInteger() && "Float types must be simple");
+ unsigned BitSize = VT.getSizeInBits();
+ // First promote to a power-of-two size, then expand if necessary.
+ if (BitSize < 8 || !isPowerOf2_32(BitSize)) {
+ EVT NVT = VT.getRoundIntegerType(Context);
+ assert(NVT != VT && "Unable to round integer VT");
+ LegalizeKind NextStep = getTypeConversion(Context, NVT);
+ // Avoid multi-step promotion.
+ if (NextStep.first == TypePromoteInteger)
+ return NextStep;
+ // Return rounded integer type.
+ return LegalizeKind(TypePromoteInteger, NVT);
+ }
+
+ return LegalizeKind(TypeExpandInteger,
+ EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
+ }
+
+ // Handle vector types.
+ ElementCount NumElts = VT.getVectorElementCount();
+ EVT EltVT = VT.getVectorElementType();
+
+ // Vectors with only one element are always scalarized.
+ if (NumElts.isScalar())
+ return LegalizeKind(TypeScalarizeVector, EltVT);
+
+ // Try to widen vector elements until the element type is a power of two and
+ // promote it to a legal type later on, for example:
+ // <3 x i8> -> <4 x i8> -> <4 x i32>
+ if (EltVT.isInteger()) {
+ // Vectors with a number of elements that is not a power of two are always
+ // widened, for example <3 x i8> -> <4 x i8>.
+ if (!VT.isPow2VectorType()) {
+ NumElts = NumElts.coefficientNextPowerOf2();
+ EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
+ return LegalizeKind(TypeWidenVector, NVT);
+ }
+
+ // Examine the element type.
+ LegalizeKind LK = getTypeConversion(Context, EltVT);
+
+ // If type is to be expanded, split the vector.
+ // <4 x i140> -> <2 x i140>
+ if (LK.first == TypeExpandInteger) {
+ if (VT.getVectorElementCount().isScalable())
+ return LegalizeKind(TypeScalarizeScalableVector, EltVT);
+ return LegalizeKind(TypeSplitVector,
+ VT.getHalfNumVectorElementsVT(Context));
+ }
+
+ // Promote the integer element types until a legal vector type is found
+ // or until the element integer type is too big. If a legal type was not
+ // found, fallback to the usual mechanism of widening/splitting the
+ // vector.
+ EVT OldEltVT = EltVT;
+ while (true) {
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
+ .getRoundIntegerType(Context);
+
+ // Stop trying when getting a non-simple element type.
+ // Note that vector elements may be greater than legal vector element
+ // types. Example: X86 XMM registers hold 64bit element on 32bit
+ // systems.
+ if (!EltVT.isSimple())
+ break;
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ // Found a legal promoted vector type.
+ if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+ return LegalizeKind(TypePromoteInteger,
+ EVT::getVectorVT(Context, EltVT, NumElts));
+ }
+
+ // Reset the type to the unexpanded type if we did not find a legal vector
+ // type with a promoted vector element type.
+ EltVT = OldEltVT;
+ }
+
+ // Try to widen the vector until a legal type is found.
+ // If there is no wider legal type, split the vector.
+ while (true) {
+ // Round up to the next power of 2.
+ NumElts = NumElts.coefficientNextPowerOf2();
+
+ // If there is no simple vector type with this many elements then there
+ // cannot be a larger legal vector type. Note that this assumes that
+ // there are no skipped intermediate vector types in the simple types.
+ if (!EltVT.isSimple())
+ break;
+ MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ if (LargerVector == MVT())
+ break;
+
+ // If this type is legal then widen the vector.
+ if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
+ return LegalizeKind(TypeWidenVector, LargerVector);
+ }
+
+ // Widen odd vectors to next power of two.
+ if (!VT.isPow2VectorType()) {
+ EVT NVT = VT.getPow2VectorType(Context);
+ return LegalizeKind(TypeWidenVector, NVT);
+ }
+
+ if (VT.getVectorElementCount() == ElementCount::getScalable(1))
+ return LegalizeKind(TypeScalarizeScalableVector, EltVT);
+
+ // Vectors with illegal element types are expanded.
+ EVT NVT = EVT::getVectorVT(Context, EltVT,
+ VT.getVectorElementCount().divideCoefficientBy(2));
+ return LegalizeKind(TypeSplitVector, NVT);
+}
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT,
+ TargetLoweringBase *TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ ElementCount EC = VT.getVectorElementCount();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // Scalable vectors cannot be scalarized, so splitting or widening is
+ // required.
+ if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue()))
+ llvm_unreachable(
+ "Splitting or widening of non-power-of-2 MVTs is not implemented.");
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now.
+ // Ideally we could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(EC.getKnownMinValue())) {
+ // Split EC to unit size (scalable property is preserved).
+ NumVectorRegs = EC.getKnownMinValue();
+ EC = ElementCount::getFixed(1);
+ }
+
+ // Divide the input until we get to a supported size. This will
+ // always end up with an EC that represent a scalar or a scalable
+ // scalar.
+ while (EC.getKnownMinValue() > 1 &&
+ !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
+ EC = EC.divideCoefficientBy(2);
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, EC);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ unsigned LaneSizeInBits = NewVT.getScalarSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ LaneSizeInBits = llvm::bit_ceil(LaneSizeInBits);
+
+ MVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC) const {
+ for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
+ if (isTypeLegal(*I))
+ return true;
+ return false;
+}
+
+/// Replace/modify any TargetFrameIndex operands with a targte-dependent
+/// sequence of memory operands that is recognized by PrologEpilogInserter.
+MachineBasicBlock *
+TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
+ MachineBasicBlock *MBB) const {
+ MachineInstr *MI = &InitialMI;
+ MachineFunction &MF = *MI->getMF();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // We're handling multiple types of operands here:
+ // PATCHPOINT MetaArgs - live-in, read only, direct
+ // STATEPOINT Deopt Spill - live-through, read only, indirect
+ // STATEPOINT Deopt Alloca - live-through, read only, direct
+ // (We're currently conservative and mark the deopt slots read/write in
+ // practice.)
+ // STATEPOINT GC Spill - live-through, read/write, indirect
+ // STATEPOINT GC Alloca - live-through, read/write, direct
+ // The live-in vs live-through is handled already (the live through ones are
+ // all stack slots), but we need to handle the different type of stackmap
+ // operands and memory effects here.
+
+ if (llvm::none_of(MI->operands(),
+ [](MachineOperand &Operand) { return Operand.isFI(); }))
+ return MBB;
+
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc());
+
+ // Inherit previous memory operands.
+ MIB.cloneMemRefs(*MI);
+
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI()) {
+ // Index of Def operand this Use it tied to.
+ // Since Defs are coming before Uses, if Use is tied, then
+ // index of Def must be smaller that index of that Use.
+ // Also, Defs preserve their position in new MI.
+ unsigned TiedTo = i;
+ if (MO.isReg() && MO.isTied())
+ TiedTo = MI->findTiedOperandIdx(i);
+ MIB.add(MO);
+ if (TiedTo < i)
+ MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1);
+ continue;
+ }
+
+ // foldMemoryOperand builds a new MI after replacing a single FI operand
+ // with the canonical set of five x86 addressing-mode operands.
+ int FI = MO.getIndex();
+
+ // Add frame index operands recognized by stackmaps.cpp
+ if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
+ // indirect-mem-ref tag, size, #FI, offset.
+ // Used for spills inserted by StatepointLowering. This codepath is not
+ // used for patchpoints/stackmaps at all, for these spilling is done via
+ // foldMemoryOperand callback only.
+ assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(MFI.getObjectSize(FI));
+ MIB.add(MO);
+ MIB.addImm(0);
+ } else {
+ // direct-mem-ref tag, #FI, offset.
+ // Used by patchpoint, and direct alloca arguments to statepoints
+ MIB.addImm(StackMaps::DirectMemRefOp);
+ MIB.add(MO);
+ MIB.addImm(0);
+ }
+
+ assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
+
+ // Add a new memory operand for this FI.
+ assert(MFI.getObjectOffset(FI) != -1);
+
+ // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
+ // PATCHPOINT should be updated to do the same. (TODO)
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT) {
+ auto Flags = MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags,
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlign(FI));
+ MIB->addMemOperand(MF, MMO);
+ }
+ }
+ MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+ MI->eraseFromParent();
+ return MBB;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+// This function is in TargetLowering because it uses RegClassForVT which would
+// need to be moved to TargetRegisterInfo and would necessitate moving
+// isTypeLegal over as well - a massive change that would just require
+// TargetLowering having a TargetRegisterInfo class member that it would use.
+std::pair<const TargetRegisterClass *, uint8_t>
+TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
+ const TargetRegisterClass *BestRC = RC;
+ for (unsigned i : SuperRegRC.set_bits()) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC))
+ continue;
+ if (!isLegalRC(*TRI, *SuperRC))
+ continue;
+ BestRC = SuperRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLoweringBase::computeRegisterProperties(
+ const TargetRegisterInfo *TRI) {
+ static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1;
+ ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+ TypeExpandInteger);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ } else {
+ NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::ppcf128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat);
+ }
+ }
+
+ // Decide how to handle f128. If the target does not have native f128 support,
+ // expand it to i128 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f128)) {
+ NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::f128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f80. If the target does not have native f80 support,
+ // expand it to i96 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f80)) {
+ NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f80] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f80, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f32. If the target does not have native f32 support,
+ // expand it to i32 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f32)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f16. If the target does not have native f16 support,
+ // promote it to f32, because there are no f16 library calls (except for
+ // conversions).
+ if (!isTypeLegal(MVT::f16)) {
+ // Allow targets to control how we legalize half.
+ if (softPromoteHalfType()) {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf);
+ } else {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
+ }
+ }
+
+ // Decide how to handle bf16. If the target does not have native bf16 support,
+ // promote it to f32, because there are no bf16 library calls (except for
+ // converting from f32 to bf16).
+ if (!isTypeLegal(MVT::bf16)) {
+ NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::bf16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::bf16, TypeSoftPromoteHalf);
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType) i;
+ if (isTypeLegal(VT))
+ continue;
+
+ MVT EltVT = VT.getVectorElementType();
+ ElementCount EC = VT.getVectorElementCount();
+ bool IsLegalWiderType = false;
+ bool IsScalable = VT.isScalableVector();
+ LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+ switch (PreferredAction) {
+ case TypePromoteInteger: {
+ MVT::SimpleValueType EndVT = IsScalable ?
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE :
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE;
+ // Try to promote the elements of integer vectors. If no legal
+ // promotion was found, fall through to the widen-vector method.
+ for (unsigned nVT = i + 1;
+ (MVT::SimpleValueType)nVT <= EndVT; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() &&
+ SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType)
+ break;
+ [[fallthrough]];
+ }
+
+ case TypeWidenVector:
+ if (isPowerOf2_32(EC.getKnownMinValue())) {
+ // Try to widen the vector.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.isScalableVector() == IsScalable &&
+ SVT.getVectorElementCount().getKnownMinValue() >
+ EC.getKnownMinValue() &&
+ isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType)
+ break;
+ } else {
+ // Only widen to the next power of 2 to keep consistency with EVT.
+ MVT NVT = VT.getPow2VectorType();
+ if (isTypeLegal(NVT)) {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ RegisterTypeForVT[i] = NVT;
+ NumRegistersForVT[i] = 1;
+ break;
+ }
+ }
+ [[fallthrough]];
+
+ case TypeSplitVector:
+ case TypeScalarizeVector: {
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+ NumIntermediates, RegisterVT, this);
+ NumRegistersForVT[i] = NumRegisters;
+ assert(NumRegistersForVT[i] == NumRegisters &&
+ "NumRegistersForVT size cannot represent NumRegisters!");
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ if (PreferredAction == TypeScalarizeVector)
+ ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+ else if (PreferredAction == TypeSplitVector)
+ ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ else if (EC.getKnownMinValue() > 1)
+ ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ else
+ ValueTypeActions.setTypeAction(VT, EC.isScalable()
+ ? TypeScalarizeScalableVector
+ : TypeScalarizeVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown vector legalization action!");
+ }
+ }
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
+}
+
+EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
+ return getPointerTy(DL).SimpleTy;
+}
+
+MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
+ EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ ElementCount EltCnt = VT.getVectorElementCount();
+
+ // If there is a wider vector type with the same element type as this one,
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (!EltCnt.isScalar() &&
+ (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterEVT)) {
+ IntermediateVT = RegisterEVT;
+ RegisterVT = RegisterEVT.getSimpleVT();
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // Scalable vectors cannot be scalarized, so handle the legalisation of the
+ // types like done elsewhere in SelectionDAG.
+ if (EltCnt.isScalable()) {
+ LegalizeKind LK;
+ EVT PartVT = VT;
+ do {
+ // Iterate until we've found a legal (part) type to hold VT.
+ LK = getTypeConversion(Context, PartVT);
+ PartVT = LK.second;
+ } while (LK.first != TypeLegal);
+
+ if (!PartVT.isVector()) {
+ report_fatal_error(
+ "Don't know how to legalize this scalable vector type");
+ }
+
+ NumIntermediates =
+ divideCeil(VT.getVectorElementCount().getKnownMinValue(),
+ PartVT.getVectorElementCount().getKnownMinValue());
+ IntermediateVT = PartVT;
+ RegisterVT = getRegisterType(Context, IntermediateVT);
+ return NumIntermediates;
+ }
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
+ // we could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(EltCnt.getKnownMinValue())) {
+ NumVectorRegs = EltCnt.getKnownMinValue();
+ EltCnt = ElementCount::getFixed(1);
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (EltCnt.getKnownMinValue() > 1 &&
+ !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
+ EltCnt = EltCnt.divideCoefficientBy(2);
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, EltCnt);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+
+ if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16.
+ TypeSize NewVTSize = NewVT.getSizeInBits();
+ // Convert sizes such as i33 to i64.
+ if (!llvm::has_single_bit<uint32_t>(NewVTSize.getKnownMinValue()))
+ NewVTSize = NewVTSize.coefficientNextPowerOf2();
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+ }
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
+ uint64_t NumCases,
+ uint64_t Range,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const {
+ // FIXME: This function check the maximum table size and density, but the
+ // minimum size is not checked. It would be nice if the minimum size is
+ // also combined within this function. Currently, the minimum size check is
+ // performed in findJumpTable() in SelectionDAGBuiler and
+ // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+ const bool OptForSize =
+ SI->getParent()->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI);
+ const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
+ const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
+
+ // Check whether the number of cases is small enough and
+ // the range is dense enough for a jump table.
+ return (OptForSize || Range <= MaxJumpTableSize) &&
+ (NumCases * 100 >= Range * MinDensity);
+}
+
+MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context,
+ EVT ConditionVT) const {
+ return getRegisterType(Context, ConditionVT);
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
+ AttributeList attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI, const DataLayout &DL) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr.hasRetAttr(Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr.hasRetAttr(Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts =
+ TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT);
+ MVT PartVT =
+ TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), CC, VT);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr.hasRetAttr(Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr.hasRetAttr(Attribute::SExt))
+ Flags.setSExt();
+ else if (attr.hasRetAttr(Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
+ return DL.getABITypeAlign(Ty).value();
+}
+
+bool TargetLoweringBase::allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
+ // Check if the specified alignment is sufficient based on the data layout.
+ // TODO: While using the data layout works in practice, a better solution
+ // would be to implement this check directly (make this a virtual function).
+ // For example, the ABI alignment may change based on software platform while
+ // this function should only be affected by hardware implementation.
+ Type *Ty = VT.getTypeForEVT(Context);
+ if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) {
+ // Assume that an access that meets the ABI-specified alignment is fast.
+ if (Fast != nullptr)
+ *Fast = 1;
+ return true;
+ }
+
+ // This is a misaligned access.
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO, unsigned *Fast) const {
+ return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
+ MMO.getAlign(), MMO.getFlags(), Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags,
+ unsigned *Fast) const {
+ return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
+ Flags, Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO,
+ unsigned *Fast) const {
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
+ MMO.getFlags(), Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, LLT Ty,
+ const MachineMemOperand &MMO,
+ unsigned *Fast) const {
+ EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
+ MMO.getFlags(), Fast);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
+ enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+ };
+ switch (static_cast<InstructionOpcodes>(Opcode)) {
+ case Ret: return 0;
+ case Br: return 0;
+ case Switch: return 0;
+ case IndirectBr: return 0;
+ case Invoke: return 0;
+ case CallBr: return 0;
+ case Resume: return 0;
+ case Unreachable: return 0;
+ case CleanupRet: return 0;
+ case CatchRet: return 0;
+ case CatchPad: return 0;
+ case CatchSwitch: return 0;
+ case CleanupPad: return 0;
+ case FNeg: return ISD::FNEG;
+ case Add: return ISD::ADD;
+ case FAdd: return ISD::FADD;
+ case Sub: return ISD::SUB;
+ case FSub: return ISD::FSUB;
+ case Mul: return ISD::MUL;
+ case FMul: return ISD::FMUL;
+ case UDiv: return ISD::UDIV;
+ case SDiv: return ISD::SDIV;
+ case FDiv: return ISD::FDIV;
+ case URem: return ISD::UREM;
+ case SRem: return ISD::SREM;
+ case FRem: return ISD::FREM;
+ case Shl: return ISD::SHL;
+ case LShr: return ISD::SRL;
+ case AShr: return ISD::SRA;
+ case And: return ISD::AND;
+ case Or: return ISD::OR;
+ case Xor: return ISD::XOR;
+ case Alloca: return 0;
+ case Load: return ISD::LOAD;
+ case Store: return ISD::STORE;
+ case GetElementPtr: return 0;
+ case Fence: return 0;
+ case AtomicCmpXchg: return 0;
+ case AtomicRMW: return 0;
+ case Trunc: return ISD::TRUNCATE;
+ case ZExt: return ISD::ZERO_EXTEND;
+ case SExt: return ISD::SIGN_EXTEND;
+ case FPToUI: return ISD::FP_TO_UINT;
+ case FPToSI: return ISD::FP_TO_SINT;
+ case UIToFP: return ISD::UINT_TO_FP;
+ case SIToFP: return ISD::SINT_TO_FP;
+ case FPTrunc: return ISD::FP_ROUND;
+ case FPExt: return ISD::FP_EXTEND;
+ case PtrToInt: return ISD::BITCAST;
+ case IntToPtr: return ISD::BITCAST;
+ case BitCast: return ISD::BITCAST;
+ case AddrSpaceCast: return ISD::ADDRSPACECAST;
+ case ICmp: return ISD::SETCC;
+ case FCmp: return ISD::SETCC;
+ case PHI: return 0;
+ case Call: return 0;
+ case Select: return ISD::SELECT;
+ case UserOp1: return 0;
+ case UserOp2: return 0;
+ case VAArg: return 0;
+ case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+ case InsertElement: return ISD::INSERT_VECTOR_ELT;
+ case ShuffleVector: return ISD::VECTOR_SHUFFLE;
+ case ExtractValue: return ISD::MERGE_VALUES;
+ case InsertValue: return ISD::MERGE_VALUES;
+ case LandingPad: return 0;
+ case Freeze: return ISD::FREEZE;
+ }
+
+ llvm_unreachable("Unknown instruction type encountered!");
+}
+
+Value *
+TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
+ bool UseTLS) const {
+ // compiler-rt provides a variable with a magic name. Targets that do not
+ // link with compiler-rt may also provide such a variable.
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+ auto UnsafeStackPtr =
+ dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));
+
+ Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+
+ if (!UnsafeStackPtr) {
+ auto TLSModel = UseTLS ?
+ GlobalValue::InitialExecTLSModel :
+ GlobalValue::NotThreadLocal;
+ // The global variable is not defined yet, define it ourselves.
+ // We use the initial-exec TLS model because we do not support the
+ // variable living anywhere other than in the main executable.
+ UnsafeStackPtr = new GlobalVariable(
+ *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+ UnsafeStackPtrVar, nullptr, TLSModel);
+ } else {
+ // The variable exists, check its type and attributes.
+ if (UnsafeStackPtr->getValueType() != StackPtrTy)
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+ if (UseTLS != UnsafeStackPtr->isThreadLocal())
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+ (UseTLS ? "" : "not ") + "be thread-local");
+ }
+ return UnsafeStackPtr;
+}
+
+Value *
+TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
+ if (!TM.getTargetTriple().isAndroid())
+ return getDefaultSafeStackPointerLocation(IRB, true);
+
+ // Android provides a libc function to retrieve the address of the current
+ // thread's unsafe stack pointer.
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+ FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address",
+ StackPtrTy->getPointerTo(0));
+ return IRB.CreateCall(Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS, Instruction *I) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default: // Don't allow n * r
+ return false;
+ }
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Protector
+//===----------------------------------------------------------------------===//
+
+// For OpenBSD return its special guard variable. Otherwise return nullptr,
+// so that SelectionDAG handle SSP.
+Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const {
+ if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
+ Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
+ PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
+ Constant *C = M.getOrInsertGlobal("__guard_local", PtrTy);
+ if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(C))
+ G->setVisibility(GlobalValue::HiddenVisibility);
+ return C;
+ }
+ return nullptr;
+}
+
+// Currently only support "standard" __stack_chk_guard.
+// TODO: add LOAD_STACK_GUARD support.
+void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
+ if (!M.getNamedValue("__stack_chk_guard")) {
+ auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
+ GlobalVariable::ExternalLinkage, nullptr,
+ "__stack_chk_guard");
+
+ // FreeBSD has "__stack_chk_guard" defined externally on libc.so
+ if (M.getDirectAccessExternalData() &&
+ !TM.getTargetTriple().isWindowsGNUEnvironment() &&
+ !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()) &&
+ !TM.getTargetTriple().isOSDarwin())
+ GV->setDSOLocal(true);
+ }
+}
+
+// Currently only support "standard" __stack_chk_guard.
+// TODO: add LOAD_STACK_GUARD support.
+Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
+ return M.getNamedValue("__stack_chk_guard");
+}
+
+Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+ return nullptr;
+}
+
+unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
+ return MinimumJumpTableEntries;
+}
+
+void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
+ MinimumJumpTableEntries = Val;
+}
+
+unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const {
+ return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
+}
+
+unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
+ return MaximumJumpTableSize;
+}
+
+void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
+ MaximumJumpTableSize = Val;
+}
+
+bool TargetLoweringBase::isJumpTableRelative() const {
+ return getTargetMachine().isPositionIndependent();
+}
+
+Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
+ if (TM.Options.LoopAlignment)
+ return Align(TM.Options.LoopAlignment);
+ return PrefLoopAlignment;
+}
+
+unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment(
+ MachineBasicBlock *MBB) const {
+ return MaxBytesForAlignment;
+}
+
+//===----------------------------------------------------------------------===//
+// Reciprocal Estimates
+//===----------------------------------------------------------------------===//
+
+/// Get the reciprocal estimate attribute string for a function that will
+/// override the target defaults.
+static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return F.getFnAttribute("reciprocal-estimates").getValueAsString();
+}
+
+/// Construct a string for the given reciprocal operation of the given type.
+/// This string should match the corresponding option to the front-end's
+/// "-mrecip" flag assuming those strings have been passed through in an
+/// attribute string. For example, "vec-divf" for a division of a vXf32.
+static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
+ std::string Name = VT.isVector() ? "vec-" : "";
+
+ Name += IsSqrt ? "sqrt" : "div";
+
+ // TODO: Handle other float types?
+ if (VT.getScalarType() == MVT::f64) {
+ Name += "d";
+ } else if (VT.getScalarType() == MVT::f16) {
+ Name += "h";
+ } else {
+ assert(VT.getScalarType() == MVT::f32 &&
+ "Unexpected FP type for reciprocal estimate");
+ Name += "f";
+ }
+
+ return Name;
+}
+
+/// Return the character position and value (a single numeric character) of a
+/// customized refinement operation in the input string if it exists. Return
+/// false if there is no customized refinement step count.
+static bool parseRefinementStep(StringRef In, size_t &Position,
+ uint8_t &Value) {
+ const char RefStepToken = ':';
+ Position = In.find(RefStepToken);
+ if (Position == StringRef::npos)
+ return false;
+
+ StringRef RefStepString = In.substr(Position + 1);
+ // Allow exactly one numeric character for the additional refinement
+ // step parameter.
+ if (RefStepString.size() == 1) {
+ char RefStepChar = RefStepString[0];
+ if (isDigit(RefStepChar)) {
+ Value = RefStepChar - '0';
+ return true;
+ }
+ }
+ report_fatal_error("Invalid refinement step for -recip.");
+}
+
+/// For the input attribute string, return one of the ReciprocalEstimate enum
+/// status values (enabled, disabled, or not specified) for this operation on
+/// the specified data type.
+static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ Override.split(OverrideVector, ',');
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "none", or "default" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(Override, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ }
+
+ // All reciprocal types are enabled.
+ if (Override == "all")
+ return TargetLoweringBase::ReciprocalEstimate::Enabled;
+
+ // All reciprocal types are disabled.
+ if (Override == "none")
+ return TargetLoweringBase::ReciprocalEstimate::Disabled;
+
+ // Target defaults for enablement are used.
+ if (Override == "default")
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTNameNoSize.pop_back();
+ static const char DisabledPrefix = '!';
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(RecipType, RefPos, RefSteps))
+ RecipType = RecipType.substr(0, RefPos);
+
+ // Ignore the disablement token for string matching.
+ bool IsDisabled = RecipType[0] == DisabledPrefix;
+ if (IsDisabled)
+ RecipType = RecipType.substr(1);
+
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
+ : TargetLoweringBase::ReciprocalEstimate::Enabled;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+/// For the input attribute string, return the customized refinement step count
+/// for this operation on the specified data type. If the step count does not
+/// exist, return the ReciprocalEstimate enum value for unspecified.
+static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ Override.split(OverrideVector, ',');
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "default", or "none" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(Override, RefPos, RefSteps))
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ assert(Override != "none" &&
+ "Disabled reciprocals, but specifed refinement steps?");
+
+ // If this is a general override, return the specified number of steps.
+ if (Override == "all" || Override == "default")
+ return RefSteps;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTNameNoSize.pop_back();
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(RecipType, RefPos, RefSteps))
+ continue;
+
+ RecipType = RecipType.substr(0, RefPos);
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return RefSteps;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getDivRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
+}
+
+bool TargetLoweringBase::isLoadBitCastBeneficial(
+ EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
+ // Single-element vectors are scalarized, so we should generally avoid having
+ // any memory operations on such types, as they would get scalarized too.
+ if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() &&
+ BitcastVT.getVectorNumElements() == 1)
+ return false;
+
+ // Don't do if we could do an indexed load on the original type, but not on
+ // the new one.
+ if (!LoadVT.isSimple() || !BitcastVT.isSimple())
+ return true;
+
+ MVT LoadMVT = LoadVT.getSimpleVT();
+
+ // Don't bother doing this if it's just going to be promoted again later, as
+ // doing so might interfere with other combines.
+ if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
+ getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
+ return false;
+
+ unsigned Fast = 0;
+ return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
+ MMO, &Fast) &&
+ Fast;
+}
+
+void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
+ MF.getRegInfo().freezeReservedRegs(MF);
+}
+
+MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags(
+ const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC,
+ const TargetLibraryInfo *LibInfo) const {
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
+ if (LI.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+
+ if (LI.hasMetadata(LLVMContext::MD_nontemporal))
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (LI.hasMetadata(LLVMContext::MD_invariant_load))
+ Flags |= MachineMemOperand::MOInvariant;
+
+ if (isDereferenceableAndAlignedPointer(LI.getPointerOperand(), LI.getType(),
+ LI.getAlign(), DL, &LI, AC,
+ /*DT=*/nullptr, LibInfo))
+ Flags |= MachineMemOperand::MODereferenceable;
+
+ Flags |= getTargetMMOFlags(LI);
+ return Flags;
+}
+
+MachineMemOperand::Flags
+TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI,
+ const DataLayout &DL) const {
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
+
+ if (SI.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+
+ if (SI.hasMetadata(LLVMContext::MD_nontemporal))
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ // FIXME: Not preserving dereferenceable
+ Flags |= getTargetMMOFlags(SI);
+ return Flags;
+}
+
+MachineMemOperand::Flags
+TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI,
+ const DataLayout &DL) const {
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(&AI)) {
+ if (RMW->isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(&AI)) {
+ if (CmpX->isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ } else
+ llvm_unreachable("not an atomic instruction");
+
+ // FIXME: Not preserving dereferenceable
+ Flags |= getTargetMMOFlags(AI);
+ return Flags;
+}
+
+Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
+ return Builder.CreateFence(Ord);
+ else
+ return nullptr;
+}
+
+Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ if (isAcquireOrStronger(Ord))
+ return Builder.CreateFence(Ord);
+ else
+ return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalISel Hooks
+//===----------------------------------------------------------------------===//
+
+bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI,
+ const TargetTransformInfo *TTI) const {
+ auto &MF = *MI.getMF();
+ auto &MRI = MF.getRegInfo();
+ // Assuming a spill and reload of a value has a cost of 1 instruction each,
+ // this helper function computes the maximum number of uses we should consider
+ // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+ // break even in terms of code size when the original MI has 2 users vs
+ // choosing to potentially spill. Any more than 2 users we we have a net code
+ // size increase. This doesn't take into account register pressure though.
+ auto maxUses = [](unsigned RematCost) {
+ // A cost of 1 means remats are basically free.
+ if (RematCost == 1)
+ return std::numeric_limits<unsigned>::max();
+ if (RematCost == 2)
+ return 2U;
+
+ // Remat is too expensive, only sink if there's one user.
+ if (RematCost > 2)
+ return 1U;
+ llvm_unreachable("Unexpected remat cost");
+ };
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Constants-like instructions should be close to their users.
+ // We don't want long live-ranges for them.
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_INTTOPTR:
+ return true;
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ unsigned RematCost = TTI->getGISelRematGlobalCost();
+ Register Reg = MI.getOperand(0).getReg();
+ unsigned MaxUses = maxUses(RematCost);
+ if (MaxUses == UINT_MAX)
+ return true; // Remats are "free" so always localize.
+ return MRI.hasAtMostUserInstrs(Reg, MaxUses);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 000000000000..55fb522554fa
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,2680 @@
+//===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionGOFF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Base64.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cassert>
+#include <string>
+
+using namespace llvm;
+using namespace dwarf;
+
+static cl::opt<bool> JumpTableInFunctionSection(
+ "jumptable-in-function-section", cl::Hidden, cl::init(false),
+ cl::desc("Putting Jump Table in function section"));
+
+static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
+ StringRef &Section) {
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+
+ for (const auto &MFE: ModuleFlags) {
+ // Ignore flags with 'Require' behaviour.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ if (Key == "Objective-C Image Info Version") {
+ Version = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+ } else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated" ||
+ Key == "Objective-C Class Properties" ||
+ Key == "Objective-C Image Swift Version") {
+ Flags |= mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+ } else if (Key == "Objective-C Image Info Section") {
+ Section = cast<MDString>(MFE.Val)->getString();
+ }
+ // Backend generates L_OBJC_IMAGE_INFO from Swift ABI version + major + minor +
+ // "Objective-C Garbage Collection".
+ else if (Key == "Swift ABI Version") {
+ Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 8;
+ } else if (Key == "Swift Major Version") {
+ Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 24;
+ } else if (Key == "Swift Minor Version") {
+ Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 16;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() {
+ SupportDSOLocalEquivalentLowering = true;
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+ const TargetMachine &TgtM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TgtM);
+
+ CodeModel::Model CM = TgtM.getCodeModel();
+ InitializeELF(TgtM.Options.UseInitArray);
+
+ switch (TgtM.getTargetTriple().getArch()) {
+ case Triple::arm:
+ case Triple::armeb:
+ case Triple::thumb:
+ case Triple::thumbeb:
+ if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
+ break;
+ // Fallthrough if not using EHABI
+ [[fallthrough]];
+ case Triple::ppc:
+ case Triple::ppcle:
+ case Triple::x86:
+ PersonalityEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_indirect |
+ dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ break;
+ case Triple::x86_64:
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (CM == CodeModel::Small
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ } else {
+ PersonalityEncoding =
+ (CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (CM == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = (CM == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::hexagon:
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ if (isPositionIndependent()) {
+ PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+ LSDAEncoding |= dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+ }
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ case Triple::aarch64_32:
+ // The small model guarantees static code/data size < 4GB, but not where it
+ // will be in memory. Most of these could end up >2GB away so even a signed
+ // pc-relative 32-bit address is insufficient, theoretically.
+ //
+ // Use DW_EH_PE_indirect even for -fno-pic to avoid copy relocations.
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32
+ ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8);
+ PersonalityEncoding = LSDAEncoding | dwarf::DW_EH_PE_indirect;
+ TTypeEncoding = LSDAEncoding | dwarf::DW_EH_PE_indirect;
+ break;
+ case Triple::lanai:
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ break;
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
+ // MIPS uses indirect pointer to refer personality functions and types, so
+ // that the eh_frame section can be read-only. DW.ref.personality will be
+ // generated for relocation.
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect;
+ // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
+ // identify N64 from just a triple.
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ // We don't support PC-relative LSDA references in GAS so we use the default
+ // DW_EH_PE_absptr for those.
+
+ // FreeBSD must be explicit about the data size and using pcrel since it's
+ // assembler/linker won't do the automatic conversion that the Linux tools
+ // do.
+ if (TgtM.getTargetTriple().isOSFreeBSD()) {
+ PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ }
+ break;
+ case Triple::ppc64:
+ case Triple::ppc64le:
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ break;
+ case Triple::sparcel:
+ case Triple::sparc:
+ if (isPositionIndependent()) {
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+ break;
+ case Triple::riscv32:
+ case Triple::riscv64:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+ break;
+ case Triple::sparcv9:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::systemz:
+ // All currently-defined code models guarantee that 4-byte PC-relative
+ // values will be in range.
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::loongarch32:
+ case Triple::loongarch64:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ break;
+ default:
+ break;
+ }
+}
+
+void TargetLoweringObjectFileELF::getModuleMetadata(Module &M) {
+ SmallVector<GlobalValue *, 4> Vec;
+ collectUsedGlobalVariables(M, Vec, false);
+ for (GlobalValue *GV : Vec)
+ if (auto *GO = dyn_cast<GlobalObject>(GV))
+ Used.insert(GO);
+}
+
+void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
+ Module &M) const {
+ auto &C = getContext();
+
+ if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
+ auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS,
+ ELF::SHF_EXCLUDE);
+
+ Streamer.switchSection(S);
+
+ for (const auto *Operand : LinkerOptions->operands()) {
+ if (cast<MDNode>(Operand)->getNumOperands() != 2)
+ report_fatal_error("invalid llvm.linker.options");
+ for (const auto &Option : cast<MDNode>(Operand)->operands()) {
+ Streamer.emitBytes(cast<MDString>(Option)->getString());
+ Streamer.emitInt8(0);
+ }
+ }
+ }
+
+ if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) {
+ auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
+
+ Streamer.switchSection(S);
+
+ for (const auto *Operand : DependentLibraries->operands()) {
+ Streamer.emitBytes(
+ cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
+ Streamer.emitInt8(0);
+ }
+ }
+
+ if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ // Emit a descriptor for every function including functions that have an
+ // available external linkage. We may not want this for imported functions
+ // that has code in another thinLTO module but we don't have a good way to
+ // tell them apart from inline functions defined in header files. Therefore
+ // we put each descriptor in a separate comdat section and rely on the
+ // linker to deduplicate.
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto *GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
+ auto *Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
+ auto *Name = cast<MDString>(MD->getOperand(2));
+ auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection(
+ TM->getFunctionSections() ? Name->getString() : StringRef());
+
+ Streamer.switchSection(S);
+ Streamer.emitInt64(GUID->getZExtValue());
+ Streamer.emitInt64(Hash->getZExtValue());
+ Streamer.emitULEB128IntValue(Name->getString().size());
+ Streamer.emitBytes(Name->getString());
+ }
+ }
+
+ if (NamedMDNode *LLVMStats = M.getNamedMetadata("llvm.stats")) {
+ // Emit the metadata for llvm statistics into .llvm_stats section, which is
+ // formatted as a list of key/value pair, the value is base64 encoded.
+ auto *S = C.getObjectFileInfo()->getLLVMStatsSection();
+ Streamer.switchSection(S);
+ for (const auto *Operand : LLVMStats->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ assert(MD->getNumOperands() % 2 == 0 &&
+ ("Operand num should be even for a list of key/value pair"));
+ for (size_t I = 0; I < MD->getNumOperands(); I += 2) {
+ // Encode the key string size.
+ auto *Key = cast<MDString>(MD->getOperand(I));
+ Streamer.emitULEB128IntValue(Key->getString().size());
+ Streamer.emitBytes(Key->getString());
+ // Encode the value into a Base64 string.
+ std::string Value = encodeBase64(
+ Twine(mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1))
+ ->getZExtValue())
+ .str());
+ Streamer.emitULEB128IntValue(Value.size());
+ Streamer.emitBytes(Value);
+ }
+ }
+ }
+
+ unsigned Version = 0;
+ unsigned Flags = 0;
+ StringRef Section;
+
+ GetObjCImageInfo(M, Version, Flags, Section);
+ if (!Section.empty()) {
+ auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Streamer.switchSection(S);
+ Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(Version);
+ Streamer.emitInt32(Flags);
+ Streamer.addBlankLine();
+ }
+
+ emitCGProfileMetadata(Streamer, M);
+}
+
+MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
+ const GlobalValue *GV, const TargetMachine &TM,
+ MachineModuleInfo *MMI) const {
+ unsigned Encoding = getPersonalityEncoding();
+ if ((Encoding & 0x80) == DW_EH_PE_indirect)
+ return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
+ TM.getSymbol(GV)->getName());
+ if ((Encoding & 0x70) == DW_EH_PE_absptr)
+ return TM.getSymbol(GV);
+ report_fatal_error("We do not support this DWARF encoding yet!");
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(
+ MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const {
+ SmallString<64> NameData("DW.ref.");
+ NameData += Sym->getName();
+ MCSymbolELF *Label =
+ cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
+ Streamer.emitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.emitSymbolAttribute(Label, MCSA_Weak);
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+ MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(),
+ ELF::SHT_PROGBITS, Flags, 0);
+ unsigned Size = DL.getPointerSize();
+ Streamer.switchSection(Sec);
+ Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0));
+ Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ const MCExpr *E = MCConstantExpr::create(Size, getContext());
+ Streamer.emitELFSize(Label, E);
+ Streamer.emitLabel(Label);
+
+ Streamer.emitSymbolValue(Sym, Size);
+}
+
+const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
+ const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (!StubSym.getPointer()) {
+ MCSymbol *Sym = TM.getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
+ Encoding & ~DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+ MMI, Streamer);
+}
+
+static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ // N.B.: The defaults used in here are not the same ones used in MC.
+ // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+ // both gas and MC will produce a section with no flags. Given
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
+
+ if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
+ /*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF,
+ /*AddSegmentInfo=*/false) ||
+ Name == ".llvmbc" || Name == ".llvmcmd")
+ return SectionKind::getMetadata();
+
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
+ return SectionName.consume_front(Prefix) &&
+ (SectionName.empty() || SectionName[0] == '.');
+}
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+ // Use SHT_NOTE for section whose name starts with ".note" to allow
+ // emitting ELF notes from C variable declaration.
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609
+ if (Name.startswith(".note"))
+ return ELF::SHT_NOTE;
+
+ if (hasPrefix(Name, ".init_array"))
+ return ELF::SHT_INIT_ARRAY;
+
+ if (hasPrefix(Name, ".fini_array"))
+ return ELF::SHT_FINI_ARRAY;
+
+ if (hasPrefix(Name, ".preinit_array"))
+ return ELF::SHT_PREINIT_ARRAY;
+
+ if (hasPrefix(Name, ".llvm.offloading"))
+ return ELF::SHT_LLVM_OFFLOADING;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return ELF::SHT_NOBITS;
+
+ return ELF::SHT_PROGBITS;
+}
+
+static unsigned getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata() && !K.isExclude())
+ Flags |= ELF::SHF_ALLOC;
+
+ if (K.isExclude())
+ Flags |= ELF::SHF_EXCLUDE;
+
+ if (K.isText())
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (K.isExecuteOnly())
+ Flags |= ELF::SHF_ARM_PURECODE;
+
+ if (K.isWriteable())
+ Flags |= ELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= ELF::SHF_TLS;
+
+ if (K.isMergeableCString() || K.isMergeableConst())
+ Flags |= ELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= ELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+static const Comdat *getELFComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any &&
+ C->getSelectionKind() != Comdat::NoDeduplicate)
+ report_fatal_error("ELF COMDATs only support SelectionKind::Any and "
+ "SelectionKind::NoDeduplicate, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
+static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO,
+ const TargetMachine &TM) {
+ MDNode *MD = GO->getMetadata(LLVMContext::MD_associated);
+ if (!MD)
+ return nullptr;
+
+ auto *VM = cast<ValueAsMetadata>(MD->getOperand(0).get());
+ auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
+ return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
+}
+
+static unsigned getEntrySizeForKind(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString())
+ return 1;
+ else if (Kind.isMergeable2ByteCString())
+ return 2;
+ else if (Kind.isMergeable4ByteCString())
+ return 4;
+ else if (Kind.isMergeableConst4())
+ return 4;
+ else if (Kind.isMergeableConst8())
+ return 8;
+ else if (Kind.isMergeableConst16())
+ return 16;
+ else if (Kind.isMergeableConst32())
+ return 32;
+ else {
+ // We shouldn't have mergeable C strings or mergeable constants that we
+ // didn't handle above.
+ assert(!Kind.isMergeableCString() && "unknown string width");
+ assert(!Kind.isMergeableConst() && "unknown data width");
+ return 0;
+ }
+}
+
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
+static StringRef getSectionPrefixForGlobal(SectionKind Kind, bool IsLarge) {
+ if (Kind.isText())
+ return ".text";
+ if (Kind.isReadOnly())
+ return IsLarge ? ".lrodata" : ".rodata";
+ if (Kind.isBSS())
+ return IsLarge ? ".lbss" : ".bss";
+ if (Kind.isThreadData())
+ return ".tdata";
+ if (Kind.isThreadBSS())
+ return ".tbss";
+ if (Kind.isData())
+ return IsLarge ? ".ldata" : ".data";
+ if (Kind.isReadOnlyWithRel())
+ return IsLarge ? ".ldata.rel.ro" : ".data.rel.ro";
+ llvm_unreachable("Unknown section kind");
+}
+
+static SmallString<128>
+getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
+ Mangler &Mang, const TargetMachine &TM,
+ unsigned EntrySize, bool UniqueSectionName) {
+ SmallString<128> Name;
+ if (Kind.isMergeableCString()) {
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign(
+ cast<GlobalVariable>(GO));
+
+ std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+ Name = SizeSpec + utostr(Alignment.value());
+ } else if (Kind.isMergeableConst()) {
+ Name = ".rodata.cst";
+ Name += utostr(EntrySize);
+ } else {
+ bool IsLarge = false;
+ if (isa<GlobalVariable>(GO))
+ IsLarge = TM.isLargeData();
+ Name = getSectionPrefixForGlobal(Kind, IsLarge);
+ }
+
+ bool HasPrefix = false;
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ if (std::optional<StringRef> Prefix = F->getSectionPrefix()) {
+ raw_svector_ostream(Name) << '.' << *Prefix;
+ HasPrefix = true;
+ }
+ }
+
+ if (UniqueSectionName) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, /*MayAlwaysUsePrivate*/true);
+ } else if (HasPrefix)
+ // For distinguishing between .text.${text-section-prefix}. (with trailing
+ // dot) and .text.${function-name}
+ Name.push_back('.');
+ return Name;
+}
+
+namespace {
+class LoweringDiagnosticInfo : public DiagnosticInfo {
+ const Twine &Msg;
+
+public:
+ LoweringDiagnosticInfo(const Twine &DiagMsg,
+ DiagnosticSeverity Severity = DS_Error)
+ : DiagnosticInfo(DK_Lowering, Severity), Msg(DiagMsg) {}
+ void print(DiagnosticPrinter &DP) const override { DP << Msg; }
+};
+}
+
+/// Calculate an appropriate unique ID for a section, and update Flags,
+/// EntrySize and NextUniqueID where appropriate.
+static unsigned
+calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
+ SectionKind Kind, const TargetMachine &TM,
+ MCContext &Ctx, Mangler &Mang, unsigned &Flags,
+ unsigned &EntrySize, unsigned &NextUniqueID,
+ const bool Retain, const bool ForceUnique) {
+ // Increment uniqueID if we are forced to emit a unique section.
+ // This works perfectly fine with section attribute or pragma section as the
+ // sections with the same name are grouped together by the assembler.
+ if (ForceUnique)
+ return NextUniqueID++;
+
+ // A section can have at most one associated section. Put each global with
+ // MD_associated in a unique section.
+ const bool Associated = GO->getMetadata(LLVMContext::MD_associated);
+ if (Associated) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ return NextUniqueID++;
+ }
+
+ if (Retain) {
+ if (TM.getTargetTriple().isOSSolaris())
+ Flags |= ELF::SHF_SUNW_NODISCARD;
+ else if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
+ Flags |= ELF::SHF_GNU_RETAIN;
+ return NextUniqueID++;
+ }
+
+ // If two symbols with differing sizes end up in the same mergeable section
+ // that section can be assigned an incorrect entry size. To avoid this we
+ // usually put symbols of the same size into distinct mergeable sections with
+ // the same name. Doing so relies on the ",unique ," assembly feature. This
+ // feature is not avalible until bintuils version 2.35
+ // (https://sourceware.org/bugzilla/show_bug.cgi?id=25380).
+ const bool SupportsUnique = Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 35);
+ if (!SupportsUnique) {
+ Flags &= ~ELF::SHF_MERGE;
+ EntrySize = 0;
+ return MCContext::GenericSectionID;
+ }
+
+ const bool SymbolMergeable = Flags & ELF::SHF_MERGE;
+ const bool SeenSectionNameBefore =
+ Ctx.isELFGenericMergeableSection(SectionName);
+ // If this is the first ocurrence of this section name, treat it as the
+ // generic section
+ if (!SymbolMergeable && !SeenSectionNameBefore)
+ return MCContext::GenericSectionID;
+
+ // Symbols must be placed into sections with compatible entry sizes. Generate
+ // unique sections for symbols that have not been assigned to compatible
+ // sections.
+ const auto PreviousID =
+ Ctx.getELFUniqueIDForEntsize(SectionName, Flags, EntrySize);
+ if (PreviousID)
+ return *PreviousID;
+
+ // If the user has specified the same section name as would be created
+ // implicitly for this symbol e.g. .rodata.str1.1, then we don't need
+ // to unique the section as the entry size for this symbol will be
+ // compatible with implicitly created sections.
+ SmallString<128> ImplicitSectionNameStem =
+ getELFSectionNameForGlobal(GO, Kind, Mang, TM, EntrySize, false);
+ if (SymbolMergeable &&
+ Ctx.isELFImplicitMergeableSectionNamePrefix(SectionName) &&
+ SectionName.startswith(ImplicitSectionNameStem))
+ return MCContext::GenericSectionID;
+
+ // We have seen this section name before, but with different flags or entity
+ // size. Create a new unique ID.
+ return NextUniqueID++;
+}
+
+static MCSection *selectExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM,
+ MCContext &Ctx, Mangler &Mang, unsigned &NextUniqueID,
+ bool Retain, bool ForceUnique) {
+ StringRef SectionName = GO->getSection();
+
+ // Check if '#pragma clang section' name is applicable.
+ // Note that pragma directive overrides -ffunction-section, -fdata-section
+ // and so section name is exactly as user specified and not uniqued.
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
+ if (GV && GV->hasImplicitSection()) {
+ auto Attrs = GV->getAttributes();
+ if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
+ SectionName = Attrs.getAttribute("bss-section").getValueAsString();
+ } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
+ SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
+ SectionName = Attrs.getAttribute("relro-section").getValueAsString();
+ } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
+ SectionName = Attrs.getAttribute("data-section").getValueAsString();
+ }
+ }
+ const Function *F = dyn_cast<Function>(GO);
+ if (F && F->hasFnAttribute("implicit-section-name")) {
+ SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
+ }
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ StringRef Group = "";
+ bool IsComdat = false;
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (const Comdat *C = getELFComdat(GO)) {
+ Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+ const unsigned UniqueID = calcUniqueIDUpdateFlagsAndSize(
+ GO, SectionName, Kind, TM, Ctx, Mang, Flags, EntrySize, NextUniqueID,
+ Retain, ForceUnique);
+
+ const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
+ MCSectionELF *Section = Ctx.getELFSection(
+ SectionName, getELFSectionType(SectionName, Kind), Flags, EntrySize,
+ Group, IsComdat, UniqueID, LinkedToSym);
+ // Make sure that we did not get some other section with incompatible sh_link.
+ // This should not be possible due to UniqueID code above.
+ assert(Section->getLinkedToSymbol() == LinkedToSym &&
+ "Associated symbol mismatch between sections");
+
+ if (!(Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 35))) {
+ // If we are using GNU as before 2.35, then this symbol might have
+ // been placed in an incompatible mergeable section. Emit an error if this
+ // is the case to avoid creating broken output.
+ if ((Section->getFlags() & ELF::SHF_MERGE) &&
+ (Section->getEntrySize() != getEntrySizeForKind(Kind)))
+ GO->getContext().diagnose(LoweringDiagnosticInfo(
+ "Symbol '" + GO->getName() + "' from module '" +
+ (GO->getParent() ? GO->getParent()->getSourceFileName() : "unknown") +
+ "' required a section with entry-size=" +
+ Twine(getEntrySizeForKind(Kind)) + " but was placed in section '" +
+ SectionName + "' with entry-size=" + Twine(Section->getEntrySize()) +
+ ": Explicit assignment by pragma or attribute of an incompatible "
+ "symbol to this section?"));
+ }
+
+ return Section;
+}
+
+MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ return selectExplicitSectionGlobal(GO, Kind, TM, getContext(), getMangler(),
+ NextUniqueID, Used.count(GO),
+ /* ForceUnique = */false);
+}
+
+static MCSectionELF *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
+ unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
+
+ StringRef Group = "";
+ bool IsComdat = false;
+ if (const Comdat *C = getELFComdat(GO)) {
+ Flags |= ELF::SHF_GROUP;
+ Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
+ }
+ if (isa<GlobalVariable>(GO)) {
+ if (TM.isLargeData()) {
+ assert(TM.getTargetTriple().getArch() == Triple::x86_64);
+ Flags |= ELF::SHF_X86_64_LARGE;
+ }
+ }
+
+ // Get the section entry size based on the kind.
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+
+ bool UniqueSectionName = false;
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniqueSection) {
+ if (TM.getUniqueSectionNames()) {
+ UniqueSectionName = true;
+ } else {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
+ }
+ }
+ SmallString<128> Name = getELFSectionNameForGlobal(
+ GO, Kind, Mang, TM, EntrySize, UniqueSectionName);
+
+ // Use 0 as the unique ID for execute-only text.
+ if (Kind.isExecuteOnly())
+ UniqueID = 0;
+ return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+ EntrySize, Group, IsComdat, UniqueID,
+ AssociatedSymbol);
+}
+
+static MCSection *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool Retain, bool EmitUniqueSection,
+ unsigned Flags, unsigned *NextUniqueID) {
+ const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
+ if (LinkedToSym) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+ if (Retain) {
+ if (TM.getTargetTriple().isOSSolaris()) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_SUNW_NODISCARD;
+ } else if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_GNU_RETAIN;
+ }
+ }
+
+ MCSectionELF *Section = selectELFSectionForGlobal(
+ Ctx, GO, Kind, Mang, TM, EmitUniqueSection, Flags,
+ NextUniqueID, LinkedToSym);
+ assert(Section->getLinkedToSymbol() == LinkedToSym);
+ return Section;
+}
+
+MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ unsigned Flags = getELFSectionFlags(Kind);
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ }
+ EmitUniqueSection |= GO->hasComdat();
+ return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
+ Used.count(GO), EmitUniqueSection, Flags,
+ &NextUniqueID);
+}
+
+MCSection *TargetLoweringObjectFileELF::getUniqueSectionForFunction(
+ const Function &F, const TargetMachine &TM) const {
+ SectionKind Kind = SectionKind::getText();
+ unsigned Flags = getELFSectionFlags(Kind);
+ // If the function's section names is pre-determined via pragma or a
+ // section attribute, call selectExplicitSectionGlobal.
+ if (F.hasSection() || F.hasFnAttribute("implicit-section-name"))
+ return selectExplicitSectionGlobal(
+ &F, Kind, TM, getContext(), getMangler(), NextUniqueID,
+ Used.count(&F), /* ForceUnique = */true);
+ else
+ return selectELFSectionForGlobal(
+ getContext(), &F, Kind, getMangler(), TM, Used.count(&F),
+ /*EmitUniqueSection=*/true, Flags, &NextUniqueID);
+}
+
+MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
+ const Function &F, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
+
+ return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
+ getMangler(), TM, EmitUniqueSection,
+ ELF::SHF_ALLOC, &NextUniqueID,
+ /* AssociatedSymbol */ nullptr);
+}
+
+MCSection *TargetLoweringObjectFileELF::getSectionForLSDA(
+ const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
+ // If neither COMDAT nor function sections, use the monolithic LSDA section.
+ // Re-use this path if LSDASection is null as in the Arm EHABI.
+ if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections()))
+ return LSDASection;
+
+ const auto *LSDA = cast<MCSectionELF>(LSDASection);
+ unsigned Flags = LSDA->getFlags();
+ const MCSymbolELF *LinkedToSym = nullptr;
+ StringRef Group;
+ bool IsComdat = false;
+ if (const Comdat *C = getELFComdat(&F)) {
+ Flags |= ELF::SHF_GROUP;
+ Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
+ }
+ // Use SHF_LINK_ORDER to facilitate --gc-sections if we can use GNU ld>=2.36
+ // or LLD, which support mixed SHF_LINK_ORDER & non-SHF_LINK_ORDER.
+ if (TM.getFunctionSections() &&
+ (getContext().getAsmInfo()->useIntegratedAssembler() &&
+ getContext().getAsmInfo()->binutilsIsAtLeast(2, 36))) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ LinkedToSym = cast<MCSymbolELF>(&FnSym);
+ }
+
+ // Append the function name as the suffix like GCC, assuming
+ // -funique-section-names applies to .gcc_except_table sections.
+ return getContext().getELFSection(
+ (TM.getUniqueSectionNames() ? LSDA->getName() + "." + F.getName()
+ : LSDA->getName()),
+ LSDA->getType(), Flags, 0, Group, IsComdat, MCSection::NonUniqueID,
+ LinkedToSym);
+}
+
+bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+}
+
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ Align &Alignment) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isMergeableConst32() && MergeableConst32Section)
+ return MergeableConst32Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+/// Returns a unique section for the given machine basic block.
+MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
+ const Function &F, const MachineBasicBlock &MBB,
+ const TargetMachine &TM) const {
+ assert(MBB.isBeginSection() && "Basic block does not start a section!");
+ unsigned UniqueID = MCContext::GenericSectionID;
+
+ // For cold sections use the .text.split. prefix along with the parent
+ // function name. All cold blocks for the same function go to the same
+ // section. Similarly all exception blocks are grouped by symbol name
+ // under the .text.eh prefix. For regular sections, we either use a unique
+ // name, or a unique ID for the section.
+ SmallString<128> Name;
+ if (MBB.getSectionID() == MBBSectionID::ColdSectionID) {
+ Name += BBSectionsColdTextPrefix;
+ Name += MBB.getParent()->getName();
+ } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) {
+ Name += ".text.eh.";
+ Name += MBB.getParent()->getName();
+ } else {
+ Name += MBB.getParent()->getSection()->getName();
+ if (TM.getUniqueBasicBlockSectionNames()) {
+ if (!Name.endswith("."))
+ Name += ".";
+ Name += MBB.getSymbol()->getName();
+ } else {
+ UniqueID = NextUniqueID++;
+ }
+ }
+
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
+ std::string GroupName;
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName().str();
+ }
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS, Flags,
+ 0 /* Entry Size */, GroupName,
+ F.hasComdat(), UniqueID, nullptr);
+}
+
+static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
+ bool IsCtor, unsigned Priority,
+ const MCSymbol *KeySym) {
+ std::string Name;
+ unsigned Type;
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+ StringRef Comdat = KeySym ? KeySym->getName() : "";
+
+ if (KeySym)
+ Flags |= ELF::SHF_GROUP;
+
+ if (UseInitArray) {
+ if (IsCtor) {
+ Type = ELF::SHT_INIT_ARRAY;
+ Name = ".init_array";
+ } else {
+ Type = ELF::SHT_FINI_ARRAY;
+ Name = ".fini_array";
+ }
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(Priority);
+ }
+ } else {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (IsCtor)
+ Name = ".ctors";
+ else
+ Name = ".dtors";
+ if (Priority != 65535)
+ raw_string_ostream(Name) << format(".%05u", 65535 - Priority);
+ Type = ELF::SHT_PROGBITS;
+ }
+
+ return Ctx.getELFSection(Name, Type, Flags, 0, Comdat, /*IsComdat=*/true);
+}
+
+MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getStaticStructorSection(getContext(), UseInitArray, true, Priority,
+ KeySym);
+}
+
+MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getStaticStructorSection(getContext(), UseInitArray, false, Priority,
+ KeySym);
+}
+
+const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic correctness checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS), PLTRelativeVariantKind,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
+}
+
+const MCExpr *TargetLoweringObjectFileELF::lowerDSOLocalEquivalent(
+ const DSOLocalEquivalent *Equiv, const TargetMachine &TM) const {
+ assert(supportDSOLocalEquivalentLowering());
+
+ const auto *GV = Equiv->getGlobalValue();
+
+ // A PLT entry is not needed for dso_local globals.
+ if (GV->isDSOLocal() || GV->isImplicitDSOLocal())
+ return MCSymbolRefExpr::create(TM.getSymbol(GV), getContext());
+
+ return MCSymbolRefExpr::create(TM.getSymbol(GV), PLTRelativeVariantKind,
+ getContext());
+}
+
+MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
+ // Use ".GCC.command.line" since this feature is to support clang's
+ // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
+ // same name.
+ return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
+}
+
+void
+TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
+ UseInitArray = UseInitArray_;
+ MCContext &Ctx = getContext();
+ if (!UseInitArray) {
+ StaticCtorSection = Ctx.getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+ StaticDtorSection = Ctx.getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ return;
+ }
+
+ StaticCtorSection = Ctx.getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ StaticDtorSection = Ctx.getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() {
+ SupportIndirectSymViaGOTPCRel = true;
+}
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorSection = Ctx.getMachOSection("__TEXT", "__constructor", 0,
+ SectionKind::getData());
+ StaticDtorSection = Ctx.getMachOSection("__TEXT", "__destructor", 0,
+ SectionKind::getData());
+ } else {
+ StaticCtorSection = Ctx.getMachOSection("__DATA", "__mod_init_func",
+ MachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getData());
+ StaticDtorSection = Ctx.getMachOSection("__DATA", "__mod_term_func",
+ MachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getData());
+ }
+
+ PersonalityEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+}
+
+MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return StaticDtorSection;
+ // In userspace, we lower global destructors via atexit(), but kernel/kext
+ // environments do not provide this function so we still need to support the
+ // legacy way here.
+ // See the -disable-atexit-based-global-dtor-lowering CodeGen flag for more
+ // context.
+}
+
+void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
+ Module &M) const {
+ // Emit the linker options if present.
+ if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
+ for (const auto *Option : LinkerOptions->operands()) {
+ SmallVector<std::string, 4> StrOptions;
+ for (const auto &Piece : cast<MDNode>(Option)->operands())
+ StrOptions.push_back(std::string(cast<MDString>(Piece)->getString()));
+ Streamer.emitLinkerOptions(StrOptions);
+ }
+ }
+
+ unsigned VersionVal = 0;
+ unsigned ImageInfoFlags = 0;
+ StringRef SectionVal;
+
+ GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal);
+ emitCGProfileMetadata(Streamer, M);
+
+ // The section is mandatory. If we don't have it, then we don't have GC info.
+ if (SectionVal.empty())
+ return;
+
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ if (Error E = MCSectionMachO::ParseSectionSpecifier(
+ SectionVal, Segment, Section, TAA, TAAParsed, StubSize)) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Invalid section specifier '" + Section +
+ "': " + toString(std::move(E)) + ".");
+ }
+
+ // Get the section.
+ MCSectionMachO *S = getContext().getMachOSection(
+ Segment, Section, TAA, StubSize, SectionKind::getData());
+ Streamer.switchSection(S);
+ Streamer.emitLabel(getContext().
+ getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(VersionVal);
+ Streamer.emitInt32(ImageInfoFlags);
+ Streamer.addBlankLine();
+}
+
+static void checkMachOComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return;
+
+ report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() +
+ "' cannot be lowered.");
+}
+
+MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+
+ StringRef SectionName = GO->getSection();
+
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
+ if (GV && GV->hasImplicitSection()) {
+ auto Attrs = GV->getAttributes();
+ if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
+ SectionName = Attrs.getAttribute("bss-section").getValueAsString();
+ } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
+ SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
+ SectionName = Attrs.getAttribute("relro-section").getValueAsString();
+ } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
+ SectionName = Attrs.getAttribute("data-section").getValueAsString();
+ }
+ }
+
+ const Function *F = dyn_cast<Function>(GO);
+ if (F && F->hasFnAttribute("implicit-section-name")) {
+ SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
+ }
+
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+
+ checkMachOComdat(GO);
+
+ if (Error E = MCSectionMachO::ParseSectionSpecifier(
+ SectionName, Segment, Section, TAA, TAAParsed, StubSize)) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GO->getName() +
+ "' has an invalid section specifier '" +
+ GO->getSection() + "': " + toString(std::move(E)) + ".");
+ }
+
+ // Get the section.
+ MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // If TAA wasn't set by ParseSectionSpecifier() above,
+ // use the value returned by getMachOSection() as a default.
+ if (!TAAParsed)
+ TAA = S->getTypeAndAttributes();
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GO->getName() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ checkMachOComdat(GO);
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
+ if (Kind.isText())
+ return GO->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GO->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ GO->getParent()->getDataLayout().getPreferredAlign(
+ cast<GlobalVariable>(GO)) < Align(32))
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() &&
+ GO->getParent()->getDataLayout().getPreferredAlign(
+ cast<GlobalVariable>(GO)) < Align(32))
+ return UStringSection;
+
+ // With MachO only variables whose corresponding symbol starts with 'l' or
+ // 'L' can be merged, so we only try merging GVs with private linkage.
+ if (GO->hasPrivateLinkage() && Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16())
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ Align &Alignment) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isData() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16())
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+MCSection *TargetLoweringObjectFileMachO::getSectionForCommandLines() const {
+ return getContext().getMachOSection("__TEXT", "__command_line", 0,
+ SectionKind::getReadOnly());
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
+ const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (!StubSym.getPointer()) {
+ MCSymbol *Sym = TM.getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
+ Encoding & ~DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+ MMI, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
+ const GlobalValue *GV, const TargetMachine &TM,
+ MachineModuleInfo *MMI) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (!StubSym.getPointer()) {
+ MCSymbol *Sym = TM.getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return SSym;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
+ const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV,
+ int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
+ // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
+ // through a non_lazy_ptr stub instead. One advantage is that it allows the
+ // computation of deltas to final external symbols. Example:
+ //
+ // _extgotequiv:
+ // .long _extfoo
+ //
+ // _delta:
+ // .long _extgotequiv-_delta
+ //
+ // is transformed to:
+ //
+ // _delta:
+ // .long L_extfoo$non_lazy_ptr-(_delta+0)
+ //
+ // .section __IMPORT,__pointers,non_lazy_symbol_pointers
+ // L_extfoo$non_lazy_ptr:
+ // .indirect_symbol _extfoo
+ // .long 0
+ //
+ // The indirect symbol table (and sections of non_lazy_symbol_pointers type)
+ // may point to both local (same translation unit) and global (other
+ // translation units) symbols. Example:
+ //
+ // .section __DATA,__pointers,non_lazy_symbol_pointers
+ // L1:
+ // .indirect_symbol _myGlobal
+ // .long 0
+ // L2:
+ // .indirect_symbol _myLocal
+ // .long _myLocal
+ //
+ // If the symbol is local, instead of the symbol's index, the assembler
+ // places the constant INDIRECT_SYMBOL_LOCAL into the indirect symbol table.
+ // Then the linker will notice the constant in the table and will look at the
+ // content of the symbol.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MCContext &Ctx = getContext();
+
+ // The offset must consider the original displacement from the base symbol
+ // since 32-bit targets don't have a GOTPCREL to fold the PC displacement.
+ Offset = -MV.getConstant();
+ const MCSymbol *BaseSym = &MV.getSymB()->getSymbol();
+
+ // Access the final symbol via sym$non_lazy_ptr and generate the appropriated
+ // non_lazy_ptr stubs.
+ SmallString<128> Name;
+ StringRef Suffix = "$non_lazy_ptr";
+ Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix();
+ Name += Sym->getName();
+ Name += Suffix;
+ MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
+
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym),
+ !GV->hasLocalLinkage());
+
+ const MCExpr *BSymExpr =
+ MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+ const MCExpr *LHS =
+ MCSymbolRefExpr::create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!Offset)
+ return MCBinaryExpr::createSub(LHS, BSymExpr, Ctx);
+
+ const MCExpr *RHS =
+ MCBinaryExpr::createAdd(BSymExpr, MCConstantExpr::create(Offset, Ctx), Ctx);
+ return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+}
+
+static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
+ const MCSection &Section) {
+ if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+ return true;
+
+ // FIXME: we should be able to use private labels for sections that can't be
+ // dead-stripped (there's no issue with blocking atomization there), but `ld
+ // -r` sometimes drops the no_dead_strip attribute from sections so for safety
+ // we don't allow it.
+ return false;
+}
+
+void TargetLoweringObjectFileMachO::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ bool CannotUsePrivateLabel = true;
+ if (auto *GO = GV->getAliaseeObject()) {
+ SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
+ const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
+ CannotUsePrivateLabel =
+ !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+ }
+ getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+static unsigned
+getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) {
+ unsigned Flags = 0;
+ bool isThumb = TM.getTargetTriple().getArch() == Triple::thumb;
+
+ if (K.isMetadata())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isExclude())
+ Flags |=
+ COFF::IMAGE_SCN_LNK_REMOVE | COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_CNT_CODE |
+ (isThumb ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0);
+ else if (K.isBSS())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isThreadLocal())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly() || K.isReadOnlyWithRel())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
+}
+
+static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ assert(C && "expected GV to have a Comdat!");
+
+ StringRef ComdatGVName = C->getName();
+ const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName);
+ if (!ComdatGV)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' does not exist.");
+
+ if (ComdatGV->getComdat() != C)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' is not a key for its COMDAT.");
+
+ return ComdatGV;
+}
+
+static int getSelectionForCOFF(const GlobalValue *GV) {
+ if (const Comdat *C = GV->getComdat()) {
+ const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
+ if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
+ ComdatKey = GA->getAliaseeObject();
+ if (ComdatKey == GV) {
+ switch (C->getSelectionKind()) {
+ case Comdat::Any:
+ return COFF::IMAGE_COMDAT_SELECT_ANY;
+ case Comdat::ExactMatch:
+ return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH;
+ case Comdat::Largest:
+ return COFF::IMAGE_COMDAT_SELECT_LARGEST;
+ case Comdat::NoDeduplicate:
+ return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ case Comdat::SameSize:
+ return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE;
+ }
+ } else {
+ return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
+ }
+ }
+ return 0;
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ int Selection = 0;
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
+ StringRef Name = GO->getSection();
+ StringRef COMDATSymName = "";
+ if (GO->hasComdat()) {
+ Selection = getSelectionForCOFF(GO);
+ const GlobalValue *ComdatGV;
+ if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+ ComdatGV = getComdatGVForCOFF(GO);
+ else
+ ComdatGV = GO;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV);
+ COMDATSymName = Sym->getName();
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ } else {
+ Selection = 0;
+ }
+ }
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+ Selection);
+}
+
+static StringRef getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text";
+ if (Kind.isBSS())
+ return ".bss";
+ if (Kind.isThreadLocal())
+ return ".tls$";
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
+ return ".rdata";
+ return ".data";
+}
+
+MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ // If we have -ffunction-sections then we should emit the global value to a
+ // uniqued section specifically for it.
+ bool EmitUniquedSection;
+ if (Kind.isText())
+ EmitUniquedSection = TM.getFunctionSections();
+ else
+ EmitUniquedSection = TM.getDataSections();
+
+ if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) {
+ SmallString<256> Name = getCOFFSectionNameForUniqueGlobal(Kind);
+
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
+
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ int Selection = getSelectionForCOFF(GO);
+ if (!Selection)
+ Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ const GlobalValue *ComdatGV;
+ if (GO->hasComdat())
+ ComdatGV = getComdatGVForCOFF(GO);
+ else
+ ComdatGV = GO;
+
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniquedSection)
+ UniqueID = NextUniqueID++;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV);
+ StringRef COMDATSymName = Sym->getName();
+
+ if (const auto *F = dyn_cast<Function>(GO))
+ if (std::optional<StringRef> Prefix = F->getSectionPrefix())
+ raw_svector_ostream(Name) << '$' << *Prefix;
+
+ // Append "$symbol" to the section name *before* IR-level mangling is
+ // applied when targetting mingw. This is what GCC does, and the ld.bfd
+ // COFF linker will not properly handle comdats otherwise.
+ if (getContext().getTargetTriple().isWindowsGNUEnvironment())
+ raw_svector_ostream(Name) << '$' << ComdatGV->getName();
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind,
+ COMDATSymName, Selection, UniqueID);
+ } else {
+ SmallString<256> TmpData;
+ getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true);
+ return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+ Selection, UniqueID);
+ }
+ }
+
+ if (Kind.isText())
+ return TextSection;
+
+ if (Kind.isThreadLocal())
+ return TLSDataSection;
+
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
+ return ReadOnlySection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon())
+ return BSSSection;
+
+ return DataSection;
+}
+
+void TargetLoweringObjectFileCOFF::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ bool CannotUsePrivateLabel = false;
+ if (GV->hasPrivateLinkage() &&
+ ((isa<Function>(GV) && TM.getFunctionSections()) ||
+ (isa<GlobalVariable>(GV) && TM.getDataSections())))
+ CannotUsePrivateLabel = true;
+
+ getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
+ const Function &F, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
+
+ // FIXME: we should produce a symbol for F instead.
+ if (F.hasPrivateLinkage())
+ return ReadOnlySection;
+
+ MCSymbol *Sym = TM.getSymbol(&F);
+ StringRef COMDATSymName = Sym->getName();
+
+ SectionKind Kind = SectionKind::getReadOnly();
+ StringRef SecName = getCOFFSectionNameForUniqueGlobal(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ unsigned UniqueID = NextUniqueID++;
+
+ return getContext().getCOFFSection(
+ SecName, Characteristics, Kind, COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
+}
+
+bool TargetLoweringObjectFileCOFF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ if (TM->getTargetTriple().getArch() == Triple::x86_64) {
+ if (!JumpTableInFunctionSection) {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+ }
+ }
+ return TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
+ UsesLabelDifference, F);
+}
+
+void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
+ Module &M) const {
+ emitLinkerDirectives(Streamer, M);
+
+ unsigned Version = 0;
+ unsigned Flags = 0;
+ StringRef Section;
+
+ GetObjCImageInfo(M, Version, Flags, Section);
+ if (!Section.empty()) {
+ auto &C = getContext();
+ auto *S = C.getCOFFSection(Section,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ Streamer.switchSection(S);
+ Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(Version);
+ Streamer.emitInt32(Flags);
+ Streamer.addBlankLine();
+ }
+
+ emitCGProfileMetadata(Streamer, M);
+}
+
+void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
+ MCStreamer &Streamer, Module &M) const {
+ if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
+ // Emit the linker options to the linker .drectve section. According to the
+ // spec, this section is a space-separated string containing flags for
+ // linker.
+ MCSection *Sec = getDrectveSection();
+ Streamer.switchSection(Sec);
+ for (const auto *Option : LinkerOptions->operands()) {
+ for (const auto &Piece : cast<MDNode>(Option)->operands()) {
+ // Lead with a space for consistency with our dllexport implementation.
+ std::string Directive(" ");
+ Directive.append(std::string(cast<MDString>(Piece)->getString()));
+ Streamer.emitBytes(Directive);
+ }
+ }
+ }
+
+ // Emit /EXPORT: flags for each exported global as necessary.
+ std::string Flags;
+ for (const GlobalValue &GV : M.global_values()) {
+ raw_string_ostream OS(Flags);
+ emitLinkerFlagsForGlobalCOFF(OS, &GV, getContext().getTargetTriple(),
+ getMangler());
+ OS.flush();
+ if (!Flags.empty()) {
+ Streamer.switchSection(getDrectveSection());
+ Streamer.emitBytes(Flags);
+ }
+ Flags.clear();
+ }
+
+ // Emit /INCLUDE: flags for each used global as necessary.
+ if (const auto *LU = M.getNamedGlobal("llvm.used")) {
+ assert(LU->hasInitializer() && "expected llvm.used to have an initializer");
+ assert(isa<ArrayType>(LU->getValueType()) &&
+ "expected llvm.used to be an array type");
+ if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
+ for (const Value *Op : A->operands()) {
+ const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
+ // Global symbols with internal or private linkage are not visible to
+ // the linker, and thus would cause an error when the linker tried to
+ // preserve the symbol due to the `/include:` directive.
+ if (GV->hasLocalLinkage())
+ continue;
+
+ raw_string_ostream OS(Flags);
+ emitLinkerFlagsForUsedCOFF(OS, GV, getContext().getTargetTriple(),
+ getMangler());
+ OS.flush();
+
+ if (!Flags.empty()) {
+ Streamer.switchSection(getDrectveSection());
+ Streamer.emitBytes(Flags);
+ }
+ Flags.clear();
+ }
+ }
+ }
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+ this->TM = &TM;
+ const Triple &T = TM.getTargetTriple();
+ if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ StaticCtorSection =
+ Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ StaticDtorSection =
+ Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ } else {
+ StaticCtorSection = Ctx.getCOFFSection(
+ ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData());
+ StaticDtorSection = Ctx.getCOFFSection(
+ ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData());
+ }
+}
+
+static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
+ const Triple &T, bool IsCtor,
+ unsigned Priority,
+ const MCSymbol *KeySym,
+ MCSectionCOFF *Default) {
+ if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ // If the priority is the default, use .CRT$XCU, possibly associative.
+ if (Priority == 65535)
+ return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+
+ // Otherwise, we need to compute a new section name. Low priorities should
+ // run earlier. The linker will sort sections ASCII-betically, and we need a
+ // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we
+ // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really
+ // low priorities need to sort before 'L', since the CRT uses that
+ // internally, so we use ".CRT$XCA00001" for them. We have a contract with
+ // the frontend that "init_seg(compiler)" corresponds to priority 200 and
+ // "init_seg(lib)" corresponds to priority 400, and those respectively use
+ // 'C' and 'L' without the priority suffix. Priorities between 200 and 400
+ // use 'C' with the priority as a suffix.
+ SmallString<24> Name;
+ char LastLetter = 'T';
+ bool AddPrioritySuffix = Priority != 200 && Priority != 400;
+ if (Priority < 200)
+ LastLetter = 'A';
+ else if (Priority < 400)
+ LastLetter = 'C';
+ else if (Priority == 400)
+ LastLetter = 'L';
+ raw_svector_ostream OS(Name);
+ OS << ".CRT$X" << (IsCtor ? "C" : "T") << LastLetter;
+ if (AddPrioritySuffix)
+ OS << format("%05u", Priority);
+ MCSectionCOFF *Sec = Ctx.getCOFFSection(
+ Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0);
+ }
+
+ std::string Name = IsCtor ? ".ctors" : ".dtors";
+ if (Priority != 65535)
+ raw_string_ostream(Name) << format(".%05u", 65535 - Priority);
+
+ return Ctx.getAssociativeCOFFSection(
+ Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData()),
+ KeySym, 0);
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getCOFFStaticStructorSection(
+ getContext(), getContext().getTargetTriple(), true, Priority, KeySym,
+ cast<MCSectionCOFF>(StaticCtorSection));
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getCOFFStaticStructorSection(
+ getContext(), getContext().getTargetTriple(), false, Priority, KeySym,
+ cast<MCSectionCOFF>(StaticDtorSection));
+}
+
+const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ const Triple &T = TM.getTargetTriple();
+ if (T.isOSCygMing())
+ return nullptr;
+
+ // Our symbols should exist in address space zero, cowardly no-op if
+ // otherwise.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0)
+ return nullptr;
+
+ // Both ptrtoint instructions must wrap global objects:
+ // - Only global variables are eligible for image relative relocations.
+ // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable.
+ // We expect __ImageBase to be a global variable without a section, externally
+ // defined.
+ //
+ // It should look something like this: @__ImageBase = external constant i8
+ if (!isa<GlobalObject>(LHS) || !isa<GlobalVariable>(RHS) ||
+ LHS->isThreadLocal() || RHS->isThreadLocal() ||
+ RHS->getName() != "__ImageBase" || !RHS->hasExternalLinkage() ||
+ cast<GlobalVariable>(RHS)->hasInitializer() || RHS->hasSection())
+ return nullptr;
+
+ return MCSymbolRefExpr::create(TM.getSymbol(LHS),
+ MCSymbolRefExpr::VK_COFF_IMGREL32,
+ getContext());
+}
+
+static std::string APIntToHexString(const APInt &AI) {
+ unsigned Width = (AI.getBitWidth() / 8) * 2;
+ std::string HexString = toString(AI, 16, /*Signed=*/false);
+ llvm::transform(HexString, HexString.begin(), tolower);
+ unsigned Size = HexString.size();
+ assert(Width >= Size && "hex string is too large!");
+ HexString.insert(HexString.begin(), Width - Size, '0');
+
+ return HexString;
+}
+
+static std::string scalarConstantToHexString(const Constant *C) {
+ Type *Ty = C->getType();
+ if (isa<UndefValue>(C)) {
+ return APIntToHexString(APInt::getZero(Ty->getPrimitiveSizeInBits()));
+ } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
+ return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
+ return APIntToHexString(CI->getValue());
+ } else {
+ unsigned NumElements;
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
+ NumElements = cast<FixedVectorType>(VTy)->getNumElements();
+ else
+ NumElements = Ty->getArrayNumElements();
+ std::string HexString;
+ for (int I = NumElements - 1, E = -1; I != E; --I)
+ HexString += scalarConstantToHexString(C->getAggregateElement(I));
+ return HexString;
+ }
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ Align &Alignment) const {
+ if (Kind.isMergeableConst() && C &&
+ getContext().getAsmInfo()->hasCOFFComdatConstants()) {
+ // This creates comdat sections with the given symbol name, but unless
+ // AsmPrinter::GetCPISymbol actually makes the symbol global, the symbol
+ // will be created with a null storage class, which makes GNU binutils
+ // error out.
+ const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT;
+ std::string COMDATSymName;
+ if (Kind.isMergeableConst4()) {
+ if (Alignment <= 4) {
+ COMDATSymName = "__real@" + scalarConstantToHexString(C);
+ Alignment = Align(4);
+ }
+ } else if (Kind.isMergeableConst8()) {
+ if (Alignment <= 8) {
+ COMDATSymName = "__real@" + scalarConstantToHexString(C);
+ Alignment = Align(8);
+ }
+ } else if (Kind.isMergeableConst16()) {
+ // FIXME: These may not be appropriate for non-x86 architectures.
+ if (Alignment <= 16) {
+ COMDATSymName = "__xmm@" + scalarConstantToHexString(C);
+ Alignment = Align(16);
+ }
+ } else if (Kind.isMergeableConst32()) {
+ if (Alignment <= 32) {
+ COMDATSymName = "__ymm@" + scalarConstantToHexString(C);
+ Alignment = Align(32);
+ }
+ }
+
+ if (!COMDATSymName.empty())
+ return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+ COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ANY);
+ }
+
+ return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C,
+ Alignment);
+}
+
+//===----------------------------------------------------------------------===//
+// Wasm
+//===----------------------------------------------------------------------===//
+
+static const Comdat *getWasmComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("WebAssembly COMDATs only support "
+ "SelectionKind::Any, '" + C->getName() + "' cannot be "
+ "lowered.");
+
+ return C;
+}
+
+static unsigned getWasmSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (K.isThreadLocal())
+ Flags |= wasm::WASM_SEG_FLAG_TLS;
+
+ if (K.isMergeableCString())
+ Flags |= wasm::WASM_SEG_FLAG_STRINGS;
+
+ // TODO(sbc): Add suport for K.isMergeableConst()
+
+ return Flags;
+}
+
+MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ // We don't support explict section names for functions in the wasm object
+ // format. Each function has to be in its own unique section.
+ if (isa<Function>(GO)) {
+ return SelectSectionForGlobal(GO, Kind, TM);
+ }
+
+ StringRef Name = GO->getSection();
+
+ // Certain data sections we treat as named custom sections rather than
+ // segments within the data section.
+ // This could be avoided if all data segements (the wasm sense) were
+ // represented as their own sections (in the llvm sense).
+ // TODO(sbc): https://github.com/WebAssembly/tool-conventions/issues/138
+ if (Name == ".llvmcmd" || Name == ".llvmbc")
+ Kind = SectionKind::getMetadata();
+
+ StringRef Group = "";
+ if (const Comdat *C = getWasmComdat(GO)) {
+ Group = C->getName();
+ }
+
+ unsigned Flags = getWasmSectionFlags(Kind);
+ MCSectionWasm *Section = getContext().getWasmSection(
+ Name, Kind, Flags, Group, MCContext::GenericSectionID);
+
+ return Section;
+}
+
+static MCSectionWasm *selectWasmSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
+ StringRef Group = "";
+ if (const Comdat *C = getWasmComdat(GO)) {
+ Group = C->getName();
+ }
+
+ bool UniqueSectionNames = TM.getUniqueSectionNames();
+ SmallString<128> Name = getSectionPrefixForGlobal(Kind, /*IsLarge=*/false);
+
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ const auto &OptionalPrefix = F->getSectionPrefix();
+ if (OptionalPrefix)
+ raw_svector_ostream(Name) << '.' << *OptionalPrefix;
+ }
+
+ if (EmitUniqueSection && UniqueSectionNames) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, true);
+ }
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniqueSection && !UniqueSectionNames) {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
+ }
+
+ unsigned Flags = getWasmSectionFlags(Kind);
+ return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
+}
+
+MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+
+ if (Kind.isCommon())
+ report_fatal_error("mergable sections not supported yet on wasm");
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ EmitUniqueSection |= GO->hasComdat();
+
+ return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
+ EmitUniqueSection, &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+}
+
+const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic correctness checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS), MCSymbolRefExpr::VK_None,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
+}
+
+void TargetLoweringObjectFileWasm::InitializeWasm() {
+ StaticCtorSection =
+ getContext().getWasmSection(".init_array", SectionKind::getData());
+
+ // We don't use PersonalityEncoding and LSDAEncoding because we don't emit
+ // .cfi directives. We use TTypeEncoding to encode typeinfo global variables.
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+}
+
+MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return Priority == UINT16_MAX ?
+ StaticCtorSection :
+ getContext().getWasmSection(".init_array." + utostr(Priority),
+ SectionKind::getData());
+}
+
+MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("@llvm.global_dtors should have been lowered already");
+}
+
+//===----------------------------------------------------------------------===//
+// XCOFF
+//===----------------------------------------------------------------------===//
+bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(
+ const MachineFunction *MF) {
+ if (!MF->getLandingPads().empty())
+ return true;
+
+ const Function &F = MF->getFunction();
+ if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry())
+ return false;
+
+ const GlobalValue *Per =
+ dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+ assert(Per && "Personality routine is not a GlobalValue type.");
+ if (isNoOpWithoutInvoke(classifyEHPersonality(Per)))
+ return false;
+
+ return true;
+}
+
+bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB(
+ const MachineFunction *MF) {
+ const Function &F = MF->getFunction();
+ if (!F.hasStackProtectorFnAttr())
+ return false;
+ // FIXME: check presence of canary word
+ // There are cases that the stack protectors are not really inserted even if
+ // the attributes are on.
+ return true;
+}
+
+MCSymbol *
+TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
+ return MF->getMMI().getContext().getOrCreateSymbol(
+ "__ehinfo." + Twine(MF->getFunctionNumber()));
+}
+
+MCSymbol *
+TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // We always use a qualname symbol for a GV that represents
+ // a declaration, a function descriptor, or a common symbol.
+ // If a GV represents a GlobalVariable and -fdata-sections is enabled, we
+ // also return a qualname so that a label symbol could be avoided.
+ // It is inherently ambiguous when the GO represents the address of a
+ // function, as the GO could either represent a function descriptor or a
+ // function entry point. We choose to always return a function descriptor
+ // here.
+ if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
+ if (GO->isDeclarationForLinker())
+ return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
+ ->getQualNameSymbol();
+
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasAttribute("toc-data"))
+ return cast<MCSectionXCOFF>(
+ SectionForGlobal(GVar, SectionKind::getData(), TM))
+ ->getQualNameSymbol();
+
+ SectionKind GOKind = getKindForGlobal(GO, TM);
+ if (GOKind.isText())
+ return cast<MCSectionXCOFF>(
+ getSectionForFunctionDescriptor(cast<Function>(GO), TM))
+ ->getQualNameSymbol();
+ if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() ||
+ GOKind.isBSSLocal() || GOKind.isThreadBSSLocal())
+ return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
+ ->getQualNameSymbol();
+ }
+
+ // For all other cases, fall back to getSymbol to return the unqualified name.
+ return nullptr;
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ if (!GO->hasSection())
+ report_fatal_error("#pragma clang section is not yet supported");
+
+ StringRef SectionName = GO->getSection();
+
+ // Handle the XCOFF::TD case first, then deal with the rest.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
+ if (GVar->hasAttribute("toc-data"))
+ return getContext().getXCOFFSection(
+ SectionName, Kind,
+ XCOFF::CsectProperties(/*MappingClass*/ XCOFF::XMC_TD, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+
+ XCOFF::StorageMappingClass MappingClass;
+ if (Kind.isText())
+ MappingClass = XCOFF::XMC_PR;
+ else if (Kind.isData() || Kind.isBSS())
+ MappingClass = XCOFF::XMC_RW;
+ else if (Kind.isReadOnlyWithRel())
+ MappingClass =
+ TM.Options.XCOFFReadOnlyPointers ? XCOFF::XMC_RO : XCOFF::XMC_RW;
+ else if (Kind.isReadOnly())
+ MappingClass = XCOFF::XMC_RO;
+ else
+ report_fatal_error("XCOFF other section types not yet implemented.");
+
+ return getContext().getXCOFFSection(
+ SectionName, Kind, XCOFF::CsectProperties(MappingClass, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
+ const GlobalObject *GO, const TargetMachine &TM) const {
+ assert(GO->isDeclarationForLinker() &&
+ "Tried to get ER section for a defined global.");
+
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+
+ XCOFF::StorageMappingClass SMC =
+ isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA;
+ if (GO->isThreadLocal())
+ SMC = XCOFF::XMC_UL;
+
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
+ if (GVar->hasAttribute("toc-data"))
+ SMC = XCOFF::XMC_TD;
+
+ // Externals go into a csect of type ER.
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getMetadata(),
+ XCOFF::CsectProperties(SMC, XCOFF::XTY_ER));
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ // Handle the XCOFF::TD case first, then deal with the rest.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
+ if (GVar->hasAttribute("toc-data")) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+ }
+
+ // Common symbols go into a csect with matching name which will get mapped
+ // into the .bss section.
+ // Zero-initialized local TLS symbols go into a csect with matching name which
+ // will get mapped into the .tbss section.
+ if (Kind.isBSSLocal() || GO->hasCommonLinkage() || Kind.isThreadBSSLocal()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ XCOFF::StorageMappingClass SMC = Kind.isBSSLocal() ? XCOFF::XMC_BS
+ : Kind.isCommon() ? XCOFF::XMC_RW
+ : XCOFF::XMC_UL;
+ return getContext().getXCOFFSection(
+ Name, Kind, XCOFF::CsectProperties(SMC, XCOFF::XTY_CM));
+ }
+
+ if (Kind.isText()) {
+ if (TM.getFunctionSections()) {
+ return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM))
+ ->getRepresentedCsect();
+ }
+ return TextSection;
+ }
+
+ if (TM.Options.XCOFFReadOnlyPointers && Kind.isReadOnlyWithRel()) {
+ if (!TM.getDataSections())
+ report_fatal_error(
+ "ReadOnlyPointers is supported only if data sections is turned on");
+
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
+ }
+
+ // For BSS kind, zero initialized data must be emitted to the .data section
+ // because external linkage control sections that get mapped to the .bss
+ // section will be linked as tentative defintions, which is only appropriate
+ // for SectionKind::Common.
+ if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getData(),
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD));
+ }
+ return DataSection;
+ }
+
+ if (Kind.isReadOnly()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
+ }
+ return ReadOnlySection;
+ }
+
+ // External/weak TLS data and initialized local TLS data are not eligible
+ // to be put into common csect. If data sections are enabled, thread
+ // data are emitted into separate sections. Otherwise, thread data
+ // are emitted into the .tdata section.
+ if (Kind.isThreadLocal()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TL, XCOFF::XTY_SD));
+ }
+ return TLSDataSection;
+ }
+
+ report_fatal_error("XCOFF other section types not yet implemented.");
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
+ const Function &F, const TargetMachine &TM) const {
+ assert (!F.getComdat() && "Comdat not supported on XCOFF.");
+
+ if (!TM.getFunctionSections())
+ return ReadOnlySection;
+
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ SmallString<128> NameStr(".rodata.jmp..");
+ getNameWithPrefix(NameStr, &F, TM);
+ return getContext().getXCOFFSection(
+ NameStr, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
+}
+
+bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ return false;
+}
+
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ Align &Alignment) const {
+ // TODO: Enable emiting constant pool to unique sections when we support it.
+ if (Alignment > Align(16))
+ report_fatal_error("Alignments greater than 16 not yet supported.");
+
+ if (Alignment == Align(8)) {
+ assert(ReadOnly8Section && "Section should always be initialized.");
+ return ReadOnly8Section;
+ }
+
+ if (Alignment == Align(16)) {
+ assert(ReadOnly16Section && "Section should always be initialized.");
+ return ReadOnly16Section;
+ }
+
+ return ReadOnlySection;
+}
+
+void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TgtM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TgtM);
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ (TgtM.getTargetTriple().isArch32Bit() ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8);
+ PersonalityEncoding = 0;
+ LSDAEncoding = 0;
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+
+ // AIX debug for thread local location is not ready. And for integrated as
+ // mode, the relocatable address for the thread local variable will cause
+ // linker error. So disable the location attribute generation for thread local
+ // variables for now.
+ // FIXME: when TLS debug on AIX is ready, remove this setting.
+ SupportDebugThreadLocalLocation = false;
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("no static constructor section on AIX");
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("no static destructor section on AIX");
+}
+
+const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ /* Not implemented yet, but don't crash, return nullptr. */
+ return nullptr;
+}
+
+XCOFF::StorageClass
+TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {
+ assert(!isa<GlobalIFunc>(GV) && "GlobalIFunc is not supported on AIX.");
+
+ switch (GV->getLinkage()) {
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ return XCOFF::C_HIDEXT;
+ case GlobalValue::ExternalLinkage:
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ return XCOFF::C_EXT;
+ case GlobalValue::ExternalWeakLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ return XCOFF::C_WEAKEXT;
+ case GlobalValue::AppendingLinkage:
+ report_fatal_error(
+ "There is no mapping that implements AppendingLinkage for XCOFF.");
+ }
+ llvm_unreachable("Unknown linkage type!");
+}
+
+MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
+ const GlobalValue *Func, const TargetMachine &TM) const {
+ assert((isa<Function>(Func) ||
+ (isa<GlobalAlias>(Func) &&
+ isa_and_nonnull<Function>(
+ cast<GlobalAlias>(Func)->getAliaseeObject()))) &&
+ "Func must be a function or an alias which has a function as base "
+ "object.");
+
+ SmallString<128> NameStr;
+ NameStr.push_back('.');
+ getNameWithPrefix(NameStr, Func, TM);
+
+ // When -function-sections is enabled and explicit section is not specified,
+ // it's not necessary to emit function entry point label any more. We will use
+ // function entry point csect instead. And for function delcarations, the
+ // undefined symbols gets treated as csect with XTY_ER property.
+ if (((TM.getFunctionSections() && !Func->hasSection()) ||
+ Func->isDeclarationForLinker()) &&
+ isa<Function>(Func)) {
+ return getContext()
+ .getXCOFFSection(
+ NameStr, SectionKind::getText(),
+ XCOFF::CsectProperties(XCOFF::XMC_PR, Func->isDeclarationForLinker()
+ ? XCOFF::XTY_ER
+ : XCOFF::XTY_SD))
+ ->getQualNameSymbol();
+ }
+
+ return getContext().getOrCreateSymbol(NameStr);
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
+ const Function *F, const TargetMachine &TM) const {
+ SmallString<128> NameStr;
+ getNameWithPrefix(NameStr, F, TM);
+ return getContext().getXCOFFSection(
+ NameStr, SectionKind::getData(),
+ XCOFF::CsectProperties(XCOFF::XMC_DS, XCOFF::XTY_SD));
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
+ const MCSymbol *Sym, const TargetMachine &TM) const {
+ // Use TE storage-mapping class when large code model is enabled so that
+ // the chance of needing -bbigtoc is decreased.
+ return getContext().getXCOFFSection(
+ cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
+ XCOFF::CsectProperties(
+ TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
+ XCOFF::XTY_SD));
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
+ const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
+ auto *LSDA = cast<MCSectionXCOFF>(LSDASection);
+ if (TM.getFunctionSections()) {
+ // If option -ffunction-sections is on, append the function name to the
+ // name of the LSDA csect so that each function has its own LSDA csect.
+ // This helps the linker to garbage-collect EH info of unused functions.
+ SmallString<128> NameStr = LSDA->getName();
+ raw_svector_ostream(NameStr) << '.' << F.getName();
+ LSDA = getContext().getXCOFFSection(NameStr, LSDA->getKind(),
+ LSDA->getCsectProp());
+ }
+ return LSDA;
+}
+//===----------------------------------------------------------------------===//
+// GOFF
+//===----------------------------------------------------------------------===//
+TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() = default;
+
+MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ return SelectSectionForGlobal(GO, Kind, TM);
+}
+
+MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ auto *Symbol = TM.getSymbol(GO);
+ if (Kind.isBSS())
+ return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(),
+ nullptr, nullptr);
+
+ return getContext().getObjectFileInfo()->getTextSection();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..af5d10103f78
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,56 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if the target want to forcably keep frame pointer.
+ if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
+ return true;
+
+ const Function &F = MF.getFunction();
+
+ if (!F.hasFnAttribute("frame-pointer"))
+ return false;
+ StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
+ if (FP == "all")
+ return true;
+ if (FP == "non-leaf")
+ return MF.getFrameInfo().hasCalls();
+ if (FP == "none")
+ return false;
+ llvm_unreachable("unknown frame pointer flag");
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// NOTE: There are targets that still do not support the debug entry values
+/// production and that is being controlled with the SupportsDebugEntryValues.
+/// In addition, SCE debugger does not have the feature implemented, so prefer
+/// not to emit the debug entry values in that case.
+/// The EnableDebugEntryValues can be used for the testing purposes.
+bool TargetOptions::ShouldEmitDebugEntryValues() const {
+ return (SupportsDebugEntryValues && DebuggerTuning != DebuggerKind::SCE) ||
+ EnableDebugEntryValues;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
new file mode 100644
index 000000000000..98ea2f21b3c8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -0,0 +1,1569 @@
+//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Discriminator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Target/CGPassBuilderOption.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include <cassert>
+#include <optional>
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<bool>
+ EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
+ cl::desc("Enable interprocedural register allocation "
+ "to reduce load/store at procedure calls."));
+static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc Scheduler"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
+ cl::Hidden, cl::desc("Disable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+ cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+ cl::desc("Disable Early If-conversion"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
+ "optimize-regalloc", cl::Hidden,
+ cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisablePostRAMachineSink("disable-postra-machine-sink",
+ cl::Hidden,
+ cl::desc("Disable PostRA Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting",
+ cl::Hidden, cl::desc("Disable ConstantHoisting"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+ cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
+ cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
+static cl::opt<bool> DisableAtExitBasedGlobalDtorLowering(
+ "disable-atexit-based-global-dtor-lowering", cl::Hidden,
+ cl::desc("For MachO, disable atexit()-based global destructor lowering"));
+static cl::opt<bool> EnableImplicitNullChecks(
+ "enable-implicit-null-checks",
+ cl::desc("Fold null checks into faulting memory operations"),
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
+ cl::desc("Disable MergeICmps Pass"),
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<cl::boolOrDefault>
+ VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"));
+static cl::opt<cl::boolOrDefault>
+ DebugifyAndStripAll("debugify-and-strip-all-safe", cl::Hidden,
+ cl::desc("Debugify MIR before and Strip debug after "
+ "each pass except those known to be unsafe "
+ "when debug info is present"));
+static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
+ "debugify-check-and-strip-all-safe", cl::Hidden,
+ cl::desc(
+ "Debugify MIR before, by checking and stripping the debug info after, "
+ "each pass except those known to be unsafe when debug info is "
+ "present"));
+// Enable or disable the MachineOutliner.
+static cl::opt<RunOutliner> EnableMachineOutliner(
+ "enable-machine-outliner", cl::desc("Enable the machine outliner"),
+ cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault),
+ cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always",
+ "Run on all functions guaranteed to be beneficial"),
+ clEnumValN(RunOutliner::NeverOutline, "never",
+ "Disable all outlining"),
+ // Sentinel value for unspecified option.
+ clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+// Disable the pass to fix unwind information. Whether the pass is included in
+// the pipeline is controlled via the target options, this option serves as
+// manual override.
+static cl::opt<bool> DisableCFIFixup("disable-cfi-fixup", cl::Hidden,
+ cl::desc("Disable the CFI fixup pass"));
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+static cl::opt<cl::boolOrDefault> EnableGlobalISelOption(
+ "global-isel", cl::Hidden,
+ cl::desc("Enable the \"global\" instruction selector"));
+
+// FIXME: remove this after switching to NPM or GlobalISel, whichever gets there
+// first...
+static cl::opt<bool>
+ PrintAfterISel("print-after-isel", cl::init(false), cl::Hidden,
+ cl::desc("Print machine instrs after ISel"));
+
+static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
+ "global-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"global\" instruction selection "
+ "fails to lower/select an instruction"),
+ cl::values(
+ clEnumValN(GlobalISelAbortMode::Disable, "0", "Disable the abort"),
+ clEnumValN(GlobalISelAbortMode::Enable, "1", "Enable the abort"),
+ clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
+ "Disable the abort but emit a diagnostic on failure")));
+
+// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
+// tuning purpose.
+static cl::opt<bool> DisableRAFSProfileLoader(
+ "disable-ra-fsprofile-loader", cl::init(false), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before RegAlloc"));
+// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
+// and tuning purpose.
+static cl::opt<bool> DisableLayoutFSProfileLoader(
+ "disable-layout-fsprofile-loader", cl::init(false), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before BlockPlacement"));
+// Specify FSProfile file name.
+static cl::opt<std::string>
+ FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile file name."), cl::Hidden);
+// Specify Remapping file for FSProfile.
+static cl::opt<std::string> FSRemappingFile(
+ "fs-remapping-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
+
+// Temporary option to allow experimenting with MachineScheduler as a post-RA
+// scheduler. Targets can "properly" enable this with
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
+// Targets can return true in targetSchedulesPostRAScheduling() and
+// insert a PostRA scheduling pass wherever it wants.
+static cl::opt<bool> MISchedPostRA(
+ "misched-postra", cl::Hidden,
+ cl::desc(
+ "Run MachineScheduler post regalloc (independent of preRA sched)"));
+
+// Experimental option to run live interval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
+/// Option names for limiting the codegen pipeline.
+/// Those are used in error reporting and we didn't want
+/// to duplicate their names all over the place.
+static const char StartAfterOptName[] = "start-after";
+static const char StartBeforeOptName[] = "start-before";
+static const char StopAfterOptName[] = "stop-after";
+static const char StopBeforeOptName[] = "stop-before";
+
+static cl::opt<std::string>
+ StartAfterOpt(StringRef(StartAfterOptName),
+ cl::desc("Resume compilation after a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+ StartBeforeOpt(StringRef(StartBeforeOptName),
+ cl::desc("Resume compilation before a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+ StopAfterOpt(StringRef(StopAfterOptName),
+ cl::desc("Stop compilation after a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+ StopBeforeOpt(StringRef(StopBeforeOptName),
+ cl::desc("Stop compilation before a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+/// Enable the machine function splitter pass.
+static cl::opt<bool> EnableMachineFunctionSplitter(
+ "enable-split-machine-functions", cl::Hidden,
+ cl::desc("Split out cold blocks from machine functions based on profile "
+ "information."));
+
+/// Disable the expand reductions pass for testing.
+static cl::opt<bool> DisableExpandReductions(
+ "disable-expand-reductions", cl::init(false), cl::Hidden,
+ cl::desc("Disable the expand reduction intrinsics pass from running"));
+
+/// Disable the select optimization pass.
+static cl::opt<bool> DisableSelectOptimize(
+ "disable-select-optimize", cl::init(true), cl::Hidden,
+ cl::desc("Disable the select-optimization pass from running"));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+ bool Override) {
+ if (Override)
+ return IdentifyingPassPtr();
+ return PassID;
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+ IdentifyingPassPtr TargetID) {
+ if (StandardID == &PostRASchedulerID)
+ return applyDisable(TargetID, DisablePostRASched);
+
+ if (StandardID == &BranchFolderPassID)
+ return applyDisable(TargetID, DisableBranchFold);
+
+ if (StandardID == &TailDuplicateID)
+ return applyDisable(TargetID, DisableTailDuplicate);
+
+ if (StandardID == &EarlyTailDuplicateID)
+ return applyDisable(TargetID, DisableEarlyTailDup);
+
+ if (StandardID == &MachineBlockPlacementID)
+ return applyDisable(TargetID, DisableBlockPlacement);
+
+ if (StandardID == &StackSlotColoringID)
+ return applyDisable(TargetID, DisableSSC);
+
+ if (StandardID == &DeadMachineInstructionElimID)
+ return applyDisable(TargetID, DisableMachineDCE);
+
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, DisableEarlyIfConversion);
+
+ if (StandardID == &EarlyMachineLICMID)
+ return applyDisable(TargetID, DisableMachineLICM);
+
+ if (StandardID == &MachineCSEID)
+ return applyDisable(TargetID, DisableMachineCSE);
+
+ if (StandardID == &MachineLICMID)
+ return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+ if (StandardID == &MachineSinkingID)
+ return applyDisable(TargetID, DisableMachineSink);
+
+ if (StandardID == &PostRAMachineSinkingID)
+ return applyDisable(TargetID, DisablePostRAMachineSink);
+
+ if (StandardID == &MachineCopyPropagationID)
+ return applyDisable(TargetID, DisableCopyProp);
+
+ return TargetID;
+}
+
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static std::string getFSProfileFile(const TargetMachine *TM) {
+ if (!FSProfileFile.empty())
+ return FSProfileFile.getValue();
+ const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileFile;
+}
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static std::string getFSRemappingFile(const TargetMachine *TM) {
+ if (!FSRemappingFile.empty())
+ return FSRemappingFile.getValue();
+ const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileRemappingFile;
+}
+
+//===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+ "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+namespace {
+
+struct InsertedPass {
+ AnalysisID TargetPassID;
+ IdentifyingPassPtr InsertedPassID;
+
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID) {}
+
+ Pass *getInsertedPass() const {
+ assert(InsertedPassID.isValid() && "Illegal Pass ID!");
+ if (InsertedPassID.isInstance())
+ return InsertedPassID.getInstance();
+ Pass *NP = Pass::createPass(InsertedPassID.getID());
+ assert(NP && "Pass ID not registered");
+ return NP;
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+extern cl::opt<bool> EnableFSDiscriminator;
+
+class PassConfigImpl {
+public:
+ // List of passes explicitly substituted by this target. Normally this is
+ // empty, but it is a convenient way to suppress or replace specific passes
+ // that are part of a standard pass pipeline without overridding the entire
+ // pipeline. This mechanism allows target options to inherit a standard pass's
+ // user interface. For example, a target may disable a standard pass by
+ // default by substituting a pass ID of zero, and the user may still enable
+ // that standard pass with an explicit command line option.
+ DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
+
+ /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
+ /// is inserted after each instance of the first one.
+ SmallVector<InsertedPass, 4> InsertedPasses;
+};
+
+} // end namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+ delete Impl;
+}
+
+static const PassInfo *getPassInfo(StringRef PassName) {
+ if (PassName.empty())
+ return nullptr;
+
+ const PassRegistry &PR = *PassRegistry::getPassRegistry();
+ const PassInfo *PI = PR.getPassInfo(PassName);
+ if (!PI)
+ report_fatal_error(Twine('\"') + Twine(PassName) +
+ Twine("\" pass is not registered."));
+ return PI;
+}
+
+static AnalysisID getPassIDFromName(StringRef PassName) {
+ const PassInfo *PI = getPassInfo(PassName);
+ return PI ? PI->getTypeInfo() : nullptr;
+}
+
+static std::pair<StringRef, unsigned>
+getPassNameAndInstanceNum(StringRef PassName) {
+ StringRef Name, InstanceNumStr;
+ std::tie(Name, InstanceNumStr) = PassName.split(',');
+
+ unsigned InstanceNum = 0;
+ if (!InstanceNumStr.empty() && InstanceNumStr.getAsInteger(10, InstanceNum))
+ report_fatal_error("invalid pass instance specifier " + PassName);
+
+ return std::make_pair(Name, InstanceNum);
+}
+
+void TargetPassConfig::setStartStopPasses() {
+ StringRef StartBeforeName;
+ std::tie(StartBeforeName, StartBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StartBeforeOpt);
+
+ StringRef StartAfterName;
+ std::tie(StartAfterName, StartAfterInstanceNum) =
+ getPassNameAndInstanceNum(StartAfterOpt);
+
+ StringRef StopBeforeName;
+ std::tie(StopBeforeName, StopBeforeInstanceNum)
+ = getPassNameAndInstanceNum(StopBeforeOpt);
+
+ StringRef StopAfterName;
+ std::tie(StopAfterName, StopAfterInstanceNum)
+ = getPassNameAndInstanceNum(StopAfterOpt);
+
+ StartBefore = getPassIDFromName(StartBeforeName);
+ StartAfter = getPassIDFromName(StartAfterName);
+ StopBefore = getPassIDFromName(StopBeforeName);
+ StopAfter = getPassIDFromName(StopAfterName);
+ if (StartBefore && StartAfter)
+ report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
+ Twine(StartAfterOptName) + Twine(" specified!"));
+ if (StopBefore && StopAfter)
+ report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") +
+ Twine(StopAfterOptName) + Twine(" specified!"));
+ Started = (StartAfter == nullptr) && (StartBefore == nullptr);
+}
+
+CGPassBuilderOption llvm::getCGPassBuilderOption() {
+ CGPassBuilderOption Opt;
+
+#define SET_OPTION(Option) \
+ if (Option.getNumOccurrences()) \
+ Opt.Option = Option;
+
+ SET_OPTION(EnableFastISelOption)
+ SET_OPTION(EnableGlobalISelAbort)
+ SET_OPTION(EnableGlobalISelOption)
+ SET_OPTION(EnableIPRA)
+ SET_OPTION(OptimizeRegAlloc)
+ SET_OPTION(VerifyMachineCode)
+
+#define SET_BOOLEAN_OPTION(Option) Opt.Option = Option;
+
+ SET_BOOLEAN_OPTION(EarlyLiveIntervals)
+ SET_BOOLEAN_OPTION(EnableBlockPlacementStats)
+ SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
+ SET_BOOLEAN_OPTION(EnableMachineOutliner)
+ SET_BOOLEAN_OPTION(MISchedPostRA)
+ SET_BOOLEAN_OPTION(DisableMergeICmps)
+ SET_BOOLEAN_OPTION(DisableLSR)
+ SET_BOOLEAN_OPTION(DisableConstantHoisting)
+ SET_BOOLEAN_OPTION(DisableCGP)
+ SET_BOOLEAN_OPTION(DisablePartialLibcallInlining)
+ SET_BOOLEAN_OPTION(DisableSelectOptimize)
+ SET_BOOLEAN_OPTION(PrintLSR)
+ SET_BOOLEAN_OPTION(PrintISelInput)
+ SET_BOOLEAN_OPTION(PrintGCInfo)
+
+ return Opt;
+}
+
+static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &LLVMTM) {
+ StringRef StartBefore;
+ StringRef StartAfter;
+ StringRef StopBefore;
+ StringRef StopAfter;
+
+ unsigned StartBeforeInstanceNum = 0;
+ unsigned StartAfterInstanceNum = 0;
+ unsigned StopBeforeInstanceNum = 0;
+ unsigned StopAfterInstanceNum = 0;
+
+ std::tie(StartBefore, StartBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StartBeforeOpt);
+ std::tie(StartAfter, StartAfterInstanceNum) =
+ getPassNameAndInstanceNum(StartAfterOpt);
+ std::tie(StopBefore, StopBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StopBeforeOpt);
+ std::tie(StopAfter, StopAfterInstanceNum) =
+ getPassNameAndInstanceNum(StopAfterOpt);
+
+ if (StartBefore.empty() && StartAfter.empty() && StopBefore.empty() &&
+ StopAfter.empty())
+ return;
+
+ std::tie(StartBefore, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StartBefore);
+ std::tie(StartAfter, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StartAfter);
+ std::tie(StopBefore, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StopBefore);
+ std::tie(StopAfter, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StopAfter);
+ if (!StartBefore.empty() && !StartAfter.empty())
+ report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
+ Twine(StartAfterOptName) + Twine(" specified!"));
+ if (!StopBefore.empty() && !StopAfter.empty())
+ report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") +
+ Twine(StopAfterOptName) + Twine(" specified!"));
+
+ PIC.registerShouldRunOptionalPassCallback(
+ [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(),
+ EnableNext = std::optional<bool>(), StartBeforeCount = 0u,
+ StartAfterCount = 0u, StopBeforeCount = 0u,
+ StopAfterCount = 0u](StringRef P, Any) mutable {
+ bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore);
+ bool StartAfterPass = !StartAfter.empty() && P.contains(StartAfter);
+ bool StopBeforePass = !StopBefore.empty() && P.contains(StopBefore);
+ bool StopAfterPass = !StopAfter.empty() && P.contains(StopAfter);
+
+ // Implement -start-after/-stop-after
+ if (EnableNext) {
+ EnableCurrent = *EnableNext;
+ EnableNext.reset();
+ }
+
+ // Using PIC.registerAfterPassCallback won't work because if this
+ // callback returns false, AfterPassCallback is also skipped.
+ if (StartAfterPass && StartAfterCount++ == StartAfterInstanceNum) {
+ assert(!EnableNext && "Error: assign to EnableNext more than once");
+ EnableNext = true;
+ }
+ if (StopAfterPass && StopAfterCount++ == StopAfterInstanceNum) {
+ assert(!EnableNext && "Error: assign to EnableNext more than once");
+ EnableNext = false;
+ }
+
+ if (StartBeforePass && StartBeforeCount++ == StartBeforeInstanceNum)
+ EnableCurrent = true;
+ if (StopBeforePass && StopBeforeCount++ == StopBeforeInstanceNum)
+ EnableCurrent = false;
+ return EnableCurrent;
+ });
+}
+
+void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &LLVMTM) {
+
+ // Register a callback for disabling passes.
+ PIC.registerShouldRunOptionalPassCallback([](StringRef P, Any) {
+
+#define DISABLE_PASS(Option, Name) \
+ if (Option && P.contains(#Name)) \
+ return false;
+ DISABLE_PASS(DisableBlockPlacement, MachineBlockPlacementPass)
+ DISABLE_PASS(DisableBranchFold, BranchFolderPass)
+ DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass)
+ DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass)
+ DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass)
+ DISABLE_PASS(DisableMachineCSE, MachineCSEPass)
+ DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass)
+ DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass)
+ DISABLE_PASS(DisableMachineSink, MachineSinkingPass)
+ DISABLE_PASS(DisablePostRAMachineLICM, MachineLICMPass)
+ DISABLE_PASS(DisablePostRAMachineSink, PostRAMachineSinkingPass)
+ DISABLE_PASS(DisablePostRASched, PostRASchedulerPass)
+ DISABLE_PASS(DisableSSC, StackSlotColoringPass)
+ DISABLE_PASS(DisableTailDuplicate, TailDuplicatePass)
+
+ return true;
+ });
+
+ registerPartialPipelineCallback(PIC, LLVMTM);
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
+ : ImmutablePass(ID), PM(&pm), TM(&TM) {
+ Impl = new PassConfigImpl();
+
+ // Register all target independent codegen passes to activate their PassIDs,
+ // including this pass itself.
+ initializeCodeGen(*PassRegistry::getPassRegistry());
+
+ // Also register alias analysis passes required by codegen passes.
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+
+ if (EnableIPRA.getNumOccurrences())
+ TM.Options.EnableIPRA = EnableIPRA;
+ else {
+ // If not explicitly specified, use target default.
+ TM.Options.EnableIPRA |= TM.useIPRA();
+ }
+
+ if (TM.Options.EnableIPRA)
+ setRequiresCodeGenSCCOrder();
+
+ if (EnableGlobalISelAbort.getNumOccurrences())
+ TM.Options.GlobalISelAbort = EnableGlobalISelAbort;
+
+ setStartStopPasses();
+}
+
+CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
+ return TM->getOptLevel();
+}
+
+/// Insert InsertedPassID pass after TargetPassID.
+void TargetPassConfig::insertPass(AnalysisID TargetPassID,
+ IdentifyingPassPtr InsertedPassID) {
+ assert(((!InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getID()) ||
+ (InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+ "Insert a pass after itself!");
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new TargetPassConfig(*this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+ : ImmutablePass(ID) {
+ report_fatal_error("Trying to construct TargetPassConfig without a target "
+ "machine. Scheduling a CodeGen pass without a target "
+ "triple set?");
+}
+
+bool TargetPassConfig::willCompleteCodeGenPipeline() {
+ return StopBeforeOpt.empty() && StopAfterOpt.empty();
+}
+
+bool TargetPassConfig::hasLimitedCodeGenPipeline() {
+ return !StartBeforeOpt.empty() || !StartAfterOpt.empty() ||
+ !willCompleteCodeGenPipeline();
+}
+
+std::string
+TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) {
+ if (!hasLimitedCodeGenPipeline())
+ return std::string();
+ std::string Res;
+ static cl::opt<std::string> *PassNames[] = {&StartAfterOpt, &StartBeforeOpt,
+ &StopAfterOpt, &StopBeforeOpt};
+ static const char *OptNames[] = {StartAfterOptName, StartBeforeOptName,
+ StopAfterOptName, StopBeforeOptName};
+ bool IsFirst = true;
+ for (int Idx = 0; Idx < 4; ++Idx)
+ if (!PassNames[Idx]->empty()) {
+ if (!IsFirst)
+ Res += Separator;
+ IsFirst = false;
+ Res += OptNames[Idx];
+ }
+ return Res;
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+ assert(!Initialized && "PassConfig is immutable");
+ Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(AnalysisID StandardID,
+ IdentifyingPassPtr TargetID) {
+ Impl->TargetPasses[StandardID] = TargetID;
+}
+
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
+ I = Impl->TargetPasses.find(ID);
+ if (I == Impl->TargetPasses.end())
+ return ID;
+ return I->second;
+}
+
+bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
+ IdentifyingPassPtr TargetID = getPassSubstitution(ID);
+ IdentifyingPassPtr FinalPtr = overridePass(ID, TargetID);
+ return !FinalPtr.isValid() || FinalPtr.isInstance() ||
+ FinalPtr.getID() != ID;
+}
+
+/// Add a pass to the PassManager if that pass is supposed to be run. If the
+/// Started/Stopped flags indicate either that the compilation should start at
+/// a later pass or that it should stop after an earlier pass, then do not add
+/// the pass. Finally, compare the current pass against the StartAfter
+/// and StopAfter options and change the Started/Stopped flags accordingly.
+void TargetPassConfig::addPass(Pass *P) {
+ assert(!Initialized && "PassConfig is immutable");
+
+ // Cache the Pass ID here in case the pass manager finds this pass is
+ // redundant with ones already scheduled / available, and deletes it.
+ // Fundamentally, once we add the pass to the manager, we no longer own it
+ // and shouldn't reference it.
+ AnalysisID PassID = P->getPassID();
+
+ if (StartBefore == PassID && StartBeforeCount++ == StartBeforeInstanceNum)
+ Started = true;
+ if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
+ Stopped = true;
+ if (Started && !Stopped) {
+ if (AddingMachinePasses) {
+ // Construct banner message before PM->add() as that may delete the pass.
+ std::string Banner =
+ std::string("After ") + std::string(P->getPassName());
+ addMachinePrePasses();
+ PM->add(P);
+ addMachinePostPasses(Banner);
+ } else {
+ PM->add(P);
+ }
+
+ // Add the passes after the pass P if there is any.
+ for (const auto &IP : Impl->InsertedPasses)
+ if (IP.TargetPassID == PassID)
+ addPass(IP.getInsertedPass());
+ } else {
+ delete P;
+ }
+
+ if (StopAfter == PassID && StopAfterCount++ == StopAfterInstanceNum)
+ Stopped = true;
+
+ if (StartAfter == PassID && StartAfterCount++ == StartAfterInstanceNum)
+ Started = true;
+ if (Stopped && !Started)
+ report_fatal_error("Cannot stop compilation after pass that is not run");
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
+ IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+ IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+ if (!FinalPtr.isValid())
+ return nullptr;
+
+ Pass *P;
+ if (FinalPtr.isInstance())
+ P = FinalPtr.getInstance();
+ else {
+ P = Pass::createPass(FinalPtr.getID());
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ }
+ AnalysisID FinalID = P->getPassID();
+ addPass(P); // Ends the lifetime of P.
+
+ return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const std::string &Banner) {
+ addPrintPass(Banner);
+ addVerifyPass(Banner);
+}
+
+void TargetPassConfig::addPrintPass(const std::string &Banner) {
+ if (PrintAfterISel)
+ PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
+void TargetPassConfig::addVerifyPass(const std::string &Banner) {
+ bool Verify = VerifyMachineCode == cl::BOU_TRUE;
+#ifdef EXPENSIVE_CHECKS
+ if (VerifyMachineCode == cl::BOU_UNSET)
+ Verify = TM->isMachineVerifierClean();
+#endif
+ if (Verify)
+ PM->add(createMachineVerifierPass(Banner));
+}
+
+void TargetPassConfig::addDebugifyPass() {
+ PM->add(createDebugifyMachineModulePass());
+}
+
+void TargetPassConfig::addStripDebugPass() {
+ PM->add(createStripDebugMachineModulePass(/*OnlyDebugified=*/true));
+}
+
+void TargetPassConfig::addCheckDebugPass() {
+ PM->add(createCheckDebugMachineModulePass());
+}
+
+void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
+ if (AllowDebugify && DebugifyIsSafe &&
+ (DebugifyAndStripAll == cl::BOU_TRUE ||
+ DebugifyCheckAndStripAll == cl::BOU_TRUE))
+ addDebugifyPass();
+}
+
+void TargetPassConfig::addMachinePostPasses(const std::string &Banner) {
+ if (DebugifyIsSafe) {
+ if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {
+ addCheckDebugPass();
+ addStripDebugPass();
+ } else if (DebugifyAndStripAll == cl::BOU_TRUE)
+ addStripDebugPass();
+ }
+ addVerifyPass(Banner);
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ addPass(createTypeBasedAAWrapperPass());
+ addPass(createScopedNoAliasAAWrapperPass());
+ addPass(createBasicAAWrapperPass());
+
+ // Run loop strength reduction before anything else.
+ if (!DisableLSR) {
+ addPass(createCanonicalizeFreezeInLoopsPass());
+ addPass(createLoopStrengthReducePass());
+ if (PrintLSR)
+ addPass(createPrintFunctionPass(dbgs(),
+ "\n\n*** Code after LSR ***\n"));
+ }
+
+ // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
+ // loads and compares. ExpandMemCmpPass then tries to expand those calls
+ // into optimally-sized loads and compares. The transforms are enabled by a
+ // target lowering hook.
+ if (!DisableMergeICmps)
+ addPass(createMergeICmpsLegacyPass());
+ addPass(createExpandMemCmpPass());
+ }
+
+ // Run GC lowering passes for builtin collectors
+ // TODO: add a pass insertion point here
+ addPass(&GCLoweringID);
+ addPass(&ShadowStackGCLoweringID);
+ addPass(createLowerConstantIntrinsicsPass());
+
+ // For MachO, lower @llvm.global_dtors into @llvm.global_ctors with
+ // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
+ if (TM->getTargetTriple().isOSBinFormatMachO() &&
+ !DisableAtExitBasedGlobalDtorLowering)
+ addPass(createLowerGlobalDtorsLegacyPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ addPass(createUnreachableBlockEliminationPass());
+
+ // Prepare expensive constants for SelectionDAG.
+ if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
+ addPass(createConstantHoistingPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createReplaceWithVeclibLegacyPass());
+
+ if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
+ addPass(createPartiallyInlineLibCallsPass());
+
+ // Expand vector predication intrinsics into standard IR instructions.
+ // This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
+ // passes since it emits those kinds of intrinsics.
+ addPass(createExpandVectorPredicationPass());
+
+ // Add scalarization of target's unsupported masked memory intrinsics pass.
+ // the unsupported intrinsic will be replaced with a chain of basic blocks,
+ // that stores/loads element one-by-one if the appropriate mask bit is set.
+ addPass(createScalarizeMaskedMemIntrinLegacyPass());
+
+ // Expand reduction intrinsics into shuffle sequences if the target wants to.
+ // Allow disabling it for testing purposes.
+ if (!DisableExpandReductions)
+ addPass(createExpandReductionsPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createTLSVariableHoistPass());
+
+ // Convert conditional moves to conditional jumps when profitable.
+ if (getOptLevel() != CodeGenOpt::None && !DisableSelectOptimize)
+ addPass(createSelectOptimizePass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+void TargetPassConfig::addPassesToHandleExceptions() {
+ const MCAsmInfo *MCAI = TM->getMCAsmInfo();
+ assert(MCAI && "No MCAsmInfo");
+ switch (MCAI->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(createSjLjEHPreparePass(TM));
+ [[fallthrough]];
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::AIX:
+ addPass(createDwarfEHPass(getOptLevel()));
+ break;
+ case ExceptionHandling::WinEH:
+ // We support using both GCC-style and MSVC-style exceptions on Windows, so
+ // add both preparation passes. Each pass will only actually run if it
+ // recognizes the personality function.
+ addPass(createWinEHPass());
+ addPass(createDwarfEHPass(getOptLevel()));
+ break;
+ case ExceptionHandling::Wasm:
+ // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs
+ // on catchpads and cleanuppads because it does not outline them into
+ // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we
+ // should remove PHIs there.
+ addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false));
+ addPass(createWasmEHPass());
+ break;
+ case ExceptionHandling::None:
+ addPass(createLowerInvokePass());
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(createUnreachableBlockEliminationPass());
+ break;
+ }
+}
+
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ addPass(createCodeGenPreparePass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+ addPreISel();
+
+ // Force codegen to run according to the callgraph.
+ if (requiresCodeGenSCCOrder())
+ addPass(new DummyCGSCCPass);
+
+ addPass(createCallBrPass());
+
+ // Add both the safe stack and the stack protection passes: each of them will
+ // only protect functions that have corresponding attributes.
+ addPass(createSafeStackPass());
+ addPass(createStackProtectorPass());
+
+ if (PrintISelInput)
+ addPass(createPrintFunctionPass(
+ dbgs(), "\n\n*** Final LLVM Code input to ISel ***\n"));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+}
+
+bool TargetPassConfig::addCoreISelPasses() {
+ // Enable FastISel with -fast-isel, but allow that to be overridden.
+ TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
+
+ // Determine an instruction selector.
+ enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+ SelectorType Selector;
+
+ if (EnableFastISelOption == cl::BOU_TRUE)
+ Selector = SelectorType::FastISel;
+ else if (EnableGlobalISelOption == cl::BOU_TRUE ||
+ (TM->Options.EnableGlobalISel &&
+ EnableGlobalISelOption != cl::BOU_FALSE))
+ Selector = SelectorType::GlobalISel;
+ else if (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())
+ Selector = SelectorType::FastISel;
+ else
+ Selector = SelectorType::SelectionDAG;
+
+ // Set consistently TM->Options.EnableFastISel and EnableGlobalISel.
+ if (Selector == SelectorType::FastISel) {
+ TM->setFastISel(true);
+ TM->setGlobalISel(false);
+ } else if (Selector == SelectorType::GlobalISel) {
+ TM->setFastISel(false);
+ TM->setGlobalISel(true);
+ }
+
+ // FIXME: Injecting into the DAGISel pipeline seems to cause issues with
+ // analyses needing to be re-run. This can result in being unable to
+ // schedule passes (particularly with 'Function Alias Analysis
+ // Results'). It's not entirely clear why but AFAICT this seems to be
+ // due to one FunctionPassManager not being able to use analyses from a
+ // previous one. As we're injecting a ModulePass we break the usual
+ // pass manager into two. GlobalISel with the fallback path disabled
+ // and -run-pass seem to be unaffected. The majority of GlobalISel
+ // testing uses -run-pass so this probably isn't too bad.
+ SaveAndRestore SavedDebugifyIsSafe(DebugifyIsSafe);
+ if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled())
+ DebugifyIsSafe = false;
+
+ // Add instruction selector passes.
+ if (Selector == SelectorType::GlobalISel) {
+ SaveAndRestore SavedAddingMachinePasses(AddingMachinePasses, true);
+ if (addIRTranslator())
+ return true;
+
+ addPreLegalizeMachineIR();
+
+ if (addLegalizeMachineIR())
+ return true;
+
+ // Before running the register bank selector, ask the target if it
+ // wants to run some passes.
+ addPreRegBankSelect();
+
+ if (addRegBankSelect())
+ return true;
+
+ addPreGlobalInstructionSelect();
+
+ if (addGlobalInstructionSelect())
+ return true;
+
+ // Pass to reset the MachineFunction if the ISel failed.
+ addPass(createResetMachineFunctionPass(
+ reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled()));
+
+ // Provide a fallback path when we do not want to abort on
+ // not-yet-supported input.
+ if (!isGlobalISelAbortEnabled() && addInstSelector())
+ return true;
+
+ } else if (addInstSelector())
+ return true;
+
+ // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+ // FinalizeISel.
+ addPass(&FinalizeISelID);
+
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
+ return false;
+}
+
+bool TargetPassConfig::addISelPasses() {
+ if (TM->useEmulatedTLS())
+ addPass(createLowerEmuTLSPass());
+
+ PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ addPass(createPreISelIntrinsicLoweringPass());
+ addPass(createExpandLargeDivRemPass());
+ addPass(createExpandLargeFpConvertPass());
+ addIRPasses();
+ addCodeGenPrepare();
+ addPassesToHandleExceptions();
+ addISelPrepare();
+
+ return addCoreISelPasses();
+}
+
+/// -regalloc=... command line option.
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc>>
+ RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
+/// Add the complete set of target-independent postISel code generator passes.
+///
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
+///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+ AddingMachinePasses = true;
+
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMachineSSAOptimization();
+ } else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+ }
+
+ if (TM->Options.EnableIPRA)
+ addPass(createRegUsageInfoPropPass());
+
+ // Run pre-ra passes.
+ addPreRegAlloc();
+
+ // Debugifying the register allocator passes seems to provoke some
+ // non-determinism that affects CodeGen and there doesn't seem to be a point
+ // where it becomes safe again so stop debugifying here.
+ DebugifyIsSafe = false;
+
+ // Add a FSDiscriminator pass right before RA, so that we could get
+ // more precise SampleFDO profile for RA.
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass1));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
+ addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass1,
+ nullptr));
+ }
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (getOptimizeRegAlloc())
+ addOptimizedRegAlloc();
+ else
+ addFastRegAlloc();
+
+ // Run post-ra passes.
+ addPostRegAlloc();
+
+ addPass(&RemoveRedundantDebugValuesID);
+
+ addPass(&FixupStatepointCallerSavedID);
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&PostRAMachineSinkingID);
+ addPass(&ShrinkWrapID);
+ }
+
+ // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
+ // do so if it hasn't been disabled, substituted, or overridden.
+ if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID))
+ addPass(createPrologEpilogInserterPass());
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ addMachineLateOptimization();
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(&ExpandPostRAPseudosID);
+
+ // Run pre-sched2 passes.
+ addPreSched2();
+
+ if (EnableImplicitNullChecks)
+ addPass(&ImplicitNullChecksID);
+
+ // Second pass scheduler.
+ // Let Target optionally insert this pass by itself at some other
+ // point.
+ if (getOptLevel() != CodeGenOpt::None &&
+ !TM->targetSchedulesPostRAScheduling()) {
+ if (MISchedPostRA)
+ addPass(&PostMachineSchedulerID);
+ else
+ addPass(&PostRASchedulerID);
+ }
+
+ // GC
+ if (addGCPasses()) {
+ if (PrintGCInfo)
+ addPass(createGCInfoPrinter(dbgs()));
+ }
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ addBlockPlacement();
+
+ // Insert before XRay Instrumentation.
+ addPass(&FEntryInserterID);
+
+ addPass(&XRayInstrumentationID);
+ addPass(&PatchableFunctionID);
+
+ addPreEmitPass();
+
+ if (TM->Options.EnableIPRA)
+ // Collect register usage information and produce a register mask of
+ // clobbered registers, to be used to optimize call sites.
+ addPass(createRegUsageInfoCollector());
+
+ // FIXME: Some backends are incompatible with running the verifier after
+ // addPreEmitPass. Maybe only pass "false" here for those targets?
+ addPass(&FuncletLayoutID);
+
+ addPass(&StackMapLivenessID);
+ addPass(&LiveDebugValuesID);
+ addPass(&MachineSanitizerBinaryMetadataID);
+
+ if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
+ EnableMachineOutliner != RunOutliner::NeverOutline) {
+ bool RunOnAllFunctions =
+ (EnableMachineOutliner == RunOutliner::AlwaysOutline);
+ bool AddOutliner =
+ RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
+ if (AddOutliner)
+ addPass(createMachineOutlinerPass(RunOnAllFunctions));
+ }
+
+ if (EnableFSDiscriminator)
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::PassLast));
+
+ // Machine function splitter uses the basic block sections feature. Both
+ // cannot be enabled at the same time. Basic block sections takes precedence.
+ // FIXME: In principle, BasicBlockSection::Labels and splitting can used
+ // together. Update this check once we have addressed any issues.
+ if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
+ if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
+ addPass(llvm::createBasicBlockSectionsProfileReaderPass(
+ TM->getBBSectionsFuncListBuf()));
+ }
+ addPass(llvm::createBasicBlockSectionsPass());
+ } else if (TM->Options.EnableMachineFunctionSplitter ||
+ EnableMachineFunctionSplitter) {
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty()) {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRProfileLoaderPass(
+ ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::PassLast, nullptr));
+ } else {
+ // Sample profile is given, but FSDiscriminator is not
+ // enabled, this may result in performance regression.
+ WithColor::warning()
+ << "Using AutoFDO without FSDiscriminator for MFS may regress "
+ "performance.";
+ }
+ }
+ addPass(createMachineFunctionSplitterPass());
+ }
+
+ addPostBBSections();
+
+ if (!DisableCFIFixup && TM->Options.EnableCFIFixup)
+ addPass(createCFIFixup());
+
+ PM->add(createStackFrameLayoutAnalysisPass());
+
+ // Add passes that directly emit MI after all other MI passes.
+ addPreEmitPass2();
+
+ AddingMachinePasses = false;
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ addPass(&EarlyTailDuplicateID);
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ addILPOpts();
+
+ addPass(&EarlyMachineLICMID);
+ addPass(&MachineCSEID);
+
+ addPass(&MachineSinkingID);
+
+ addPass(&PeepholeOptimizerID);
+ // Clean-up the dead code that may have been generated by peephole
+ // rewriting.
+ addPass(&DeadMachineInstructionElimID);
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
+//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+ switch (OptimizeRegAlloc) {
+ case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
+
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ useDefaultRegisterAllocator);
+
+static void initializeDefaultRegisterAllocatorOnce() {
+ if (!RegisterRegAlloc::getDefault())
+ RegisterRegAlloc::setDefault(RegAlloc);
+}
+
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
+///
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+ if (Optimized)
+ return createGreedyRegisterAllocator();
+ else
+ return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
+///
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
+ // Initialize the global default.
+ llvm::call_once(InitializeDefaultRegisterAllocatorFlag,
+ initializeDefaultRegisterAllocatorOnce);
+
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ return createTargetRegisterAllocator(Optimized);
+}
+
+bool TargetPassConfig::isCustomizedRegAlloc() {
+ return RegAlloc !=
+ (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator;
+}
+
+bool TargetPassConfig::addRegAssignAndRewriteFast() {
+ if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator &&
+ RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator)
+ report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
+
+ addPass(createRegAllocPass(false));
+
+ // Allow targets to change the register assignments after
+ // fast register allocation.
+ addPostFastRegAllocRewrite();
+ return true;
+}
+
+bool TargetPassConfig::addRegAssignAndRewriteOptimized() {
+ // Add the selected register allocation pass.
+ addPass(createRegAllocPass(true));
+
+ // Allow targets to change the register assignments before rewriting.
+ addPreRewrite();
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+
+ // Regalloc scoring for ML-driven eviction - noop except when learning a new
+ // eviction policy.
+ addPass(createRegAllocScoringPass());
+ return true;
+}
+
+/// Return true if the default global register allocator is in use and
+/// has not be overriden on the command line with '-regalloc=...'
+bool TargetPassConfig::usingDefaultRegAlloc() const {
+ return RegAlloc.getNumOccurrences() == 0;
+}
+
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc() {
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
+
+ addRegAssignAndRewriteFast();
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc() {
+ addPass(&DetectDeadLanesID);
+
+ addPass(&ProcessImplicitDefsID);
+
+ // LiveVariables currently requires pure SSA form.
+ //
+ // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+ // LiveVariables can be removed completely, and LiveIntervals can be directly
+ // computed. (We still either need to regenerate kill flags after regalloc, or
+ // preferably fix the scavenger to not depend on them).
+ // FIXME: UnreachableMachineBlockElim is a dependant pass of LiveVariables.
+ // When LiveVariables is removed this has to be removed/moved either.
+ // Explicit addition of UnreachableMachineBlockElim allows stopping before or
+ // after it with -stop-before/-stop-after.
+ addPass(&UnreachableMachineBlockElimID);
+ addPass(&LiveVariablesID);
+
+ // Edge splitting is smarter with machine loop info.
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
+ addPass(&TwoAddressInstructionPassID);
+ addPass(&RegisterCoalescerID);
+
+ // The machine scheduler may accidentally create disconnected components
+ // when moving subregister definitions around, avoid this by splitting them to
+ // separate vregs before. Splitting can also improve reg. allocation quality.
+ addPass(&RenameIndependentSubregsID);
+
+ // PreRA instruction scheduling.
+ addPass(&MachineSchedulerID);
+
+ if (addRegAssignAndRewriteOptimized()) {
+ // Perform stack slot coloring and post-ra machine LICM.
+ addPass(&StackSlotColoringID);
+
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ addPostRewrite();
+
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(&MachineCopyPropagationID);
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(&MachineLICMID);
+ }
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+ // Cleanup of redundant immediate/address loads.
+ addPass(&MachineLateInstrsCleanupID);
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ addPass(&BranchFolderPassID);
+
+ // Tail duplication.
+ // Note that duplicating tail just increases code size and degrades
+ // performance for targets that require Structured Control Flow.
+ // In addition it can also make CFG irreducible. Thus we disable it.
+ if (!TM->requiresStructuredCFG())
+ addPass(&TailDuplicateID);
+
+ // Copy propagation.
+ addPass(&MachineCopyPropagationID);
+}
+
+/// Add standard GC passes.
+bool TargetPassConfig::addGCPasses() {
+ addPass(&GCMachineCodeAnalysisID);
+ return true;
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass2));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
+ addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass2,
+ nullptr));
+ }
+ if (addPass(&MachineBlockPlacementID)) {
+ // Run a separate pass to collect block placement statistics.
+ if (EnableBlockPlacementStats)
+ addPass(&MachineBlockPlacementStatsID);
+ }
+}
+
+//===---------------------------------------------------------------------===//
+/// GlobalISel Configuration
+//===---------------------------------------------------------------------===//
+bool TargetPassConfig::isGlobalISelAbortEnabled() const {
+ return TM->Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
+}
+
+bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
+ return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
+}
+
+bool TargetPassConfig::isGISelCSEEnabled() const {
+ return true;
+}
+
+std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const {
+ return std::make_unique<CSEConfigBase>();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..77d2dfcf2323
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -0,0 +1,678 @@
+//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+
+#define DEBUG_TYPE "target-reg-info"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ HugeSizeForSplit("huge-size-for-split", cl::Hidden,
+ cl::desc("A threshold of live range size which may cause "
+ "high compile time cost in global splitting."),
+ cl::init(5000));
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+ regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *SRINames,
+ const LaneBitmask *SRILaneMasks,
+ LaneBitmask SRICoveringLanes,
+ const RegClassInfo *const RCIs,
+ unsigned Mode)
+ : InfoDesc(ID), SubRegIndexNames(SRINames),
+ SubRegIndexLaneMasks(SRILaneMasks),
+ RegClassBegin(RCB), RegClassEnd(RCE),
+ CoveringLanes(SRICoveringLanes),
+ RCInfos(RCIs), HwMode(Mode) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() = default;
+
+bool TargetRegisterInfo::shouldRegionSplitForVirtReg(
+ const MachineFunction &MF, const LiveInterval &VirtReg) const {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg());
+ if (MI && TII->isTriviallyReMaterializable(*MI) &&
+ VirtReg.size() > HugeSizeForSplit)
+ return false;
+ return true;
+}
+
+void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet,
+ MCRegister Reg) const {
+ for (MCPhysReg SR : superregs_inclusive(Reg))
+ RegisterSet.set(SR);
+}
+
+bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
+ ArrayRef<MCPhysReg> Exceptions) const {
+ // Check that all super registers of reserved regs are reserved as well.
+ BitVector Checked(getNumRegs());
+ for (unsigned Reg : RegisterSet.set_bits()) {
+ if (Checked[Reg])
+ continue;
+ for (MCPhysReg SR : superregs(Reg)) {
+ if (!RegisterSet[SR] && !is_contained(Exceptions, Reg)) {
+ dbgs() << "Error: Super register " << printReg(SR, this)
+ << " of reserved register " << printReg(Reg, this)
+ << " is not reserved.\n";
+ return false;
+ }
+
+ // We transitively check superregs. So we can remember this for later
+ // to avoid compiletime explosion in deep register hierarchies.
+ Checked.set(SR);
+ }
+ }
+ return true;
+}
+
+namespace llvm {
+
+Printable printReg(Register Reg, const TargetRegisterInfo *TRI,
+ unsigned SubIdx, const MachineRegisterInfo *MRI) {
+ return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) {
+ if (!Reg)
+ OS << "$noreg";
+ else if (Register::isStackSlot(Reg))
+ OS << "SS#" << Register::stackSlot2Index(Reg);
+ else if (Reg.isVirtual()) {
+ StringRef Name = MRI ? MRI->getVRegName(Reg) : "";
+ if (Name != "") {
+ OS << '%' << Name;
+ } else {
+ OS << '%' << Register::virtReg2Index(Reg);
+ }
+ } else if (!TRI)
+ OS << '$' << "physreg" << Reg;
+ else if (Reg < TRI->getNumRegs()) {
+ OS << '$';
+ printLowerCase(TRI->getName(Reg), OS);
+ } else
+ llvm_unreachable("Register kind is unsupported.");
+
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+ });
+}
+
+Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
+
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
+
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+ });
+}
+
+Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ if (Register::isVirtualRegister(Unit)) {
+ OS << '%' << Register::virtReg2Index(Unit);
+ } else {
+ OS << printRegUnit(Unit, TRI);
+ }
+ });
+}
+
+Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
+ return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) {
+ if (RegInfo.getRegClassOrNull(Reg))
+ OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ else if (RegInfo.getRegBankOrNull(Reg))
+ OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
+ else {
+ OS << "_";
+ assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
+ "Generic registers must have a valid type");
+ }
+ });
+}
+
+} // end namespace llvm
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+ if (!RC || RC->isAllocatable())
+ return RC;
+
+ for (BitMaskClassIterator It(RC->getSubClassMask(), *this); It.isValid();
+ ++It) {
+ const TargetRegisterClass *SubRC = getRegClass(It.getID());
+ if (SubRC->isAllocatable())
+ return SubRC;
+ }
+ return nullptr;
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(MCRegister reg, MVT VT) const {
+ assert(Register::isPhysicalRegister(reg) &&
+ "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = nullptr;
+ for (const TargetRegisterClass* RC : regclasses()) {
+ if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) &&
+ RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClassLLT(MCRegister reg, LLT Ty) const {
+ assert(Register::isPhysicalRegister(reg) &&
+ "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass *BestRC = nullptr;
+ for (const TargetRegisterClass *RC : regclasses()) {
+ if ((!Ty.isValid() || isTypeLegalForClass(*RC, Ty)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ assert(RC->isAllocatable() && "invalid for nonallocatable sets");
+ ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF);
+ for (MCPhysReg PR : Order)
+ R.set(PR);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(getNumRegs());
+ if (RC) {
+ // A register class with no allocatable subclass returns an empty set.
+ const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+ if (SubClass)
+ getAllocatableSetForRC(MF, SubClass, Allocatable);
+ } else {
+ for (const TargetRegisterClass *C : regclasses())
+ if (C->isAllocatable())
+ getAllocatableSetForRC(MF, C, Allocatable);
+ }
+
+ // Mask out the reserved registers
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const BitVector &Reserved = MRI.getReservedRegs();
+ Allocatable.reset(Reserved);
+
+ return Allocatable;
+}
+
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+ const uint32_t *B,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+ if (unsigned Common = *A++ & *B++)
+ return TRI->getRegClass(I + llvm::countr_zero(Common));
+ return nullptr;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) const {
+ // First take care of the trivial cases.
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return nullptr;
+
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ assert(A && B && "Missing register class");
+ assert(Idx && "Bad sub-register index");
+
+ // Find Idx in the list of super-register indices.
+ for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+ if (RCI.getSubReg() == Idx)
+ // The bit mask contains all register classes that are projected into B
+ // by Idx. Find a class that is also a sub-class of A.
+ return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+ return nullptr;
+}
+
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+ const TargetRegisterClass *RCB, unsigned SubB,
+ unsigned &PreA, unsigned &PreB) const {
+ assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+ // Search all pairs of sub-register indices that project into RCA and RCB
+ // respectively. This is quadratic, but usually the sets are very small. On
+ // most targets like X86, there will only be a single sub-register index
+ // (e.g., sub_16bit projecting into GR16).
+ //
+ // The worst case is a register class like DPR on ARM.
+ // We have indices dsub_0..dsub_7 projecting into that class.
+ //
+ // It is very common that one register class is a sub-register of the other.
+ // Arrange for RCA to be the larger register so the answer will be found in
+ // the first iteration. This makes the search linear for the most common
+ // case.
+ const TargetRegisterClass *BestRC = nullptr;
+ unsigned *BestPreA = &PreA;
+ unsigned *BestPreB = &PreB;
+ if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) {
+ std::swap(RCA, RCB);
+ std::swap(SubA, SubB);
+ std::swap(BestPreA, BestPreB);
+ }
+
+ // Also terminate the search one we have found a register class as small as
+ // RCA.
+ unsigned MinSize = getRegSizeInBits(*RCA);
+
+ for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+ unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+ for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+ // Check if a common super-register class exists for this index pair.
+ const TargetRegisterClass *RC =
+ firstCommonClass(IA.getMask(), IB.getMask(), this);
+ if (!RC || getRegSizeInBits(*RC) < MinSize)
+ continue;
+
+ // The indexes must compose identically: PreA+SubA == PreB+SubB.
+ unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+ if (FinalA != FinalB)
+ continue;
+
+ // Is RC a better candidate than BestRC?
+ if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC))
+ continue;
+
+ // Yes, RC is the smallest super-register seen so far.
+ BestRC = RC;
+ *BestPreA = IA.getSubReg();
+ *BestPreB = IB.getSubReg();
+
+ // Bail early if we reached MinSize. We won't find a better candidate.
+ if (getRegSizeInBits(*BestRC) == MinSize)
+ return BestRC;
+ }
+ }
+ return BestRC;
+}
+
+/// Check if the registers defined by the pair (RegisterClass, SubReg)
+/// share the same register file.
+static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) {
+ // Same register class.
+ if (DefRC == SrcRC)
+ return true;
+
+ // Both operands are sub registers. Check if they share a register class.
+ unsigned SrcIdx, DefIdx;
+ if (SrcSubReg && DefSubReg) {
+ return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
+ SrcIdx, DefIdx) != nullptr;
+ }
+
+ // At most one of the register is a sub register, make it Src to avoid
+ // duplicating the test.
+ if (!SrcSubReg) {
+ std::swap(DefSubReg, SrcSubReg);
+ std::swap(DefRC, SrcRC);
+ }
+
+ // One of the register is a sub register, check if we can get a superclass.
+ if (SrcSubReg)
+ return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
+
+ // Plain copy.
+ return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
+bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // If this source does not incur a cross register bank copy, use it.
+ return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
+}
+
+// Compute target-independent register allocator hints to help eliminate copies.
+bool TargetRegisterInfo::getRegAllocationHints(
+ Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
+ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const std::pair<unsigned, SmallVector<Register, 4>> &Hints_MRI =
+ MRI.getRegAllocationHints(VirtReg);
+
+ SmallSet<Register, 32> HintedRegs;
+ // First hint may be a target hint.
+ bool Skip = (Hints_MRI.first != 0);
+ for (auto Reg : Hints_MRI.second) {
+ if (Skip) {
+ Skip = false;
+ continue;
+ }
+
+ // Target-independent hints are either a physical or a virtual register.
+ Register Phys = Reg;
+ if (VRM && Phys.isVirtual())
+ Phys = VRM->getPhys(Phys);
+
+ // Don't add the same reg twice (Hints_MRI may contain multiple virtual
+ // registers allocated to the same physreg).
+ if (!HintedRegs.insert(Phys).second)
+ continue;
+ // Check that Phys is a valid hint in VirtReg's register class.
+ if (!Phys.isPhysical())
+ continue;
+ if (MRI.isReserved(Phys))
+ continue;
+ // Check that Phys is in the allocation order. We shouldn't heed hints
+ // from VirtReg's register class if they aren't in the allocation order. The
+ // target probably has a reason for removing the register.
+ if (!is_contained(Order, Phys))
+ continue;
+
+ // All clear, tell the register allocator to prefer this register.
+ Hints.push_back(Phys);
+ }
+ return false;
+}
+
+bool TargetRegisterInfo::isCalleeSavedPhysReg(
+ MCRegister PhysReg, const MachineFunction &MF) const {
+ if (PhysReg == 0)
+ return false;
+ const uint32_t *callerPreservedRegs =
+ getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ if (callerPreservedRegs) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "Expected physical register");
+ return (callerPreservedRegs[PhysReg / 32] >> PhysReg % 32) & 1;
+ }
+ return false;
+}
+
+bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ return !MF.getFunction().hasFnAttribute("no-realign-stack");
+}
+
+bool TargetRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const Function &F = MF.getFunction();
+ return F.hasFnAttribute("stackrealign") ||
+ (MFI.getMaxAlign() > TFI->getStackAlign()) ||
+ F.hasFnAttribute(Attribute::StackAlignment);
+}
+
+bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
+ const uint32_t *mask1) const {
+ unsigned N = (getNumRegs()+31) / 32;
+ for (unsigned I = 0; I < N; ++I)
+ if ((mask0[I] & mask1[I]) != mask0[I])
+ return false;
+ return true;
+}
+
+unsigned
+TargetRegisterInfo::getRegSizeInBits(Register Reg,
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterClass *RC{};
+ if (Reg.isPhysical()) {
+ // The size is not directly available for physical registers.
+ // Instead, we need to access a register class that contains Reg and
+ // get the size of that register class.
+ RC = getMinimalPhysRegClass(Reg);
+ } else {
+ LLT Ty = MRI.getType(Reg);
+ unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
+ // If Reg is not a generic register, query the register class to
+ // get its size.
+ if (RegSize)
+ return RegSize;
+ // Since Reg is not a generic register, it must have a register class.
+ RC = MRI.getRegClass(Reg);
+ }
+ assert(RC && "Unable to deduce the register class");
+ return getRegSizeInBits(*RC);
+}
+
+bool TargetRegisterInfo::getCoveringSubRegIndexes(
+ const MachineRegisterInfo &MRI, const TargetRegisterClass *RC,
+ LaneBitmask LaneMask, SmallVectorImpl<unsigned> &NeededIndexes) const {
+ SmallVector<unsigned, 8> PossibleIndexes;
+ unsigned BestIdx = 0;
+ unsigned BestCover = 0;
+
+ for (unsigned Idx = 1, E = getNumSubRegIndices(); Idx < E; ++Idx) {
+ // Is this index even compatible with the given class?
+ if (getSubClassWithSubReg(RC, Idx) != RC)
+ continue;
+ LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LaneMask) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // The index must not cover any lanes outside \p LaneMask.
+ if ((SubRegMask & ~LaneMask).any())
+ continue;
+
+ unsigned PopCount = SubRegMask.getNumLanes();
+ PossibleIndexes.push_back(Idx);
+ if (PopCount > BestCover) {
+ BestCover = PopCount;
+ BestIdx = Idx;
+ }
+ }
+
+ // Abort if we cannot possibly implement the COPY with the given indexes.
+ if (BestIdx == 0)
+ return false;
+
+ NeededIndexes.push_back(BestIdx);
+
+ // Greedy heuristic: Keep iterating keeping the best covering subreg index
+ // each time.
+ LaneBitmask LanesLeft = LaneMask & ~getSubRegIndexLaneMask(BestIdx);
+ while (LanesLeft.any()) {
+ unsigned BestIdx = 0;
+ int BestCover = std::numeric_limits<int>::min();
+ for (unsigned Idx : PossibleIndexes) {
+ LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LanesLeft) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // Do not cover already-covered lanes to avoid creating cycles
+ // in copy bundles (= bundle contains copies that write to the
+ // registers).
+ if ((SubRegMask & ~LanesLeft).any())
+ continue;
+
+ // Try to cover as many of the remaining lanes as possible.
+ const int Cover = (SubRegMask & LanesLeft).getNumLanes();
+ if (Cover > BestCover) {
+ BestCover = Cover;
+ BestIdx = Idx;
+ }
+ }
+
+ if (BestIdx == 0)
+ return false; // Impossible to handle
+
+ NeededIndexes.push_back(BestIdx);
+
+ LanesLeft &= ~getSubRegIndexLaneMask(BestIdx);
+ }
+
+ return BestIdx;
+}
+
+Register
+TargetRegisterInfo::lookThruCopyLike(Register SrcReg,
+ const MachineRegisterInfo *MRI) const {
+ while (true) {
+ const MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ Register CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!CopySrcReg.isVirtual())
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+Register TargetRegisterInfo::lookThruSingleUseCopyChain(
+ Register SrcReg, const MachineRegisterInfo *MRI) const {
+ while (true) {
+ const MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ // Found the real definition, return it if it has a single use.
+ if (!MI->isCopyLike())
+ return MRI->hasOneNonDBGUse(SrcReg) ? SrcReg : Register();
+
+ Register CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ // Continue only if the next definition in the chain is for a virtual
+ // register that has a single use.
+ if (!CopySrcReg.isVirtual() || !MRI->hasOneNonDBGUse(CopySrcReg))
+ return Register();
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+void TargetRegisterInfo::getOffsetOpcodes(
+ const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
+ assert(!Offset.getScalable() && "Scalable offsets are not handled");
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+}
+
+DIExpression *
+TargetRegisterInfo::prependOffsetExpression(const DIExpression *Expr,
+ unsigned PrependFlags,
+ const StackOffset &Offset) const {
+ assert((PrependFlags &
+ ~(DIExpression::DerefBefore | DIExpression::DerefAfter |
+ DIExpression::StackValue | DIExpression::EntryValue)) == 0 &&
+ "Unsupported prepend flag");
+ SmallVector<uint64_t, 16> OffsetExpr;
+ if (PrependFlags & DIExpression::DerefBefore)
+ OffsetExpr.push_back(dwarf::DW_OP_deref);
+ getOffsetOpcodes(Offset, OffsetExpr);
+ if (PrependFlags & DIExpression::DerefAfter)
+ OffsetExpr.push_back(dwarf::DW_OP_deref);
+ return DIExpression::prependOpcodes(Expr, OffsetExpr,
+ PrependFlags & DIExpression::StackValue,
+ PrependFlags & DIExpression::EntryValue);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void TargetRegisterInfo::dumpReg(Register Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
+ dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n";
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 000000000000..dba84950f49d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,343 @@
+//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <numeric>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+ cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+ cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+ return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+ return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
+ STI = TSInfo;
+ SchedModel = TSInfo->getSchedModel();
+ TII = TSInfo->getInstrInfo();
+ STI->initInstrItins(InstrItins);
+
+ unsigned NumRes = SchedModel.getNumProcResourceKinds();
+ ResourceFactors.resize(NumRes);
+ ResourceLCM = SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ if (NumUnits > 0)
+ ResourceLCM = std::lcm(ResourceLCM, NumUnits);
+ }
+ MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+ }
+}
+
+/// Returns true only if instruction is specified as single issue.
+bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->BeginGroup;
+ }
+ return false;
+}
+
+bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->EndGroup;
+ }
+ return false;
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrItineraries()) {
+ int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
+ }
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->NumMicroOps;
+ }
+ return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned capLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+ // Get the definition's scheduling class descriptor from this machine model.
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ return SCDesc;
+
+#ifndef NDEBUG
+ unsigned NIter = 0;
+#endif
+ while (SCDesc->isVariant()) {
+ assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+ SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+ SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ }
+ return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+ unsigned DefIdx = 0;
+ for (unsigned i = 0; i != DefOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ ++DefIdx;
+ }
+ return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+ unsigned UseIdx = 0;
+ for (unsigned i = 0; i != UseOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.readsReg() && !MO.isDef())
+ ++UseIdx;
+ }
+ return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+ const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *UseMI, unsigned UseOperIdx) const {
+
+ if (!hasInstrSchedModel() && !hasInstrItineraries())
+ return TII->defaultDefLatency(SchedModel, *DefMI);
+
+ if (hasInstrItineraries()) {
+ int OperLatency = 0;
+ if (UseMI) {
+ OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
+ *UseMI, UseOperIdx);
+ }
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ // Rather than directly querying InstrItins stage latency, we call a TII
+ // hook to allow subtargets to specialize latency. This hook is only
+ // applicable to the InstrItins model. InstrSchedModel should model all
+ // special cases without TII hooks.
+ InstrLatency =
+ std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
+ return InstrLatency;
+ }
+ // hasInstrSchedModel()
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+ if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ unsigned WriteID = WLEntry->WriteResourceID;
+ unsigned Latency = capLatency(WLEntry->Cycles);
+ if (!UseMI)
+ return Latency;
+
+ // Lookup the use's latency adjustment in SubtargetInfo.
+ const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+ if (UseDesc->NumReadAdvanceEntries == 0)
+ return Latency;
+ unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+ int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+ if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
+ return 0;
+ return Latency - Advance;
+ }
+ // If DefIdx does not exist in the model (e.g. implicit defs), then return
+ // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+ if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
+ !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
+ SchedModel.isComplete()) {
+ errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
+ llvm_unreachable("incomplete machine model");
+ }
+#endif
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
+}
+
+unsigned
+TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
+ return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));
+}
+
+unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
+ assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
+ unsigned SCIdx = TII->get(Opcode).getSchedClass();
+ return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx));
+}
+
+unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
+ if (hasInstrSchedModel())
+ return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
+ return computeInstrLatency(Inst.getOpcode());
+}
+
+unsigned
+TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
+ bool UseDefaultDefLatency) const {
+ // For the itinerary model, fall back to the old subtarget hook.
+ // Allow subtargets to compute Bundle latencies outside the machine model.
+ if (hasInstrItineraries() || MI->isBundle() ||
+ (!hasInstrSchedModel() && !UseDefaultDefLatency))
+ return TII->getInstrLatency(&InstrItins, *MI);
+
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+ if (SCDesc->isValid())
+ return computeInstrLatency(*SCDesc);
+ }
+ return TII->defaultDefLatency(SchedModel, *MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *DepMI) const {
+ if (!SchedModel.isOutOfOrder())
+ return 1;
+
+ // Out-of-order processor can dispatch WAW dependencies in the same cycle.
+
+ // Treat predication as a data dependency for out-of-order cpus. In-order
+ // cpus do not need to treat predicated writes specially.
+ //
+ // TODO: The following hack exists because predication passes do not
+ // correctly append imp-use operands, and readsReg() strangely returns false
+ // for predicated defs.
+ Register Reg = DefMI->getOperand(DefOperIdx).getReg();
+ const MachineFunction &MF = *DefMI->getMF();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
+ return computeInstrLatency(DefMI);
+
+ // If we have a per operand scheduling model, check if this def is writing
+ // an unbuffered resource. If so, it treated like an in-order cpu.
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ if (SCDesc->isValid()) {
+ for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+ *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+ if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+double
+TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const {
+ if (hasInstrItineraries()) {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ return MCSchedModel::getReciprocalThroughput(SchedClass,
+ *getInstrItineraries());
+ }
+
+ if (hasInstrSchedModel())
+ return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI));
+
+ return 0.0;
+}
+
+double
+TargetSchedModel::computeReciprocalThroughput(unsigned Opcode) const {
+ unsigned SchedClass = TII->get(Opcode).getSchedClass();
+ if (hasInstrItineraries())
+ return MCSchedModel::getReciprocalThroughput(SchedClass,
+ *getInstrItineraries());
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass);
+ if (SCDesc.isValid() && !SCDesc.isVariant())
+ return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);
+ }
+
+ return 0.0;
+}
+
+double
+TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const {
+ if (hasInstrSchedModel())
+ return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
+ return computeReciprocalThroughput(MI.getOpcode());
+}
+
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
new file mode 100644
index 000000000000..ba2c8dda7de5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -0,0 +1,60 @@
+//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+TargetSubtargetInfo::TargetSubtargetInfo(
+ const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
+ ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+ const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
+ const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC,
+ const unsigned *FP)
+ : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() = default;
+
+bool TargetSubtargetInfo::enableAtomicExpand() const {
+ return true;
+}
+
+bool TargetSubtargetInfo::enableIndirectBrExpand() const {
+ return false;
+}
+
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+ return false;
+}
+
+bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
+ return enableMachineScheduler();
+}
+
+bool TargetSubtargetInfo::enableRALocalReassignment(
+ CodeGenOpt::Level OptLevel) const {
+ return true;
+}
+
+bool TargetSubtargetInfo::enablePostRAScheduler() const {
+ return getSchedModel().PostRAScheduler;
+}
+
+bool TargetSubtargetInfo::enablePostRAMachineScheduler() const {
+ return enableMachineScheduler() && enablePostRAScheduler();
+}
+
+bool TargetSubtargetInfo::useAA() const {
+ return false;
+}
+
+void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 000000000000..c3ea76bf8cea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1967 @@
+//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "twoaddressinstruction"
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
+
+// Temporary flag to disable rescheduling.
+static cl::opt<bool>
+EnableRescheduling("twoaddr-reschedule",
+ cl::desc("Coalesce copies by rescheduling (default=true)"),
+ cl::init(true), cl::Hidden);
+
+// Limit the number of dataflow edges to traverse when evaluating the benefit
+// of commuting operands.
+static cl::opt<unsigned> MaxDataFlowEdge(
+ "dataflow-edge-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum number of dataflow edges to traverse when evaluating "
+ "the benefit of commuting operands"));
+
+namespace {
+
+class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const InstrItineraryData *InstrItins = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
+ AliasAnalysis *AA = nullptr;
+ CodeGenOpt::Level OptLevel = CodeGenOpt::None;
+
+ // The current basic block being processed.
+ MachineBasicBlock *MBB = nullptr;
+
+ // Keep track the distance of a MI from the start of the current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // Set of already processed instructions in the current block.
+ SmallPtrSet<MachineInstr*, 8> Processed;
+
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies from physical registers to virtual
+ // registers. e.g. v1024 = move r0.
+ DenseMap<Register, Register> SrcRegMap;
+
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies to physical registers from virtual
+ // registers. e.g. r1 = move v1024.
+ DenseMap<Register, Register> DstRegMap;
+
+ void removeClobberedSrcRegMap(MachineInstr *MI);
+
+ bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
+
+ bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
+
+ bool isProfitableToCommute(Register RegA, Register RegB, Register RegC,
+ MachineInstr *MI, unsigned Dist);
+
+ bool commuteInstruction(MachineInstr *MI, unsigned DstIdx,
+ unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
+
+ bool isProfitableToConv3Addr(Register RegA, Register RegB);
+
+ bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi, Register RegA,
+ Register RegB, unsigned &Dist);
+
+ bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
+
+ bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi, Register Reg);
+ bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi, Register Reg);
+
+ bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned &Dist, bool shouldOnlyCommute);
+
+ bool tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist);
+ void scanUses(Register DstReg);
+
+ void processCopy(MachineInstr *MI);
+
+ using TiedPairList = SmallVector<std::pair<unsigned, unsigned>, 4>;
+ using TiedOperandMap = SmallDenseMap<unsigned, TiedPairList>;
+
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+ void eliminateRegSequence(MachineBasicBlock::iterator&);
+ bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addUsedIfAvailable<AAResultsWrapperPass>();
+ AU.addUsedIfAvailable<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// Pass entry point.
+ bool runOnMachineFunction(MachineFunction&) override;
+};
+
+} // end anonymous namespace
+
+char TwoAddressInstructionPass::ID = 0;
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE,
+ "Two-Address instruction pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
+ "Two-Address instruction pass", false, false)
+
+/// Return the MachineInstr* if it is the single def of the Reg in current BB.
+static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *Ret = nullptr;
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != BB || DefMI.isDebugValue())
+ continue;
+ if (!Ret)
+ Ret = &DefMI;
+ else if (Ret != &DefMI)
+ return nullptr;
+ }
+ return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg,
+ int Maxlen) {
+ Register TmpReg = FromReg;
+ for (int i = 0; i < Maxlen; i++) {
+ MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+ if (!Def || !Def->isCopy())
+ return false;
+
+ TmpReg = Def->getOperand(1).getReg();
+
+ if (TmpReg == ToReg)
+ return true;
+ }
+ return false;
+}
+
+/// Return true if there are no intervening uses between the last instruction
+/// in the MBB that defines the specified register and the two-address
+/// instruction which is being processed. It also returns the last def location
+/// by reference.
+bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineOperand &MO : MRI->reg_operands(Reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+/// Return true if the specified MI is a copy instruction or an extract_subreg
+/// instruction. It also returns the source and destination registers and
+/// whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ Register &SrcReg, Register &DstReg, bool &IsSrcPhys,
+ bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else {
+ return false;
+ }
+
+ IsSrcPhys = SrcReg.isPhysical();
+ IsDstPhys = DstReg.isPhysical();
+ return true;
+}
+
+/// Test if the given register value, which is used by the
+/// given instruction, is killed by the given instruction.
+static bool isPlainlyKilled(const MachineInstr *MI, Register Reg,
+ LiveIntervals *LIS) {
+ if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
+ // FIXME: Sometimes tryInstructionTransform() will add instructions and
+ // test whether they can be folded before keeping them. In this case it
+ // sets a kill before recursively calling tryInstructionTransform() again.
+ // If there is no interval available, we assume that this instruction is
+ // one of those. A kill flag is manually inserted on the operand so the
+ // check below will handle it.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ // This is to match the kill flag version where undefs don't have kill
+ // flags.
+ if (!LI.hasAtLeastOneValue())
+ return false;
+
+ SlotIndex useIdx = LIS->getInstructionIndex(*MI);
+ LiveInterval::const_iterator I = LI.find(useIdx);
+ assert(I != LI.end() && "Reg must be live-in to use.");
+ return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+ }
+
+ return MI->killsRegister(Reg);
+}
+
+/// Test if the register used by the given operand is killed by the operand's
+/// instruction.
+static bool isPlainlyKilled(const MachineOperand &MO, LiveIntervals *LIS) {
+ return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg(), LIS);
+}
+
+/// Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy killed %reg1025
+/// %reg1036 = add killed %reg1034, killed %reg1035
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+/// If allowFalsePositives is true then likely kills are treated as kills even
+/// if it can't be proven that they are kills.
+static bool isKilled(MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ LiveIntervals *LIS, bool allowFalsePositives) {
+ MachineInstr *DefMI = &MI;
+ while (true) {
+ // All uses of physical registers are likely to be kills.
+ if (Reg.isPhysical() && (allowFalsePositives || MRI->hasOneUse(Reg)))
+ return true;
+ if (!isPlainlyKilled(DefMI, Reg, LIS))
+ return false;
+ if (Reg.isPhysical())
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (std::next(Begin) != MRI->def_end())
+ return true;
+ DefMI = Begin->getParent();
+ bool IsSrcPhys, IsDstPhys;
+ Register SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// Return true if the specified MI uses the specified register as a two-address
+/// use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
+ for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Given a register, if all its uses are in the same basic block, return the
+/// last use instruction if it's a copy or a two-address use.
+static MachineInstr *
+findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ bool &IsCopy, Register &DstReg, bool &IsDstPhys,
+ LiveIntervals *LIS) {
+ MachineOperand *UseOp = nullptr;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ return nullptr;
+ if (isPlainlyKilled(MI, Reg, LIS))
+ UseOp = &MO;
+ }
+ if (!UseOp)
+ return nullptr;
+ MachineInstr &UseMI = *UseOp->getParent();
+
+ Register SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = DstReg.isPhysical();
+ return &UseMI;
+ }
+ if (UseMI.isCommutable()) {
+ unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ unsigned Src2 = UseOp->getOperandNo();
+ if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
+ MachineOperand &MO = UseMI.getOperand(Src1);
+ if (MO.isReg() && MO.isUse() &&
+ isTwoAddrUse(UseMI, MO.getReg(), DstReg)) {
+ IsDstPhys = DstReg.isPhysical();
+ return &UseMI;
+ }
+ }
+ }
+ return nullptr;
+}
+
+/// Return the physical register the specified virtual register might be mapped
+/// to.
+static MCRegister getMappedReg(Register Reg,
+ DenseMap<Register, Register> &RegMap) {
+ while (Reg.isVirtual()) {
+ DenseMap<Register, Register>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (Reg.isPhysical())
+ return Reg;
+ return 0;
+}
+
+/// Return true if the two registers are equal or aliased.
+static bool regsAreCompatible(Register RegA, Register RegB,
+ const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+/// From RegMap remove entries mapped to a physical register which overlaps MO.
+static void removeMapRegEntry(const MachineOperand &MO,
+ DenseMap<Register, Register> &RegMap,
+ const TargetRegisterInfo *TRI) {
+ assert(
+ (MO.isReg() || MO.isRegMask()) &&
+ "removeMapRegEntry must be called with a register or regmask operand.");
+
+ SmallVector<Register, 2> Srcs;
+ for (auto SI : RegMap) {
+ Register ToReg = SI.second;
+ if (ToReg.isVirtual())
+ continue;
+
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (TRI->regsOverlap(ToReg, Reg))
+ Srcs.push_back(SI.first);
+ } else if (MO.clobbersPhysReg(ToReg))
+ Srcs.push_back(SI.first);
+ }
+
+ for (auto SrcReg : Srcs)
+ RegMap.erase(SrcReg);
+}
+
+/// If a physical register is clobbered, old entries mapped to it should be
+/// deleted. For example
+///
+/// %2:gr64 = COPY killed $rdx
+/// MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx
+///
+/// After the MUL instruction, $rdx contains different value than in the COPY
+/// instruction. So %2 should not map to $rdx after MUL.
+void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ // If a virtual register is copied to its mapped physical register, it
+ // doesn't change the potential coalescing between them, so we don't remove
+ // entries mapped to the physical register. For example
+ //
+ // %100 = COPY $r8
+ // ...
+ // $r8 = COPY %100
+ //
+ // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't
+ // destroy the content of $r8, and should not impact SrcRegMap.
+ Register Dst = MI->getOperand(0).getReg();
+ if (!Dst || Dst.isVirtual())
+ return;
+
+ Register Src = MI->getOperand(1).getReg();
+ if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
+ return;
+ }
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isRegMask()) {
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg || Reg.isVirtual())
+ continue;
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ }
+}
+
+// Returns true if Reg is equal or aliased to at least one register in Set.
+static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned R : Set)
+ if (TRI->regsOverlap(R, Reg))
+ return true;
+
+ return false;
+}
+
+/// Return true if it's potentially profitable to commute the two-address
+/// instruction that's being processed.
+bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
+ Register RegB,
+ Register RegC,
+ MachineInstr *MI,
+ unsigned Dist) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
+ // %reg1029 = COPY %reg1028
+ // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
+ // insert => %reg1030 = COPY %reg1028
+ // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
+ // In this case, it might not be possible to coalesce the second COPY
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
+ // %reg1029 = COPY %reg1028
+ // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
+ // insert => %reg1030 = COPY %reg1029
+ // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
+
+ if (!isPlainlyKilled(MI, RegC, LIS))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024 = MOV r1
+ // %reg1025 = MOV r0
+ // %reg1026 = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
+ if (ToRegA) {
+ MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
+ MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
+ bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
+
+ // Compute if any of the following are true:
+ // -RegB is not tied to a register and RegC is compatible with RegA.
+ // -RegB is tied to the wrong physical register, but RegC is.
+ // -RegB is tied to the wrong physical register, and RegC isn't tied.
+ if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC)))
+ return true;
+ // Don't compute if any of the following are true:
+ // -RegC is not tied to a register and RegB is compatible with RegA.
+ // -RegC is tied to the wrong physical register, but RegB is.
+ // -RegC is tied to the wrong physical register, and RegB isn't tied.
+ if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB)))
+ return false;
+ }
+
+ // If there is a use of RegC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!noUseAfterLastDef(RegC, Dist, LastDefC))
+ return false;
+
+ // If there is a use of RegB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!noUseAfterLastDef(RegB, Dist, LastDefB))
+ return true;
+
+ // Look for situation like this:
+ // %reg101 = MOV %reg100
+ // %reg102 = ...
+ // %reg103 = ADD %reg102, %reg101
+ // ... = %reg103 ...
+ // %reg100 = MOV %reg103
+ // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+ // to eliminate an otherwise unavoidable copy.
+ // FIXME:
+ // We can extend the logic further: If an pair of operands in an insn has
+ // been merged, the insn could be regarded as a virtual copy, and the virtual
+ // copy could also be used to construct a copy chain.
+ // To more generally minimize register copies, ideally the logic of two addr
+ // instruction pass should be integrated with register allocation pass where
+ // interference graph is available.
+ if (isRevCopyChain(RegC, RegA, MaxDataFlowEdge))
+ return true;
+
+ if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
+ return false;
+
+ // Look for other target specific commute preference.
+ bool Commute;
+ if (TII->hasCommutePreference(*MI, Commute))
+ return Commute;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of RegC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// Commute a two-address instruction and update the basic block, distance map,
+/// and live variables if needed. Return true if it is successful.
+bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+ unsigned DstIdx,
+ unsigned RegBIdx,
+ unsigned RegCIdx,
+ unsigned Dist) {
+ Register RegC = MI->getOperand(RegCIdx).getReg();
+ LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);
+
+ if (NewMI == nullptr) {
+ LLVM_DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ assert(NewMI == MI &&
+ "TargetInstrInfo::commuteInstruction() should not return a new "
+ "instruction unless it was requested.");
+
+ // Update source register map.
+ MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ Register RegA = MI->getOperand(DstIdx).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// Return true if it is profitable to convert the given 2-address instruction
+/// to a 3-address one.
+bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
+ Register RegB) {
+ // Look for situations like this:
+ // %reg1024 = MOV r1
+ // %reg1025 = MOV r0
+ // %reg1026 = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
+ if (!FromRegB)
+ return false;
+ MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// Convert the specified two-address instruction into a three address one.
+/// Return true if this transformation was successful.
+bool TwoAddressInstructionPass::convertInstTo3Addr(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register RegA, Register RegB, unsigned &Dist) {
+ MachineInstrSpan MIS(mi, MBB);
+ MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV, LIS);
+ if (!NewMI)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+
+ // If the old instruction is debug value tracked, an update is required.
+ if (auto OldInstrNum = mi->peekDebugInstrNum()) {
+ assert(mi->getNumExplicitDefs() == 1);
+ assert(NewMI->getNumExplicitDefs() == 1);
+
+ // Find the old and new def location.
+ unsigned OldIdx = mi->defs().begin()->getOperandNo();
+ unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
+
+ // Record that one def has been replaced by the other.
+ unsigned NewInstrNum = NewMI->getDebugInstrNum();
+ MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
+ std::make_pair(NewInstrNum, NewIdx));
+ }
+
+ MBB->erase(mi); // Nuke the old inst.
+
+ for (MachineInstr &MI : MIS)
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ Dist--;
+ mi = NewMI;
+ nmi = std::next(mi);
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
+ return true;
+}
+
+/// Scan forward recursively for only uses, update maps if the use is a copy or
+/// a two-address instruction.
+void TwoAddressInstructionPass::scanUses(Register DstReg) {
+ SmallVector<Register, 4> VirtRegPairs;
+ bool IsDstPhys;
+ bool IsCopy = false;
+ Register NewReg;
+ Register Reg = DstReg;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+ NewReg, IsDstPhys, LIS)) {
+ if (IsCopy && !Processed.insert(UseMI).second)
+ break;
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ SrcRegMap[NewReg] = Reg;
+ VirtRegPairs.push_back(NewReg);
+ Reg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.pop_back_val();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+ ToReg = FromReg;
+ }
+ bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+ }
+}
+
+/// If the specified instruction is not yet processed, process it if it's a
+/// copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ Register SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys) {
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ } else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ scanUses(DstReg);
+ }
+
+ Processed.insert(MI);
+}
+
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the instruction below the kill instruction in order to
+/// eliminate the need for the copy.
+bool TwoAddressInstructionPass::rescheduleMIBelowKill(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = nullptr;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+
+ Register DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!MI->isSafeToMove(AA, SeenStore))
+ return false;
+
+ if (TII->getInstrLatency(InstrItins, *MI) > 1)
+ // FIXME: Needs more sophisticated heuristics.
+ return false;
+
+ SmallVector<Register, 2> Uses;
+ SmallVector<Register, 2> Kills;
+ SmallVector<Register, 2> Defs;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(MOReg);
+ else {
+ Uses.push_back(MOReg);
+ if (MOReg != Reg && isPlainlyKilled(MO, LIS))
+ Kills.push_back(MOReg);
+ }
+ }
+
+ // Move the copies connected to MI down as well.
+ MachineBasicBlock::iterator Begin = MI;
+ MachineBasicBlock::iterator AfterMI = std::next(Begin);
+ MachineBasicBlock::iterator End = AfterMI;
+ while (End != MBB->end()) {
+ End = skipDebugInstructionsForward(End, MBB->end());
+ if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI))
+ Defs.push_back(End->getOperand(0).getReg());
+ else
+ break;
+ ++End;
+ }
+
+ // Check if the reschedule will not break dependencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+ for (MachineInstr &OtherMI : make_range(End, KillPos)) {
+ // Debug or pseudo instructions cannot be counted against the limit.
+ if (OtherMI.isDebugOrPseudoInstr())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
+ OtherMI.isBranch() || OtherMI.isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ for (const MachineOperand &MO : OtherMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef()) {
+ if (regOverlapsSet(Uses, MOReg, TRI))
+ // Physical register use would be clobbered.
+ return false;
+ if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI))
+ // May clobber a physical register def.
+ // FIXME: This may be too conservative. It's ok if the instruction
+ // is sunken completely below the use.
+ return false;
+ } else {
+ if (regOverlapsSet(Defs, MOReg, TRI))
+ return false;
+ bool isKill = isPlainlyKilled(MO, LIS);
+ if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
+ regOverlapsSet(Kills, MOReg, TRI)))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (MOReg == Reg && !isKill)
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || &OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
+ }
+ }
+ }
+
+ // Move debug info as well.
+ while (Begin != MBB->begin() && std::prev(Begin)->isDebugInstr())
+ --Begin;
+
+ nmi = End;
+ MachineBasicBlock::iterator InsertPos = KillPos;
+ if (LIS) {
+ // We have to move the copies (and any interleaved debug instructions)
+ // first so that the MBB is still well-formed when calling handleMove().
+ for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
+ auto CopyMI = MBBI++;
+ MBB->splice(InsertPos, MBB, CopyMI);
+ if (!CopyMI->isDebugOrPseudoInstr())
+ LIS->handleMove(*CopyMI);
+ InsertPos = CopyMI;
+ }
+ End = std::next(MachineBasicBlock::iterator(MI));
+ }
+
+ // Copies following MI may have been moved as well.
+ MBB->splice(InsertPos, MBB, Begin, End);
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(*MI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, *KillMI);
+ LV->addVirtualRegisterKilled(Reg, *MI);
+ }
+
+ LLVM_DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
+ return true;
+}
+
+/// Return true if the re-scheduling will put the given instruction too close
+/// to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist,
+ MachineInstr *MI) {
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike())
+ continue;
+ if (&DefMI == MI)
+ return true; // MI is defining something KillMI uses
+ DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(&DefMI);
+ if (DDI == DistanceMap.end())
+ return true; // Below MI
+ unsigned DefDist = DDI->second;
+ assert(Dist > DefDist && "Visited def already?");
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
+ return true;
+ }
+ return false;
+}
+
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the kill instruction above the current two-address
+/// instruction in order to eliminate the need for the copy.
+bool TwoAddressInstructionPass::rescheduleKillAboveMI(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = nullptr;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ Register DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!KillMI->isSafeToMove(AA, SeenStore))
+ return false;
+
+ SmallVector<Register, 2> Uses;
+ SmallVector<Register, 2> Kills;
+ SmallVector<Register, 2> Defs;
+ SmallVector<Register, 2> LiveDefs;
+ for (const MachineOperand &MO : KillMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ Register MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MOReg)
+ continue;
+ if (isDefTooClose(MOReg, DI->second, MI))
+ return false;
+ bool isKill = isPlainlyKilled(MO, LIS);
+ if (MOReg == Reg && !isKill)
+ return false;
+ Uses.push_back(MOReg);
+ if (isKill && MOReg != Reg)
+ Kills.push_back(MOReg);
+ } else if (MOReg.isPhysical()) {
+ Defs.push_back(MOReg);
+ if (!MO.isDead())
+ LiveDefs.push_back(MOReg);
+ }
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ for (MachineInstr &OtherMI :
+ make_range(mi, MachineBasicBlock::iterator(KillMI))) {
+ // Debug or pseudo instructions cannot be counted against the limit.
+ if (OtherMI.isDebugOrPseudoInstr())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
+ OtherMI.isBranch() || OtherMI.isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ SmallVector<Register, 2> OtherDefs;
+ for (const MachineOperand &MO : OtherMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse()) {
+ if (regOverlapsSet(Defs, MOReg, TRI))
+ // Moving KillMI can clobber the physical register if the def has
+ // not been seen.
+ return false;
+ if (regOverlapsSet(Kills, MOReg, TRI))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (&OtherMI != MI && MOReg == Reg && !isPlainlyKilled(MO, LIS))
+ // We can't schedule across a use of the register in question.
+ return false;
+ } else {
+ OtherDefs.push_back(MOReg);
+ }
+ }
+
+ for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+ Register MOReg = OtherDefs[i];
+ if (regOverlapsSet(Uses, MOReg, TRI))
+ return false;
+ if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg, TRI))
+ return false;
+ // Physical register def is seen.
+ llvm::erase_value(Defs, MOReg);
+ }
+ }
+
+ // Move the old kill above MI, don't forget to move debug info as well.
+ MachineBasicBlock::iterator InsertPos = mi;
+ while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugInstr())
+ --InsertPos;
+ MachineBasicBlock::iterator From = KillMI;
+ MachineBasicBlock::iterator To = std::next(From);
+ while (std::prev(From)->isDebugInstr())
+ --From;
+ MBB->splice(InsertPos, MBB, From, To);
+
+ nmi = std::prev(InsertPos); // Backtrack so we process the moved instr.
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(*KillMI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, *KillMI);
+ LV->addVirtualRegisterKilled(Reg, *MI);
+ }
+
+ LLVM_DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
+ return true;
+}
+
+/// Tries to commute the operand 'BaseOpIdx' and some other operand in the
+/// given machine instruction to improve opportunities for coalescing and
+/// elimination of a register to register copy.
+///
+/// 'DstOpIdx' specifies the index of MI def operand.
+/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx'
+/// operand is killed by the given instruction.
+/// The 'Dist' arguments provides the distance of MI from the start of the
+/// current basic block and it is used to determine if it is profitable
+/// to commute operands in the instruction.
+///
+/// Returns true if the transformation happened. Otherwise, returns false.
+bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist) {
+ if (!MI->isCommutable())
+ return false;
+
+ bool MadeChange = false;
+ Register DstOpReg = MI->getOperand(DstOpIdx).getReg();
+ Register BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+ unsigned OpsNum = MI->getDesc().getNumOperands();
+ unsigned OtherOpIdx = MI->getDesc().getNumDefs();
+ for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
+ // The call of findCommutedOpIndices below only checks if BaseOpIdx
+ // and OtherOpIdx are commutable, it does not really search for
+ // other commutable operands and does not change the values of passed
+ // variables.
+ if (OtherOpIdx == BaseOpIdx || !MI->getOperand(OtherOpIdx).isReg() ||
+ !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
+ continue;
+
+ Register OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+ bool AggressiveCommute = false;
+
+ // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
+ // operands. This makes the live ranges of DstOp and OtherOp joinable.
+ bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+ bool DoCommute = !BaseOpKilled && OtherOpKilled;
+
+ if (!DoCommute &&
+ isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
+ DoCommute = true;
+ AggressiveCommute = true;
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,
+ Dist)) {
+ MadeChange = true;
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+
+ // There might be more than two commutable operands, update BaseOp and
+ // continue scanning.
+ // FIXME: This assumes that the new instruction's operands are in the
+ // same positions and were simply swapped.
+ BaseOpReg = OtherOpReg;
+ BaseOpKilled = OtherOpKilled;
+ // Resamples OpsNum in case the number of operands was reduced. This
+ // happens with X86.
+ OpsNum = MI->getDesc().getNumOperands();
+ }
+ }
+ return MadeChange;
+}
+
+/// For the case where an instruction has a single pair of tied register
+/// operands, attempt some transformations that may either eliminate the tied
+/// operands or improve the opportunities for coalescing away the register copy.
+/// Returns true if no copy needs to be inserted to untie mi's operands
+/// (either because they were untied, or because mi was rescheduled, and will
+/// be visited again later). If the shouldOnlyCommute flag is true, only
+/// instruction commutation is attempted.
+bool TwoAddressInstructionPass::
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned &Dist, bool shouldOnlyCommute) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ MachineInstr &MI = *mi;
+ Register regA = MI.getOperand(DstIdx).getReg();
+ Register regB = MI.getOperand(SrcIdx).getReg();
+
+ assert(regB.isVirtual() && "cannot make instruction into two-address form");
+ bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+
+ if (regA.isVirtual())
+ scanUses(regA);
+
+ bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
+
+ // If the instruction is convertible to 3 Addr, instead
+ // of returning try 3 Addr transformation aggressively and
+ // use this variable to check later. Because it might be better.
+ // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
+ // instead of the following code.
+ // addl %esi, %edi
+ // movl %edi, %eax
+ // ret
+ if (Commuted && !MI.isConvertibleTo3Addr())
+ return false;
+
+ if (shouldOnlyCommute)
+ return false;
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule this MI below it.
+ if (!Commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
+ ++NumReSchedDowns;
+ return true;
+ }
+
+ // If we commuted, regB may have changed so we should re-sample it to avoid
+ // confusing the three address conversion below.
+ if (Commuted) {
+ regB = MI.getOperand(SrcIdx).getReg();
+ regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+ }
+
+ if (MI.isConvertibleTo3Addr()) {
+ // This instruction is potentially convertible to a true
+ // three-address instruction. Check if it is profitable.
+ if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+ // Try to convert it.
+ if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ return true; // Done with this instruction.
+ }
+ }
+ }
+
+ // Return if it is commuted but 3 addr conversion is failed.
+ if (Commuted)
+ return false;
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule it before this MI if it's legal.
+ if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
+ ++NumReSchedUps;
+ return true;
+ }
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (MI.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+ if (UnfoldMCID.getNumDefs() == 1) {
+ // Unfold the load.
+ LLVM_DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
+ Register Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs)) {
+ LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ MBB->insert(mi, NewMIs[0]);
+ MBB->insert(mi, NewMIs[1]);
+ DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+ DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
+
+ LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformResult =
+ tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
+ (void)TransformResult;
+ assert(!TransformResult &&
+ "tryInstructionTransform() should return false.");
+ if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), MI)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, *NewMIs[1]);
+ }
+
+ SmallVector<Register, 4> OrigRegs;
+ if (LIS) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg())
+ OrigRegs.push_back(MO.getReg());
+ }
+
+ LIS->RemoveMachineInstrFromMaps(MI);
+ }
+
+ MI.eraseFromParent();
+ DistanceMap.erase(&MI);
+
+ // Update LiveIntervals.
+ if (LIS) {
+ MachineBasicBlock::iterator Begin(NewMIs[0]);
+ MachineBasicBlock::iterator End(NewMIs[1]);
+ LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs);
+ }
+
+ mi = NewMIs[1];
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ DistanceMap.erase(NewMIs[0]);
+ DistanceMap.erase(NewMIs[1]);
+ Dist--;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ bool AnyOps = false;
+ unsigned NumOps = MI->getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ Register SrcReg = SrcMO.getReg();
+ Register DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with undef uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef() && !DstMO.getSubReg()) {
+ // Constrain the DstReg register class if required.
+ if (DstReg.isVirtual()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ MRI->constrainRegClass(DstReg, RC);
+ }
+ SrcMO.setReg(DstReg);
+ SrcMO.setSubReg(0);
+ LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = llvm::any_of(TiedPairs, [MI](auto const &TP) {
+ return MI->getOperand(TP.second).isEarlyClobber();
+ });
+
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ SlotIndex LastCopyIdx;
+ Register RegB = 0;
+ unsigned SubRegB = 0;
+ for (auto &TP : TiedPairs) {
+ unsigned SrcIdx = TP.first;
+ unsigned DstIdx = TP.second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ Register RegA = DstMO.getReg();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+ SubRegB = MI->getOperand(SrcIdx).getSubReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(RegB.isVirtual() && "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA);
+ // If this operand is folding a truncation, the truncation now moves to the
+ // copy so that the register classes remain valid for the operands.
+ MIB.addReg(RegB, 0, SubRegB);
+ const TargetRegisterClass *RC = MRI->getRegClass(RegB);
+ if (SubRegB) {
+ if (RegA.isVirtual()) {
+ assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
+ SubRegB) &&
+ "tied subregister must be a truncation");
+ // The superreg class will not be used to constrain the subreg class.
+ RC = nullptr;
+ } else {
+ assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
+ && "tied subregister must be a truncation");
+ }
+ }
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(&*PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ if (LIS) {
+ LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
+
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
+ if (RegA.isVirtual()) {
+ LiveInterval &LI = LIS->getInterval(RegA);
+ VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ for (auto &S : LI.subranges()) {
+ VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ } else {
+ for (MCRegUnit Unit : TRI->regunits(RegA)) {
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) {
+ VNInfo *VNI =
+ LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (RegA.isVirtual() && RegB.isVirtual())
+ MRI->constrainRegClass(RegA, RC);
+ MO.setReg(RegA);
+ // The getMatchingSuper asserts guarantee that the register class projected
+ // by SubRegB is compatible with RegA with no subregister. So regardless of
+ // whether the dest oper writes a subreg, the source oper should not.
+ MO.setSubReg(0);
+ }
+
+ if (AllUsesCopied) {
+ LaneBitmask RemainingUses = LaneBitmask::getNone();
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg() == RegB) {
+ if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ } else {
+ RemainingUses |= TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && RemainingUses.none() && LV &&
+ LV->getVarInfo(RegB).removeKill(*MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, *PrevMI);
+ }
+
+ if (RemovedKillFlag && RemainingUses.none())
+ SrcRegMap[LastCopiedReg] = RegB;
+
+ // Update LiveIntervals.
+ if (LIS) {
+ SlotIndex UseIdx = LIS->getInstructionIndex(*MI);
+ auto Shrink = [=](LiveRange &LR, LaneBitmask LaneMask) {
+ LiveRange::Segment *S = LR.getSegmentContaining(LastCopyIdx);
+ if (!S)
+ return true;
+ if ((LaneMask & RemainingUses).any())
+ return false;
+ if (S->end.getBaseIndex() != UseIdx)
+ return false;
+ S->end = LastCopyIdx;
+ return true;
+ };
+
+ LiveInterval &LI = LIS->getInterval(RegB);
+ bool ShrinkLI = true;
+ for (auto &S : LI.subranges())
+ ShrinkLI &= Shrink(S, S.LaneMask);
+ if (ShrinkLI)
+ Shrink(LI, LaneBitmask::getAll());
+ }
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg() == RegB) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
+// For every tied operand pair this function transforms statepoint from
+// RegA = STATEPOINT ... RegB(tied-def N)
+// to
+// RegB = STATEPOINT ... RegB(tied-def N)
+// and replaces all uses of RegA with RegB.
+// No extra COPY instruction is necessary because tied use is killed at
+// STATEPOINT.
+bool TwoAddressInstructionPass::processStatepoint(
+ MachineInstr *MI, TiedOperandMap &TiedOperands) {
+
+ bool NeedCopy = false;
+ for (auto &TO : TiedOperands) {
+ Register RegB = TO.first;
+ if (TO.second.size() != 1) {
+ NeedCopy = true;
+ continue;
+ }
+
+ unsigned SrcIdx = TO.second[0].first;
+ unsigned DstIdx = TO.second[0].second;
+
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ Register RegA = DstMO.getReg();
+
+ assert(RegB == MI->getOperand(SrcIdx).getReg());
+
+ if (RegA == RegB)
+ continue;
+
+ // CodeGenPrepare can sink pointer compare past statepoint, which
+ // breaks assumption that statepoint kills tied-use register when
+ // in SSA form (see note in IR/SafepointIRVerifier.cpp). Fall back
+ // to generic tied register handling to avoid assertion failures.
+ // TODO: Recompute LIS/LV information for new range here.
+ if (LIS) {
+ const auto &UseLI = LIS->getInterval(RegB);
+ const auto &DefLI = LIS->getInterval(RegA);
+ if (DefLI.overlaps(UseLI)) {
+ LLVM_DEBUG(dbgs() << "LIS: " << printReg(RegB, TRI, 0)
+ << " UseLI overlaps with DefLI\n");
+ NeedCopy = true;
+ continue;
+ }
+ } else if (LV && LV->getVarInfo(RegB).findKill(MI->getParent()) != MI) {
+ // Note that MachineOperand::isKill does not work here, because it
+ // is set only on first register use in instruction and for statepoint
+ // tied-use register will usually be found in preceeding deopt bundle.
+ LLVM_DEBUG(dbgs() << "LV: " << printReg(RegB, TRI, 0)
+ << " not killed by statepoint\n");
+ NeedCopy = true;
+ continue;
+ }
+
+ if (!MRI->constrainRegClass(RegB, MRI->getRegClass(RegA))) {
+ LLVM_DEBUG(dbgs() << "MRI: couldn't constrain" << printReg(RegB, TRI, 0)
+ << " to register class of " << printReg(RegA, TRI, 0)
+ << '\n');
+ NeedCopy = true;
+ continue;
+ }
+ MRI->replaceRegWith(RegA, RegB);
+
+ if (LIS) {
+ VNInfo::Allocator &A = LIS->getVNInfoAllocator();
+ LiveInterval &LI = LIS->getInterval(RegB);
+ LiveInterval &Other = LIS->getInterval(RegA);
+ SmallVector<VNInfo *> NewVNIs;
+ for (const VNInfo *VNI : Other.valnos) {
+ assert(VNI->id == NewVNIs.size() && "assumed");
+ NewVNIs.push_back(LI.createValueCopy(VNI, A));
+ }
+ for (auto &S : Other) {
+ VNInfo *VNI = NewVNIs[S.valno->id];
+ LiveRange::Segment NewSeg(S.start, S.end, VNI);
+ LI.addSegment(NewSeg);
+ }
+ LIS->removeInterval(RegA);
+ }
+
+ if (LV) {
+ if (MI->getOperand(SrcIdx).isKill())
+ LV->removeVirtualRegisterKilled(RegB, *MI);
+ LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB);
+ LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA);
+ SrcInfo.AliveBlocks |= DstInfo.AliveBlocks;
+ DstInfo.AliveBlocks.clear();
+ for (auto *KillMI : DstInfo.Kills)
+ LV->addVirtualRegisterKilled(RegB, *KillMI, false);
+ }
+ }
+ return !NeedCopy;
+}
+
+/// Reduce two-address instructions to two operands.
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ InstrItins = MF->getSubtarget().getInstrItineraryData();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ if (auto *AAPass = getAnalysisIfAvailable<AAResultsWrapperPass>())
+ AA = &AAPass->getAAResults();
+ else
+ AA = nullptr;
+ OptLevel = TM.getOptLevel();
+ // Disable optimizations if requested. We cannot skip the whole pass as some
+ // fixups are necessary for correctness.
+ if (skipFunction(Func.getFunction()))
+ OptLevel = CodeGenOpt::None;
+
+ bool MadeChange = false;
+
+ LLVM_DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << MF->getName() << '\n');
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ // This pass will rewrite the tied-def to meet the RegConstraint.
+ MF->getProperties()
+ .set(MachineFunctionProperties::Property::TiedOpsRewritten);
+
+ TiedOperandMap TiedOperands;
+ for (MachineBasicBlock &MBBI : *MF) {
+ MBB = &MBBI;
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = std::next(mi);
+ // Skip debug instructions.
+ if (mi->isDebugInstr()) {
+ mi = nmi;
+ continue;
+ }
+
+ // Expand REG_SEQUENCE instructions. This will position mi at the first
+ // expanded instruction.
+ if (mi->isRegSequence())
+ eliminateRegSequence(mi);
+
+ DistanceMap.insert(std::make_pair(&*mi, ++Dist));
+
+ processCopy(&*mi);
+
+ // First scan through all the tied register uses in this instruction
+ // and record a list of pairs of tied operands for each register.
+ if (!collectTiedOperands(&*mi, TiedOperands)) {
+ removeClobberedSrcRegMap(&*mi);
+ mi = nmi;
+ continue;
+ }
+
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ LLVM_DEBUG(dbgs() << '\t' << *mi);
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVectorImpl<std::pair<unsigned, unsigned>> &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+ Register SrcReg = mi->getOperand(SrcIdx).getReg();
+ Register DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
+ // The tied operands have been eliminated or shifted further down
+ // the block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
+ mi = nmi;
+ continue;
+ }
+ }
+ }
+
+ if (mi->getOpcode() == TargetOpcode::STATEPOINT &&
+ processStatepoint(&*mi, TiedOperands)) {
+ TiedOperands.clear();
+ LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ mi = nmi;
+ continue;
+ }
+
+ // Now iterate over the information collected above.
+ for (auto &TO : TiedOperands) {
+ processTiedPairs(&*mi, TO.second, Dist);
+ LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
+
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->removeOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
+ mi->removeOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+
+ // Update LiveIntervals.
+ if (LIS) {
+ Register Reg = mi->getOperand(0).getReg();
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ // The COPY no longer defines subregs of %reg except for
+ // %reg.subidx.
+ LaneBitmask LaneMask =
+ TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
+ SlotIndex Idx = LIS->getInstructionIndex(*mi);
+ for (auto &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none()) {
+ LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
+ LiveRange::iterator DefSeg = std::next(UseSeg);
+ S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ }
+ }
+
+ // The COPY no longer has a use of %reg.
+ LIS->shrinkToUses(&LI);
+ } else {
+ // The live interval for Reg did not have subranges but now it needs
+ // them because we have introduced a subreg def. Recompute it.
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
+ }
+
+ // Clear TiedOperands here instead of at the top of the loop
+ // since most instructions do not have tied operands.
+ TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
+ mi = nmi;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
+///
+/// The instruction is turned into a sequence of sub-register copies:
+///
+/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+/// undef %dst:ssub0 = COPY %v1
+/// %dst:ssub1 = COPY %v2
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+
+ SmallVector<Register, 4> OrigRegs;
+ if (LIS) {
+ OrigRegs.push_back(MI.getOperand(0).getReg());
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
+ OrigRegs.push_back(MI.getOperand(i).getReg());
+ }
+
+ bool DefEmitted = false;
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
+ MachineOperand &UseMO = MI.getOperand(i);
+ Register SrcReg = UseMO.getReg();
+ unsigned SubIdx = MI.getOperand(i+1).getImm();
+ // Nothing needs to be inserted for undef operands.
+ if (UseMO.isUndef())
+ continue;
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ bool isKill = UseMO.isKill();
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI.getOperand(j).getReg() == SrcReg) {
+ MI.getOperand(j).setIsKill();
+ UseMO.setIsKill(false);
+ isKill = false;
+ break;
+ }
+
+ // Insert the sub-register copy.
+ MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, SubIdx)
+ .add(UseMO);
+
+ // The first def needs an undef flag because there is no live register
+ // before it.
+ if (!DefEmitted) {
+ CopyMI->getOperand(0).setIsUndef(true);
+ // Return an iterator pointing to the first inserted instr.
+ MBBI = CopyMI;
+ }
+ DefEmitted = true;
+
+ // Update LiveVariables' kill info.
+ if (LV && isKill && !SrcReg.isPhysical())
+ LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
+
+ LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);
+ }
+
+ MachineBasicBlock::iterator EndMBBI =
+ std::next(MachineBasicBlock::iterator(MI));
+
+ if (!DefEmitted) {
+ LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
+ MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI.removeOperand(j);
+ } else {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+
+ LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
+ MI.eraseFromParent();
+ }
+
+ // Udpate LiveIntervals.
+ if (LIS)
+ LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
new file mode 100644
index 000000000000..426292345a14
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -0,0 +1,1047 @@
+//===----- TypePromotion.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is an opcode based type promotion pass for small types that would
+/// otherwise be promoted during legalisation. This works around the limitations
+/// of selection dag for cyclic regions. The search begins from icmp
+/// instructions operands where a tree, consisting of non-wrapping or safe
+/// wrapping instructions, is built, checked and promoted if possible.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TypePromotion.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "type-promotion"
+#define PASS_NAME "Type Promotion"
+
+using namespace llvm;
+
+static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable type promotion pass"));
+
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+// define hidden i32 @cmp(i8 zeroext) {
+// %2 = add i8 %0, -49
+// %3 = icmp ult i8 %2, 3
+// ..
+// }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+// t19: i32 = add t4, Constant:i32<-49>
+// t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+// subs r0, #49
+// uxtb r1, r0
+// cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+// sub.w r1, r0, #49
+// cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+// define i32 @cmp(i8 zeroext %c) {
+// %0 = zext i8 %c to i32
+// %c.off = add i32 %0, -49
+// %1 = icmp ult i32 %c.off, 3
+// ..
+// }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
+
+namespace {
+class IRPromoter {
+ LLVMContext &Ctx;
+ unsigned PromotedWidth = 0;
+ SetVector<Value *> &Visited;
+ SetVector<Value *> &Sources;
+ SetVector<Instruction *> &Sinks;
+ SmallPtrSetImpl<Instruction *> &SafeWrap;
+ SmallPtrSetImpl<Instruction *> &InstsToRemove;
+ IntegerType *ExtTy = nullptr;
+ SmallPtrSet<Value *, 8> NewInsts;
+ DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap;
+ SmallPtrSet<Value *, 8> Promoted;
+
+ void ReplaceAllUsersOfWith(Value *From, Value *To);
+ void ExtendSources();
+ void ConvertTruncs();
+ void PromoteTree();
+ void TruncateSinks();
+ void Cleanup();
+
+public:
+ IRPromoter(LLVMContext &C, unsigned Width, SetVector<Value *> &visited,
+ SetVector<Value *> &sources, SetVector<Instruction *> &sinks,
+ SmallPtrSetImpl<Instruction *> &wrap,
+ SmallPtrSetImpl<Instruction *> &instsToRemove)
+ : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources),
+ Sinks(sinks), SafeWrap(wrap), InstsToRemove(instsToRemove) {
+ ExtTy = IntegerType::get(Ctx, PromotedWidth);
+ }
+
+ void Mutate();
+};
+
+class TypePromotionImpl {
+ unsigned TypeSize = 0;
+ LLVMContext *Ctx = nullptr;
+ unsigned RegisterBitWidth = 0;
+ SmallPtrSet<Value *, 16> AllVisited;
+ SmallPtrSet<Instruction *, 8> SafeToPromote;
+ SmallPtrSet<Instruction *, 4> SafeWrap;
+ SmallPtrSet<Instruction *, 4> InstsToRemove;
+
+ // Does V have the same size result type as TypeSize.
+ bool EqualTypeSize(Value *V);
+ // Does V have the same size, or narrower, result type as TypeSize.
+ bool LessOrEqualTypeSize(Value *V);
+ // Does V have a result type that is wider than TypeSize.
+ bool GreaterThanTypeSize(Value *V);
+ // Does V have a result type that is narrower than TypeSize.
+ bool LessThanTypeSize(Value *V);
+ // Should V be a leaf in the promote tree?
+ bool isSource(Value *V);
+ // Should V be a root in the promotion tree?
+ bool isSink(Value *V);
+ // Should we change the result type of V? It will result in the users of V
+ // being visited.
+ bool shouldPromote(Value *V);
+ // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
+ // result won't affect the computation?
+ bool isSafeWrap(Instruction *I);
+ // Can V have its integer type promoted, or can the type be ignored.
+ bool isSupportedType(Value *V);
+ // Is V an instruction with a supported opcode or another value that we can
+ // handle, such as constants and basic blocks.
+ bool isSupportedValue(Value *V);
+ // Is V an instruction thats result can trivially promoted, or has safe
+ // wrapping.
+ bool isLegalToPromote(Value *V);
+ bool TryToPromote(Value *V, unsigned PromotedWidth, const LoopInfo &LI);
+
+public:
+ bool run(Function &F, const TargetMachine *TM,
+ const TargetTransformInfo &TTI, const LoopInfo &LI);
+};
+
+class TypePromotionLegacy : public FunctionPass {
+public:
+ static char ID;
+
+ TypePromotionLegacy() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ }
+
+ StringRef getPassName() const override { return PASS_NAME; }
+
+ bool runOnFunction(Function &F) override;
+};
+
+} // namespace
+
+static bool GenerateSignBits(Instruction *I) {
+ unsigned Opc = I->getOpcode();
+ return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
+ Opc == Instruction::SRem || Opc == Instruction::SExt;
+}
+
+bool TypePromotionImpl::EqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() == TypeSize;
+}
+
+bool TypePromotionImpl::LessOrEqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() <= TypeSize;
+}
+
+bool TypePromotionImpl::GreaterThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() > TypeSize;
+}
+
+bool TypePromotionImpl::LessThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() < TypeSize;
+}
+
+/// Return true if the given value is a source in the use-def chain, producing
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+bool TypePromotionImpl::isSource(Value *V) {
+ if (!isa<IntegerType>(V->getType()))
+ return false;
+
+ // TODO Allow zext to be sources.
+ if (isa<Argument>(V))
+ return true;
+ else if (isa<LoadInst>(V))
+ return true;
+ else if (auto *Call = dyn_cast<CallInst>(V))
+ return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ else if (auto *Trunc = dyn_cast<TruncInst>(V))
+ return EqualTypeSize(Trunc);
+ return false;
+}
+
+/// Return true if V will require any promoted values to be truncated for the
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
+bool TypePromotionImpl::isSink(Value *V) {
+ // TODO The truncate also isn't actually necessary because we would already
+ // proved that the data value is kept within the range of the original data
+ // type. We currently remove any truncs inserted for handling zext sinks.
+
+ // Sinks are:
+ // - points where the value in the register is being observed, such as an
+ // icmp, switch or store.
+ // - points where value types have to match, such as calls and returns.
+ // - zext are included to ease the transformation and are generally removed
+ // later on.
+ if (auto *Store = dyn_cast<StoreInst>(V))
+ return LessOrEqualTypeSize(Store->getValueOperand());
+ if (auto *Return = dyn_cast<ReturnInst>(V))
+ return LessOrEqualTypeSize(Return->getReturnValue());
+ if (auto *ZExt = dyn_cast<ZExtInst>(V))
+ return GreaterThanTypeSize(ZExt);
+ if (auto *Switch = dyn_cast<SwitchInst>(V))
+ return LessThanTypeSize(Switch->getCondition());
+ if (auto *ICmp = dyn_cast<ICmpInst>(V))
+ return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
+
+ return isa<CallInst>(V);
+}
+
+/// Return whether this instruction can safely wrap.
+bool TypePromotionImpl::isSafeWrap(Instruction *I) {
+ // We can support a potentially wrapping instruction (I) if:
+ // - It is only used by an unsigned icmp.
+ // - The icmp uses a constant.
+ // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
+ // around zero to become a larger number than before.
+ // - The wrapping instruction (I) also uses a constant.
+ //
+ // We can then use the two constants to calculate whether the result would
+ // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+ // just underflows the range, the icmp would give the same result whether the
+ // result has been truncated or not. We calculate this by:
+ // - Zero extending both constants, if needed, to RegisterBitWidth.
+ // - Take the absolute value of I's constant, adding this to the icmp const.
+ // - Check that this value is not out of range for small type. If it is, it
+ // means that it has underflowed enough to wrap around the icmp constant.
+ //
+ // For example:
+ //
+ // %sub = sub i8 %a, 2
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -2 == FE == 254
+ // But if this is evalulated as a i32
+ // %sub = -2 == FF FF FF FE == 4294967294
+ // So the unsigned compares (i8 and i32) would not yield the same result.
+ //
+ // Another way to look at it is:
+ // %a - 2 <= 254
+ // %a + 2 <= 254 + 2
+ // %a <= 256
+ // And we can't represent 256 in the i8 format, so we don't support it.
+ //
+ // Whereas:
+ //
+ // %sub i8 %a, 1
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -1 == FF == 255
+ // As i32:
+ // %sub = -1 == FF FF FF FF == 4294967295
+ //
+ // In this case, the unsigned compare results would be the same and this
+ // would also be true for ult, uge and ugt:
+ // - (255 < 254) == (0xFFFFFFFF < 254) == false
+ // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+ // - (255 > 254) == (0xFFFFFFFF > 254) == true
+ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+ //
+ // To demonstrate why we can't handle increasing values:
+ //
+ // %add = add i8 %a, 2
+ // %cmp = icmp ult i8 %add, 127
+ //
+ // If %a = 254, %add = 256 == (i8 1)
+ // As i32:
+ // %add = 256
+ //
+ // (1 < 127) != (256 < 127)
+
+ unsigned Opc = I->getOpcode();
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ return false;
+
+ if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
+ !isa<ConstantInt>(I->getOperand(1)))
+ return false;
+
+ // Don't support an icmp that deals with sign bits.
+ auto *CI = cast<ICmpInst>(*I->user_begin());
+ if (CI->isSigned() || CI->isEquality())
+ return false;
+
+ ConstantInt *ICmpConstant = nullptr;
+ if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+ ICmpConstant = Const;
+ else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ ICmpConstant = Const;
+ else
+ return false;
+
+ const APInt &ICmpConst = ICmpConstant->getValue();
+ APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if (Opc == Instruction::Sub)
+ OverflowConst = -OverflowConst;
+ if (!OverflowConst.isNonPositive())
+ return false;
+
+ // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
+ // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
+ // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
+ if (OverflowConst.sgt(ICmpConst)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << "\n");
+ SafeWrap.insert(I);
+ return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << " and " << *CI << "\n");
+ SafeWrap.insert(I);
+ SafeWrap.insert(CI);
+ return true;
+ }
+ return false;
+}
+
+bool TypePromotionImpl::shouldPromote(Value *V) {
+ if (!isa<IntegerType>(V->getType()) || isSink(V))
+ return false;
+
+ if (isSource(V))
+ return true;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ if (isa<ICmpInst>(I))
+ return false;
+
+ return true;
+}
+
+/// Return whether we can safely mutate V's type to ExtTy without having to be
+/// concerned with zero extending or truncation.
+static bool isPromotedResultSafe(Instruction *I) {
+ if (GenerateSignBits(I))
+ return false;
+
+ if (!isa<OverflowingBinaryOperator>(I))
+ return true;
+
+ return I->hasNoUnsignedWrap();
+}
+
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
+ SmallVector<Instruction *, 4> Users;
+ Instruction *InstTo = dyn_cast<Instruction>(To);
+ bool ReplacedAll = true;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
+ << "\n");
+
+ for (Use &U : From->uses()) {
+ auto *User = cast<Instruction>(U.getUser());
+ if (InstTo && User->isIdenticalTo(InstTo)) {
+ ReplacedAll = false;
+ continue;
+ }
+ Users.push_back(User);
+ }
+
+ for (auto *U : Users)
+ U->replaceUsesOfWith(From, To);
+
+ if (ReplacedAll)
+ if (auto *I = dyn_cast<Instruction>(From))
+ InstsToRemove.insert(I);
+}
+
+void IRPromoter::ExtendSources() {
+ IRBuilder<> Builder{Ctx};
+
+ auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
+ assert(V->getType() != ExtTy && "zext already extends to i32");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
+ Builder.SetInsertPoint(InsertPt);
+ if (auto *I = dyn_cast<Instruction>(V))
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+
+ Value *ZExt = Builder.CreateZExt(V, ExtTy);
+ if (auto *I = dyn_cast<Instruction>(ZExt)) {
+ if (isa<Argument>(V))
+ I->moveBefore(InsertPt);
+ else
+ I->moveAfter(InsertPt);
+ NewInsts.insert(I);
+ }
+
+ ReplaceAllUsersOfWith(V, ZExt);
+ };
+
+ // Now, insert extending instructions between the sources and their users.
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
+ for (auto *V : Sources) {
+ LLVM_DEBUG(dbgs() << " - " << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V))
+ InsertZExt(I, I);
+ else if (auto *Arg = dyn_cast<Argument>(V)) {
+ BasicBlock &BB = Arg->getParent()->front();
+ InsertZExt(Arg, &*BB.getFirstInsertionPt());
+ } else {
+ llvm_unreachable("unhandled source that needs extending");
+ }
+ Promoted.insert(V);
+ }
+}
+
+void IRPromoter::PromoteTree() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
+
+ // Mutate the types of the instructions within the tree. Here we handle
+ // constant operands.
+ for (auto *V : Visited) {
+ if (Sources.count(V))
+ continue;
+
+ auto *I = cast<Instruction>(V);
+ if (Sinks.count(I))
+ continue;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+ Value *Op = I->getOperand(i);
+ if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
+ continue;
+
+ if (auto *Const = dyn_cast<ConstantInt>(Op)) {
+ // For subtract, we don't need to sext the constant. We only put it in
+ // SafeWrap because SafeWrap.size() is used elsewhere.
+ // For cmp, we need to sign extend a constant appearing in either
+ // operand. For add, we should only sign extend the RHS.
+ Constant *NewConst = (SafeWrap.contains(I) &&
+ (I->getOpcode() == Instruction::ICmp || i == 1) &&
+ I->getOpcode() != Instruction::Sub)
+ ? ConstantExpr::getSExt(Const, ExtTy)
+ : ConstantExpr::getZExt(Const, ExtTy);
+ I->setOperand(i, NewConst);
+ } else if (isa<UndefValue>(Op))
+ I->setOperand(i, ConstantInt::get(ExtTy, 0));
+ }
+
+ // Mutate the result type, unless this is an icmp or switch.
+ if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
+ I->mutateType(ExtTy);
+ Promoted.insert(I);
+ }
+ }
+}
+
+void IRPromoter::TruncateSinks() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
+
+ IRBuilder<> Builder{Ctx};
+
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * {
+ if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
+ return nullptr;
+
+ if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
+ << *V << "\n");
+ Builder.SetInsertPoint(cast<Instruction>(V));
+ auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+ if (Trunc)
+ NewInsts.insert(Trunc);
+ return Trunc;
+ };
+
+ // Fix up any stores or returns that use the results of the promoted
+ // chain.
+ for (auto *I : Sinks) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
+
+ // Handle calls separately as we need to iterate over arg operands.
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->arg_size(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ Type *Ty = TruncTysMap[Call][i];
+ if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
+ Trunc->moveBefore(Call);
+ Call->setArgOperand(i, Trunc);
+ }
+ }
+ continue;
+ }
+
+ // Special case switches because we need to truncate the condition.
+ if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+ Type *Ty = TruncTysMap[Switch][0];
+ if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+ Trunc->moveBefore(Switch);
+ Switch->setCondition(Trunc);
+ }
+ continue;
+ }
+
+ // Don't insert a trunc for a zext which can still legally promote.
+ // Nor insert a trunc when the input value to that trunc has the same width
+ // as the zext we are inserting it for. When this happens the input operand
+ // for the zext will be promoted to the same width as the zext's return type
+ // rendering that zext unnecessary. This zext gets removed before the end
+ // of the pass.
+ if (auto ZExt = dyn_cast<ZExtInst>(I))
+ if (ZExt->getType()->getScalarSizeInBits() >= PromotedWidth)
+ continue;
+
+ // Now handle the others.
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Type *Ty = TruncTysMap[I][i];
+ if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
+ Trunc->moveBefore(I);
+ I->setOperand(i, Trunc);
+ }
+ }
+ }
+}
+
+void IRPromoter::Cleanup() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
+ // Some zexts will now have become redundant, along with their trunc
+ // operands, so remove them.
+ for (auto *V : Visited) {
+ if (!isa<ZExtInst>(V))
+ continue;
+
+ auto ZExt = cast<ZExtInst>(V);
+ if (ZExt->getDestTy() != ExtTy)
+ continue;
+
+ Value *Src = ZExt->getOperand(0);
+ if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
+ << "\n");
+ ReplaceAllUsersOfWith(ZExt, Src);
+ continue;
+ }
+
+ // We've inserted a trunc for a zext sink, but we already know that the
+ // input is in range, negating the need for the trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
+ auto *Trunc = cast<TruncInst>(Src);
+ assert(Trunc->getOperand(0)->getType() == ExtTy &&
+ "expected inserted trunc to be operating on i32");
+ ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
+ }
+ }
+
+ for (auto *I : InstsToRemove) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
+ I->dropAllReferences();
+ }
+}
+
+void IRPromoter::ConvertTruncs() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
+ IRBuilder<> Builder{Ctx};
+
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
+ continue;
+
+ auto *Trunc = cast<TruncInst>(V);
+ Builder.SetInsertPoint(Trunc);
+ IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
+ IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
+
+ unsigned NumBits = DestTy->getScalarSizeInBits();
+ ConstantInt *Mask =
+ ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
+ Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+ if (SrcTy != ExtTy)
+ Masked = Builder.CreateTrunc(Masked, ExtTy);
+
+ if (auto *I = dyn_cast<Instruction>(Masked))
+ NewInsts.insert(I);
+
+ ReplaceAllUsersOfWith(Trunc, Masked);
+ }
+}
+
+void IRPromoter::Mutate() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
+ << PromotedWidth << "-bits\n");
+
+ // Cache original types of the values that will likely need truncating
+ for (auto *I : Sinks) {
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (Value *Arg : Call->args())
+ TruncTysMap[Call].push_back(Arg->getType());
+ } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+ TruncTysMap[I].push_back(Switch->getCondition()->getType());
+ else {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ TruncTysMap[I].push_back(I->getOperand(i)->getType());
+ }
+ }
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
+ continue;
+ auto *Trunc = cast<TruncInst>(V);
+ TruncTysMap[Trunc].push_back(Trunc->getDestTy());
+ }
+
+ // Insert zext instructions between sources and their users.
+ ExtendSources();
+
+ // Promote visited instructions, mutating their types in place.
+ PromoteTree();
+
+ // Convert any truncs, that aren't sources, into AND masks.
+ ConvertTruncs();
+
+ // Insert trunc instructions for use by calls, stores etc...
+ TruncateSinks();
+
+ // Finally, remove unecessary zexts and truncs, delete old instructions and
+ // clear the data structures.
+ Cleanup();
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
+}
+
+/// We disallow booleans to make life easier when dealing with icmps but allow
+/// any other integer that fits in a scalar register. Void types are accepted
+/// so we can handle switches.
+bool TypePromotionImpl::isSupportedType(Value *V) {
+ Type *Ty = V->getType();
+
+ // Allow voids and pointers, these won't be promoted.
+ if (Ty->isVoidTy() || Ty->isPointerTy())
+ return true;
+
+ if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 ||
+ cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
+ return false;
+
+ return LessOrEqualTypeSize(V);
+}
+
+/// We accept most instructions, as well as Arguments and ConstantInsts. We
+/// Disallow casts other than zext and truncs and only allow calls if their
+/// return value is zeroext. We don't allow opcodes that can introduce sign
+/// bits.
+bool TypePromotionImpl::isSupportedValue(Value *V) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ switch (I->getOpcode()) {
+ default:
+ return isa<BinaryOperator>(I) && isSupportedType(I) &&
+ !GenerateSignBits(I);
+ case Instruction::GetElementPtr:
+ case Instruction::Store:
+ case Instruction::Br:
+ case Instruction::Switch:
+ return true;
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Trunc:
+ return isSupportedType(I);
+ case Instruction::BitCast:
+ return I->getOperand(0)->getType() == I->getType();
+ case Instruction::ZExt:
+ return isSupportedType(I->getOperand(0));
+ case Instruction::ICmp:
+ // Now that we allow small types than TypeSize, only allow icmp of
+ // TypeSize because they will require a trunc to be legalised.
+ // TODO: Allow icmp of smaller types, and calculate at the end
+ // whether the transform would be beneficial.
+ if (isa<PointerType>(I->getOperand(0)->getType()))
+ return true;
+ return EqualTypeSize(I->getOperand(0));
+ case Instruction::Call: {
+ // Special cases for calls as we need to check for zeroext
+ // TODO We should accept calls even if they don't have zeroext, as they
+ // can still be sinks.
+ auto *Call = cast<CallInst>(I);
+ return isSupportedType(Call) &&
+ Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ }
+ }
+ } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
+ return isSupportedType(V);
+ } else if (isa<Argument>(V))
+ return isSupportedType(V);
+
+ return isa<BasicBlock>(V);
+}
+
+/// Check that the type of V would be promoted and that the original type is
+/// smaller than the targeted promoted type. Check that we're not trying to
+/// promote something larger than our base 'TypeSize' type.
+bool TypePromotionImpl::isLegalToPromote(Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ if (SafeToPromote.count(I))
+ return true;
+
+ if (isPromotedResultSafe(I) || isSafeWrap(I)) {
+ SafeToPromote.insert(I);
+ return true;
+ }
+ return false;
+}
+
+bool TypePromotionImpl::TryToPromote(Value *V, unsigned PromotedWidth,
+ const LoopInfo &LI) {
+ Type *OrigTy = V->getType();
+ TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedValue();
+ SafeToPromote.clear();
+ SafeWrap.clear();
+
+ if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
+ << TypeSize << " bits to " << PromotedWidth << "\n");
+
+ SetVector<Value *> WorkList;
+ SetVector<Value *> Sources;
+ SetVector<Instruction *> Sinks;
+ SetVector<Value *> CurrentVisited;
+ WorkList.insert(V);
+
+ // Return true if V was added to the worklist as a supported instruction,
+ // if it was already visited, or if we don't need to explore it (e.g.
+ // pointer values and GEPs), and false otherwise.
+ auto AddLegalInst = [&](Value *V) {
+ if (CurrentVisited.count(V))
+ return true;
+
+ // Ignore GEPs because they don't need promoting and the constant indices
+ // will prevent the transformation.
+ if (isa<GetElementPtrInst>(V))
+ return true;
+
+ if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
+ return false;
+ }
+
+ WorkList.insert(V);
+ return true;
+ };
+
+ // Iterate through, and add to, a tree of operands and users in the use-def.
+ while (!WorkList.empty()) {
+ Value *V = WorkList.pop_back_val();
+ if (CurrentVisited.count(V))
+ continue;
+
+ // Ignore non-instructions, other than arguments.
+ if (!isa<Instruction>(V) && !isSource(V))
+ continue;
+
+ // If we've already visited this value from somewhere, bail now because
+ // the tree has already been explored.
+ // TODO: This could limit the transform, ie if we try to promote something
+ // from an i8 and fail first, before trying an i16.
+ if (AllVisited.count(V))
+ return false;
+
+ CurrentVisited.insert(V);
+ AllVisited.insert(V);
+
+ // Calls can be both sources and sinks.
+ if (isSink(V))
+ Sinks.insert(cast<Instruction>(V));
+
+ if (isSource(V))
+ Sources.insert(V);
+
+ if (!isSink(V) && !isSource(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Visit operands of any instruction visited.
+ for (auto &U : I->operands()) {
+ if (!AddLegalInst(U))
+ return false;
+ }
+ }
+ }
+
+ // Don't visit users of a node which isn't going to be mutated unless its a
+ // source.
+ if (isSource(V) || shouldPromote(V)) {
+ for (Use &U : V->uses()) {
+ if (!AddLegalInst(U.getUser()))
+ return false;
+ }
+ }
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "IR Promotion: Visited nodes:\n";
+ for (auto *I : CurrentVisited)
+ I->dump();
+ });
+
+ unsigned ToPromote = 0;
+ unsigned NonFreeArgs = 0;
+ unsigned NonLoopSources = 0, LoopSinks = 0;
+ SmallPtrSet<BasicBlock *, 4> Blocks;
+ for (auto *CV : CurrentVisited) {
+ if (auto *I = dyn_cast<Instruction>(CV))
+ Blocks.insert(I->getParent());
+
+ if (Sources.count(CV)) {
+ if (auto *Arg = dyn_cast<Argument>(CV))
+ if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
+ ++NonFreeArgs;
+ if (!isa<Instruction>(CV) ||
+ !LI.getLoopFor(cast<Instruction>(CV)->getParent()))
+ ++NonLoopSources;
+ continue;
+ }
+
+ if (isa<PHINode>(CV))
+ continue;
+ if (LI.getLoopFor(cast<Instruction>(CV)->getParent()))
+ ++LoopSinks;
+ if (Sinks.count(cast<Instruction>(CV)))
+ continue;
+ ++ToPromote;
+ }
+
+ // DAG optimizations should be able to handle these cases better, especially
+ // for function arguments.
+ if (!isa<PHINode>(V) && !(LoopSinks && NonLoopSources) &&
+ (ToPromote < 2 || (Blocks.size() == 1 && NonFreeArgs > SafeWrap.size())))
+ return false;
+
+ IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
+ SafeWrap, InstsToRemove);
+ Promoter.Mutate();
+ return true;
+}
+
+bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
+ const TargetTransformInfo &TTI,
+ const LoopInfo &LI) {
+ if (DisablePromotion)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
+
+ AllVisited.clear();
+ SafeToPromote.clear();
+ SafeWrap.clear();
+ bool MadeChange = false;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
+ const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
+ RegisterBitWidth =
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
+ Ctx = &F.getParent()->getContext();
+
+ // Return the preferred integer width of the instruction, or zero if we
+ // shouldn't try.
+ auto GetPromoteWidth = [&](Instruction *I) -> uint32_t {
+ if (!isa<IntegerType>(I->getType()))
+ return 0;
+
+ EVT SrcVT = TLI->getValueType(DL, I->getType());
+ if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
+ return 0;
+
+ if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger)
+ return 0;
+
+ EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
+ if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
+ << "for promoted type\n");
+ return 0;
+ }
+
+ // TODO: Should we prefer to use RegisterBitWidth instead?
+ return PromotedVT.getFixedSizeInBits();
+ };
+
+ auto BBIsInLoop = [&](BasicBlock *BB) -> bool {
+ for (auto *L : LI)
+ if (L->contains(BB))
+ return true;
+ return false;
+ };
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (AllVisited.count(&I))
+ continue;
+
+ if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) &&
+ isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: "
+ << *I.getOperand(0) << "\n");
+ EVT ZExtVT = TLI->getValueType(DL, I.getType());
+ Instruction *Phi = static_cast<Instruction *>(I.getOperand(0));
+ auto PromoteWidth = ZExtVT.getFixedSizeInBits();
+ if (RegisterBitWidth < PromoteWidth) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target "
+ << "register for ZExt type\n");
+ continue;
+ }
+ MadeChange |= TryToPromote(Phi, PromoteWidth, LI);
+ } else if (auto *ICmp = dyn_cast<ICmpInst>(&I)) {
+ // Search up from icmps to try to promote their operands.
+ // Skip signed or pointer compares
+ if (ICmp->isSigned())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
+
+ for (auto &Op : ICmp->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (auto PromotedWidth = GetPromoteWidth(OpI)) {
+ MadeChange |= TryToPromote(OpI, PromotedWidth, LI);
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (!InstsToRemove.empty()) {
+ for (auto *I : InstsToRemove)
+ I->eraseFromParent();
+ InstsToRemove.clear();
+ }
+ }
+
+ AllVisited.clear();
+ SafeToPromote.clear();
+ SafeWrap.clear();
+
+ return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false)
+
+char TypePromotionLegacy::ID = 0;
+
+bool TypePromotionLegacy::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto *TM = &TPC->getTM<TargetMachine>();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ TypePromotionImpl TP;
+ return TP.run(F, TM, TTI, LI);
+}
+
+FunctionPass *llvm::createTypePromotionLegacyPass() {
+ return new TypePromotionLegacy();
+}
+
+PreservedAnalyses TypePromotionPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ TypePromotionImpl TP;
+
+ bool Changed = TP.run(F, TM, TTI, LI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 000000000000..f17450d264ba
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,196 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+namespace {
+class UnreachableBlockElimLegacyPass : public FunctionPass {
+ bool runOnFunction(Function &F) override {
+ return llvm::EliminateUnreachableBlocks(F);
+ }
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElimLegacyPass() : FunctionPass(ID) {
+ initializeUnreachableBlockElimLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+}
+char UnreachableBlockElimLegacyPass::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElimLegacyPass, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElimLegacyPass();
+}
+
+PreservedAnalyses UnreachableBlockElimPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = llvm::EliminateUnreachableBlocks(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+namespace {
+ class UnreachableMachineBlockElim : public MachineFunctionPass {
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ df_iterator_default_set<MachineBasicBlock*> Reachable;
+ bool ModifiedPHI = false;
+
+ MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ // Mark all reachable blocks.
+ for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineBasicBlock &BB : F) {
+ // Test for deadness.
+ if (!Reachable.count(&BB)) {
+ DeadBlocks.push_back(&BB);
+
+ // Update dominator and loop info.
+ if (MLI) MLI->removeBlock(&BB);
+ if (MDT && MDT->getNode(&BB)) MDT->eraseNode(&BB);
+
+ while (BB.succ_begin() != BB.succ_end()) {
+ MachineBasicBlock* succ = *BB.succ_begin();
+
+ for (MachineInstr &Phi : succ->phis()) {
+ for (unsigned i = Phi.getNumOperands() - 1; i >= 2; i -= 2) {
+ if (Phi.getOperand(i).isMBB() &&
+ Phi.getOperand(i).getMBB() == &BB) {
+ Phi.removeOperand(i);
+ Phi.removeOperand(i - 1);
+ }
+ }
+ }
+
+ BB.removeSuccessor(BB.succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (MachineBasicBlock *BB : DeadBlocks) {
+ // Remove any call site information for calls in the block.
+ for (auto &I : BB->instrs())
+ if (I.shouldUpdateCallSiteInfo())
+ BB->getParent()->eraseCallSiteInfo(&I);
+
+ BB->eraseFromParent();
+ }
+
+ // Cleanup PHI nodes.
+ for (MachineBasicBlock &BB : F) {
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB.pred_begin(),
+ BB.pred_end());
+ for (MachineInstr &Phi : make_early_inc_range(BB.phis())) {
+ for (unsigned i = Phi.getNumOperands() - 1; i >= 2; i -= 2) {
+ if (!preds.count(Phi.getOperand(i).getMBB())) {
+ Phi.removeOperand(i);
+ Phi.removeOperand(i - 1);
+ ModifiedPHI = true;
+ }
+ }
+
+ if (Phi.getNumOperands() == 3) {
+ const MachineOperand &Input = Phi.getOperand(1);
+ const MachineOperand &Output = Phi.getOperand(0);
+ Register InputReg = Input.getReg();
+ Register OutputReg = Output.getReg();
+ assert(Output.getSubReg() == 0 && "Cannot have output subregister");
+ ModifiedPHI = true;
+
+ if (InputReg != OutputReg) {
+ MachineRegisterInfo &MRI = F.getRegInfo();
+ unsigned InputSub = Input.getSubReg();
+ if (InputSub == 0 &&
+ MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)) &&
+ !Input.isUndef()) {
+ MRI.replaceRegWith(OutputReg, InputReg);
+ } else {
+ // The input register to the PHI has a subregister or it can't be
+ // constrained to the proper register class or it is undef:
+ // insert a COPY instead of simply replacing the output
+ // with the input.
+ const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
+ BuildMI(BB, BB.getFirstNonPHI(), Phi.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), OutputReg)
+ .addReg(InputReg, getRegState(Input), InputSub);
+ }
+ Phi.eraseFromParent();
+ }
+ }
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return (!DeadBlocks.empty() || ModifiedPHI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
new file mode 100644
index 000000000000..fc1cbfefb0db
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -0,0 +1,1007 @@
+//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include <limits>
+#include <memory>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
+ cl::init(true));
+
+static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
+ cl::Hidden, cl::init(1));
+
+// Check if the scheduler should penalize instructions that are available to
+// early due to a zero-latency dependence.
+static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
+ cl::init(true));
+
+// This value is used to determine if a register class is a high pressure set.
+// We compute the maximum number of registers needed and divided by the total
+// available. Then, we compare the result to this value.
+static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
+ cl::init(0.75f),
+ cl::desc("High register pressure threhold."));
+
+VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
+ const TargetSchedModel *SM)
+ : TII(STI.getInstrInfo()), SchedModel(SM) {
+ ResourcesModel = createPacketizer(STI);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.reserve(SchedModel->getIssueWidth());
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+void VLIWResourceModel::reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
+
+/// Return true if there is a dependence between SUd and SUu.
+bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
+ if (SUd->Succs.size() == 0)
+ return false;
+
+ for (const auto &S : SUd->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (S.isCtrl())
+ continue;
+
+ if (S.getSUnit() == SUu && S.getLatency() > 0)
+ return true;
+ }
+ return false;
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(*SU->getInstr()))
+ return false;
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ if (IsTop) {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(Packet[i], SU))
+ return false;
+ } else {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(SU, Packet[i]))
+ return false;
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
+ bool startNewCycle = false;
+ // Artificially reset state.
+ if (!SU) {
+ reset();
+ TotalPackets++;
+ return false;
+ }
+ // If this SU does not fit in the packet or the packet is now full
+ // start a new one.
+ if (!isResourceAvailable(SU, IsTop) ||
+ Packet.size() >= SchedModel->getIssueWidth()) {
+ reset();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(*SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
+ LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ LLVM_DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ return startNewCycle;
+}
+
+DFAPacketizer *
+VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
+ return STI.getInstrInfo()->CreateTargetScheduleState(STI);
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
+ << printMBBReference(*BB) << " " << BB->getName()
+ << " in_func " << BB->getParent()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
+
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ // Postprocess the DAG to add platform-specific artificial dependencies.
+ postProcessDAG();
+
+ SmallVector<SUnit *, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
+ LLVM_DEBUG({
+ unsigned maxH = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getHeight() > maxH)
+ maxH = SU.getHeight();
+ dbgs() << "Max Height " << maxH << "\n";
+ });
+ LLVM_DEBUG({
+ unsigned maxD = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getDepth() > maxD)
+ maxD = SU.getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";
+ });
+ LLVM_DEBUG(dump());
+ if (ViewMISchedDAGs)
+ viewGraph();
+
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ LLVM_DEBUG(
+ dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU)
+ break;
+
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = static_cast<VLIWMachineScheduler *>(dag);
+ SchedModel = DAG->getSchedModel();
+
+ Top.init(DAG, SchedModel);
+ Bot.init(DAG, SchedModel);
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ delete Top.ResourceModel;
+ delete Bot.ResourceModel;
+ Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+ Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+
+ const std::vector<unsigned> &MaxPressure =
+ DAG->getRegPressure().MaxSetPressure;
+ HighPressureSets.assign(MaxPressure.size(), false);
+ for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
+ unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
+ HighPressureSets[i] =
+ ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
+ }
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
+ const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
+ return new VLIWResourceModel(STI, SchedModel);
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ for (const SDep &PI : SU->Preds) {
+ unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
+ unsigned MinLatency = PI.getLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
+ ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if (IssueCount + uops > SchedModel->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
+ SUnit *SU, unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU, isTop());
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ } else
+ LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
+ << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin() + i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin() + i);
+ --i;
+ --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ auto AdvanceCycle = [this]() {
+ if (Available.empty())
+ return true;
+ if (Available.size() == 1 && Pending.size() > 0)
+ return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
+ getWeakLeft(*Available.begin(), isTop()) != 0;
+ return false;
+ };
+ for (unsigned i = 0; AdvanceCycle(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard");
+ (void)i;
+ ResourceModel->reserveResources(nullptr, isTop());
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q, SUnit *SU,
+ int Cost, PressureChange P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ dbgs() << "cost(" << Cost << ")\t";
+ DAG->dumpNode(*SU);
+}
+
+// Very detailed queue dump, to be used with higher verbosity levels.
+void ConvergingVLIWScheduler::readyQueueVerboseDump(
+ const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
+ ReadyQueue &Q) {
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ dbgs() << ">>> " << Q.getName() << "\n";
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ std::stringstream dbgstr;
+ dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
+ dbgs() << dbgstr.str();
+ SchedulingCost(Q, *I, Candidate, RPDelta, true);
+ dbgs() << "\t";
+ (*I)->getInstr()->dump();
+ }
+ dbgs() << "\n";
+}
+#endif
+
+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
+ if (SU->NumPredsLeft == 0)
+ return false;
+
+ for (auto &Pred : SU->Preds) {
+ // We found an available, but not scheduled, predecessor.
+ if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
+ if (SU->NumSuccsLeft == 0)
+ return false;
+
+ for (auto &Succ : SU->Succs) {
+ // We found an available, but not scheduled, successor.
+ if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
+ return false;
+ }
+ return true;
+}
+
+/// Check if the instruction changes the register pressure of a register in the
+/// high pressure set. The function returns a negative value if the pressure
+/// decreases and a positive value is the pressure increases. If the instruction
+/// doesn't use a high pressure register or doesn't change the register
+/// pressure, then return 0.
+int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
+ PressureDiff &PD = DAG->getPressureDiff(SU);
+ for (const auto &P : PD) {
+ if (!P.isValid())
+ continue;
+ // The pressure differences are computed bottom-up, so the comparison for
+ // an increase is positive in the bottom direction, but negative in the
+ // top-down direction.
+ if (HighPressureSets[P.getPSet()])
+ return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
+ }
+ return 0;
+}
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ LLVM_DEBUG(if (verbose) dbgs()
+ << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
+ // Forced priority is high.
+ if (SU->isScheduleHigh) {
+ ResCount += PriorityOne;
+ LLVM_DEBUG(dbgs() << "H|");
+ }
+
+ unsigned IsAvailableAmt = 0;
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ if (Top.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getHeight() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU, true)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ } else {
+ if (Bot.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getDepth() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ if (Top.isLatencyBound(SU))
+ for (const SDep &SI : SU->Succs)
+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ if (Bot.isLatencyBound(SU))
+ for (const SDep &PI : SU->Preds)
+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
+ dbgs() << dbgstr.str();
+ });
+
+ // Factor in reg pressure as a heuristic.
+ if (!IgnoreBBRegPressure) {
+ // Decrease priority by the amount that register pressure exceeds the limit.
+ ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
+ // Decrease priority if register pressure exceeds the limit.
+ ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
+ // Decrease priority slightly if register pressure would increase over the
+ // current maximum.
+ ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
+ // If there are register pressure issues, then we remove the value added for
+ // the instruction being available. The rationale is that we really don't
+ // want to schedule an instruction that causes a spill.
+ if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
+ (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
+ Delta.CurrentMax.getUnitInc()))
+ ResCount -= IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) {
+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
+ << Delta.CriticalMax.getUnitInc() << "/"
+ << Delta.CurrentMax.getUnitInc() << ")|";
+ });
+ }
+
+ // Give preference to a zero latency instruction if the dependent
+ // instruction is in the current packet.
+ if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
+ for (const SDep &PI : SU->Preds) {
+ if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
+ PI.getLatency() == 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
+ for (const SDep &SI : SU->Succs) {
+ if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
+ SI.getLatency() == 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ }
+
+ // If the instruction has a non-zero latency dependence with an instruction in
+ // the current packet, then it should not be scheduled yet. The case occurs
+ // when the dependent instruction is scheduled in a new packet, so the
+ // scheduler updates the current cycle and pending instructions become
+ // available.
+ if (CheckEarlyAvail) {
+ if (Q.getID() == TopQID) {
+ for (const auto &PI : SU->Preds) {
+ if (PI.getLatency() > 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ } else {
+ for (const auto &SI : SU->Succs) {
+ if (SI.getLatency() > 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "Total " << std::setw(4) << ResCount << ")";
+ dbgs() << dbgstr.str();
+ });
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult
+ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ ReadyQueue &Q = Zone.Available;
+ LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
+ readyQueueVerboseDump(RPTracker, Candidate, Q);
+ else Q.dump(););
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Choose node order for negative cost candidates. There is no good
+ // candidate in this case.
+ if (CurrentCost < 0 && Candidate.SCost < 0) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ }
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Choose an instruction that does not depend on an artificial edge.
+ unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
+ unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
+ if (CurrWeak != CandWeak) {
+ if (CurrWeak < CandWeak) {
+ LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = Weak;
+ }
+ continue;
+ }
+
+ if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
+ unsigned CurrSize, CandSize;
+ if (Q.getID() == TopQID) {
+ CurrSize = (*I)->Succs.size();
+ CandSize = Candidate.SU->Succs.size();
+ } else {
+ CurrSize = (*I)->Preds.size();
+ CandSize = Candidate.SU->Preds.size();
+ }
+ if (CurrSize > CandSize) {
+ LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ }
+ // Keep the old candidate if it's a better candidate. That is, don't use
+ // the subsequent tie breaker.
+ if (CurrSize != CandSize)
+ continue;
+ }
+
+ // Tie breaker.
+ // To avoid scheduling indeterminism, we need a tie breaker
+ // for the case when cost is identical for two nodes.
+ if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Top\n");
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
+ << reportPackets() << ")\n";
+ DAG->dumpNode(*SU));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ Top.bumpNode(SU);
+ SU->TopReadyCycle = Top.CurrCycle;
+ } else {
+ Bot.bumpNode(SU);
+ SU->BotReadyCycle = Bot.CurrCycle;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
new file mode 100644
index 000000000000..d514e1642e29
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -0,0 +1,642 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/WithColor.h"
+using namespace llvm;
+
+EVT EVT::changeExtendedTypeToInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ LLVMContext &Context = LLVMTy->getContext();
+ return getIntegerVT(Context, getSizeInBits());
+}
+
+EVT EVT::changeExtendedVectorElementTypeToInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ LLVMContext &Context = LLVMTy->getContext();
+ EVT IntTy = getIntegerVT(Context, getScalarSizeInBits());
+ return getVectorVT(Context, IntTy, getVectorElementCount());
+}
+
+EVT EVT::changeExtendedVectorElementType(EVT EltVT) const {
+ assert(isExtended() && "Type is not extended!");
+ LLVMContext &Context = LLVMTy->getContext();
+ return getVectorVT(Context, EltVT, getVectorElementCount());
+}
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+ EVT VT;
+ VT.LLVMTy = IntegerType::get(Context, BitWidth);
+ assert(VT.isExtended() && "Type is not extended!");
+ return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
+ bool IsScalable) {
+ EVT ResultVT;
+ ResultVT.LLVMTy =
+ VectorType::get(VT.getTypeForEVT(Context), NumElements, IsScalable);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
+ EVT ResultVT;
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), EC);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedScalarInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntegerTy();
+}
+
+bool EVT::isExtendedVector() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended16BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 16;
+}
+
+bool EVT::isExtended32BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 32;
+}
+
+bool EVT::isExtended64BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 512;
+}
+
+bool EVT::isExtended1024BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 1024;
+}
+
+bool EVT::isExtended2048BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 2048;
+}
+
+bool EVT::isExtendedFixedLengthVector() const {
+ return isExtendedVector() && isa<FixedVectorType>(LLVMTy);
+}
+
+bool EVT::isExtendedScalableVector() const {
+ return isExtendedVector() && isa<ScalableVectorType>(LLVMTy);
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+ assert(isExtended() && "Type is not extended!");
+ return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+ assert(isExtended() && "Type is not extended!");
+ ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount();
+ if (EC.isScalable()) {
+ WithColor::warning()
+ << "The code that requested the fixed number of elements has made the "
+ "assumption that this vector is not scalable. This assumption was "
+ "not correct, and this may lead to broken code\n";
+ }
+ return EC.getKnownMinValue();
+}
+
+ElementCount EVT::getExtendedVectorElementCount() const {
+ assert(isExtended() && "Type is not extended!");
+ return cast<VectorType>(LLVMTy)->getElementCount();
+}
+
+TypeSize EVT::getExtendedSizeInBits() const {
+ assert(isExtended() && "Type is not extended!");
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+ return TypeSize::Fixed(ITy->getBitWidth());
+ if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+ return VTy->getPrimitiveSizeInBits();
+ llvm_unreachable("Unrecognized extended type!");
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+ switch (V.SimpleTy) {
+ default:
+ if (isVector())
+ return (isScalableVector() ? "nxv" : "v") +
+ utostr(getVectorElementCount().getKnownMinValue()) +
+ getVectorElementType().getEVTString();
+ if (isInteger())
+ return "i" + utostr(getSizeInBits());
+ if (isFloatingPoint())
+ return "f" + utostr(getSizeInBits());
+ llvm_unreachable("Invalid EVT!");
+ case MVT::bf16: return "bf16";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
+ case MVT::x86amx: return "x86amx";
+ case MVT::i64x8: return "i64x8";
+ case MVT::Metadata: return "Metadata";
+ case MVT::Untyped: return "Untyped";
+ case MVT::funcref: return "funcref";
+ case MVT::externref: return "externref";
+ case MVT::aarch64svcount:
+ return "aarch64svcount";
+ case MVT::spirvbuiltin:
+ return "spirvbuiltin";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void EVT::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT. For integer types, this returns an unsigned type. Note
+/// that this will abort for types that cannot be represented.
+Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+ // clang-format off
+ switch (V.SimpleTy) {
+ default:
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy;
+ case MVT::isVoid: return Type::getVoidTy(Context);
+ case MVT::i1: return Type::getInt1Ty(Context);
+ case MVT::i2: return Type::getIntNTy(Context, 2);
+ case MVT::i4: return Type::getIntNTy(Context, 4);
+ case MVT::i8: return Type::getInt8Ty(Context);
+ case MVT::i16: return Type::getInt16Ty(Context);
+ case MVT::i32: return Type::getInt32Ty(Context);
+ case MVT::i64: return Type::getInt64Ty(Context);
+ case MVT::i128: return IntegerType::get(Context, 128);
+ case MVT::f16: return Type::getHalfTy(Context);
+ case MVT::bf16: return Type::getBFloatTy(Context);
+ case MVT::f32: return Type::getFloatTy(Context);
+ case MVT::f64: return Type::getDoubleTy(Context);
+ case MVT::f80: return Type::getX86_FP80Ty(Context);
+ case MVT::f128: return Type::getFP128Ty(Context);
+ case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+ case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::aarch64svcount:
+ return TargetExtType::get(Context, "aarch64.svcount");
+ case MVT::x86amx: return Type::getX86_AMXTy(Context);
+ case MVT::i64x8: return IntegerType::get(Context, 512);
+ case MVT::externref: return Type::getWasm_ExternrefTy(Context);
+ case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
+ case MVT::v1i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 1);
+ case MVT::v2i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 2);
+ case MVT::v4i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 4);
+ case MVT::v8i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 8);
+ case MVT::v16i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 16);
+ case MVT::v32i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 32);
+ case MVT::v64i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v128i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 128);
+ case MVT::v256i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 256);
+ case MVT::v512i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 512);
+ case MVT::v1024i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 1024);
+ case MVT::v2048i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 2048);
+ case MVT::v128i2:
+ return FixedVectorType::get(Type::getIntNTy(Context, 2), 128);
+ case MVT::v256i2:
+ return FixedVectorType::get(Type::getIntNTy(Context, 2), 256);
+ case MVT::v64i4:
+ return FixedVectorType::get(Type::getIntNTy(Context, 4), 64);
+ case MVT::v128i4:
+ return FixedVectorType::get(Type::getIntNTy(Context, 4), 128);
+ case MVT::v1i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 1);
+ case MVT::v2i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::v4i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::v8i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::v16i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::v32i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::v64i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::v128i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 128);
+ case MVT::v256i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 256);
+ case MVT::v512i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 512);
+ case MVT::v1024i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 1024);
+ case MVT::v1i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 1);
+ case MVT::v2i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v3i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 3);
+ case MVT::v4i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::v8i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 8);
+ case MVT::v16i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 16);
+ case MVT::v32i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::v64i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 64);
+ case MVT::v128i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 128);
+ case MVT::v256i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 256);
+ case MVT::v512i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 512);
+ case MVT::v1i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 1);
+ case MVT::v2i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v3i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 3);
+ case MVT::v4i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v5i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 5);
+ case MVT::v6i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 6);
+ case MVT::v7i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 7);
+ case MVT::v8i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v9i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 9);
+ case MVT::v10i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 10);
+ case MVT::v11i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 11);
+ case MVT::v12i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 12);
+ case MVT::v16i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 16);
+ case MVT::v32i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 32);
+ case MVT::v64i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 64);
+ case MVT::v128i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 128);
+ case MVT::v256i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 256);
+ case MVT::v512i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 512);
+ case MVT::v1024i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 1024);
+ case MVT::v2048i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 2048);
+ case MVT::v1i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::v2i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::v3i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 3);
+ case MVT::v4i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::v8i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v16i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 16);
+ case MVT::v32i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::v64i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 64);
+ case MVT::v128i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 128);
+ case MVT::v256i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 256);
+ case MVT::v1i128:
+ return FixedVectorType::get(Type::getInt128Ty(Context), 1);
+ case MVT::v1f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 1);
+ case MVT::v2f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v3f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 3);
+ case MVT::v4f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 4);
+ case MVT::v8f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v16f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 16);
+ case MVT::v32f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 32);
+ case MVT::v64f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 64);
+ case MVT::v128f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 128);
+ case MVT::v256f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 256);
+ case MVT::v512f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 512);
+ case MVT::v2bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 2);
+ case MVT::v3bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 3);
+ case MVT::v4bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 4);
+ case MVT::v8bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::v16bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 16);
+ case MVT::v32bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 32);
+ case MVT::v64bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 64);
+ case MVT::v128bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 128);
+ case MVT::v1f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 1);
+ case MVT::v2f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v3f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 3);
+ case MVT::v4f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v5f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 5);
+ case MVT::v6f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 6);
+ case MVT::v7f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 7);
+ case MVT::v8f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::v9f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 9);
+ case MVT::v10f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 10);
+ case MVT::v11f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 11);
+ case MVT::v12f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 12);
+ case MVT::v16f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v32f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 32);
+ case MVT::v64f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 64);
+ case MVT::v128f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 128);
+ case MVT::v256f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 256);
+ case MVT::v512f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 512);
+ case MVT::v1024f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 1024);
+ case MVT::v2048f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 2048);
+ case MVT::v1f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 1);
+ case MVT::v2f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::v3f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 3);
+ case MVT::v4f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::v8f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::v16f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 16);
+ case MVT::v32f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 32);
+ case MVT::v64f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 64);
+ case MVT::v128f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 128);
+ case MVT::v256f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 256);
+ case MVT::nxv1i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 1);
+ case MVT::nxv2i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 2);
+ case MVT::nxv4i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 4);
+ case MVT::nxv8i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 8);
+ case MVT::nxv16i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 16);
+ case MVT::nxv32i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 32);
+ case MVT::nxv64i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::nxv1i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 1);
+ case MVT::nxv2i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::nxv4i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::nxv8i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::nxv16i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::nxv32i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::nxv64i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::nxv1i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 1);
+ case MVT::nxv2i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::nxv4i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::nxv8i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 8);
+ case MVT::nxv16i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 16);
+ case MVT::nxv32i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::nxv1i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 1);
+ case MVT::nxv2i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::nxv4i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::nxv8i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::nxv16i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 16);
+ case MVT::nxv32i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 32);
+ case MVT::nxv1i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::nxv2i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::nxv4i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::nxv8i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::nxv16i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 16);
+ case MVT::nxv32i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::nxv1f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 1);
+ case MVT::nxv2f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::nxv4f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 4);
+ case MVT::nxv8f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::nxv16f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 16);
+ case MVT::nxv32f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 32);
+ case MVT::nxv1bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 1);
+ case MVT::nxv2bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 2);
+ case MVT::nxv4bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 4);
+ case MVT::nxv8bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::nxv16bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 16);
+ case MVT::nxv32bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 32);
+ case MVT::nxv1f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 1);
+ case MVT::nxv2f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::nxv4f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::nxv8f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::nxv16f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::nxv1f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 1);
+ case MVT::nxv2f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::nxv4f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::nxv8f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+ // clang-format on
+}
+
+/// Return the value type corresponding to the specified type. This returns all
+/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned
+/// as Other, otherwise they are invalid.
+MVT MVT::getVT(Type *Ty, bool HandleUnknown){
+ assert(Ty != nullptr && "Invalid type");
+ switch (Ty->getTypeID()) {
+ default:
+ if (HandleUnknown) return MVT(MVT::Other);
+ llvm_unreachable("Unknown type!");
+ case Type::VoidTyID:
+ return MVT::isVoid;
+ case Type::IntegerTyID:
+ return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
+ case Type::HalfTyID: return MVT(MVT::f16);
+ case Type::BFloatTyID: return MVT(MVT::bf16);
+ case Type::FloatTyID: return MVT(MVT::f32);
+ case Type::DoubleTyID: return MVT(MVT::f64);
+ case Type::X86_FP80TyID: return MVT(MVT::f80);
+ case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::TargetExtTyID: {
+ TargetExtType *TargetExtTy = cast<TargetExtType>(Ty);
+ if (TargetExtTy->getName() == "aarch64.svcount")
+ return MVT(MVT::aarch64svcount);
+ else if (TargetExtTy->getName().starts_with("spirv."))
+ return MVT(MVT::spirvbuiltin);
+ if (HandleUnknown)
+ return MVT(MVT::Other);
+ llvm_unreachable("Unknown target ext type!");
+ }
+ case Type::X86_AMXTyID: return MVT(MVT::x86amx);
+ case Type::FP128TyID: return MVT(MVT::f128);
+ case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+ case Type::PointerTyID: return MVT(MVT::iPTR);
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(
+ getVT(VTy->getElementType(), /*HandleUnknown=*/ false),
+ VTy->getElementCount());
+ }
+ }
+}
+
+/// getEVT - Return the value type corresponding to the specified type. This
+/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ return MVT::getVT(Ty, HandleUnknown);
+ case Type::IntegerTyID:
+ return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(Ty->getContext(),
+ getEVT(VTy->getElementType(), /*HandleUnknown=*/ false),
+ VTy->getElementCount());
+ }
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MVT::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
+void MVT::print(raw_ostream &OS) const {
+ OS << EVT(*this).getEVTString();
+}
+
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 000000000000..a816bd5b52de
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,647 @@
+//===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumSpillSlots, "Number of spill slots allocated");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
+ TII = mf.getSubtarget().getInstrInfo();
+ TRI = mf.getSubtarget().getRegisterInfo();
+ MF = &mf;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2SplitMap.clear();
+ Virt2ShapeMap.clear();
+
+ grow();
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+}
+
+void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) {
+ assert(virtReg.isVirtual() && Register::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg.id()] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ assert(!getRegInfo().isReserved(physReg) &&
+ "Attempt to map virtReg to a reserved physReg");
+ Virt2PhysMap[virtReg.id()] = physReg;
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ unsigned Size = TRI->getSpillSize(*RC);
+ Align Alignment = TRI->getSpillAlign(*RC);
+ // Set preferred alignment if we are still able to realign the stack
+ auto &ST = MF->getSubtarget();
+ Align CurrentAlign = ST.getFrameLowering()->getStackAlign();
+ if (Alignment > CurrentAlign && !ST.getRegisterInfo()->canRealignStack(*MF)) {
+ Alignment = CurrentAlign;
+ }
+ int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Alignment);
+ ++NumSpillSlots;
+ return SS;
+}
+
+bool VirtRegMap::hasPreferredPhys(Register VirtReg) const {
+ Register Hint = MRI->getSimpleHint(VirtReg);
+ if (!Hint.isValid())
+ return false;
+ if (Hint.isVirtual())
+ Hint = getPhys(Hint);
+ return Register(getPhys(VirtReg)) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(Register VirtReg) const {
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(VirtReg);
+ if (Hint.second.isPhysical())
+ return true;
+ if (Hint.second.isVirtual())
+ return hasPhys(Hint.second);
+ return false;
+}
+
+int VirtRegMap::assignVirt2StackSlot(Register virtReg) {
+ assert(virtReg.isVirtual());
+ assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ return Virt2StackSlotMap[virtReg.id()] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) {
+ assert(virtReg.isVirtual());
+ assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo().getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg.id()] = SS;
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module*) const {
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << printReg(Reg, TRI) << " -> "
+ << printReg(Virt2PhysMap[Reg], TRI) << "] "
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void VirtRegMap::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// VirtRegRewriter
+//===----------------------------------------------------------------------===//
+//
+// The VirtRegRewriter is the last of the register allocator passes.
+// It rewrites virtual registers to physical registers as specified in the
+// VirtRegMap analysis. It also updates live-in information on basic blocks
+// according to LiveIntervals.
+//
+namespace {
+
+class VirtRegRewriter : public MachineFunctionPass {
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ SlotIndexes *Indexes = nullptr;
+ LiveIntervals *LIS = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveDebugVariables *DebugVars = nullptr;
+ DenseSet<Register> RewriteRegs;
+ bool ClearVirtRegs;
+
+ void rewrite();
+ void addMBBLiveIns();
+ bool readsUndefSubreg(const MachineOperand &MO) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const;
+ void handleIdentityCopy(MachineInstr &MI);
+ void expandCopyBundle(MachineInstr &MI) const;
+ bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
+
+public:
+ static char ID;
+ VirtRegRewriter(bool ClearVirtRegs_ = true) :
+ MachineFunctionPass(ID),
+ ClearVirtRegs(ClearVirtRegs_) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ MachineFunctionProperties getSetProperties() const override {
+ if (ClearVirtRegs) {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ return MachineFunctionProperties();
+ }
+};
+
+} // end anonymous namespace
+
+char VirtRegRewriter::ID = 0;
+
+char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
+
+INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+
+void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+
+ if (!ClearVirtRegs)
+ AU.addPreserved<LiveDebugVariables>();
+
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ MRI = &MF->getRegInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+ DebugVars = getAnalysisIfAvailable<LiveDebugVariables>();
+ LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+ LLVM_DEBUG(VRM->dump());
+
+ // Add kill flags while we still have virtual registers.
+ LIS->addKillFlags(VRM);
+
+ // Live-in lists on basic blocks are required for physregs.
+ addMBBLiveIns();
+
+ // Rewrite virtual registers.
+ rewrite();
+
+ if (DebugVars && ClearVirtRegs) {
+ // Write out new DBG_VALUE instructions.
+
+ // We only do this if ClearVirtRegs is specified since this should be the
+ // final run of the pass and we don't want to emit them multiple times.
+ DebugVars->emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ }
+
+ return true;
+}
+
+void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
+ MCRegister PhysReg) const {
+ assert(!LI.empty());
+ assert(LI.hasSubRanges());
+
+ using SubRangeIteratorPair =
+ std::pair<const LiveInterval::SubRange *, LiveInterval::const_iterator>;
+
+ SmallVector<SubRangeIteratorPair, 4> SubRanges;
+ SlotIndex First;
+ SlotIndex Last;
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRanges.push_back(std::make_pair(&SR, SR.begin()));
+ if (!First.isValid() || SR.segments.front().start < First)
+ First = SR.segments.front().start;
+ if (!Last.isValid() || SR.segments.back().end > Last)
+ Last = SR.segments.back().end;
+ }
+
+ // Check all mbb start positions between First and Last while
+ // simulatenously advancing an iterator for each subrange.
+ for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First);
+ MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) {
+ SlotIndex MBBBegin = MBBI->first;
+ // Advance all subrange iterators so that their end position is just
+ // behind MBBBegin (or the iterator is at the end).
+ LaneBitmask LaneMask;
+ for (auto &RangeIterPair : SubRanges) {
+ const LiveInterval::SubRange *SR = RangeIterPair.first;
+ LiveInterval::const_iterator &SRI = RangeIterPair.second;
+ while (SRI != SR->end() && SRI->end <= MBBBegin)
+ ++SRI;
+ if (SRI == SR->end())
+ continue;
+ if (SRI->start <= MBBBegin)
+ LaneMask |= SR->LaneMask;
+ }
+ if (LaneMask.none())
+ continue;
+ MachineBasicBlock *MBB = MBBI->second;
+ MBB->addLiveIn(PhysReg, LaneMask);
+ }
+}
+
+// Compute MBB live-in lists from virtual register live ranges and their
+// assignments.
+void VirtRegRewriter::addMBBLiveIns() {
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ Register VirtReg = Register::index2VirtReg(Idx);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (LI.empty() || LIS->intervalIsInOneMBB(LI))
+ continue;
+ // This is a virtual register that is live across basic blocks. Its
+ // assigned PhysReg must be marked as live-in to those blocks.
+ Register PhysReg = VRM->getPhys(VirtReg);
+ if (PhysReg == VirtRegMap::NO_PHYS_REG) {
+ // There may be no physical register assigned if only some register
+ // classes were already allocated.
+ assert(!ClearVirtRegs && "Unmapped virtual register");
+ continue;
+ }
+
+ if (LI.hasSubRanges()) {
+ addLiveInsForSubRanges(LI, PhysReg);
+ } else {
+ // Go over MBB begin positions and see if we have segments covering them.
+ // The following works because segments and the MBBIndex list are both
+ // sorted by slot indexes.
+ SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin();
+ for (const auto &Seg : LI) {
+ I = Indexes->advanceMBBIndex(I, Seg.start);
+ for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) {
+ MachineBasicBlock *MBB = I->second;
+ MBB->addLiveIn(PhysReg);
+ }
+ }
+ }
+ }
+
+ // Sort and unique MBB LiveIns as we've not checked if SubReg/PhysReg were in
+ // each MBB's LiveIns set before calling addLiveIn on them.
+ for (MachineBasicBlock &MBB : *MF)
+ MBB.sortUniqueLiveIns();
+}
+
+/// Returns true if the given machine operand \p MO only reads undefined lanes.
+/// The function only works for use operands with a subregister set.
+bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
+ // Shortcut if the operand is already marked undef.
+ if (MO.isUndef())
+ return true;
+
+ Register Reg = MO.getReg();
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex BaseIndex = LIS->getInstructionIndex(MI);
+ // This code is only meant to handle reading undefined subregisters which
+ // we couldn't properly detect before.
+ assert(LI.liveAt(BaseIndex) &&
+ "Reads of completely dead register should be marked undef already");
+ unsigned SubRegIdx = MO.getSubReg();
+ assert(SubRegIdx != 0 && LI.hasSubRanges());
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ // See if any of the relevant subregister liveranges is defined at this point.
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & UseMask).any() && SR.liveAt(BaseIndex))
+ return false;
+ }
+ return true;
+}
+
+void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
+ if (!MI.isIdentityCopy())
+ return;
+ LLVM_DEBUG(dbgs() << "Identity copy: " << MI);
+ ++NumIdCopies;
+
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // We may have deferred allocation of the virtual register, and the rewrite
+ // regs code doesn't handle the liveness update.
+ if (DstReg.isVirtual())
+ return;
+
+ RewriteRegs.insert(DstReg);
+
+ // Copies like:
+ // %r0 = COPY undef %r0
+ // %al = COPY %al, implicit-def %eax
+ // give us additional liveness information: The target (super-)register
+ // must not be valid before this point. Replace the COPY with a KILL
+ // instruction to maintain this information.
+ if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
+ MI.setDesc(TII->get(TargetOpcode::KILL));
+ LLVM_DEBUG(dbgs() << " replace by: " << MI);
+ return;
+ }
+
+ if (Indexes)
+ Indexes->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
+ LLVM_DEBUG(dbgs() << " deleted.\n");
+}
+
+/// The liverange splitting logic sometimes produces bundles of copies when
+/// subregisters are involved. Expand these into a sequence of copy instructions
+/// after processing the last in the bundle. Does not update LiveIntervals
+/// which we shouldn't need for this instruction anymore.
+void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
+ if (!MI.isCopy() && !MI.isKill())
+ return;
+
+ if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+ SmallVector<MachineInstr *, 2> MIs({&MI});
+
+ // Only do this when the complete bundle is made out of COPYs and KILLs.
+ MachineBasicBlock &MBB = *MI.getParent();
+ for (MachineBasicBlock::reverse_instr_iterator I =
+ std::next(MI.getReverseIterator()), E = MBB.instr_rend();
+ I != E && I->isBundledWithSucc(); ++I) {
+ if (!I->isCopy() && !I->isKill())
+ return;
+ MIs.push_back(&*I);
+ }
+ MachineInstr *FirstMI = MIs.back();
+
+ auto anyRegsAlias = [](const MachineInstr *Dst,
+ ArrayRef<MachineInstr *> Srcs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineInstr *Src : Srcs)
+ if (Src != Dst)
+ if (TRI->regsOverlap(Dst->getOperand(0).getReg(),
+ Src->getOperand(1).getReg()))
+ return true;
+ return false;
+ };
+
+ // If any of the destination registers in the bundle of copies alias any of
+ // the source registers, try to schedule the instructions to avoid any
+ // clobbering.
+ for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) {
+ for (int I = E; I--; )
+ if (!anyRegsAlias(MIs[I], ArrayRef(MIs).take_front(E), TRI)) {
+ if (I + 1 != E)
+ std::swap(MIs[I], MIs[E - 1]);
+ --E;
+ }
+ if (PrevE == E) {
+ MF->getFunction().getContext().emitError(
+ "register rewriting failed: cycle in copy bundle");
+ break;
+ }
+ }
+
+ MachineInstr *BundleStart = FirstMI;
+ for (MachineInstr *BundledMI : llvm::reverse(MIs)) {
+ // If instruction is in the middle of the bundle, move it before the
+ // bundle starts, otherwise, just unbundle it. When we get to the last
+ // instruction, the bundle will have been completely undone.
+ if (BundledMI != BundleStart) {
+ BundledMI->removeFromBundle();
+ MBB.insert(BundleStart, BundledMI);
+ } else if (BundledMI->isBundledWithSucc()) {
+ BundledMI->unbundleFromSucc();
+ BundleStart = &*std::next(BundledMI->getIterator());
+ }
+
+ if (Indexes && BundledMI != FirstMI)
+ Indexes->insertMachineInstrInMaps(*BundledMI);
+ }
+ }
+}
+
+/// Check whether (part of) \p SuperPhysReg is live through \p MI.
+/// \pre \p MI defines a subregister of a virtual register that
+/// has been assigned to \p SuperPhysReg.
+bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
+ MCRegister SuperPhysReg) const {
+ SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+ SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
+ SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
+ for (MCRegUnit Unit : TRI->regunits(SuperPhysReg)) {
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
+ // If the regunit is live both before and after MI,
+ // we assume it is live through.
+ // Generally speaking, this is not true, because something like
+ // "RU = op RU" would match that description.
+ // However, we know that we are trying to assess whether
+ // a def of a virtual reg, vreg, is live at the same time of RU.
+ // If we are in the "RU = op RU" situation, that means that vreg
+ // is defined at the same time as RU (i.e., "vreg, RU = op RU").
+ // Thus, vreg and RU interferes and vreg cannot be assigned to
+ // SuperPhysReg. Therefore, this situation cannot happen.
+ if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses))
+ return true;
+ }
+ return false;
+}
+
+void VirtRegRewriter::rewrite() {
+ bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
+ SmallVector<Register, 8> SuperDeads;
+ SmallVector<Register, 8> SuperDefs;
+ SmallVector<Register, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
+ for (MachineOperand &MO : MI.operands()) {
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask())
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ Register VirtReg = MO.getReg();
+ MCRegister PhysReg = VRM->getPhys(VirtReg);
+ if (PhysReg == VirtRegMap::NO_PHYS_REG)
+ continue;
+
+ assert(Register(PhysReg).isPhysical());
+
+ RewriteRegs.insert(PhysReg);
+ assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
+
+ // Preserve semantics of sub-register operands.
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add implicit killed operands for the super-register. A
+ // partial redef always kills and redefines the super-register.
+ if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
+ (MO.isDef() && subRegLiveThrough(MI, PhysReg)))
+ SuperKills.push_back(PhysReg);
+
+ if (MO.isDef()) {
+ // Also add implicit defs for the super-register.
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
+ } else {
+ if (MO.isUse()) {
+ if (readsUndefSubreg(MO))
+ // We need to add an <undef> flag if the subregister is
+ // completely undefined (and we are not adding super-register
+ // defs).
+ MO.setIsUndef(true);
+ } else if (!MO.isDead()) {
+ assert(MO.isDef());
+ }
+ }
+
+ // The def undef and def internal flags only make sense for
+ // sub-register defs, and we are substituting a full physreg. An
+ // implicit killed operand from the SuperKills list will represent the
+ // partial read of the super-register.
+ if (MO.isDef()) {
+ MO.setIsUndef(false);
+ MO.setIsInternalRead(false);
+ }
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, SubReg);
+ assert(PhysReg.isValid() && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(true);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ while (!SuperDeads.empty())
+ MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+ while (!SuperDefs.empty())
+ MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+ LLVM_DEBUG(dbgs() << "> " << MI);
+
+ expandCopyBundle(MI);
+
+ // We can remove identity copies right now.
+ handleIdentityCopy(MI);
+ }
+ }
+
+ if (LIS) {
+ // Don't bother maintaining accurate LiveIntervals for registers which were
+ // already allocated.
+ for (Register PhysReg : RewriteRegs) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LIS->removeRegUnit(Unit);
+ }
+ }
+ }
+
+ RewriteRegs.clear();
+}
+
+FunctionPass *llvm::createVirtRegRewriter(bool ClearVirtRegs) {
+ return new VirtRegRewriter(ClearVirtRegs);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
new file mode 100644
index 000000000000..cc04807e8455
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -0,0 +1,377 @@
+//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use
+// WebAssembly exception handling scheme. This currently supports C++
+// exceptions.
+//
+// WebAssembly exception handling uses Windows exception IR for the middle level
+// representation. This pass does the following transformation for every
+// catchpad block:
+// (In C-style pseudocode)
+//
+// - Before:
+// catchpad ...
+// exn = wasm.get.exception();
+// selector = wasm.get.selector();
+// ...
+//
+// - After:
+// catchpad ...
+// exn = wasm.catch(WebAssembly::CPP_EXCEPTION);
+// // Only add below in case it's not a single catch (...)
+// wasm.landingpad.index(index);
+// __wasm_lpad_context.lpad_index = index;
+// __wasm_lpad_context.lsda = wasm.lsda();
+// _Unwind_CallPersonality(exn);
+// selector = __wasm_lpad_context.selector;
+// ...
+//
+//
+// * Background: Direct personality function call
+// In WebAssembly EH, the VM is responsible for unwinding the stack once an
+// exception is thrown. After the stack is unwound, the control flow is
+// transfered to WebAssembly 'catch' instruction.
+//
+// Unwinding the stack is not done by libunwind but the VM, so the personality
+// function in libcxxabi cannot be called from libunwind during the unwinding
+// process. So after a catch instruction, we insert a call to a wrapper function
+// in libunwind that in turn calls the real personality function.
+//
+// In Itanium EH, if the personality function decides there is no matching catch
+// clause in a call frame and no cleanup action to perform, the unwinder doesn't
+// stop there and continues unwinding. But in Wasm EH, the unwinder stops at
+// every call frame with a catch intruction, after which the personality
+// function is called from the compiler-generated user code here.
+//
+// In libunwind, we have this struct that serves as a communincation channel
+// between the compiler-generated user code and the personality function in
+// libcxxabi.
+//
+// struct _Unwind_LandingPadContext {
+// uintptr_t lpad_index;
+// uintptr_t lsda;
+// uintptr_t selector;
+// };
+// struct _Unwind_LandingPadContext __wasm_lpad_context = ...;
+//
+// And this wrapper in libunwind calls the personality function.
+//
+// _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) {
+// struct _Unwind_Exception *exception_obj =
+// (struct _Unwind_Exception *)exception_ptr;
+// _Unwind_Reason_Code ret = __gxx_personality_v0(
+// 1, _UA_CLEANUP_PHASE, exception_obj->exception_class, exception_obj,
+// (struct _Unwind_Context *)__wasm_lpad_context);
+// return ret;
+// }
+//
+// We pass a landing pad index, and the address of LSDA for the current function
+// to the wrapper function _Unwind_CallPersonality in libunwind, and we retrieve
+// the selector after it returns.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasmehprepare"
+
+namespace {
+class WasmEHPrepare : public FunctionPass {
+ Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext'
+ GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context
+
+ // Field addresses of struct _Unwind_LandingPadContext
+ Value *LPadIndexField = nullptr; // lpad_index field
+ Value *LSDAField = nullptr; // lsda field
+ Value *SelectorField = nullptr; // selector
+
+ Function *ThrowF = nullptr; // wasm.throw() intrinsic
+ Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
+ Function *LSDAF = nullptr; // wasm.lsda() intrinsic
+ Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
+ Function *CatchF = nullptr; // wasm.catch() intrinsic
+ Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
+ FunctionCallee CallPersonalityF =
+ nullptr; // _Unwind_CallPersonality() wrapper
+
+ bool prepareThrows(Function &F);
+ bool prepareEHPads(Function &F);
+ void prepareEHPad(BasicBlock *BB, bool NeedPersonality, unsigned Index = 0);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ WasmEHPrepare() : FunctionPass(ID) {}
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "WebAssembly Exception handling preparation";
+ }
+};
+} // end anonymous namespace
+
+char WasmEHPrepare::ID = 0;
+INITIALIZE_PASS_BEGIN(WasmEHPrepare, DEBUG_TYPE,
+ "Prepare WebAssembly exceptions", false, false)
+INITIALIZE_PASS_END(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions",
+ false, false)
+
+FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); }
+
+bool WasmEHPrepare::doInitialization(Module &M) {
+ IRBuilder<> IRB(M.getContext());
+ LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index
+ IRB.getInt8PtrTy(), // lsda
+ IRB.getInt32Ty() // selector
+ );
+ return false;
+}
+
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &BBs) {
+ SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
+ while (!WL.empty()) {
+ auto *BB = WL.pop_back_val();
+ if (!pred_empty(BB))
+ continue;
+ WL.append(succ_begin(BB), succ_end(BB));
+ DeleteDeadBlock(BB);
+ }
+}
+
+bool WasmEHPrepare::runOnFunction(Function &F) {
+ bool Changed = false;
+ Changed |= prepareThrows(F);
+ Changed |= prepareEHPads(F);
+ return Changed;
+}
+
+bool WasmEHPrepare::prepareThrows(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+ bool Changed = false;
+
+ // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
+ ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
+ // Insert an unreachable instruction after a call to @llvm.wasm.throw and
+ // delete all following instructions within the BB, and delete all the dead
+ // children of the BB as well.
+ for (User *U : ThrowF->users()) {
+ // A call to @llvm.wasm.throw() is only generated from __cxa_throw()
+ // builtin call within libcxxabi, and cannot be an InvokeInst.
+ auto *ThrowI = cast<CallInst>(U);
+ if (ThrowI->getFunction() != &F)
+ continue;
+ Changed = true;
+ auto *BB = ThrowI->getParent();
+ SmallVector<BasicBlock *, 4> Succs(successors(BB));
+ BB->erase(std::next(BasicBlock::iterator(ThrowI)), BB->end());
+ IRB.SetInsertPoint(BB);
+ IRB.CreateUnreachable();
+ eraseDeadBBsAndChildren(Succs);
+ }
+
+ return Changed;
+}
+
+bool WasmEHPrepare::prepareEHPads(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+
+ SmallVector<BasicBlock *, 16> CatchPads;
+ SmallVector<BasicBlock *, 16> CleanupPads;
+ for (BasicBlock &BB : F) {
+ if (!BB.isEHPad())
+ continue;
+ auto *Pad = BB.getFirstNonPHI();
+ if (isa<CatchPadInst>(Pad))
+ CatchPads.push_back(&BB);
+ else if (isa<CleanupPadInst>(Pad))
+ CleanupPads.push_back(&BB);
+ }
+ if (CatchPads.empty() && CleanupPads.empty())
+ return false;
+
+ if (!F.hasPersonalityFn() ||
+ !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+ report_fatal_error("Function '" + F.getName() +
+ "' does not have a correct Wasm personality function "
+ "'__gxx_wasm_personality_v0'");
+ }
+ assert(F.hasPersonalityFn() && "Personality function not found");
+
+ // __wasm_lpad_context global variable.
+ // This variable should be thread local. If the target does not support TLS,
+ // we depend on CoalesceFeaturesAndStripAtomics to downgrade it to
+ // non-thread-local ones, in which case we don't allow this object to be
+ // linked with other objects using shared memory.
+ LPadContextGV = cast<GlobalVariable>(
+ M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
+ LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel);
+
+ LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0,
+ "lpad_index_gep");
+ LSDAField =
+ IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep");
+ SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2,
+ "selector_gep");
+
+ // wasm.landingpad.index() intrinsic, which is to specify landingpad index
+ LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index);
+ // wasm.lsda() intrinsic. Returns the address of LSDA table for the current
+ // function.
+ LSDAF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_lsda);
+ // wasm.get.exception() and wasm.get.ehselector() intrinsics. Calls to these
+ // are generated in clang.
+ GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
+ GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
+
+ // wasm.catch() will be lowered down to wasm 'catch' instruction in
+ // instruction selection.
+ CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
+
+ // _Unwind_CallPersonality() wrapper function, which calls the personality
+ CallPersonalityF = M.getOrInsertFunction(
+ "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
+ if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
+ F->setDoesNotThrow();
+
+ unsigned Index = 0;
+ for (auto *BB : CatchPads) {
+ auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI());
+ // In case of a single catch (...), we don't need to emit a personalify
+ // function call
+ if (CPI->arg_size() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue())
+ prepareEHPad(BB, false);
+ else
+ prepareEHPad(BB, true, Index++);
+ }
+
+ // Cleanup pads don't need a personality function call.
+ for (auto *BB : CleanupPads)
+ prepareEHPad(BB, false);
+
+ return true;
+}
+
+// Prepare an EH pad for Wasm EH handling. If NeedPersonality is false, Index is
+// ignored.
+void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
+ unsigned Index) {
+ assert(BB->isEHPad() && "BB is not an EHPad!");
+ IRBuilder<> IRB(BB->getContext());
+ IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
+
+ auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI());
+ Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
+ for (auto &U : FPI->uses()) {
+ if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
+ if (CI->getCalledOperand() == GetExnF)
+ GetExnCI = CI;
+ if (CI->getCalledOperand() == GetSelectorF)
+ GetSelectorCI = CI;
+ }
+ }
+
+ // Cleanup pads do not have any of wasm.get.exception() or
+ // wasm.get.ehselector() calls. We need to do nothing.
+ if (!GetExnCI) {
+ assert(!GetSelectorCI &&
+ "wasm.get.ehselector() cannot exist w/o wasm.get.exception()");
+ return;
+ }
+
+ // Replace wasm.get.exception intrinsic with wasm.catch intrinsic, which will
+ // be lowered to wasm 'catch' instruction. We do this mainly because
+ // instruction selection cannot handle wasm.get.exception intrinsic's token
+ // argument.
+ Instruction *CatchCI =
+ IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::CPP_EXCEPTION)}, "exn");
+ GetExnCI->replaceAllUsesWith(CatchCI);
+ GetExnCI->eraseFromParent();
+
+ // In case it is a catchpad with single catch (...) or a cleanuppad, we don't
+ // need to call personality function because we don't need a selector.
+ if (!NeedPersonality) {
+ if (GetSelectorCI) {
+ assert(GetSelectorCI->use_empty() &&
+ "wasm.get.ehselector() still has uses!");
+ GetSelectorCI->eraseFromParent();
+ }
+ return;
+ }
+ IRB.SetInsertPoint(CatchCI->getNextNode());
+
+ // This is to create a map of <landingpad EH label, landingpad index> in
+ // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
+ // Pseudocode: wasm.landingpad.index(Index);
+ IRB.CreateCall(LPadIndexF, {FPI, IRB.getInt32(Index)});
+
+ // Pseudocode: __wasm_lpad_context.lpad_index = index;
+ IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
+
+ auto *CPI = cast<CatchPadInst>(FPI);
+ // TODO Sometimes storing the LSDA address every time is not necessary, in
+ // case it is already set in a dominating EH pad and there is no function call
+ // between from that EH pad to here. Consider optimizing those cases.
+ // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda();
+ IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
+
+ // Pseudocode: _Unwind_CallPersonality(exn);
+ CallInst *PersCI = IRB.CreateCall(CallPersonalityF, CatchCI,
+ OperandBundleDef("funclet", CPI));
+ PersCI->setDoesNotThrow();
+
+ // Pseudocode: int selector = __wasm_lpad_context.selector;
+ Instruction *Selector =
+ IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");
+
+ // Replace the return value from wasm.get.ehselector() with the selector value
+ // loaded from __wasm_lpad_context.selector.
+ assert(GetSelectorCI && "wasm.get.ehselector() call does not exist");
+ GetSelectorCI->replaceAllUsesWith(Selector);
+ GetSelectorCI->eraseFromParent();
+}
+
+void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
+ // If an exception is not caught by a catchpad (i.e., it is a foreign
+ // exception), it will unwind to its parent catchswitch's unwind destination.
+ // We don't record an unwind destination for cleanuppads because every
+ // exception should be caught by it.
+ for (const auto &BB : *F) {
+ if (!BB.isEHPad())
+ continue;
+ const Instruction *Pad = BB.getFirstNonPHI();
+
+ if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) {
+ const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest();
+ if (!UnwindBB)
+ continue;
+ const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
+ if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
+ // Currently there should be only one handler per a catchswitch.
+ EHInfo.setUnwindDest(&BB, *CatchSwitch->handlers().begin());
+ else // cleanuppad
+ EHInfo.setUnwindDest(&BB, UnwindBB);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
new file mode 100644
index 000000000000..11597b119893
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -0,0 +1,1396 @@
+//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers LLVM IR exception handling into something closer to what the
+// backend wants for functions using a personality function from a runtime
+// provided by MSVC. Functions with other personality functions are left alone
+// and may be prepared by other passes. In particular, all supported MSVC
+// personality functions require cleanup code to be outlined, and the C++
+// personality requires catch handler code to be outlined.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/EHPersonalities.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "winehprepare"
+
+static cl::opt<bool> DisableDemotion(
+ "disable-demotion", cl::Hidden,
+ cl::desc(
+ "Clone multicolor basic blocks but do not demote cross scopes"),
+ cl::init(false));
+
+static cl::opt<bool> DisableCleanups(
+ "disable-cleanups", cl::Hidden,
+ cl::desc("Do not remove implausible terminators or other similar cleanups"),
+ cl::init(false));
+
+static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt(
+ "demote-catchswitch-only", cl::Hidden,
+ cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false));
+
+namespace {
+
+class WinEHPrepare : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false)
+ : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {}
+
+ bool runOnFunction(Function &Fn) override;
+
+ bool doFinalization(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ StringRef getPassName() const override {
+ return "Windows exception handling preparation";
+ }
+
+private:
+ void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot);
+ void
+ insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist);
+ AllocaInst *insertPHILoads(PHINode *PN, Function &F);
+ void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads, Function &F);
+ bool prepareExplicitEH(Function &F);
+ void colorFunclets(Function &F);
+
+ void demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly);
+ void cloneCommonBlocks(Function &F);
+ void removeImplausibleInstructions(Function &F);
+ void cleanupPreparedFunclets(Function &F);
+ void verifyPreparedFunclets(Function &F);
+
+ bool DemoteCatchSwitchPHIOnly;
+
+ // All fields are reset by runOnFunction.
+ EHPersonality Personality = EHPersonality::Unknown;
+
+ const DataLayout *DL = nullptr;
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
+ MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
+};
+
+} // end anonymous namespace
+
+char WinEHPrepare::ID = 0;
+INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions",
+ false, false)
+
+FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) {
+ return new WinEHPrepare(DemoteCatchSwitchPHIOnly);
+}
+
+bool WinEHPrepare::runOnFunction(Function &Fn) {
+ if (!Fn.hasPersonalityFn())
+ return false;
+
+ // Classify the personality to see what kind of preparation we need.
+ Personality = classifyEHPersonality(Fn.getPersonalityFn());
+
+ // Do nothing if this is not a scope-based personality.
+ if (!isScopedEHPersonality(Personality))
+ return false;
+
+ DL = &Fn.getParent()->getDataLayout();
+ return prepareExplicitEH(Fn);
+}
+
+bool WinEHPrepare::doFinalization(Module &M) { return false; }
+
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
+
+static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState,
+ const BasicBlock *BB) {
+ CxxUnwindMapEntry UME;
+ UME.ToState = ToState;
+ UME.Cleanup = BB;
+ FuncInfo.CxxUnwindMap.push_back(UME);
+ return FuncInfo.getLastStateNumber();
+}
+
+static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
+ int TryHigh, int CatchHigh,
+ ArrayRef<const CatchPadInst *> Handlers) {
+ WinEHTryBlockMapEntry TBME;
+ TBME.TryLow = TryLow;
+ TBME.TryHigh = TryHigh;
+ TBME.CatchHigh = CatchHigh;
+ assert(TBME.TryLow <= TBME.TryHigh);
+ for (const CatchPadInst *CPI : Handlers) {
+ WinEHHandlerType HT;
+ Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0));
+ if (TypeInfo->isNullValue())
+ HT.TypeDescriptor = nullptr;
+ else
+ HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
+ HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
+ HT.Handler = CPI->getParent();
+ if (auto *AI =
+ dyn_cast<AllocaInst>(CPI->getArgOperand(2)->stripPointerCasts()))
+ HT.CatchObj.Alloca = AI;
+ else
+ HT.CatchObj.Alloca = nullptr;
+ TBME.HandlerArray.push_back(HT);
+ }
+ FuncInfo.TryBlockMap.push_back(TBME);
+}
+
+static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) {
+ for (const User *U : CleanupPad->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ return CRI->getUnwindDest();
+ return nullptr;
+}
+
+static void calculateStateNumbersForInvokes(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ auto *F = const_cast<Function *>(Fn);
+ DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F);
+ for (BasicBlock &BB : *F) {
+ auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+ if (!II)
+ continue;
+
+ auto &BBColors = BlockColors[&BB];
+ assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+
+ BasicBlock *FuncletUnwindDest;
+ auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI());
+ assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock());
+ if (!FuncletPad)
+ FuncletUnwindDest = nullptr;
+ else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+ FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest();
+ else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad))
+ FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad);
+ else
+ llvm_unreachable("unexpected funclet pad!");
+
+ BasicBlock *InvokeUnwindDest = II->getUnwindDest();
+ int BaseState = -1;
+ if (FuncletUnwindDest == InvokeUnwindDest) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
+ }
+
+ if (BaseState != -1) {
+ FuncInfo.InvokeStateMap[II] = BaseState;
+ } else {
+ Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI();
+ assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!");
+ FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst];
+ }
+ }
+}
+
+// See comments below for calculateSEHStateForAsynchEH().
+// State - incoming State of normal paths
+struct WorkItem {
+ const BasicBlock *Block;
+ int State;
+ WorkItem(const BasicBlock *BB, int St) {
+ Block = BB;
+ State = St;
+ }
+};
+void llvm::calculateCXXStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &EHInfo) {
+ SmallVector<struct WorkItem *, 8> WorkList;
+ struct WorkItem *WI = new WorkItem(BB, State);
+ WorkList.push_back(WI);
+
+ while (!WorkList.empty()) {
+ WI = WorkList.pop_back_val();
+ const BasicBlock *BB = WI->Block;
+ int State = WI->State;
+ delete WI;
+ if (EHInfo.BlockToStateMap.count(BB) && EHInfo.BlockToStateMap[BB] <= State)
+ continue; // skip blocks already visited by lower State
+
+ const llvm::Instruction *I = BB->getFirstNonPHI();
+ const llvm::Instruction *TI = BB->getTerminator();
+ if (I->isEHPad())
+ State = EHInfo.EHPadStateMap[I];
+ EHInfo.BlockToStateMap[BB] = State; // Record state, also flag visiting
+
+ if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) && State > 0) {
+ // Retrive the new State
+ State = EHInfo.CxxUnwindMap[State].ToState; // Retrive next State
+ } else if (isa<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(TI);
+ const Function *Fn = Call->getCalledFunction();
+ if (Fn && Fn->isIntrinsic() &&
+ (Fn->getIntrinsicID() == Intrinsic::seh_scope_begin ||
+ Fn->getIntrinsicID() == Intrinsic::seh_try_begin))
+ // Retrive the new State from seh_scope_begin
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ else if (Fn && Fn->isIntrinsic() &&
+ (Fn->getIntrinsicID() == Intrinsic::seh_scope_end ||
+ Fn->getIntrinsicID() == Intrinsic::seh_try_end)) {
+ // In case of conditional ctor, let's retrieve State from Invoke
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ // end of current state, retrive new state from UnwindMap
+ State = EHInfo.CxxUnwindMap[State].ToState;
+ }
+ }
+ // Continue push successors into worklist
+ for (auto *SuccBB : successors(BB)) {
+ WI = new WorkItem(SuccBB, State);
+ WorkList.push_back(WI);
+ }
+ }
+}
+
+// The central theory of this routine is based on the following:
+// A _try scope is always a SEME (Single Entry Multiple Exits) region
+// as jumping into a _try is not allowed
+// The single entry must start with a seh_try_begin() invoke with a
+// correct State number that is the initial state of the SEME.
+// Through control-flow, state number is propagated into all blocks.
+// Side exits marked by seh_try_end() will unwind to parent state via
+// existing SEHUnwindMap[].
+// Side exits can ONLY jump into parent scopes (lower state number).
+// Thus, when a block succeeds various states from its predecessors,
+// the lowest State trumphs others.
+// If some exits flow to unreachable, propagation on those paths terminate,
+// not affecting remaining blocks.
+void llvm::calculateSEHStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &EHInfo) {
+ SmallVector<struct WorkItem *, 8> WorkList;
+ struct WorkItem *WI = new WorkItem(BB, State);
+ WorkList.push_back(WI);
+
+ while (!WorkList.empty()) {
+ WI = WorkList.pop_back_val();
+ const BasicBlock *BB = WI->Block;
+ int State = WI->State;
+ delete WI;
+ if (EHInfo.BlockToStateMap.count(BB) && EHInfo.BlockToStateMap[BB] <= State)
+ continue; // skip blocks already visited by lower State
+
+ const llvm::Instruction *I = BB->getFirstNonPHI();
+ const llvm::Instruction *TI = BB->getTerminator();
+ if (I->isEHPad())
+ State = EHInfo.EHPadStateMap[I];
+ EHInfo.BlockToStateMap[BB] = State; // Record state
+
+ if (isa<CatchPadInst>(I) && isa<CatchReturnInst>(TI)) {
+ const Constant *FilterOrNull = cast<Constant>(
+ cast<CatchPadInst>(I)->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ if (!Filter || !Filter->getName().startswith("__IsLocalUnwind"))
+ State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
+ } else if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) &&
+ State > 0) {
+ // Retrive the new State.
+ State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
+ } else if (isa<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(TI);
+ const Function *Fn = Call->getCalledFunction();
+ if (Fn && Fn->isIntrinsic() &&
+ Fn->getIntrinsicID() == Intrinsic::seh_try_begin)
+ // Retrive the new State from seh_try_begin
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ else if (Fn && Fn->isIntrinsic() &&
+ Fn->getIntrinsicID() == Intrinsic::seh_try_end)
+ // end of current state, retrive new state from UnwindMap
+ State = EHInfo.SEHUnwindMap[State].ToState;
+ }
+ // Continue push successors into worklist
+ for (auto *SuccBB : successors(BB)) {
+ WI = new WorkItem(SuccBB, State);
+ WorkList.push_back(WI);
+ }
+ }
+}
+
+// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
+// to. If the unwind edge came from an invoke, return null.
+static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
+ Value *ParentPad) {
+ const Instruction *TI = BB->getTerminator();
+ if (isa<InvokeInst>(TI))
+ return nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CatchSwitch->getParentPad() != ParentPad)
+ return nullptr;
+ return BB;
+ }
+ assert(!TI->isEHPad() && "unexpected EHPad!");
+ auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad();
+ if (CleanupPad->getParentPad() != ParentPad)
+ return nullptr;
+ return CleanupPad->getParent();
+}
+
+// Starting from a EHPad, Backward walk through control-flow graph
+// to produce two primary outputs:
+// FuncInfo.EHPadStateMap[] and FuncInfo.CxxUnwindMap[]
+static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "not a funclet!");
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
+
+ SmallVector<const CatchPadInst *, 2> Handlers;
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+ Handlers.push_back(CatchPad);
+ }
+ int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryLow;
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryLow);
+ int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+
+ // catchpads are separate funclets in C++ EH due to the way rethrow works.
+ int TryHigh = CatchLow - 1;
+
+ // MSVC FrameHandler3/4 on x64&Arm64 expect Catch Handlers in $tryMap$
+ // stored in pre-order (outer first, inner next), not post-order
+ // Add to map here. Fix the CatchHigh after children are processed
+ const Module *Mod = BB->getParent()->getParent();
+ bool IsPreOrder = Triple(Mod->getTargetTriple()).isArch64Bit();
+ if (IsPreOrder)
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchLow, Handlers);
+ unsigned TBMEIdx = FuncInfo.TryBlockMap.size() - 1;
+
+ for (const auto *CatchPad : Handlers) {
+ FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+ FuncInfo.EHPadStateMap[CatchPad] = CatchLow;
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
+ BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest();
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ }
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
+ BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
+ // If a nested cleanup pad reports a null unwind destination and the
+ // enclosing catch pad doesn't it must be post-dominated by an
+ // unreachable instruction.
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ }
+ }
+ }
+ int CatchHigh = FuncInfo.getLastStateNumber();
+ // Now child Catches are processed, update CatchHigh
+ if (IsPreOrder)
+ FuncInfo.TryBlockMap[TBMEIdx].CatchHigh = CatchHigh;
+ else // PostOrder
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+
+ LLVM_DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
+ LLVM_DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh
+ << '\n');
+ LLVM_DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
+ << '\n');
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
+
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
+
+ int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB)) {
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CleanupPad->getParentPad()))) {
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
+ }
+ }
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the MSVC++ personality cannot "
+ "contain exceptional actions");
+ }
+ }
+}
+
+static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState,
+ const Function *Filter, const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = false;
+ Entry.Filter = Filter;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
+}
+
+static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState,
+ const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = true;
+ Entry.Filter = nullptr;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
+}
+
+// Starting from a EHPad, Backward walk through control-flow graph
+// to produce two primary outputs:
+// FuncInfo.EHPadStateMap[] and FuncInfo.SEHUnwindMap[]
+static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "no a funclet!");
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
+
+ // Extract the filter function and the __except basic block and create a
+ // state for them.
+ assert(CatchSwitch->getNumHandlers() == 1 &&
+ "SEH doesn't have multiple handlers per __try");
+ const auto *CatchPad =
+ cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI());
+ const BasicBlock *CatchPadBB = CatchPad->getParent();
+ const Constant *FilterOrNull =
+ cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ assert((Filter || FilterOrNull->isNullValue()) &&
+ "unexpected filter value");
+ int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB);
+
+ // Everything in the __try block uses TryState as its parent state.
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+ FuncInfo.EHPadStateMap[CatchPad] = TryState;
+ LLVM_DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
+ << CatchPadBB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryState);
+
+ // Everything in the __except block unwinds to ParentState, just like code
+ // outside the __try.
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
+ BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest();
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ }
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
+ BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
+ // If a nested cleanup pad reports a null unwind destination and the
+ // enclosing catch pad doesn't it must be post-dominated by an
+ // unreachable instruction.
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ }
+ }
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
+
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
+
+ int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock =
+ getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the SEH personality cannot "
+ "contain exceptional actions");
+ }
+ }
+}
+
+static bool isTopLevelPadForMSVC(const Instruction *EHPad) {
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad))
+ return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) &&
+ CatchSwitch->unwindsToCaller();
+ if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad))
+ return isa<ConstantTokenNone>(CleanupPad->getParentPad()) &&
+ getCleanupRetUnwindDest(CleanupPad) == nullptr;
+ if (isa<CatchPadInst>(EHPad))
+ return false;
+ llvm_unreachable("unexpected EHPad!");
+}
+
+void llvm::calculateSEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Don't compute state numbers twice.
+ if (!FuncInfo.SEHUnwindMap.empty())
+ return;
+
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
+ continue;
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
+ continue;
+ ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1);
+ }
+
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
+
+ bool IsEHa = Fn->getParent()->getModuleFlag("eh-asynch");
+ if (IsEHa) {
+ const BasicBlock *EntryBB = &(Fn->getEntryBlock());
+ calculateSEHStateForAsynchEH(EntryBB, -1, FuncInfo);
+ }
+}
+
+void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
+
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
+ continue;
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
+ continue;
+ calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1);
+ }
+
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
+
+ bool IsEHa = Fn->getParent()->getModuleFlag("eh-asynch");
+ if (IsEHa) {
+ const BasicBlock *EntryBB = &(Fn->getEntryBlock());
+ calculateCXXStateForAsynchEH(EntryBB, -1, FuncInfo);
+ }
+}
+
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
+ int TryParentState, ClrHandlerType HandlerType,
+ uint32_t TypeToken, const BasicBlock *Handler) {
+ ClrEHUnwindMapEntry Entry;
+ Entry.HandlerParentState = HandlerParentState;
+ Entry.TryParentState = TryParentState;
+ Entry.Handler = Handler;
+ Entry.HandlerType = HandlerType;
+ Entry.TypeToken = TypeToken;
+ FuncInfo.ClrEHUnwindMap.push_back(Entry);
+ return FuncInfo.ClrEHUnwindMap.size() - 1;
+}
+
+void llvm::calculateClrEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
+
+ // This numbering assigns one state number to each catchpad and cleanuppad.
+ // It also computes two tree-like relations over states:
+ // 1) Each state has a "HandlerParentState", which is the state of the next
+ // outer handler enclosing this state's handler (same as nearest ancestor
+ // per the ParentPad linkage on EH pads, but skipping over catchswitches).
+ // 2) Each state has a "TryParentState", which:
+ // a) for a catchpad that's not the last handler on its catchswitch, is
+ // the state of the next catchpad on that catchswitch
+ // b) for all other pads, is the state of the pad whose try region is the
+ // next outer try region enclosing this state's try region. The "try
+ // regions are not present as such in the IR, but will be inferred
+ // based on the placement of invokes and pads which reach each other
+ // by exceptional exits
+ // Catchswitches do not get their own states, but each gets mapped to the
+ // state of its first catchpad.
+
+ // Step one: walk down from outermost to innermost funclets, assigning each
+ // catchpad and cleanuppad a state number. Add an entry to the
+ // ClrEHUnwindMap for each state, recording its HandlerParentState and
+ // handler attributes. Record the TryParentState as well for each catchpad
+ // that's not the last on its catchswitch, but initialize all other entries'
+ // TryParentStates to a sentinel -1 value that the next pass will update.
+
+ // Seed a worklist with pads that have no parent.
+ SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
+ for (const BasicBlock &BB : *Fn) {
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ const Value *ParentPad;
+ if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI))
+ ParentPad = CPI->getParentPad();
+ else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI))
+ ParentPad = CSI->getParentPad();
+ else
+ continue;
+ if (isa<ConstantTokenNone>(ParentPad))
+ Worklist.emplace_back(FirstNonPHI, -1);
+ }
+
+ // Use the worklist to visit all pads, from outer to inner. Record
+ // HandlerParentState for all pads. Record TryParentState only for catchpads
+ // that aren't the last on their catchswitch (setting all other entries'
+ // TryParentStates to an initial value of -1). This loop is also responsible
+ // for setting the EHPadStateMap entry for all catchpads, cleanuppads, and
+ // catchswitches.
+ while (!Worklist.empty()) {
+ const Instruction *Pad;
+ int HandlerParentState;
+ std::tie(Pad, HandlerParentState) = Worklist.pop_back_val();
+
+ if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+ // Create the entry for this cleanup with the appropriate handler
+ // properties. Finally and fault handlers are distinguished by arity.
+ ClrHandlerType HandlerType =
+ (Cleanup->arg_size() ? ClrHandlerType::Fault
+ : ClrHandlerType::Finally);
+ int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
+ HandlerType, 0, Pad->getParent());
+ // Queue any child EH pads on the worklist.
+ for (const User *U : Cleanup->users())
+ if (const auto *I = dyn_cast<Instruction>(U))
+ if (I->isEHPad())
+ Worklist.emplace_back(I, CleanupState);
+ // Remember this pad's state.
+ FuncInfo.EHPadStateMap[Cleanup] = CleanupState;
+ } else {
+ // Walk the handlers of this catchswitch in reverse order since all but
+ // the last need to set the following one as its TryParentState.
+ const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
+ int CatchState = -1, FollowerState = -1;
+ SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
+ for (const BasicBlock *CatchBlock : llvm::reverse(CatchBlocks)) {
+ // Create the entry for this catch with the appropriate handler
+ // properties.
+ const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
+ uint32_t TypeToken = static_cast<uint32_t>(
+ cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
+ CatchState =
+ addClrEHHandler(FuncInfo, HandlerParentState, FollowerState,
+ ClrHandlerType::Catch, TypeToken, CatchBlock);
+ // Queue any child EH pads on the worklist.
+ for (const User *U : Catch->users())
+ if (const auto *I = dyn_cast<Instruction>(U))
+ if (I->isEHPad())
+ Worklist.emplace_back(I, CatchState);
+ // Remember this catch's state.
+ FuncInfo.EHPadStateMap[Catch] = CatchState;
+ FollowerState = CatchState;
+ }
+ // Associate the catchswitch with the state of its first catch.
+ assert(CatchSwitch->getNumHandlers());
+ FuncInfo.EHPadStateMap[CatchSwitch] = CatchState;
+ }
+ }
+
+ // Step two: record the TryParentState of each state. For cleanuppads that
+ // don't have cleanuprets, we may need to infer this from their child pads,
+ // so visit pads in descendant-most to ancestor-most order.
+ for (ClrEHUnwindMapEntry &Entry : llvm::reverse(FuncInfo.ClrEHUnwindMap)) {
+ const Instruction *Pad =
+ cast<const BasicBlock *>(Entry.Handler)->getFirstNonPHI();
+ // For most pads, the TryParentState is the state associated with the
+ // unwind dest of exceptional exits from it.
+ const BasicBlock *UnwindDest;
+ if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) {
+ // If a catch is not the last in its catchswitch, its TryParentState is
+ // the state associated with the next catch in the switch, even though
+ // that's not the unwind dest of exceptions escaping the catch. Those
+ // cases were already assigned a TryParentState in the first pass, so
+ // skip them.
+ if (Entry.TryParentState != -1)
+ continue;
+ // Otherwise, get the unwind dest from the catchswitch.
+ UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
+ } else {
+ const auto *Cleanup = cast<CleanupPadInst>(Pad);
+ UnwindDest = nullptr;
+ for (const User *U : Cleanup->users()) {
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
+ // Common and unambiguous case -- cleanupret indicates cleanup's
+ // unwind dest.
+ UnwindDest = CleanupRet->getUnwindDest();
+ break;
+ }
+
+ // Get an unwind dest for the user
+ const BasicBlock *UserUnwindDest = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
+ UserUnwindDest = Invoke->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) {
+ UserUnwindDest = CatchSwitch->getUnwindDest();
+ } else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) {
+ int UserState = FuncInfo.EHPadStateMap[ChildCleanup];
+ int UserUnwindState =
+ FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
+ if (UserUnwindState != -1)
+ UserUnwindDest = cast<const BasicBlock *>(
+ FuncInfo.ClrEHUnwindMap[UserUnwindState].Handler);
+ }
+
+ // Not having an unwind dest for this user might indicate that it
+ // doesn't unwind, so can't be taken as proof that the cleanup itself
+ // may unwind to caller (see e.g. SimplifyUnreachable and
+ // RemoveUnwindEdge).
+ if (!UserUnwindDest)
+ continue;
+
+ // Now we have an unwind dest for the user, but we need to see if it
+ // unwinds all the way out of the cleanup or if it stays within it.
+ const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI();
+ const Value *UserUnwindParent;
+ if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad))
+ UserUnwindParent = CSI->getParentPad();
+ else
+ UserUnwindParent =
+ cast<CleanupPadInst>(UserUnwindPad)->getParentPad();
+
+ // The unwind stays within the cleanup iff it targets a child of the
+ // cleanup.
+ if (UserUnwindParent == Cleanup)
+ continue;
+
+ // This unwind exits the cleanup, so its dest is the cleanup's dest.
+ UnwindDest = UserUnwindDest;
+ break;
+ }
+ }
+
+ // Record the state of the unwind dest as the TryParentState.
+ int UnwindDestState;
+
+ // If UnwindDest is null at this point, either the pad in question can
+ // be exited by unwind to caller, or it cannot be exited by unwind. In
+ // either case, reporting such cases as unwinding to caller is correct.
+ // This can lead to EH tables that "look strange" -- if this pad's is in
+ // a parent funclet which has other children that do unwind to an enclosing
+ // pad, the try region for this pad will be missing the "duplicate" EH
+ // clause entries that you'd expect to see covering the whole parent. That
+ // should be benign, since the unwind never actually happens. If it were
+ // an issue, we could add a subsequent pass that pushes unwind dests down
+ // from parents that have them to children that appear to unwind to caller.
+ if (!UnwindDest) {
+ UnwindDestState = -1;
+ } else {
+ UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
+ }
+
+ Entry.TryParentState = UnwindDestState;
+ }
+
+ // Step three: transfer information from pads to invokes.
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
+}
+
+void WinEHPrepare::colorFunclets(Function &F) {
+ BlockColors = colorEHFunclets(F);
+
+ // Invert the map from BB to colors to color to BBs.
+ for (BasicBlock &BB : F) {
+ ColorVector &Colors = BlockColors[&BB];
+ for (BasicBlock *Color : Colors)
+ FuncletBlocks[Color].push_back(&BB);
+ }
+}
+
+void WinEHPrepare::demotePHIsOnFunclets(Function &F,
+ bool DemoteCatchSwitchPHIOnly) {
+ // Strip PHI nodes off of EH pads.
+ SmallVector<PHINode *, 16> PHINodes;
+ for (BasicBlock &BB : make_early_inc_range(F)) {
+ if (!BB.isEHPad())
+ continue;
+ if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB.getFirstNonPHI()))
+ continue;
+
+ for (Instruction &I : make_early_inc_range(BB)) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ // Stop at the first non-PHI.
+ if (!PN)
+ break;
+
+ AllocaInst *SpillSlot = insertPHILoads(PN, F);
+ if (SpillSlot)
+ insertPHIStores(PN, SpillSlot);
+
+ PHINodes.push_back(PN);
+ }
+ }
+
+ for (auto *PN : PHINodes) {
+ // There may be lingering uses on other EH PHIs being removed
+ PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+}
+
+void WinEHPrepare::cloneCommonBlocks(Function &F) {
+ // We need to clone all blocks which belong to multiple funclets. Values are
+ // remapped throughout the funclet to propagate both the new instructions
+ // *and* the new basic blocks themselves.
+ for (auto &Funclets : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclets.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+ Value *FuncletToken;
+ if (FuncletPadBB == &F.getEntryBlock())
+ FuncletToken = ConstantTokenNone::get(F.getContext());
+ else
+ FuncletToken = FuncletPadBB->getFirstNonPHI();
+
+ std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
+ ValueToValueMapTy VMap;
+ for (BasicBlock *BB : BlocksInFunclet) {
+ ColorVector &ColorsForBB = BlockColors[BB];
+ // We don't need to do anything if the block is monochromatic.
+ size_t NumColorsForBB = ColorsForBB.size();
+ if (NumColorsForBB == 1)
+ continue;
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Cloning block \'" << BB->getName()
+ << "\' for funclet \'" << FuncletPadBB->getName()
+ << "\'.\n");
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB =
+ CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
+ // Insert the clone immediately after the original to ensure determinism
+ // and to keep the same relative ordering of any funclet's blocks.
+ CBB->insertInto(&F, BB->getNextNode());
+
+ // Add basic block mapping.
+ VMap[BB] = CBB;
+
+ // Record delta operations that we need to perform to our color mappings.
+ Orig2Clone.emplace_back(BB, CBB);
+ }
+
+ // If nothing was cloned, we're done cloning in this funclet.
+ if (Orig2Clone.empty())
+ continue;
+
+ // Update our color mappings to reflect that one block has lost a color and
+ // another has gained a color.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ BlocksInFunclet.push_back(NewBlock);
+ ColorVector &NewColors = BlockColors[NewBlock];
+ assert(NewColors.empty() && "A new block should only have one color!");
+ NewColors.push_back(FuncletPadBB);
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Assigned color \'" << FuncletPadBB->getName()
+ << "\' to block \'" << NewBlock->getName()
+ << "\'.\n");
+
+ llvm::erase_value(BlocksInFunclet, OldBlock);
+ ColorVector &OldColors = BlockColors[OldBlock];
+ llvm::erase_value(OldColors, FuncletPadBB);
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Removed color \'" << FuncletPadBB->getName()
+ << "\' from block \'" << OldBlock->getName()
+ << "\'.\n");
+ }
+
+ // Loop over all of the instructions in this funclet, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (BasicBlock *BB : BlocksInFunclet)
+ // Loop over all instructions, fixing each one as we find it...
+ for (Instruction &I : *BB)
+ RemapInstruction(&I, VMap,
+ RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
+
+ // Catchrets targeting cloned blocks need to be updated separately from
+ // the loop above because they are not in the current funclet.
+ SmallVector<CatchReturnInst *, 2> FixupCatchrets;
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ FixupCatchrets.clear();
+ for (BasicBlock *Pred : predecessors(OldBlock))
+ if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
+ if (CatchRet->getCatchSwitchParentPad() == FuncletToken)
+ FixupCatchrets.push_back(CatchRet);
+
+ for (CatchReturnInst *CatchRet : FixupCatchrets)
+ CatchRet->setSuccessor(NewBlock);
+ }
+
+ auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
+ unsigned NumPreds = PN->getNumIncomingValues();
+ for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
+ ++PredIdx) {
+ BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
+ bool EdgeTargetsFunclet;
+ if (auto *CRI =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken);
+ } else {
+ ColorVector &IncomingColors = BlockColors[IncomingBlock];
+ assert(!IncomingColors.empty() && "Block not colored!");
+ assert((IncomingColors.size() == 1 ||
+ !llvm::is_contained(IncomingColors, FuncletPadBB)) &&
+ "Cloning should leave this funclet's blocks monochromatic");
+ EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
+ }
+ if (IsForOldBlock != EdgeTargetsFunclet)
+ continue;
+ PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
+ // Revisit the next entry.
+ --PredIdx;
+ --PredEnd;
+ }
+ };
+
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (PHINode &OldPN : OldBlock->phis()) {
+ UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true);
+ }
+ for (PHINode &NewPN : NewBlock->phis()) {
+ UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false);
+ }
+ }
+
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
+ // the PHI nodes for NewBB now.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (BasicBlock *SuccBB : successors(NewBlock)) {
+ for (PHINode &SuccPN : SuccBB->phis()) {
+ // Ok, we have a PHI node. Figure out what the incoming value was for
+ // the OldBlock.
+ int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock);
+ if (OldBlockIdx == -1)
+ break;
+ Value *IV = SuccPN.getIncomingValue(OldBlockIdx);
+
+ // Remap the value if necessary.
+ if (auto *Inst = dyn_cast<Instruction>(IV)) {
+ ValueToValueMapTy::iterator I = VMap.find(Inst);
+ if (I != VMap.end())
+ IV = I->second;
+ }
+
+ SuccPN.addIncoming(IV, NewBlock);
+ }
+ }
+ }
+
+ for (ValueToValueMapTy::value_type VT : VMap) {
+ // If there were values defined in BB that are used outside the funclet,
+ // then we now have to update all uses of the value to use either the
+ // original value, the cloned value, or some PHI derived value. This can
+ // require arbitrary PHI insertion, of which we are prepared to do, clean
+ // these up now.
+ SmallVector<Use *, 16> UsesToRename;
+
+ auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
+ if (!OldI)
+ continue;
+ auto *NewI = cast<Instruction>(VT.second);
+ // Scan all uses of this instruction to see if it is used outside of its
+ // funclet, and if so, record them in UsesToRename.
+ for (Use &U : OldI->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = UserI->getParent();
+ ColorVector &ColorsForUserBB = BlockColors[UserBB];
+ assert(!ColorsForUserBB.empty());
+ if (ColorsForUserBB.size() > 1 ||
+ *ColorsForUserBB.begin() != FuncletPadBB)
+ UsesToRename.push_back(&U);
+ }
+
+ // If there are no uses outside the block, we're done with this
+ // instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ // We found a use of OldI outside of the funclet. Rename all uses of OldI
+ // that are outside its funclet to be uses of the appropriate PHI node
+ // etc.
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(OldI->getType(), OldI->getName());
+ SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
+ SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
+ }
+ }
+}
+
+void WinEHPrepare::removeImplausibleInstructions(Function &F) {
+ // Remove implausible terminators and replace them with UnreachableInst.
+ for (auto &Funclet : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclet.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second;
+ Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI();
+ auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI);
+ auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad);
+ auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad);
+
+ for (BasicBlock *BB : BlocksInFunclet) {
+ for (Instruction &I : *BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ Value *FuncletBundleOperand = nullptr;
+ if (auto BU = CB->getOperandBundle(LLVMContext::OB_funclet))
+ FuncletBundleOperand = BU->Inputs.front();
+
+ if (FuncletBundleOperand == FuncletPad)
+ continue;
+
+ // Skip call sites which are nounwind intrinsics or inline asm.
+ auto *CalledFn =
+ dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
+ if (CalledFn && ((CalledFn->isIntrinsic() && CB->doesNotThrow()) ||
+ CB->isInlineAsm()))
+ continue;
+
+ // This call site was not part of this funclet, remove it.
+ if (isa<InvokeInst>(CB)) {
+ // Remove the unwind edge if it was an invoke.
+ removeUnwindEdge(BB);
+ // Get a pointer to the new call.
+ BasicBlock::iterator CallI =
+ std::prev(BB->getTerminator()->getIterator());
+ auto *CI = cast<CallInst>(&*CallI);
+ changeToUnreachable(CI);
+ } else {
+ changeToUnreachable(&I);
+ }
+
+ // There are no more instructions in the block (except for unreachable),
+ // we are done.
+ break;
+ }
+
+ Instruction *TI = BB->getTerminator();
+ // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
+ bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
+ // The token consumed by a CatchReturnInst must match the funclet token.
+ bool IsUnreachableCatchret = false;
+ if (auto *CRI = dyn_cast<CatchReturnInst>(TI))
+ IsUnreachableCatchret = CRI->getCatchPad() != CatchPad;
+ // The token consumed by a CleanupReturnInst must match the funclet token.
+ bool IsUnreachableCleanupret = false;
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI))
+ IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
+ if (IsUnreachableRet || IsUnreachableCatchret ||
+ IsUnreachableCleanupret) {
+ changeToUnreachable(TI);
+ } else if (isa<InvokeInst>(TI)) {
+ if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
+ // Invokes within a cleanuppad for the MSVC++ personality never
+ // transfer control to their unwind edge: the personality will
+ // terminate the program.
+ removeUnwindEdge(BB);
+ }
+ }
+ }
+ }
+}
+
+void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
+ // Clean-up some of the mess we made by removing useles PHI nodes, trivial
+ // branches, etc.
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+ SimplifyInstructionsInBlock(&BB);
+ ConstantFoldTerminator(&BB, /*DeleteDeadConditions=*/true);
+ MergeBlockIntoPredecessor(&BB);
+ }
+
+ // We might have some unreachable blocks after cleaning up some impossible
+ // control flow.
+ removeUnreachableBlocks(F);
+}
+
+#ifndef NDEBUG
+void WinEHPrepare::verifyPreparedFunclets(Function &F) {
+ for (BasicBlock &BB : F) {
+ size_t NumColors = BlockColors[&BB].size();
+ assert(NumColors == 1 && "Expected monochromatic BB!");
+ if (NumColors == 0)
+ report_fatal_error("Uncolored BB!");
+ if (NumColors > 1)
+ report_fatal_error("Multicolor BB!");
+ assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) &&
+ "EH Pad still has a PHI!");
+ }
+}
+#endif
+
+bool WinEHPrepare::prepareExplicitEH(Function &F) {
+ // Remove unreachable blocks. It is not valuable to assign them a color and
+ // their existence can trick us into thinking values are alive when they are
+ // not.
+ removeUnreachableBlocks(F);
+
+ // Determine which blocks are reachable from which funclet entries.
+ colorFunclets(F);
+
+ cloneCommonBlocks(F);
+
+ if (!DisableDemotion)
+ demotePHIsOnFunclets(F, DemoteCatchSwitchPHIOnly ||
+ DemoteCatchSwitchPHIOnlyOpt);
+
+ if (!DisableCleanups) {
+ assert(!verifyFunction(F, &dbgs()));
+ removeImplausibleInstructions(F);
+
+ assert(!verifyFunction(F, &dbgs()));
+ cleanupPreparedFunclets(F);
+ }
+
+ LLVM_DEBUG(verifyPreparedFunclets(F));
+ // Recolor the CFG to verify that all is well.
+ LLVM_DEBUG(colorFunclets(F));
+ LLVM_DEBUG(verifyPreparedFunclets(F));
+
+ BlockColors.clear();
+ FuncletBlocks.clear();
+
+ return true;
+}
+
+// TODO: Share loads when one use dominates another, or when a catchpad exit
+// dominates uses (needs dominators).
+AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
+ BasicBlock *PHIBlock = PN->getParent();
+ AllocaInst *SpillSlot = nullptr;
+ Instruction *EHPad = PHIBlock->getFirstNonPHI();
+
+ if (!EHPad->isTerminator()) {
+ // If the EHPad isn't a terminator, then we can insert a load in this block
+ // that will dominate all uses.
+ SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
+ Twine(PN->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+ Value *V = new LoadInst(PN->getType(), SpillSlot,
+ Twine(PN->getName(), ".wineh.reload"),
+ &*PHIBlock->getFirstInsertionPt());
+ PN->replaceAllUsesWith(V);
+ return SpillSlot;
+ }
+
+ // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert
+ // loads of the slot before every use.
+ DenseMap<BasicBlock *, Value *> Loads;
+ for (Use &U : llvm::make_early_inc_range(PN->uses())) {
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) {
+ // Use is on an EH pad phi. Leave it alone; we'll insert loads and
+ // stores for it separately.
+ continue;
+ }
+ replaceUseWithLoad(PN, U, SpillSlot, Loads, F);
+ }
+ return SpillSlot;
+}
+
+// TODO: improve store placement. Inserting at def is probably good, but need
+// to be careful not to introduce interfering stores (needs liveness analysis).
+// TODO: identify related phi nodes that can share spill slots, and share them
+// (also needs liveness).
+void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
+ AllocaInst *SpillSlot) {
+ // Use a worklist of (Block, Value) pairs -- the given Value needs to be
+ // stored to the spill slot by the end of the given Block.
+ SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist;
+
+ Worklist.push_back({OriginalPHI->getParent(), OriginalPHI});
+
+ while (!Worklist.empty()) {
+ BasicBlock *EHBlock;
+ Value *InVal;
+ std::tie(EHBlock, InVal) = Worklist.pop_back_val();
+
+ PHINode *PN = dyn_cast<PHINode>(InVal);
+ if (PN && PN->getParent() == EHBlock) {
+ // The value is defined by another PHI we need to remove, with no room to
+ // insert a store after the PHI, so each predecessor needs to store its
+ // incoming value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ Value *PredVal = PN->getIncomingValue(i);
+
+ // Undef can safely be skipped.
+ if (isa<UndefValue>(PredVal))
+ continue;
+
+ insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist);
+ }
+ } else {
+ // We need to store InVal, which dominates EHBlock, but can't put a store
+ // in EHBlock, so need to put stores in each predecessor.
+ for (BasicBlock *PredBlock : predecessors(EHBlock)) {
+ insertPHIStore(PredBlock, InVal, SpillSlot, Worklist);
+ }
+ }
+ }
+}
+
+void WinEHPrepare::insertPHIStore(
+ BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
+
+ if (PredBlock->isEHPad() && PredBlock->getFirstNonPHI()->isTerminator()) {
+ // Pred is unsplittable, so we need to queue it on the worklist.
+ Worklist.push_back({PredBlock, PredVal});
+ return;
+ }
+
+ // Otherwise, insert the store at the end of the basic block.
+ new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
+}
+
+void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads,
+ Function &F) {
+ // Lazilly create the spill slot.
+ if (!SpillSlot)
+ SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr,
+ Twine(V->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) {
+ // If this is a PHI node, we can't insert a load of the value before
+ // the use. Instead insert the load in the predecessor block
+ // corresponding to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this
+ // PHI node that we cannot have multiple loads. The problem is that
+ // the resulting PHI node will have multiple values (from each load)
+ // coming in from the same block, which is illegal SSA form.
+ // For this reason, we keep track of and reuse loads we insert.
+ BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U);
+ if (auto *CatchRet =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ // Putting a load above a catchret and use on the phi would still leave
+ // a cross-funclet def/use. We need to split the edge, change the
+ // catchret to target the new block, and put the load there.
+ BasicBlock *PHIBlock = UsingInst->getParent();
+ BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock);
+ // SplitEdge gives us:
+ // IncomingBlock:
+ // ...
+ // br label %NewBlock
+ // NewBlock:
+ // catchret label %PHIBlock
+ // But we need:
+ // IncomingBlock:
+ // ...
+ // catchret label %NewBlock
+ // NewBlock:
+ // br label %PHIBlock
+ // So move the terminators to each others' blocks and swap their
+ // successors.
+ BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
+ Goto->removeFromParent();
+ CatchRet->removeFromParent();
+ CatchRet->insertInto(IncomingBlock, IncomingBlock->end());
+ Goto->insertInto(NewBlock, NewBlock->end());
+ Goto->setSuccessor(0, PHIBlock);
+ CatchRet->setSuccessor(NewBlock);
+ // Update the color mapping for the newly split edge.
+ // Grab a reference to the ColorVector to be inserted before getting the
+ // reference to the vector we are copying because inserting the new
+ // element in BlockColors might cause the map to be reallocated.
+ ColorVector &ColorsForNewBlock = BlockColors[NewBlock];
+ ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
+ ColorsForNewBlock = ColorsForPHIBlock;
+ for (BasicBlock *FuncletPad : ColorsForPHIBlock)
+ FuncletBlocks[FuncletPad].push_back(NewBlock);
+ // Treat the new block as incoming for load insertion.
+ IncomingBlock = NewBlock;
+ }
+ Value *&Load = Loads[IncomingBlock];
+ // Insert the load into the predecessor block
+ if (!Load)
+ Load = new LoadInst(V->getType(), SpillSlot,
+ Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, IncomingBlock->getTerminator());
+
+ U.set(Load);
+ } else {
+ // Reload right before the old use.
+ auto *Load = new LoadInst(V->getType(), SpillSlot,
+ Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, UsingInst);
+ U.set(Load);
+ }
+}
+
+void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
+ MCSymbol *InvokeBegin,
+ MCSymbol *InvokeEnd) {
+ assert(InvokeStateMap.count(II) &&
+ "should get invoke with precomputed state");
+ LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
+}
+
+void WinEHFuncInfo::addIPToStateRange(int State, MCSymbol* InvokeBegin,
+ MCSymbol* InvokeEnd) {
+ LabelToStateMap[InvokeBegin] = std::make_pair(State, InvokeEnd);
+}
+
+WinEHFuncInfo::WinEHFuncInfo() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
new file mode 100644
index 000000000000..d40725838c94
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -0,0 +1,269 @@
+//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a MachineFunctionPass that inserts the appropriate
+// XRay instrumentation instructions. We look for XRay-specific attributes
+// on the function to determine whether we should insert the replacement
+// operations.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+
+using namespace llvm;
+
+namespace {
+
+struct InstrumentationOptions {
+ // Whether to emit PATCHABLE_TAIL_CALL.
+ bool HandleTailcall;
+
+ // Whether to emit PATCHABLE_RET/PATCHABLE_FUNCTION_EXIT for all forms of
+ // return, e.g. conditional return.
+ bool HandleAllReturns;
+};
+
+struct XRayInstrumentation : public MachineFunctionPass {
+ static char ID;
+
+ XRayInstrumentation() : MachineFunctionPass(ID) {
+ initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ // Replace the original RET instruction with the exit sled code ("patchable
+ // ret" pseudo-instruction), so that at runtime XRay can replace the sled
+ // with a code jumping to XRay trampoline, which calls the tracing handler
+ // and, in the end, issues the RET instruction.
+ // This is the approach to go on CPUs which have a single RET instruction,
+ // like x86/x86_64.
+ void replaceRetWithPatchableRet(MachineFunction &MF,
+ const TargetInstrInfo *TII,
+ InstrumentationOptions);
+
+ // Prepend the original return instruction with the exit sled code ("patchable
+ // function exit" pseudo-instruction), preserving the original return
+ // instruction just after the exit sled code.
+ // This is the approach to go on CPUs which have multiple options for the
+ // return instruction, like ARM. For such CPUs we can't just jump into the
+ // XRay trampoline and issue a single return instruction there. We rather
+ // have to call the trampoline and return from it to the original return
+ // instruction of the function being instrumented.
+ void prependRetWithPatchableExit(MachineFunction &MF,
+ const TargetInstrInfo *TII,
+ InstrumentationOptions);
+};
+
+} // end anonymous namespace
+
+void XRayInstrumentation::replaceRetWithPatchableRet(
+ MachineFunction &MF, const TargetInstrInfo *TII,
+ InstrumentationOptions op) {
+ // We look for *all* terminators and returns, then replace those with
+ // PATCHABLE_RET instructions.
+ SmallVector<MachineInstr *, 4> Terminators;
+ for (auto &MBB : MF) {
+ for (auto &T : MBB.terminators()) {
+ unsigned Opc = 0;
+ if (T.isReturn() &&
+ (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) {
+ // Replace return instructions with:
+ // PATCHABLE_RET <Opcode>, <Operand>...
+ Opc = TargetOpcode::PATCHABLE_RET;
+ }
+ if (TII->isTailCall(T) && op.HandleTailcall) {
+ // Treat the tail call as a return instruction, which has a
+ // different-looking sled than the normal return case.
+ Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+ }
+ if (Opc != 0) {
+ auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
+ .addImm(T.getOpcode());
+ for (auto &MO : T.operands())
+ MIB.add(MO);
+ Terminators.push_back(&T);
+ if (T.shouldUpdateCallSiteInfo())
+ MF.eraseCallSiteInfo(&T);
+ }
+ }
+ }
+
+ for (auto &I : Terminators)
+ I->eraseFromParent();
+}
+
+void XRayInstrumentation::prependRetWithPatchableExit(
+ MachineFunction &MF, const TargetInstrInfo *TII,
+ InstrumentationOptions op) {
+ for (auto &MBB : MF)
+ for (auto &T : MBB.terminators()) {
+ unsigned Opc = 0;
+ if (T.isReturn() &&
+ (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) {
+ Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT;
+ }
+ if (TII->isTailCall(T) && op.HandleTailcall) {
+ Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+ }
+ if (Opc != 0) {
+ // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or
+ // PATCHABLE_TAIL_CALL .
+ BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc));
+ }
+ }
+}
+
+bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
+ auto &F = MF.getFunction();
+ auto InstrAttr = F.getFnAttribute("function-instrument");
+ bool AlwaysInstrument = InstrAttr.isStringAttribute() &&
+ InstrAttr.getValueAsString() == "xray-always";
+ bool NeverInstrument = InstrAttr.isStringAttribute() &&
+ InstrAttr.getValueAsString() == "xray-never";
+ if (NeverInstrument && !AlwaysInstrument)
+ return false;
+ auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops");
+
+ uint64_t XRayThreshold = 0;
+ if (!AlwaysInstrument) {
+ bool IgnoreLoops = IgnoreLoopsAttr.isValid();
+ XRayThreshold = F.getFnAttributeAsParsedInteger(
+ "xray-instruction-threshold", std::numeric_limits<uint64_t>::max());
+ if (XRayThreshold == std::numeric_limits<uint64_t>::max())
+ return false;
+
+ // Count the number of MachineInstr`s in MachineFunction
+ uint64_t MICount = 0;
+ for (const auto &MBB : MF)
+ MICount += MBB.size();
+
+ bool TooFewInstrs = MICount < XRayThreshold;
+
+ if (!IgnoreLoops) {
+ // Get MachineDominatorTree or compute it on the fly if it's unavailable
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineDominatorTree ComputedMDT;
+ if (!MDT) {
+ ComputedMDT.getBase().recalculate(MF);
+ MDT = &ComputedMDT;
+ }
+
+ // Get MachineLoopInfo or compute it on the fly if it's unavailable
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ MachineLoopInfo ComputedMLI;
+ if (!MLI) {
+ ComputedMLI.getBase().analyze(MDT->getBase());
+ MLI = &ComputedMLI;
+ }
+
+ // Check if we have a loop.
+ // FIXME: Maybe make this smarter, and see whether the loops are dependent
+ // on inputs or side-effects?
+ if (MLI->empty() && TooFewInstrs)
+ return false; // Function is too small and has no loops.
+ } else if (TooFewInstrs) {
+ // Function is too small
+ return false;
+ }
+ }
+
+ // We look for the first non-empty MachineBasicBlock, so that we can insert
+ // the function instrumentation in the appropriate place.
+ auto MBI = llvm::find_if(
+ MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
+ if (MBI == MF.end())
+ return false; // The function is empty.
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ auto &FirstMBB = *MBI;
+ auto &FirstMI = *FirstMBB.begin();
+
+ if (!MF.getSubtarget().isXRaySupported()) {
+ FirstMI.emitError("An attempt to perform XRay instrumentation for an"
+ " unsupported target.");
+ return false;
+ }
+
+ if (!F.hasFnAttribute("xray-skip-entry")) {
+ // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
+ // MachineFunction.
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+ }
+
+ if (!F.hasFnAttribute("xray-skip-exit")) {
+ switch (MF.getTarget().getTargetTriple().getArch()) {
+ case Triple::ArchType::arm:
+ case Triple::ArchType::thumb:
+ case Triple::ArchType::aarch64:
+ case Triple::ArchType::hexagon:
+ case Triple::ArchType::loongarch64:
+ case Triple::ArchType::mips:
+ case Triple::ArchType::mipsel:
+ case Triple::ArchType::mips64:
+ case Triple::ArchType::mips64el: {
+ // For the architectures which don't have a single return instruction
+ InstrumentationOptions op;
+ op.HandleTailcall = false;
+ op.HandleAllReturns = true;
+ prependRetWithPatchableExit(MF, TII, op);
+ break;
+ }
+ case Triple::ArchType::ppc64le: {
+ // PPC has conditional returns. Turn them into branch and plain returns.
+ InstrumentationOptions op;
+ op.HandleTailcall = false;
+ op.HandleAllReturns = true;
+ replaceRetWithPatchableRet(MF, TII, op);
+ break;
+ }
+ default: {
+ // For the architectures that have a single return instruction (such as
+ // RETQ on x86_64).
+ InstrumentationOptions op;
+ op.HandleTailcall = true;
+ op.HandleAllReturns = false;
+ replaceRetWithPatchableRet(MF, TII, op);
+ break;
+ }
+ }
+ }
+ return true;
+}
+
+char XRayInstrumentation::ID = 0;
+char &llvm::XRayInstrumentationID = XRayInstrumentation::ID;
+INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation",
+ "Insert XRay ops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation",
+ "Insert XRay ops", false, false)